diff --git "a/checkpoint-171448/trainer_state.json" "b/checkpoint-171448/trainer_state.json" --- "a/checkpoint-171448/trainer_state.json" +++ "b/checkpoint-171448/trainer_state.json" @@ -1,6 +1,6 @@ { - "best_metric": 0.03627169877290726, - "best_model_checkpoint": "wav2vec2-base-pem123-32-960h-la/checkpoint-64293", + "best_metric": 0.01730768382549286, + "best_model_checkpoint": "wav2vec2-base-pem123-32-960h-la/checkpoint-128586", "epoch": 8.0, "global_step": 171448, "is_hyper_param_search": false, @@ -9,205822 +9,205822 @@ "log_history": [ { "epoch": 0.0, - "learning_rate": 1.0000000000000001e-07, - "loss": 2.5681, + "learning_rate": 1.5000000000000002e-07, + "loss": 2.7078, "step": 5 }, { "epoch": 0.0, - "learning_rate": 2.0000000000000002e-07, - "loss": 3.2419, + "learning_rate": 3.0000000000000004e-07, + "loss": 3.2934, "step": 10 }, { "epoch": 0.0, - "learning_rate": 3.0000000000000004e-07, - "loss": 3.4492, + "learning_rate": 4.5e-07, + "loss": 3.5202, "step": 15 }, { "epoch": 0.0, - "learning_rate": 4.0000000000000003e-07, - "loss": 3.6105, + "learning_rate": 6.000000000000001e-07, + "loss": 3.0796, "step": 20 }, { "epoch": 0.0, - "learning_rate": 5.000000000000001e-07, - "loss": 3.2818, + "learning_rate": 7.5e-07, + "loss": 3.4226, "step": 25 }, { "epoch": 0.0, - "learning_rate": 6.000000000000001e-07, - "loss": 3.3872, + "learning_rate": 9e-07, + "loss": 3.3138, "step": 30 }, { "epoch": 0.0, - "learning_rate": 7.000000000000001e-07, - "loss": 3.4249, + "learning_rate": 1.0500000000000001e-06, + "loss": 3.4498, "step": 35 }, { "epoch": 0.0, - "learning_rate": 8.000000000000001e-07, - "loss": 3.4935, + "learning_rate": 1.2000000000000002e-06, + "loss": 3.3835, "step": 40 }, { "epoch": 0.0, - "learning_rate": 9.000000000000001e-07, - "loss": 3.5878, + "learning_rate": 1.35e-06, + "loss": 3.3571, "step": 45 }, { "epoch": 0.0, - "learning_rate": 1.0000000000000002e-06, - "loss": 3.3044, + "learning_rate": 1.5e-06, + "loss": 3.8856, "step": 50 }, { "epoch": 0.0, - "learning_rate": 1.1e-06, - "loss": 2.1617, + "learning_rate": 1.65e-06, + "loss": 2.0857, "step": 55 }, { "epoch": 0.0, - "learning_rate": 1.2000000000000002e-06, - "loss": 2.8407, + "learning_rate": 1.8e-06, + "loss": 2.6621, "step": 60 }, { "epoch": 0.0, - "learning_rate": 1.3e-06, - "loss": 2.6913, + "learning_rate": 1.95e-06, + "loss": 2.2829, "step": 65 }, { "epoch": 0.0, - "learning_rate": 1.4000000000000001e-06, - "loss": 3.1341, + "learning_rate": 2.1000000000000002e-06, + "loss": 2.2943, "step": 70 }, { "epoch": 0.0, - "learning_rate": 1.5e-06, - "loss": 2.6384, + "learning_rate": 2.25e-06, + "loss": 2.3434, "step": 75 }, { "epoch": 0.0, - "learning_rate": 1.6000000000000001e-06, - "loss": 2.3875, + "learning_rate": 2.4000000000000003e-06, + "loss": 2.4087, "step": 80 }, { "epoch": 0.0, - "learning_rate": 1.7000000000000002e-06, - "loss": 2.6405, + "learning_rate": 2.55e-06, + "loss": 2.1228, "step": 85 }, { "epoch": 0.0, - "learning_rate": 1.8000000000000001e-06, - "loss": 2.6826, + "learning_rate": 2.7e-06, + "loss": 2.2721, "step": 90 }, { "epoch": 0.0, - "learning_rate": 1.9000000000000002e-06, - "loss": 2.6435, + "learning_rate": 2.8500000000000002e-06, + "loss": 2.4505, "step": 95 }, { "epoch": 0.0, - "learning_rate": 2.0000000000000003e-06, - "loss": 2.9612, + "learning_rate": 3e-06, + "loss": 2.6655, "step": 100 }, { "epoch": 0.0, - "learning_rate": 2.1000000000000002e-06, - "loss": 1.6984, + "learning_rate": 3.15e-06, + "loss": 1.4593, "step": 105 }, { "epoch": 0.01, - "learning_rate": 2.2e-06, - "loss": 1.6943, + "learning_rate": 3.3e-06, + "loss": 1.4055, "step": 110 }, { "epoch": 0.01, - "learning_rate": 2.3000000000000004e-06, - "loss": 1.9857, + "learning_rate": 3.4500000000000004e-06, + "loss": 1.485, "step": 115 }, { "epoch": 0.01, - "learning_rate": 2.4000000000000003e-06, - "loss": 2.0135, + "learning_rate": 3.6e-06, + "loss": 1.5149, "step": 120 }, { "epoch": 0.01, - "learning_rate": 2.5e-06, - "loss": 1.9529, + "learning_rate": 3.75e-06, + "loss": 1.3764, "step": 125 }, { "epoch": 0.01, - "learning_rate": 2.6e-06, - "loss": 2.1156, + "learning_rate": 3.9e-06, + "loss": 1.6748, "step": 130 }, { "epoch": 0.01, - "learning_rate": 2.7000000000000004e-06, - "loss": 1.8468, + "learning_rate": 4.05e-06, + "loss": 1.5351, "step": 135 }, { "epoch": 0.01, - "learning_rate": 2.8000000000000003e-06, - "loss": 1.9046, + "learning_rate": 4.2000000000000004e-06, + "loss": 1.6763, "step": 140 }, { "epoch": 0.01, - "learning_rate": 2.9e-06, - "loss": 2.0311, + "learning_rate": 4.35e-06, + "loss": 1.7393, "step": 145 }, { "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 2.0449, + "learning_rate": 4.5e-06, + "loss": 1.6164, "step": 150 }, { "epoch": 0.01, - "learning_rate": 3.1000000000000004e-06, - "loss": 1.3589, + "learning_rate": 4.65e-06, + "loss": 1.3382, "step": 155 }, { "epoch": 0.01, - "learning_rate": 3.2000000000000003e-06, - "loss": 1.2662, + "learning_rate": 4.800000000000001e-06, + "loss": 1.028, "step": 160 }, { "epoch": 0.01, - "learning_rate": 3.3000000000000006e-06, - "loss": 1.1506, + "learning_rate": 4.95e-06, + "loss": 1.1416, "step": 165 }, { "epoch": 0.01, - "learning_rate": 3.4000000000000005e-06, - "loss": 1.3545, + "learning_rate": 5.1e-06, + "loss": 0.978, "step": 170 }, { "epoch": 0.01, - "learning_rate": 3.5e-06, - "loss": 1.4116, + "learning_rate": 5.25e-06, + "loss": 1.169, "step": 175 }, { "epoch": 0.01, - "learning_rate": 3.6000000000000003e-06, - "loss": 1.402, + "learning_rate": 5.4e-06, + "loss": 1.2869, "step": 180 }, { "epoch": 0.01, - "learning_rate": 3.7e-06, - "loss": 1.5328, + "learning_rate": 5.55e-06, + "loss": 1.5859, "step": 185 }, { "epoch": 0.01, - "learning_rate": 3.8000000000000005e-06, - "loss": 1.475, + "learning_rate": 5.7000000000000005e-06, + "loss": 1.2329, "step": 190 }, { "epoch": 0.01, - "learning_rate": 3.900000000000001e-06, - "loss": 1.6633, + "learning_rate": 5.850000000000001e-06, + "loss": 1.4542, "step": 195 }, { "epoch": 0.01, - "learning_rate": 4.000000000000001e-06, - "loss": 2.138, + "learning_rate": 6e-06, + "loss": 1.8286, "step": 200 }, { "epoch": 0.01, - "learning_rate": 4.1e-06, - "loss": 1.2597, + "learning_rate": 6.1499999999999996e-06, + "loss": 1.0484, "step": 205 }, { "epoch": 0.01, - "learning_rate": 4.2000000000000004e-06, - "loss": 1.0349, + "learning_rate": 6.3e-06, + "loss": 0.8858, "step": 210 }, { "epoch": 0.01, - "learning_rate": 4.3e-06, - "loss": 1.0808, + "learning_rate": 6.45e-06, + "loss": 0.9786, "step": 215 }, { "epoch": 0.01, - "learning_rate": 4.4e-06, - "loss": 1.044, + "learning_rate": 6.6e-06, + "loss": 1.153, "step": 220 }, { "epoch": 0.01, - "learning_rate": 4.5e-06, - "loss": 1.2246, + "learning_rate": 6.750000000000001e-06, + "loss": 1.0792, "step": 225 }, { "epoch": 0.01, - "learning_rate": 4.600000000000001e-06, - "loss": 1.112, + "learning_rate": 6.900000000000001e-06, + "loss": 1.2376, "step": 230 }, { "epoch": 0.01, - "learning_rate": 4.7e-06, - "loss": 1.3231, + "learning_rate": 7.049999999999999e-06, + "loss": 1.2282, "step": 235 }, { "epoch": 0.01, - "learning_rate": 4.800000000000001e-06, - "loss": 1.5087, + "learning_rate": 7.2e-06, + "loss": 1.211, "step": 240 }, { "epoch": 0.01, - "learning_rate": 4.9000000000000005e-06, - "loss": 1.5727, + "learning_rate": 7.35e-06, + "loss": 1.3426, "step": 245 }, { "epoch": 0.01, - "learning_rate": 5e-06, - "loss": 1.7501, + "learning_rate": 7.5e-06, + "loss": 1.6511, "step": 250 }, { "epoch": 0.01, - "learning_rate": 5.1e-06, - "loss": 1.1905, + "learning_rate": 7.65e-06, + "loss": 1.1288, "step": 255 }, { "epoch": 0.01, - "learning_rate": 5.2e-06, - "loss": 0.9608, + "learning_rate": 7.8e-06, + "loss": 0.8139, "step": 260 }, { "epoch": 0.01, - "learning_rate": 5.300000000000001e-06, - "loss": 1.1615, + "learning_rate": 7.95e-06, + "loss": 0.985, "step": 265 }, { "epoch": 0.01, - "learning_rate": 5.400000000000001e-06, - "loss": 1.0021, + "learning_rate": 8.1e-06, + "loss": 0.8362, "step": 270 }, { "epoch": 0.01, - "learning_rate": 5.500000000000001e-06, - "loss": 0.9685, + "learning_rate": 8.25e-06, + "loss": 0.9912, "step": 275 }, { "epoch": 0.01, - "learning_rate": 5.600000000000001e-06, - "loss": 1.2904, + "learning_rate": 8.400000000000001e-06, + "loss": 1.2811, "step": 280 }, { "epoch": 0.01, - "learning_rate": 5.7e-06, - "loss": 1.2705, + "learning_rate": 8.55e-06, + "loss": 1.1056, "step": 285 }, { "epoch": 0.01, - "learning_rate": 5.8e-06, - "loss": 1.4223, + "learning_rate": 8.7e-06, + "loss": 1.1996, "step": 290 }, { "epoch": 0.01, - "learning_rate": 5.9e-06, - "loss": 1.5338, + "learning_rate": 8.85e-06, + "loss": 1.4412, "step": 295 }, { "epoch": 0.01, - "learning_rate": 6e-06, - "loss": 1.7456, + "learning_rate": 9e-06, + "loss": 1.9595, "step": 300 }, { "epoch": 0.01, - "learning_rate": 6.1e-06, - "loss": 1.1443, + "learning_rate": 9.15e-06, + "loss": 0.9321, "step": 305 }, { "epoch": 0.01, - "learning_rate": 6.200000000000001e-06, - "loss": 0.8596, + "learning_rate": 9.3e-06, + "loss": 0.682, "step": 310 }, { "epoch": 0.01, - "learning_rate": 6.300000000000001e-06, - "loss": 0.9641, + "learning_rate": 9.450000000000001e-06, + "loss": 0.7705, "step": 315 }, { "epoch": 0.01, - "learning_rate": 6.4000000000000006e-06, - "loss": 0.9228, + "learning_rate": 9.600000000000001e-06, + "loss": 0.8194, "step": 320 }, { "epoch": 0.02, - "learning_rate": 6.5000000000000004e-06, - "loss": 1.0683, + "learning_rate": 9.75e-06, + "loss": 0.9736, "step": 325 }, { "epoch": 0.02, - "learning_rate": 6.600000000000001e-06, - "loss": 0.9611, + "learning_rate": 9.9e-06, + "loss": 1.0385, "step": 330 }, { "epoch": 0.02, - "learning_rate": 6.700000000000001e-06, - "loss": 1.2087, + "learning_rate": 1.005e-05, + "loss": 1.0263, "step": 335 }, { "epoch": 0.02, - "learning_rate": 6.800000000000001e-06, - "loss": 1.1336, + "learning_rate": 1.02e-05, + "loss": 1.1658, "step": 340 }, { "epoch": 0.02, - "learning_rate": 6.9e-06, - "loss": 1.422, + "learning_rate": 1.035e-05, + "loss": 1.4046, "step": 345 }, { "epoch": 0.02, - "learning_rate": 7e-06, - "loss": 1.8331, + "learning_rate": 1.05e-05, + "loss": 1.7697, "step": 350 }, { "epoch": 0.02, - "learning_rate": 7.100000000000001e-06, - "loss": 1.0522, + "learning_rate": 1.065e-05, + "loss": 0.8677, "step": 355 }, { "epoch": 0.02, - "learning_rate": 7.2000000000000005e-06, - "loss": 0.8421, + "learning_rate": 1.08e-05, + "loss": 0.7698, "step": 360 }, { "epoch": 0.02, - "learning_rate": 7.3e-06, - "loss": 0.7651, + "learning_rate": 1.095e-05, + "loss": 0.7114, "step": 365 }, { "epoch": 0.02, - "learning_rate": 7.4e-06, - "loss": 0.9904, + "learning_rate": 1.11e-05, + "loss": 0.8821, "step": 370 }, { "epoch": 0.02, - "learning_rate": 7.500000000000001e-06, - "loss": 0.8832, + "learning_rate": 1.125e-05, + "loss": 0.8893, "step": 375 }, { "epoch": 0.02, - "learning_rate": 7.600000000000001e-06, - "loss": 0.9663, + "learning_rate": 1.1400000000000001e-05, + "loss": 0.8909, "step": 380 }, { "epoch": 0.02, - "learning_rate": 7.7e-06, - "loss": 1.115, + "learning_rate": 1.1550000000000001e-05, + "loss": 1.0615, "step": 385 }, { "epoch": 0.02, - "learning_rate": 7.800000000000002e-06, - "loss": 1.1533, + "learning_rate": 1.1700000000000001e-05, + "loss": 1.026, "step": 390 }, { "epoch": 0.02, - "learning_rate": 7.9e-06, - "loss": 1.2886, + "learning_rate": 1.185e-05, + "loss": 1.1341, "step": 395 }, { "epoch": 0.02, - "learning_rate": 8.000000000000001e-06, - "loss": 2.0128, + "learning_rate": 1.2e-05, + "loss": 1.7984, "step": 400 }, { "epoch": 0.02, - "learning_rate": 8.1e-06, - "loss": 0.9795, + "learning_rate": 1.215e-05, + "loss": 0.946, "step": 405 }, { "epoch": 0.02, - "learning_rate": 8.2e-06, - "loss": 0.7312, + "learning_rate": 1.2299999999999999e-05, + "loss": 0.5768, "step": 410 }, { "epoch": 0.02, - "learning_rate": 8.3e-06, - "loss": 0.7858, + "learning_rate": 1.245e-05, + "loss": 0.6476, "step": 415 }, { "epoch": 0.02, - "learning_rate": 8.400000000000001e-06, - "loss": 0.8727, + "learning_rate": 1.26e-05, + "loss": 0.7286, "step": 420 }, { "epoch": 0.02, - "learning_rate": 8.5e-06, - "loss": 0.8088, + "learning_rate": 1.275e-05, + "loss": 0.7953, "step": 425 }, { "epoch": 0.02, - "learning_rate": 8.6e-06, - "loss": 0.9188, + "learning_rate": 1.29e-05, + "loss": 1.0026, "step": 430 }, { "epoch": 0.02, - "learning_rate": 8.700000000000001e-06, - "loss": 1.1715, + "learning_rate": 1.305e-05, + "loss": 0.8454, "step": 435 }, { "epoch": 0.02, - "learning_rate": 8.8e-06, - "loss": 1.3011, + "learning_rate": 1.32e-05, + "loss": 1.0045, "step": 440 }, { "epoch": 0.02, - "learning_rate": 8.900000000000001e-06, - "loss": 1.3686, + "learning_rate": 1.3350000000000001e-05, + "loss": 1.0515, "step": 445 }, { "epoch": 0.02, - "learning_rate": 9e-06, - "loss": 1.5265, + "learning_rate": 1.3500000000000001e-05, + "loss": 1.4769, "step": 450 }, { "epoch": 0.02, - "learning_rate": 9.100000000000001e-06, - "loss": 0.8523, + "learning_rate": 1.3650000000000001e-05, + "loss": 0.8398, "step": 455 }, { "epoch": 0.02, - "learning_rate": 9.200000000000002e-06, - "loss": 0.6656, + "learning_rate": 1.3800000000000002e-05, + "loss": 0.6949, "step": 460 }, { "epoch": 0.02, - "learning_rate": 9.3e-06, - "loss": 0.8027, + "learning_rate": 1.395e-05, + "loss": 0.6305, "step": 465 }, { "epoch": 0.02, - "learning_rate": 9.4e-06, - "loss": 0.6912, + "learning_rate": 1.4099999999999999e-05, + "loss": 0.6594, "step": 470 }, { "epoch": 0.02, - "learning_rate": 9.5e-06, - "loss": 0.8886, + "learning_rate": 1.4249999999999999e-05, + "loss": 0.7755, "step": 475 }, { "epoch": 0.02, - "learning_rate": 9.600000000000001e-06, - "loss": 0.8875, + "learning_rate": 1.44e-05, + "loss": 0.7899, "step": 480 }, { "epoch": 0.02, - "learning_rate": 9.7e-06, - "loss": 0.9732, + "learning_rate": 1.455e-05, + "loss": 0.9505, "step": 485 }, { "epoch": 0.02, - "learning_rate": 9.800000000000001e-06, - "loss": 1.051, + "learning_rate": 1.47e-05, + "loss": 0.9007, "step": 490 }, { "epoch": 0.02, - "learning_rate": 9.9e-06, - "loss": 1.1972, + "learning_rate": 1.485e-05, + "loss": 0.9164, "step": 495 }, { "epoch": 0.02, - "learning_rate": 1e-05, - "loss": 1.5824, + "learning_rate": 1.5e-05, + "loss": 1.4495, "step": 500 }, { "epoch": 0.02, - "learning_rate": 1.0100000000000002e-05, - "loss": 0.9922, + "learning_rate": 1.515e-05, + "loss": 0.8424, "step": 505 }, { "epoch": 0.02, - "learning_rate": 1.02e-05, - "loss": 0.6785, + "learning_rate": 1.53e-05, + "loss": 0.518, "step": 510 }, { "epoch": 0.02, - "learning_rate": 1.0300000000000001e-05, - "loss": 0.6366, + "learning_rate": 1.545e-05, + "loss": 0.5492, "step": 515 }, { "epoch": 0.02, - "learning_rate": 1.04e-05, - "loss": 0.7643, + "learning_rate": 1.56e-05, + "loss": 0.7048, "step": 520 }, { "epoch": 0.02, - "learning_rate": 1.0500000000000001e-05, - "loss": 0.8867, + "learning_rate": 1.575e-05, + "loss": 0.7184, "step": 525 }, { "epoch": 0.02, - "learning_rate": 1.0600000000000002e-05, - "loss": 0.7378, + "learning_rate": 1.59e-05, + "loss": 0.8037, "step": 530 }, { "epoch": 0.02, - "learning_rate": 1.0700000000000001e-05, - "loss": 0.8769, + "learning_rate": 1.605e-05, + "loss": 0.8041, "step": 535 }, { "epoch": 0.03, - "learning_rate": 1.0800000000000002e-05, - "loss": 1.0011, + "learning_rate": 1.62e-05, + "loss": 0.9346, "step": 540 }, { "epoch": 0.03, - "learning_rate": 1.0900000000000002e-05, - "loss": 1.1308, + "learning_rate": 1.635e-05, + "loss": 1.0695, "step": 545 }, { "epoch": 0.03, - "learning_rate": 1.1000000000000001e-05, - "loss": 1.6817, + "learning_rate": 1.65e-05, + "loss": 1.0438, "step": 550 }, { "epoch": 0.03, - "learning_rate": 1.1100000000000002e-05, - "loss": 0.8498, + "learning_rate": 1.665e-05, + "loss": 0.7821, "step": 555 }, { "epoch": 0.03, - "learning_rate": 1.1200000000000001e-05, - "loss": 0.6052, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6464, "step": 560 }, { "epoch": 0.03, - "learning_rate": 1.13e-05, - "loss": 0.6782, + "learning_rate": 1.695e-05, + "loss": 0.5224, "step": 565 }, { "epoch": 0.03, - "learning_rate": 1.14e-05, - "loss": 0.6536, + "learning_rate": 1.71e-05, + "loss": 0.5888, "step": 570 }, { "epoch": 0.03, - "learning_rate": 1.15e-05, - "loss": 0.7006, + "learning_rate": 1.725e-05, + "loss": 0.6815, "step": 575 }, { "epoch": 0.03, - "learning_rate": 1.16e-05, - "loss": 0.9329, + "learning_rate": 1.74e-05, + "loss": 0.7166, "step": 580 }, { "epoch": 0.03, - "learning_rate": 1.17e-05, - "loss": 0.7805, + "learning_rate": 1.755e-05, + "loss": 0.7879, "step": 585 }, { "epoch": 0.03, - "learning_rate": 1.18e-05, - "loss": 0.872, + "learning_rate": 1.77e-05, + "loss": 0.9349, "step": 590 }, { "epoch": 0.03, - "learning_rate": 1.1900000000000001e-05, - "loss": 0.8867, + "learning_rate": 1.785e-05, + "loss": 1.1031, "step": 595 }, { "epoch": 0.03, - "learning_rate": 1.2e-05, - "loss": 1.3013, + "learning_rate": 1.8e-05, + "loss": 1.5274, "step": 600 }, { "epoch": 0.03, - "learning_rate": 1.2100000000000001e-05, - "loss": 0.9019, + "learning_rate": 1.815e-05, + "loss": 0.8439, "step": 605 }, { "epoch": 0.03, - "learning_rate": 1.22e-05, - "loss": 0.5602, + "learning_rate": 1.83e-05, + "loss": 0.51, "step": 610 }, { "epoch": 0.03, - "learning_rate": 1.23e-05, - "loss": 0.632, + "learning_rate": 1.845e-05, + "loss": 0.6592, "step": 615 }, { "epoch": 0.03, - "learning_rate": 1.2400000000000002e-05, - "loss": 0.6756, + "learning_rate": 1.86e-05, + "loss": 0.5804, "step": 620 }, { "epoch": 0.03, - "learning_rate": 1.25e-05, - "loss": 0.7106, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.648, "step": 625 }, { "epoch": 0.03, - "learning_rate": 1.2600000000000001e-05, - "loss": 0.7885, + "learning_rate": 1.8900000000000002e-05, + "loss": 0.7396, "step": 630 }, { "epoch": 0.03, - "learning_rate": 1.27e-05, - "loss": 0.8643, + "learning_rate": 1.9050000000000002e-05, + "loss": 0.9051, "step": 635 }, { "epoch": 0.03, - "learning_rate": 1.2800000000000001e-05, - "loss": 0.953, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.9073, "step": 640 }, { "epoch": 0.03, - "learning_rate": 1.2900000000000002e-05, - "loss": 1.2641, + "learning_rate": 1.935e-05, + "loss": 1.0043, "step": 645 }, { "epoch": 0.03, - "learning_rate": 1.3000000000000001e-05, - "loss": 1.4198, + "learning_rate": 1.95e-05, + "loss": 1.1916, "step": 650 }, { "epoch": 0.03, - "learning_rate": 1.3100000000000002e-05, - "loss": 0.82, + "learning_rate": 1.965e-05, + "loss": 0.6345, "step": 655 }, { "epoch": 0.03, - "learning_rate": 1.3200000000000002e-05, - "loss": 0.6065, + "learning_rate": 1.98e-05, + "loss": 0.5134, "step": 660 }, { "epoch": 0.03, - "learning_rate": 1.3300000000000001e-05, - "loss": 0.5797, + "learning_rate": 1.995e-05, + "loss": 0.4761, "step": 665 }, { "epoch": 0.03, - "learning_rate": 1.3400000000000002e-05, - "loss": 0.7074, + "learning_rate": 2.01e-05, + "loss": 0.6114, "step": 670 }, { "epoch": 0.03, - "learning_rate": 1.3500000000000001e-05, - "loss": 0.5688, + "learning_rate": 2.025e-05, + "loss": 0.5128, "step": 675 }, { "epoch": 0.03, - "learning_rate": 1.3600000000000002e-05, - "loss": 0.6267, + "learning_rate": 2.04e-05, + "loss": 0.7642, "step": 680 }, { "epoch": 0.03, - "learning_rate": 1.3700000000000003e-05, - "loss": 0.9009, + "learning_rate": 2.055e-05, + "loss": 0.7798, "step": 685 }, { "epoch": 0.03, - "learning_rate": 1.38e-05, - "loss": 0.86, + "learning_rate": 2.07e-05, + "loss": 0.8385, "step": 690 }, { "epoch": 0.03, - "learning_rate": 1.39e-05, - "loss": 0.9638, + "learning_rate": 2.085e-05, + "loss": 0.8393, "step": 695 }, { "epoch": 0.03, - "learning_rate": 1.4e-05, - "loss": 1.6872, + "learning_rate": 2.1e-05, + "loss": 1.2992, "step": 700 }, { "epoch": 0.03, - "learning_rate": 1.41e-05, - "loss": 0.7373, + "learning_rate": 2.115e-05, + "loss": 0.7748, "step": 705 }, { "epoch": 0.03, - "learning_rate": 1.4200000000000001e-05, - "loss": 0.5019, + "learning_rate": 2.13e-05, + "loss": 0.4805, "step": 710 }, { "epoch": 0.03, - "learning_rate": 1.43e-05, - "loss": 0.6072, + "learning_rate": 2.145e-05, + "loss": 0.4676, "step": 715 }, { "epoch": 0.03, - "learning_rate": 1.4400000000000001e-05, - "loss": 0.6512, + "learning_rate": 2.16e-05, + "loss": 0.4895, "step": 720 }, { "epoch": 0.03, - "learning_rate": 1.45e-05, - "loss": 0.6652, + "learning_rate": 2.175e-05, + "loss": 0.6178, "step": 725 }, { "epoch": 0.03, - "learning_rate": 1.46e-05, - "loss": 0.7577, + "learning_rate": 2.19e-05, + "loss": 0.6627, "step": 730 }, { "epoch": 0.03, - "learning_rate": 1.4700000000000002e-05, - "loss": 1.0377, + "learning_rate": 2.205e-05, + "loss": 0.7188, "step": 735 }, { "epoch": 0.03, - "learning_rate": 1.48e-05, - "loss": 0.8039, + "learning_rate": 2.22e-05, + "loss": 0.7827, "step": 740 }, { "epoch": 0.03, - "learning_rate": 1.4900000000000001e-05, - "loss": 0.9909, + "learning_rate": 2.235e-05, + "loss": 0.9153, "step": 745 }, { "epoch": 0.03, - "learning_rate": 1.5000000000000002e-05, - "loss": 1.2967, + "learning_rate": 2.25e-05, + "loss": 1.4324, "step": 750 }, { "epoch": 0.04, - "learning_rate": 1.5100000000000001e-05, - "loss": 0.7075, + "learning_rate": 2.265e-05, + "loss": 0.5785, "step": 755 }, { "epoch": 0.04, - "learning_rate": 1.5200000000000002e-05, - "loss": 0.535, + "learning_rate": 2.2800000000000002e-05, + "loss": 0.4344, "step": 760 }, { "epoch": 0.04, - "learning_rate": 1.5300000000000003e-05, - "loss": 0.5943, + "learning_rate": 2.2950000000000002e-05, + "loss": 0.4868, "step": 765 }, { "epoch": 0.04, - "learning_rate": 1.54e-05, - "loss": 0.7975, + "learning_rate": 2.3100000000000002e-05, + "loss": 0.4944, "step": 770 }, { "epoch": 0.04, - "learning_rate": 1.55e-05, - "loss": 0.606, + "learning_rate": 2.3250000000000003e-05, + "loss": 0.5598, "step": 775 }, { "epoch": 0.04, - "learning_rate": 1.5600000000000003e-05, - "loss": 0.748, + "learning_rate": 2.3400000000000003e-05, + "loss": 0.5761, "step": 780 }, { "epoch": 0.04, - "learning_rate": 1.5700000000000002e-05, - "loss": 0.714, + "learning_rate": 2.3550000000000003e-05, + "loss": 0.6754, "step": 785 }, { "epoch": 0.04, - "learning_rate": 1.58e-05, - "loss": 0.9922, + "learning_rate": 2.37e-05, + "loss": 0.9604, "step": 790 }, { "epoch": 0.04, - "learning_rate": 1.5900000000000004e-05, - "loss": 0.9972, + "learning_rate": 2.385e-05, + "loss": 1.0913, "step": 795 }, { "epoch": 0.04, - "learning_rate": 1.6000000000000003e-05, - "loss": 1.2826, + "learning_rate": 2.4e-05, + "loss": 1.3051, "step": 800 }, { "epoch": 0.04, - "learning_rate": 1.6100000000000002e-05, - "loss": 0.5911, + "learning_rate": 2.415e-05, + "loss": 0.6032, "step": 805 }, { "epoch": 0.04, - "learning_rate": 1.62e-05, - "loss": 0.5172, + "learning_rate": 2.43e-05, + "loss": 0.403, "step": 810 }, { "epoch": 0.04, - "learning_rate": 1.63e-05, - "loss": 0.455, + "learning_rate": 2.4449999999999998e-05, + "loss": 0.4457, "step": 815 }, { "epoch": 0.04, - "learning_rate": 1.64e-05, - "loss": 0.6032, + "learning_rate": 2.4599999999999998e-05, + "loss": 0.4929, "step": 820 }, { "epoch": 0.04, - "learning_rate": 1.65e-05, - "loss": 0.6292, + "learning_rate": 2.475e-05, + "loss": 0.6268, "step": 825 }, { "epoch": 0.04, - "learning_rate": 1.66e-05, - "loss": 0.658, + "learning_rate": 2.49e-05, + "loss": 0.5645, "step": 830 }, { "epoch": 0.04, - "learning_rate": 1.67e-05, - "loss": 0.7158, + "learning_rate": 2.505e-05, + "loss": 0.6993, "step": 835 }, { "epoch": 0.04, - "learning_rate": 1.6800000000000002e-05, - "loss": 0.8783, + "learning_rate": 2.52e-05, + "loss": 0.7833, "step": 840 }, { "epoch": 0.04, - "learning_rate": 1.69e-05, - "loss": 0.9988, + "learning_rate": 2.535e-05, + "loss": 0.816, "step": 845 }, { "epoch": 0.04, - "learning_rate": 1.7e-05, - "loss": 1.1596, + "learning_rate": 2.55e-05, + "loss": 1.0046, "step": 850 }, { "epoch": 0.04, - "learning_rate": 1.7100000000000002e-05, - "loss": 0.7145, + "learning_rate": 2.565e-05, + "loss": 0.7003, "step": 855 }, { "epoch": 0.04, - "learning_rate": 1.72e-05, - "loss": 0.4501, + "learning_rate": 2.58e-05, + "loss": 0.3647, "step": 860 }, { "epoch": 0.04, - "learning_rate": 1.73e-05, - "loss": 0.5174, + "learning_rate": 2.595e-05, + "loss": 0.441, "step": 865 }, { "epoch": 0.04, - "learning_rate": 1.7400000000000003e-05, - "loss": 0.5951, + "learning_rate": 2.61e-05, + "loss": 0.5558, "step": 870 }, { "epoch": 0.04, - "learning_rate": 1.7500000000000002e-05, - "loss": 0.7152, + "learning_rate": 2.625e-05, + "loss": 0.526, "step": 875 }, { "epoch": 0.04, - "learning_rate": 1.76e-05, - "loss": 0.6649, + "learning_rate": 2.64e-05, + "loss": 0.7098, "step": 880 }, { "epoch": 0.04, - "learning_rate": 1.77e-05, - "loss": 0.6626, + "learning_rate": 2.655e-05, + "loss": 0.5895, "step": 885 }, { "epoch": 0.04, - "learning_rate": 1.7800000000000002e-05, - "loss": 0.7571, + "learning_rate": 2.6700000000000002e-05, + "loss": 0.7329, "step": 890 }, { "epoch": 0.04, - "learning_rate": 1.79e-05, - "loss": 1.1449, + "learning_rate": 2.6850000000000002e-05, + "loss": 1.0144, "step": 895 }, { "epoch": 0.04, - "learning_rate": 1.8e-05, - "loss": 1.3764, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.1521, "step": 900 }, { "epoch": 0.04, - "learning_rate": 1.8100000000000003e-05, - "loss": 0.7015, + "learning_rate": 2.7150000000000003e-05, + "loss": 0.6231, "step": 905 }, { "epoch": 0.04, - "learning_rate": 1.8200000000000002e-05, - "loss": 0.4139, + "learning_rate": 2.7300000000000003e-05, + "loss": 0.361, "step": 910 }, { "epoch": 0.04, - "learning_rate": 1.83e-05, - "loss": 0.5709, + "learning_rate": 2.7450000000000003e-05, + "loss": 0.5285, "step": 915 }, { "epoch": 0.04, - "learning_rate": 1.8400000000000003e-05, - "loss": 0.4689, + "learning_rate": 2.7600000000000003e-05, + "loss": 0.563, "step": 920 }, { "epoch": 0.04, - "learning_rate": 1.8500000000000002e-05, - "loss": 0.5459, + "learning_rate": 2.7750000000000004e-05, + "loss": 0.5804, "step": 925 }, { "epoch": 0.04, - "learning_rate": 1.86e-05, - "loss": 0.6105, + "learning_rate": 2.79e-05, + "loss": 0.5276, "step": 930 }, { "epoch": 0.04, - "learning_rate": 1.8700000000000004e-05, - "loss": 0.7417, + "learning_rate": 2.805e-05, + "loss": 0.5842, "step": 935 }, { "epoch": 0.04, - "learning_rate": 1.88e-05, - "loss": 0.7044, + "learning_rate": 2.8199999999999998e-05, + "loss": 0.7148, "step": 940 }, { "epoch": 0.04, - "learning_rate": 1.8900000000000002e-05, - "loss": 0.8603, + "learning_rate": 2.8349999999999998e-05, + "loss": 0.9909, "step": 945 }, { "epoch": 0.04, - "learning_rate": 1.9e-05, - "loss": 1.2372, + "learning_rate": 2.8499999999999998e-05, + "loss": 1.0674, "step": 950 }, { "epoch": 0.04, - "learning_rate": 1.91e-05, - "loss": 0.6774, + "learning_rate": 2.865e-05, + "loss": 0.6094, "step": 955 }, { "epoch": 0.04, - "learning_rate": 1.9200000000000003e-05, - "loss": 0.3458, + "learning_rate": 2.88e-05, + "loss": 0.3548, "step": 960 }, { "epoch": 0.05, - "learning_rate": 1.93e-05, - "loss": 0.4924, + "learning_rate": 2.895e-05, + "loss": 0.4452, "step": 965 }, { "epoch": 0.05, - "learning_rate": 1.94e-05, - "loss": 0.4699, + "learning_rate": 2.91e-05, + "loss": 0.5272, "step": 970 }, { "epoch": 0.05, - "learning_rate": 1.95e-05, - "loss": 0.5591, + "learning_rate": 2.925e-05, + "loss": 0.4915, "step": 975 }, { "epoch": 0.05, - "learning_rate": 1.9600000000000002e-05, - "loss": 0.7706, + "learning_rate": 2.94e-05, + "loss": 0.5938, "step": 980 }, { "epoch": 0.05, - "learning_rate": 1.97e-05, - "loss": 0.6164, + "learning_rate": 2.955e-05, + "loss": 0.6842, "step": 985 }, { "epoch": 0.05, - "learning_rate": 1.98e-05, - "loss": 0.6035, + "learning_rate": 2.97e-05, + "loss": 0.7153, "step": 990 }, { "epoch": 0.05, - "learning_rate": 1.9900000000000003e-05, - "loss": 0.8199, + "learning_rate": 2.985e-05, + "loss": 0.8872, "step": 995 }, { "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 1.3823, + "learning_rate": 3e-05, + "loss": 1.0897, "step": 1000 }, { "epoch": 0.05, - "learning_rate": 1.999953119872486e-05, - "loss": 0.6102, + "learning_rate": 2.999953193016398e-05, + "loss": 0.6239, "step": 1005 }, { "epoch": 0.05, - "learning_rate": 1.999906239744972e-05, - "loss": 0.3826, + "learning_rate": 2.9999063860327964e-05, + "loss": 0.3869, "step": 1010 }, { "epoch": 0.05, - "learning_rate": 1.999859359617458e-05, - "loss": 0.4675, + "learning_rate": 2.9998595790491944e-05, + "loss": 0.4034, "step": 1015 }, { "epoch": 0.05, - "learning_rate": 1.999812479489944e-05, - "loss": 0.4567, + "learning_rate": 2.9998127720655923e-05, + "loss": 0.5883, "step": 1020 }, { "epoch": 0.05, - "learning_rate": 1.9997655993624305e-05, - "loss": 0.6292, + "learning_rate": 2.9997659650819903e-05, + "loss": 0.5247, "step": 1025 }, { "epoch": 0.05, - "learning_rate": 1.9997187192349165e-05, - "loss": 0.6357, + "learning_rate": 2.9997191580983886e-05, + "loss": 0.5544, "step": 1030 }, { "epoch": 0.05, - "learning_rate": 1.9996718391074025e-05, + "learning_rate": 2.9996723511147863e-05, "loss": 0.666, "step": 1035 }, { "epoch": 0.05, - "learning_rate": 1.9996249589798888e-05, - "loss": 0.6685, + "learning_rate": 2.9996255441311843e-05, + "loss": 0.6601, "step": 1040 }, { "epoch": 0.05, - "learning_rate": 1.9995780788523748e-05, - "loss": 0.8939, + "learning_rate": 2.9995787371475823e-05, + "loss": 0.7471, "step": 1045 }, { "epoch": 0.05, - "learning_rate": 1.9995311987248608e-05, - "loss": 1.0384, + "learning_rate": 2.9995319301639806e-05, + "loss": 1.0622, "step": 1050 }, { "epoch": 0.05, - "learning_rate": 1.9994843185973468e-05, - "loss": 0.4313, + "learning_rate": 2.9994851231803786e-05, + "loss": 0.5234, "step": 1055 }, { "epoch": 0.05, - "learning_rate": 1.9994374384698328e-05, - "loss": 0.3909, + "learning_rate": 2.9994383161967765e-05, + "loss": 0.3457, "step": 1060 }, { "epoch": 0.05, - "learning_rate": 1.9993905583423188e-05, - "loss": 0.4668, + "learning_rate": 2.999391509213175e-05, + "loss": 0.4613, "step": 1065 }, { "epoch": 0.05, - "learning_rate": 1.999343678214805e-05, - "loss": 0.4648, + "learning_rate": 2.999344702229573e-05, + "loss": 0.5267, "step": 1070 }, { "epoch": 0.05, - "learning_rate": 1.999296798087291e-05, - "loss": 0.5502, + "learning_rate": 2.9992978952459708e-05, + "loss": 0.4031, "step": 1075 }, { "epoch": 0.05, - "learning_rate": 1.999249917959777e-05, - "loss": 0.5675, + "learning_rate": 2.9992510882623688e-05, + "loss": 0.657, "step": 1080 }, { "epoch": 0.05, - "learning_rate": 1.999203037832263e-05, - "loss": 0.7048, + "learning_rate": 2.999204281278767e-05, + "loss": 0.5817, "step": 1085 }, { "epoch": 0.05, - "learning_rate": 1.999156157704749e-05, - "loss": 0.7512, + "learning_rate": 2.999157474295165e-05, + "loss": 0.5876, "step": 1090 }, { "epoch": 0.05, - "learning_rate": 1.999109277577235e-05, - "loss": 0.91, + "learning_rate": 2.9991106673115627e-05, + "loss": 0.8619, "step": 1095 }, { "epoch": 0.05, - "learning_rate": 1.999062397449721e-05, - "loss": 1.4923, + "learning_rate": 2.9990638603279607e-05, + "loss": 0.9578, "step": 1100 }, { "epoch": 0.05, - "learning_rate": 1.9990155173222074e-05, - "loss": 0.593, + "learning_rate": 2.999017053344359e-05, + "loss": 0.6637, "step": 1105 }, { "epoch": 0.05, - "learning_rate": 1.9989686371946934e-05, - "loss": 0.412, + "learning_rate": 2.998970246360757e-05, + "loss": 0.4149, "step": 1110 }, { "epoch": 0.05, - "learning_rate": 1.9989217570671794e-05, - "loss": 0.4295, + "learning_rate": 2.998923439377155e-05, + "loss": 0.2971, "step": 1115 }, { "epoch": 0.05, - "learning_rate": 1.9988748769396657e-05, - "loss": 0.5324, + "learning_rate": 2.998876632393553e-05, + "loss": 0.3353, "step": 1120 }, { "epoch": 0.05, - "learning_rate": 1.9988279968121517e-05, - "loss": 0.5146, + "learning_rate": 2.9988298254099513e-05, + "loss": 0.4795, "step": 1125 }, { "epoch": 0.05, - "learning_rate": 1.9987811166846377e-05, - "loss": 0.6231, + "learning_rate": 2.9987830184263493e-05, + "loss": 0.5698, "step": 1130 }, { "epoch": 0.05, - "learning_rate": 1.9987342365571237e-05, - "loss": 0.6736, + "learning_rate": 2.9987362114427473e-05, + "loss": 0.5586, "step": 1135 }, { "epoch": 0.05, - "learning_rate": 1.9986873564296097e-05, - "loss": 0.7737, + "learning_rate": 2.9986894044591456e-05, + "loss": 0.7123, "step": 1140 }, { "epoch": 0.05, - "learning_rate": 1.9986404763020957e-05, - "loss": 0.796, + "learning_rate": 2.9986425974755436e-05, + "loss": 0.7498, "step": 1145 }, { "epoch": 0.05, - "learning_rate": 1.9985935961745817e-05, - "loss": 1.3116, + "learning_rate": 2.9985957904919416e-05, + "loss": 1.1885, "step": 1150 }, { "epoch": 0.05, - "learning_rate": 1.9985467160470677e-05, - "loss": 0.5599, + "learning_rate": 2.9985489835083395e-05, + "loss": 0.5377, "step": 1155 }, { "epoch": 0.05, - "learning_rate": 1.9984998359195536e-05, - "loss": 0.3027, + "learning_rate": 2.9985021765247375e-05, + "loss": 0.3324, "step": 1160 }, { "epoch": 0.05, - "learning_rate": 1.99845295579204e-05, - "loss": 0.4646, + "learning_rate": 2.9984553695411355e-05, + "loss": 0.4179, "step": 1165 }, { "epoch": 0.05, - "learning_rate": 1.998406075664526e-05, - "loss": 0.5406, + "learning_rate": 2.9984085625575335e-05, + "loss": 0.4199, "step": 1170 }, { "epoch": 0.05, - "learning_rate": 1.998359195537012e-05, - "loss": 0.5461, + "learning_rate": 2.9983617555739315e-05, + "loss": 0.5361, "step": 1175 }, { "epoch": 0.06, - "learning_rate": 1.998312315409498e-05, - "loss": 0.6237, + "learning_rate": 2.9983149485903298e-05, + "loss": 0.5003, "step": 1180 }, { "epoch": 0.06, - "learning_rate": 1.9982654352819843e-05, - "loss": 0.6277, + "learning_rate": 2.9982681416067278e-05, + "loss": 0.7324, "step": 1185 }, { "epoch": 0.06, - "learning_rate": 1.9982185551544703e-05, - "loss": 0.6304, + "learning_rate": 2.9982213346231258e-05, + "loss": 0.7964, "step": 1190 }, { "epoch": 0.06, - "learning_rate": 1.9981716750269563e-05, - "loss": 0.8886, + "learning_rate": 2.998174527639524e-05, + "loss": 0.7179, "step": 1195 }, { "epoch": 0.06, - "learning_rate": 1.9981247948994423e-05, - "loss": 1.1062, + "learning_rate": 2.998127720655922e-05, + "loss": 0.9888, "step": 1200 }, { "epoch": 0.06, - "learning_rate": 1.9980779147719283e-05, - "loss": 0.597, + "learning_rate": 2.99808091367232e-05, + "loss": 0.545, "step": 1205 }, { "epoch": 0.06, - "learning_rate": 1.9980310346444146e-05, - "loss": 0.3313, + "learning_rate": 2.998034106688718e-05, + "loss": 0.265, "step": 1210 }, { "epoch": 0.06, - "learning_rate": 1.9979841545169006e-05, - "loss": 0.3761, + "learning_rate": 2.9979872997051163e-05, + "loss": 0.4571, "step": 1215 }, { "epoch": 0.06, - "learning_rate": 1.9979372743893866e-05, - "loss": 0.4524, + "learning_rate": 2.9979404927215143e-05, + "loss": 0.4603, "step": 1220 }, { "epoch": 0.06, - "learning_rate": 1.9978903942618726e-05, - "loss": 0.5068, + "learning_rate": 2.997893685737912e-05, + "loss": 0.4434, "step": 1225 }, { "epoch": 0.06, - "learning_rate": 1.9978435141343586e-05, - "loss": 0.5899, + "learning_rate": 2.99784687875431e-05, + "loss": 0.5181, "step": 1230 }, { "epoch": 0.06, - "learning_rate": 1.9977966340068446e-05, - "loss": 0.5973, + "learning_rate": 2.9978000717707083e-05, + "loss": 0.6107, "step": 1235 }, { "epoch": 0.06, - "learning_rate": 1.9977497538793306e-05, - "loss": 0.7986, + "learning_rate": 2.9977532647871063e-05, + "loss": 0.6827, "step": 1240 }, { "epoch": 0.06, - "learning_rate": 1.9977028737518165e-05, - "loss": 0.749, + "learning_rate": 2.9977064578035042e-05, + "loss": 0.8626, "step": 1245 }, { "epoch": 0.06, - "learning_rate": 1.997655993624303e-05, - "loss": 1.074, + "learning_rate": 2.9976596508199026e-05, + "loss": 1.0847, "step": 1250 }, { "epoch": 0.06, - "learning_rate": 1.997609113496789e-05, - "loss": 0.511, + "learning_rate": 2.9976128438363005e-05, + "loss": 0.5537, "step": 1255 }, { "epoch": 0.06, - "learning_rate": 1.997562233369275e-05, - "loss": 0.3073, + "learning_rate": 2.9975660368526985e-05, + "loss": 0.3262, "step": 1260 }, { "epoch": 0.06, - "learning_rate": 1.9975153532417612e-05, - "loss": 0.3776, + "learning_rate": 2.9975192298690965e-05, + "loss": 0.4132, "step": 1265 }, { "epoch": 0.06, - "learning_rate": 1.9974684731142472e-05, - "loss": 0.458, + "learning_rate": 2.9974724228854948e-05, + "loss": 0.4272, "step": 1270 }, { "epoch": 0.06, - "learning_rate": 1.9974215929867332e-05, - "loss": 0.4236, + "learning_rate": 2.9974256159018928e-05, + "loss": 0.3927, "step": 1275 }, { "epoch": 0.06, - "learning_rate": 1.9973747128592192e-05, - "loss": 0.53, + "learning_rate": 2.9973788089182908e-05, + "loss": 0.4433, "step": 1280 }, { "epoch": 0.06, - "learning_rate": 1.9973278327317052e-05, - "loss": 0.5216, + "learning_rate": 2.9973320019346884e-05, + "loss": 0.5682, "step": 1285 }, { "epoch": 0.06, - "learning_rate": 1.997280952604191e-05, - "loss": 0.8459, + "learning_rate": 2.9972851949510867e-05, + "loss": 0.6579, "step": 1290 }, { "epoch": 0.06, - "learning_rate": 1.997234072476677e-05, - "loss": 1.0025, + "learning_rate": 2.9972383879674847e-05, + "loss": 0.9216, "step": 1295 }, { "epoch": 0.06, - "learning_rate": 1.997187192349163e-05, - "loss": 1.0659, + "learning_rate": 2.9971915809838827e-05, + "loss": 1.0738, "step": 1300 }, { "epoch": 0.06, - "learning_rate": 1.9971403122216495e-05, - "loss": 0.5928, + "learning_rate": 2.9971447740002807e-05, + "loss": 0.5654, "step": 1305 }, { "epoch": 0.06, - "learning_rate": 1.9970934320941355e-05, - "loss": 0.2835, + "learning_rate": 2.997097967016679e-05, + "loss": 0.3394, "step": 1310 }, { "epoch": 0.06, - "learning_rate": 1.9970465519666215e-05, - "loss": 0.4103, + "learning_rate": 2.997051160033077e-05, + "loss": 0.3269, "step": 1315 }, { "epoch": 0.06, - "learning_rate": 1.9969996718391075e-05, - "loss": 0.424, + "learning_rate": 2.997004353049475e-05, + "loss": 0.3893, "step": 1320 }, { "epoch": 0.06, - "learning_rate": 1.9969527917115935e-05, - "loss": 0.505, + "learning_rate": 2.9969575460658733e-05, + "loss": 0.4748, "step": 1325 }, { "epoch": 0.06, - "learning_rate": 1.9969059115840798e-05, - "loss": 0.5272, + "learning_rate": 2.9969107390822713e-05, + "loss": 0.4428, "step": 1330 }, { "epoch": 0.06, - "learning_rate": 1.9968590314565658e-05, - "loss": 0.5859, + "learning_rate": 2.9968639320986693e-05, + "loss": 0.5365, "step": 1335 }, { "epoch": 0.06, - "learning_rate": 1.9968121513290518e-05, - "loss": 0.7147, + "learning_rate": 2.9968171251150672e-05, + "loss": 0.6774, "step": 1340 }, { "epoch": 0.06, - "learning_rate": 1.9967652712015378e-05, - "loss": 0.6935, + "learning_rate": 2.9967703181314656e-05, + "loss": 0.6508, "step": 1345 }, { "epoch": 0.06, - "learning_rate": 1.996718391074024e-05, - "loss": 1.0448, + "learning_rate": 2.9967235111478632e-05, + "loss": 1.013, "step": 1350 }, { "epoch": 0.06, - "learning_rate": 1.99667151094651e-05, - "loss": 0.5598, + "learning_rate": 2.9966767041642612e-05, + "loss": 0.5972, "step": 1355 }, { "epoch": 0.06, - "learning_rate": 1.996624630818996e-05, - "loss": 0.2887, + "learning_rate": 2.996629897180659e-05, + "loss": 0.3359, "step": 1360 }, { "epoch": 0.06, - "learning_rate": 1.996577750691482e-05, - "loss": 0.3524, + "learning_rate": 2.9965830901970575e-05, + "loss": 0.3574, "step": 1365 }, { "epoch": 0.06, - "learning_rate": 1.996530870563968e-05, - "loss": 0.3648, + "learning_rate": 2.9965362832134555e-05, + "loss": 0.3918, "step": 1370 }, { "epoch": 0.06, - "learning_rate": 1.996483990436454e-05, - "loss": 0.5021, + "learning_rate": 2.9964894762298535e-05, + "loss": 0.4044, "step": 1375 }, { "epoch": 0.06, - "learning_rate": 1.99643711030894e-05, - "loss": 0.5777, + "learning_rate": 2.9964426692462518e-05, + "loss": 0.523, "step": 1380 }, { "epoch": 0.06, - "learning_rate": 1.996390230181426e-05, - "loss": 0.5879, + "learning_rate": 2.9963958622626498e-05, + "loss": 0.5226, "step": 1385 }, { "epoch": 0.06, - "learning_rate": 1.9963433500539124e-05, - "loss": 0.6235, + "learning_rate": 2.9963490552790477e-05, + "loss": 0.5051, "step": 1390 }, { "epoch": 0.07, - "learning_rate": 1.9962964699263984e-05, - "loss": 0.5928, + "learning_rate": 2.9963022482954457e-05, + "loss": 0.8244, "step": 1395 }, { "epoch": 0.07, - "learning_rate": 1.9962495897988844e-05, - "loss": 1.172, + "learning_rate": 2.996255441311844e-05, + "loss": 1.2422, "step": 1400 }, { "epoch": 0.07, - "learning_rate": 1.9962027096713704e-05, - "loss": 0.5087, + "learning_rate": 2.996208634328242e-05, + "loss": 0.5015, "step": 1405 }, { "epoch": 0.07, - "learning_rate": 1.9961558295438567e-05, - "loss": 0.3315, + "learning_rate": 2.99616182734464e-05, + "loss": 0.2722, "step": 1410 }, { "epoch": 0.07, - "learning_rate": 1.9961089494163427e-05, - "loss": 0.405, + "learning_rate": 2.9961150203610376e-05, + "loss": 0.3147, "step": 1415 }, { "epoch": 0.07, - "learning_rate": 1.9960620692888287e-05, - "loss": 0.407, + "learning_rate": 2.996068213377436e-05, + "loss": 0.389, "step": 1420 }, { "epoch": 0.07, - "learning_rate": 1.9960151891613147e-05, - "loss": 0.5505, + "learning_rate": 2.996021406393834e-05, + "loss": 0.3931, "step": 1425 }, { "epoch": 0.07, - "learning_rate": 1.9959683090338007e-05, - "loss": 0.5691, + "learning_rate": 2.995974599410232e-05, + "loss": 0.5088, "step": 1430 }, { "epoch": 0.07, - "learning_rate": 1.9959214289062867e-05, - "loss": 0.502, + "learning_rate": 2.9959277924266302e-05, + "loss": 0.6008, "step": 1435 }, { "epoch": 0.07, - "learning_rate": 1.9958745487787727e-05, - "loss": 0.8324, + "learning_rate": 2.9958809854430282e-05, + "loss": 0.6104, "step": 1440 }, { "epoch": 0.07, - "learning_rate": 1.995827668651259e-05, - "loss": 0.6685, + "learning_rate": 2.9958341784594262e-05, + "loss": 0.6142, "step": 1445 }, { "epoch": 0.07, - "learning_rate": 1.995780788523745e-05, - "loss": 1.0913, + "learning_rate": 2.9957873714758242e-05, + "loss": 1.0904, "step": 1450 }, { "epoch": 0.07, - "learning_rate": 1.995733908396231e-05, - "loss": 0.4889, + "learning_rate": 2.9957405644922225e-05, + "loss": 0.5021, "step": 1455 }, { "epoch": 0.07, - "learning_rate": 1.995687028268717e-05, - "loss": 0.3823, + "learning_rate": 2.9956937575086205e-05, + "loss": 0.4151, "step": 1460 }, { "epoch": 0.07, - "learning_rate": 1.995640148141203e-05, - "loss": 0.4376, + "learning_rate": 2.9956469505250185e-05, + "loss": 0.3302, "step": 1465 }, { "epoch": 0.07, - "learning_rate": 1.9955932680136893e-05, - "loss": 0.4689, + "learning_rate": 2.9956001435414165e-05, + "loss": 0.3917, "step": 1470 }, { "epoch": 0.07, - "learning_rate": 1.9955463878861753e-05, - "loss": 0.4048, + "learning_rate": 2.9955533365578144e-05, + "loss": 0.3972, "step": 1475 }, { "epoch": 0.07, - "learning_rate": 1.9954995077586613e-05, - "loss": 0.5842, + "learning_rate": 2.9955065295742124e-05, + "loss": 0.428, "step": 1480 }, { "epoch": 0.07, - "learning_rate": 1.9954526276311473e-05, - "loss": 0.5643, + "learning_rate": 2.9954597225906104e-05, + "loss": 0.5828, "step": 1485 }, { "epoch": 0.07, - "learning_rate": 1.9954057475036336e-05, - "loss": 0.8457, + "learning_rate": 2.9954129156070087e-05, + "loss": 0.6936, "step": 1490 }, { "epoch": 0.07, - "learning_rate": 1.9953588673761196e-05, - "loss": 0.5716, + "learning_rate": 2.9953661086234067e-05, + "loss": 0.7506, "step": 1495 }, { "epoch": 0.07, - "learning_rate": 1.9953119872486056e-05, - "loss": 1.1056, + "learning_rate": 2.9953193016398047e-05, + "loss": 1.1826, "step": 1500 }, { "epoch": 0.07, - "learning_rate": 1.9952651071210916e-05, - "loss": 0.4777, + "learning_rate": 2.9952724946562027e-05, + "loss": 0.4892, "step": 1505 }, { "epoch": 0.07, - "learning_rate": 1.9952182269935776e-05, - "loss": 0.277, + "learning_rate": 2.995225687672601e-05, + "loss": 0.353, "step": 1510 }, { "epoch": 0.07, - "learning_rate": 1.9951713468660636e-05, - "loss": 0.3154, + "learning_rate": 2.995178880688999e-05, + "loss": 0.381, "step": 1515 }, { "epoch": 0.07, - "learning_rate": 1.9951244667385496e-05, - "loss": 0.4537, + "learning_rate": 2.995132073705397e-05, + "loss": 0.2913, "step": 1520 }, { "epoch": 0.07, - "learning_rate": 1.9950775866110356e-05, - "loss": 0.517, + "learning_rate": 2.995085266721795e-05, + "loss": 0.3876, "step": 1525 }, { "epoch": 0.07, - "learning_rate": 1.9950307064835216e-05, - "loss": 0.5247, + "learning_rate": 2.9950384597381933e-05, + "loss": 0.4861, "step": 1530 }, { "epoch": 0.07, - "learning_rate": 1.994983826356008e-05, - "loss": 0.4861, + "learning_rate": 2.9949916527545912e-05, + "loss": 0.5714, "step": 1535 }, { "epoch": 0.07, - "learning_rate": 1.994936946228494e-05, - "loss": 0.5592, + "learning_rate": 2.994944845770989e-05, + "loss": 0.3839, "step": 1540 }, { "epoch": 0.07, - "learning_rate": 1.99489006610098e-05, - "loss": 0.9036, + "learning_rate": 2.994898038787387e-05, + "loss": 0.688, "step": 1545 }, { "epoch": 0.07, - "learning_rate": 1.9948431859734662e-05, - "loss": 1.0261, + "learning_rate": 2.9948512318037852e-05, + "loss": 0.9213, "step": 1550 }, { "epoch": 0.07, - "learning_rate": 1.9947963058459522e-05, - "loss": 0.5694, + "learning_rate": 2.994804424820183e-05, + "loss": 0.5367, "step": 1555 }, { "epoch": 0.07, - "learning_rate": 1.9947494257184382e-05, - "loss": 0.2699, + "learning_rate": 2.994757617836581e-05, + "loss": 0.2404, "step": 1560 }, { "epoch": 0.07, - "learning_rate": 1.9947025455909242e-05, - "loss": 0.3443, + "learning_rate": 2.9947108108529795e-05, + "loss": 0.3007, "step": 1565 }, { "epoch": 0.07, - "learning_rate": 1.9946556654634102e-05, - "loss": 0.563, + "learning_rate": 2.9946640038693775e-05, + "loss": 0.3554, "step": 1570 }, { "epoch": 0.07, - "learning_rate": 1.9946087853358962e-05, - "loss": 0.3772, + "learning_rate": 2.9946171968857754e-05, + "loss": 0.3435, "step": 1575 }, { "epoch": 0.07, - "learning_rate": 1.9945619052083822e-05, - "loss": 0.5209, + "learning_rate": 2.9945703899021734e-05, + "loss": 0.4196, "step": 1580 }, { "epoch": 0.07, - "learning_rate": 1.9945150250808685e-05, - "loss": 0.4973, + "learning_rate": 2.9945235829185717e-05, + "loss": 0.5177, "step": 1585 }, { "epoch": 0.07, - "learning_rate": 1.9944681449533545e-05, - "loss": 0.4882, + "learning_rate": 2.9944767759349697e-05, + "loss": 0.6126, "step": 1590 }, { "epoch": 0.07, - "learning_rate": 1.9944212648258405e-05, - "loss": 0.7922, + "learning_rate": 2.9944299689513677e-05, + "loss": 0.6459, "step": 1595 }, { "epoch": 0.07, - "learning_rate": 1.9943743846983265e-05, - "loss": 0.7772, + "learning_rate": 2.9943831619677657e-05, + "loss": 1.0063, "step": 1600 }, { "epoch": 0.07, - "learning_rate": 1.9943275045708125e-05, - "loss": 0.555, + "learning_rate": 2.9943363549841637e-05, + "loss": 0.4703, "step": 1605 }, { "epoch": 0.08, - "learning_rate": 1.9942806244432985e-05, - "loss": 0.4321, + "learning_rate": 2.9942895480005616e-05, + "loss": 0.2602, "step": 1610 }, { "epoch": 0.08, - "learning_rate": 1.9942337443157848e-05, - "loss": 0.4047, + "learning_rate": 2.9942427410169596e-05, + "loss": 0.3381, "step": 1615 }, { "epoch": 0.08, - "learning_rate": 1.9941868641882708e-05, - "loss": 0.352, + "learning_rate": 2.994195934033358e-05, + "loss": 0.3428, "step": 1620 }, { "epoch": 0.08, - "learning_rate": 1.9941399840607568e-05, - "loss": 0.3804, + "learning_rate": 2.994149127049756e-05, + "loss": 0.5091, "step": 1625 }, { "epoch": 0.08, - "learning_rate": 1.994093103933243e-05, - "loss": 0.4649, + "learning_rate": 2.994102320066154e-05, + "loss": 0.4356, "step": 1630 }, { "epoch": 0.08, - "learning_rate": 1.994046223805729e-05, - "loss": 0.5527, + "learning_rate": 2.994055513082552e-05, + "loss": 0.4724, "step": 1635 }, { "epoch": 0.08, - "learning_rate": 1.993999343678215e-05, - "loss": 0.7, + "learning_rate": 2.9940087060989502e-05, + "loss": 0.6446, "step": 1640 }, { "epoch": 0.08, - "learning_rate": 1.993952463550701e-05, - "loss": 0.7865, + "learning_rate": 2.9939618991153482e-05, + "loss": 0.8065, "step": 1645 }, { "epoch": 0.08, - "learning_rate": 1.993905583423187e-05, - "loss": 1.0613, + "learning_rate": 2.9939150921317462e-05, + "loss": 1.2063, "step": 1650 }, { "epoch": 0.08, - "learning_rate": 1.993858703295673e-05, - "loss": 0.533, + "learning_rate": 2.993868285148144e-05, + "loss": 0.4322, "step": 1655 }, { "epoch": 0.08, - "learning_rate": 1.993811823168159e-05, - "loss": 0.2746, + "learning_rate": 2.9938214781645425e-05, + "loss": 0.2961, "step": 1660 }, { "epoch": 0.08, - "learning_rate": 1.993764943040645e-05, - "loss": 0.3082, + "learning_rate": 2.99377467118094e-05, + "loss": 0.3671, "step": 1665 }, { "epoch": 0.08, - "learning_rate": 1.993718062913131e-05, - "loss": 0.4, + "learning_rate": 2.993727864197338e-05, + "loss": 0.2797, "step": 1670 }, { "epoch": 0.08, - "learning_rate": 1.9936711827856174e-05, - "loss": 0.4761, + "learning_rate": 2.9936810572137364e-05, + "loss": 0.3971, "step": 1675 }, { "epoch": 0.08, - "learning_rate": 1.9936243026581034e-05, - "loss": 0.4022, + "learning_rate": 2.9936342502301344e-05, + "loss": 0.3879, "step": 1680 }, { "epoch": 0.08, - "learning_rate": 1.9935774225305894e-05, - "loss": 0.6181, + "learning_rate": 2.9935874432465324e-05, + "loss": 0.4365, "step": 1685 }, { "epoch": 0.08, - "learning_rate": 1.9935305424030754e-05, - "loss": 0.6173, + "learning_rate": 2.9935406362629304e-05, + "loss": 0.7451, "step": 1690 }, { "epoch": 0.08, - "learning_rate": 1.9934836622755617e-05, - "loss": 0.6797, + "learning_rate": 2.9934938292793287e-05, + "loss": 0.5118, "step": 1695 }, { "epoch": 0.08, - "learning_rate": 1.9934367821480477e-05, - "loss": 1.1921, + "learning_rate": 2.9934470222957267e-05, + "loss": 0.9061, "step": 1700 }, { "epoch": 0.08, - "learning_rate": 1.9933899020205337e-05, - "loss": 0.5224, + "learning_rate": 2.9934002153121247e-05, + "loss": 0.4253, "step": 1705 }, { "epoch": 0.08, - "learning_rate": 1.9933430218930197e-05, - "loss": 0.3617, + "learning_rate": 2.9933534083285226e-05, + "loss": 0.2794, "step": 1710 }, { "epoch": 0.08, - "learning_rate": 1.9932961417655057e-05, - "loss": 0.3178, + "learning_rate": 2.993306601344921e-05, + "loss": 0.2437, "step": 1715 }, { "epoch": 0.08, - "learning_rate": 1.993249261637992e-05, - "loss": 0.4298, + "learning_rate": 2.993259794361319e-05, + "loss": 0.3895, "step": 1720 }, { "epoch": 0.08, - "learning_rate": 1.993202381510478e-05, - "loss": 0.4292, + "learning_rate": 2.993212987377717e-05, + "loss": 0.4005, "step": 1725 }, { "epoch": 0.08, - "learning_rate": 1.993155501382964e-05, - "loss": 0.4192, + "learning_rate": 2.9931661803941146e-05, + "loss": 0.3341, "step": 1730 }, { "epoch": 0.08, - "learning_rate": 1.99310862125545e-05, - "loss": 0.5256, + "learning_rate": 2.993119373410513e-05, + "loss": 0.5051, "step": 1735 }, { "epoch": 0.08, - "learning_rate": 1.993061741127936e-05, - "loss": 0.5433, + "learning_rate": 2.993072566426911e-05, + "loss": 0.6066, "step": 1740 }, { "epoch": 0.08, - "learning_rate": 1.993014861000422e-05, - "loss": 0.7419, + "learning_rate": 2.993025759443309e-05, + "loss": 0.8869, "step": 1745 }, { "epoch": 0.08, - "learning_rate": 1.992967980872908e-05, - "loss": 1.1098, + "learning_rate": 2.992978952459707e-05, + "loss": 1.0657, "step": 1750 }, { "epoch": 0.08, - "learning_rate": 1.992921100745394e-05, - "loss": 0.4264, + "learning_rate": 2.992932145476105e-05, + "loss": 0.3844, "step": 1755 }, { "epoch": 0.08, - "learning_rate": 1.9928742206178803e-05, - "loss": 0.2677, + "learning_rate": 2.992885338492503e-05, + "loss": 0.3359, "step": 1760 }, { "epoch": 0.08, - "learning_rate": 1.9928273404903663e-05, - "loss": 0.3569, + "learning_rate": 2.992838531508901e-05, + "loss": 0.3245, "step": 1765 }, { "epoch": 0.08, - "learning_rate": 1.9927804603628523e-05, - "loss": 0.3524, + "learning_rate": 2.9927917245252994e-05, + "loss": 0.4634, "step": 1770 }, { "epoch": 0.08, - "learning_rate": 1.9927335802353386e-05, - "loss": 0.469, + "learning_rate": 2.9927449175416974e-05, + "loss": 0.3902, "step": 1775 }, { "epoch": 0.08, - "learning_rate": 1.9926867001078246e-05, - "loss": 0.3399, + "learning_rate": 2.9926981105580954e-05, + "loss": 0.4418, "step": 1780 }, { "epoch": 0.08, - "learning_rate": 1.9926398199803106e-05, - "loss": 0.583, + "learning_rate": 2.9926513035744934e-05, + "loss": 0.4923, "step": 1785 }, { "epoch": 0.08, - "learning_rate": 1.9925929398527966e-05, - "loss": 0.5959, + "learning_rate": 2.9926044965908914e-05, + "loss": 0.5632, "step": 1790 }, { "epoch": 0.08, - "learning_rate": 1.9925460597252826e-05, - "loss": 0.7883, + "learning_rate": 2.9925576896072893e-05, + "loss": 0.6368, "step": 1795 }, { "epoch": 0.08, - "learning_rate": 1.9924991795977686e-05, - "loss": 0.8624, + "learning_rate": 2.9925108826236873e-05, + "loss": 0.8357, "step": 1800 }, { "epoch": 0.08, - "learning_rate": 1.9924522994702546e-05, - "loss": 0.4495, + "learning_rate": 2.9924640756400856e-05, + "loss": 0.5503, "step": 1805 }, { "epoch": 0.08, - "learning_rate": 1.9924054193427406e-05, - "loss": 0.2398, + "learning_rate": 2.9924172686564836e-05, + "loss": 0.273, "step": 1810 }, { "epoch": 0.08, - "learning_rate": 1.992358539215227e-05, - "loss": 0.285, + "learning_rate": 2.9923704616728816e-05, + "loss": 0.2966, "step": 1815 }, { "epoch": 0.08, - "learning_rate": 1.992311659087713e-05, - "loss": 0.3657, + "learning_rate": 2.9923236546892796e-05, + "loss": 0.3135, "step": 1820 }, { "epoch": 0.09, - "learning_rate": 1.992264778960199e-05, - "loss": 0.4166, + "learning_rate": 2.992276847705678e-05, + "loss": 0.4249, "step": 1825 }, { "epoch": 0.09, - "learning_rate": 1.992217898832685e-05, - "loss": 0.4724, + "learning_rate": 2.992230040722076e-05, + "loss": 0.4162, "step": 1830 }, { "epoch": 0.09, - "learning_rate": 1.992171018705171e-05, - "loss": 0.5657, + "learning_rate": 2.992183233738474e-05, + "loss": 0.606, "step": 1835 }, { "epoch": 0.09, - "learning_rate": 1.9921241385776572e-05, - "loss": 0.4912, + "learning_rate": 2.992136426754872e-05, + "loss": 0.5282, "step": 1840 }, { "epoch": 0.09, - "learning_rate": 1.9920772584501432e-05, - "loss": 0.8323, + "learning_rate": 2.9920896197712702e-05, + "loss": 0.6356, "step": 1845 }, { "epoch": 0.09, - "learning_rate": 1.9920303783226292e-05, - "loss": 1.2386, + "learning_rate": 2.992042812787668e-05, + "loss": 0.8718, "step": 1850 }, { "epoch": 0.09, - "learning_rate": 1.9919834981951152e-05, - "loss": 0.481, + "learning_rate": 2.9919960058040658e-05, + "loss": 0.6441, "step": 1855 }, { "epoch": 0.09, - "learning_rate": 1.9919366180676015e-05, - "loss": 0.2647, + "learning_rate": 2.991949198820464e-05, + "loss": 0.2587, "step": 1860 }, { "epoch": 0.09, - "learning_rate": 1.9918897379400875e-05, - "loss": 0.3251, + "learning_rate": 2.991902391836862e-05, + "loss": 0.394, "step": 1865 }, { "epoch": 0.09, - "learning_rate": 1.9918428578125735e-05, - "loss": 0.4379, + "learning_rate": 2.99185558485326e-05, + "loss": 0.4004, "step": 1870 }, { "epoch": 0.09, - "learning_rate": 1.9917959776850595e-05, + "learning_rate": 2.991808777869658e-05, "loss": 0.3494, "step": 1875 }, { "epoch": 0.09, - "learning_rate": 1.9917490975575455e-05, - "loss": 0.4675, + "learning_rate": 2.9917619708860564e-05, + "loss": 0.4785, "step": 1880 }, { "epoch": 0.09, - "learning_rate": 1.9917022174300315e-05, - "loss": 0.5453, + "learning_rate": 2.9917151639024544e-05, + "loss": 0.4616, "step": 1885 }, { "epoch": 0.09, - "learning_rate": 1.9916553373025175e-05, - "loss": 0.7557, + "learning_rate": 2.9916683569188524e-05, + "loss": 0.5584, "step": 1890 }, { "epoch": 0.09, - "learning_rate": 1.9916084571750035e-05, - "loss": 0.8384, + "learning_rate": 2.9916215499352503e-05, + "loss": 0.6289, "step": 1895 }, { "epoch": 0.09, - "learning_rate": 1.9915615770474898e-05, - "loss": 0.9389, + "learning_rate": 2.9915747429516487e-05, + "loss": 0.9955, "step": 1900 }, { "epoch": 0.09, - "learning_rate": 1.9915146969199758e-05, - "loss": 0.595, + "learning_rate": 2.9915279359680466e-05, + "loss": 0.4722, "step": 1905 }, { "epoch": 0.09, - "learning_rate": 1.9914678167924618e-05, - "loss": 0.2589, + "learning_rate": 2.9914811289844446e-05, + "loss": 0.3275, "step": 1910 }, { "epoch": 0.09, - "learning_rate": 1.9914209366649478e-05, - "loss": 0.2753, + "learning_rate": 2.9914343220008426e-05, + "loss": 0.266, "step": 1915 }, { "epoch": 0.09, - "learning_rate": 1.991374056537434e-05, - "loss": 0.327, + "learning_rate": 2.9913875150172406e-05, + "loss": 0.3133, "step": 1920 }, { "epoch": 0.09, - "learning_rate": 1.99132717640992e-05, - "loss": 0.4068, + "learning_rate": 2.9913407080336386e-05, + "loss": 0.4386, "step": 1925 }, { "epoch": 0.09, - "learning_rate": 1.991280296282406e-05, - "loss": 0.3982, + "learning_rate": 2.9912939010500365e-05, + "loss": 0.5633, "step": 1930 }, { "epoch": 0.09, - "learning_rate": 1.991233416154892e-05, - "loss": 0.4792, + "learning_rate": 2.991247094066435e-05, + "loss": 0.5064, "step": 1935 }, { "epoch": 0.09, - "learning_rate": 1.991186536027378e-05, - "loss": 0.5322, + "learning_rate": 2.991200287082833e-05, + "loss": 0.6127, "step": 1940 }, { "epoch": 0.09, - "learning_rate": 1.991139655899864e-05, - "loss": 0.5524, + "learning_rate": 2.9911534800992308e-05, + "loss": 0.5771, "step": 1945 }, { "epoch": 0.09, - "learning_rate": 1.99109277577235e-05, - "loss": 0.7718, + "learning_rate": 2.9911066731156288e-05, + "loss": 0.8355, "step": 1950 }, { "epoch": 0.09, - "learning_rate": 1.9910458956448364e-05, - "loss": 0.4615, + "learning_rate": 2.991059866132027e-05, + "loss": 0.6079, "step": 1955 }, { "epoch": 0.09, - "learning_rate": 1.9909990155173224e-05, - "loss": 0.23, + "learning_rate": 2.991013059148425e-05, + "loss": 0.2757, "step": 1960 }, { "epoch": 0.09, - "learning_rate": 1.9909521353898084e-05, - "loss": 0.3236, + "learning_rate": 2.990966252164823e-05, + "loss": 0.2852, "step": 1965 }, { "epoch": 0.09, - "learning_rate": 1.9909052552622944e-05, - "loss": 0.3613, + "learning_rate": 2.990919445181221e-05, + "loss": 0.3789, "step": 1970 }, { "epoch": 0.09, - "learning_rate": 1.9908583751347804e-05, - "loss": 0.4707, + "learning_rate": 2.9908726381976194e-05, + "loss": 0.3595, "step": 1975 }, { "epoch": 0.09, - "learning_rate": 1.9908114950072667e-05, - "loss": 0.4895, + "learning_rate": 2.990825831214017e-05, + "loss": 0.4526, "step": 1980 }, { "epoch": 0.09, - "learning_rate": 1.9907646148797527e-05, - "loss": 0.4998, + "learning_rate": 2.990779024230415e-05, + "loss": 0.469, "step": 1985 }, { "epoch": 0.09, - "learning_rate": 1.9907177347522387e-05, - "loss": 0.4744, + "learning_rate": 2.9907322172468133e-05, + "loss": 0.5819, "step": 1990 }, { "epoch": 0.09, - "learning_rate": 1.9906708546247247e-05, - "loss": 0.6763, + "learning_rate": 2.9906854102632113e-05, + "loss": 0.5658, "step": 1995 }, { "epoch": 0.09, - "learning_rate": 1.990623974497211e-05, - "loss": 0.8183, + "learning_rate": 2.9906386032796093e-05, + "loss": 0.9492, "step": 2000 }, { "epoch": 0.09, - "learning_rate": 1.990577094369697e-05, - "loss": 0.4009, + "learning_rate": 2.9905917962960073e-05, + "loss": 0.4452, "step": 2005 }, { "epoch": 0.09, - "learning_rate": 1.990530214242183e-05, - "loss": 0.2849, + "learning_rate": 2.9905449893124056e-05, + "loss": 0.3381, "step": 2010 }, { "epoch": 0.09, - "learning_rate": 1.990483334114669e-05, - "loss": 0.3281, + "learning_rate": 2.9904981823288036e-05, + "loss": 0.2989, "step": 2015 }, { "epoch": 0.09, - "learning_rate": 1.990436453987155e-05, - "loss": 0.2439, + "learning_rate": 2.9904513753452016e-05, + "loss": 0.3576, "step": 2020 }, { "epoch": 0.09, - "learning_rate": 1.990389573859641e-05, - "loss": 0.4259, + "learning_rate": 2.9904045683615996e-05, + "loss": 0.3346, "step": 2025 }, { "epoch": 0.09, - "learning_rate": 1.990342693732127e-05, - "loss": 0.4708, + "learning_rate": 2.990357761377998e-05, + "loss": 0.4455, "step": 2030 }, { "epoch": 0.09, - "learning_rate": 1.990295813604613e-05, - "loss": 0.4431, + "learning_rate": 2.990310954394396e-05, + "loss": 0.4583, "step": 2035 }, { "epoch": 0.1, - "learning_rate": 1.990248933477099e-05, - "loss": 0.6112, + "learning_rate": 2.990264147410794e-05, + "loss": 0.5651, "step": 2040 }, { "epoch": 0.1, - "learning_rate": 1.9902020533495853e-05, - "loss": 0.7258, + "learning_rate": 2.9902173404271918e-05, + "loss": 0.7309, "step": 2045 }, { "epoch": 0.1, - "learning_rate": 1.9901551732220713e-05, - "loss": 1.1696, + "learning_rate": 2.9901705334435898e-05, + "loss": 1.0832, "step": 2050 }, { "epoch": 0.1, - "learning_rate": 1.9901082930945573e-05, - "loss": 0.4031, + "learning_rate": 2.9901237264599878e-05, + "loss": 0.4419, "step": 2055 }, { "epoch": 0.1, - "learning_rate": 1.9900614129670436e-05, - "loss": 0.2141, + "learning_rate": 2.9900769194763858e-05, + "loss": 0.2238, "step": 2060 }, { "epoch": 0.1, - "learning_rate": 1.9900145328395296e-05, - "loss": 0.2085, + "learning_rate": 2.990030112492784e-05, + "loss": 0.3316, "step": 2065 }, { "epoch": 0.1, - "learning_rate": 1.9899676527120156e-05, - "loss": 0.3854, + "learning_rate": 2.989983305509182e-05, + "loss": 0.3515, "step": 2070 }, { "epoch": 0.1, - "learning_rate": 1.9899207725845016e-05, - "loss": 0.447, + "learning_rate": 2.98993649852558e-05, + "loss": 0.3687, "step": 2075 }, { "epoch": 0.1, - "learning_rate": 1.9898738924569876e-05, - "loss": 0.4033, + "learning_rate": 2.989889691541978e-05, + "loss": 0.3471, "step": 2080 }, { "epoch": 0.1, - "learning_rate": 1.9898270123294736e-05, - "loss": 0.4619, + "learning_rate": 2.9898428845583763e-05, + "loss": 0.4767, "step": 2085 }, { "epoch": 0.1, - "learning_rate": 1.9897801322019596e-05, - "loss": 0.4808, + "learning_rate": 2.9897960775747743e-05, + "loss": 0.4956, "step": 2090 }, { "epoch": 0.1, - "learning_rate": 1.989733252074446e-05, - "loss": 0.7021, + "learning_rate": 2.9897492705911723e-05, + "loss": 0.5889, "step": 2095 }, { "epoch": 0.1, - "learning_rate": 1.989686371946932e-05, - "loss": 1.0386, + "learning_rate": 2.9897024636075706e-05, + "loss": 0.896, "step": 2100 }, { "epoch": 0.1, - "learning_rate": 1.989639491819418e-05, - "loss": 0.4082, + "learning_rate": 2.9896556566239686e-05, + "loss": 0.5322, "step": 2105 }, { "epoch": 0.1, - "learning_rate": 1.989592611691904e-05, - "loss": 0.2731, + "learning_rate": 2.9896088496403663e-05, + "loss": 0.2182, "step": 2110 }, { "epoch": 0.1, - "learning_rate": 1.98954573156439e-05, - "loss": 0.3498, + "learning_rate": 2.9895620426567642e-05, + "loss": 0.2965, "step": 2115 }, { "epoch": 0.1, - "learning_rate": 1.989498851436876e-05, - "loss": 0.3212, + "learning_rate": 2.9895152356731626e-05, + "loss": 0.3261, "step": 2120 }, { "epoch": 0.1, - "learning_rate": 1.9894519713093622e-05, - "loss": 0.3869, + "learning_rate": 2.9894684286895605e-05, + "loss": 0.3001, "step": 2125 }, { "epoch": 0.1, - "learning_rate": 1.9894050911818482e-05, - "loss": 0.3923, + "learning_rate": 2.9894216217059585e-05, + "loss": 0.357, "step": 2130 }, { "epoch": 0.1, - "learning_rate": 1.9893582110543342e-05, - "loss": 0.4625, + "learning_rate": 2.9893748147223565e-05, + "loss": 0.4138, "step": 2135 }, { "epoch": 0.1, - "learning_rate": 1.9893113309268205e-05, - "loss": 0.5782, + "learning_rate": 2.9893280077387548e-05, + "loss": 0.4194, "step": 2140 }, { "epoch": 0.1, - "learning_rate": 1.9892644507993065e-05, - "loss": 0.845, + "learning_rate": 2.9892812007551528e-05, + "loss": 0.6184, "step": 2145 }, { "epoch": 0.1, - "learning_rate": 1.9892175706717925e-05, - "loss": 1.0402, + "learning_rate": 2.9892343937715508e-05, + "loss": 0.9583, "step": 2150 }, { "epoch": 0.1, - "learning_rate": 1.9891706905442785e-05, - "loss": 0.4479, + "learning_rate": 2.9891875867879488e-05, + "loss": 0.6117, "step": 2155 }, { "epoch": 0.1, - "learning_rate": 1.9891238104167645e-05, - "loss": 0.2284, + "learning_rate": 2.989140779804347e-05, + "loss": 0.2569, "step": 2160 }, { "epoch": 0.1, - "learning_rate": 1.9890769302892505e-05, - "loss": 0.2668, + "learning_rate": 2.989093972820745e-05, + "loss": 0.3615, "step": 2165 }, { "epoch": 0.1, - "learning_rate": 1.9890300501617365e-05, - "loss": 0.3342, + "learning_rate": 2.9890471658371427e-05, + "loss": 0.3633, "step": 2170 }, { "epoch": 0.1, - "learning_rate": 1.9889831700342225e-05, - "loss": 0.4317, + "learning_rate": 2.989000358853541e-05, + "loss": 0.3625, "step": 2175 }, { "epoch": 0.1, - "learning_rate": 1.9889362899067085e-05, - "loss": 0.4107, + "learning_rate": 2.988953551869939e-05, + "loss": 0.4699, "step": 2180 }, { "epoch": 0.1, - "learning_rate": 1.9888894097791948e-05, - "loss": 0.3555, + "learning_rate": 2.988906744886337e-05, + "loss": 0.4034, "step": 2185 }, { "epoch": 0.1, - "learning_rate": 1.9888425296516808e-05, - "loss": 0.5225, + "learning_rate": 2.988859937902735e-05, + "loss": 0.5138, "step": 2190 }, { "epoch": 0.1, - "learning_rate": 1.9887956495241668e-05, - "loss": 0.9319, + "learning_rate": 2.9888131309191333e-05, + "loss": 0.5981, "step": 2195 }, { "epoch": 0.1, - "learning_rate": 1.9887487693966528e-05, - "loss": 0.9103, + "learning_rate": 2.9887663239355313e-05, + "loss": 0.8212, "step": 2200 }, { "epoch": 0.1, - "learning_rate": 1.988701889269139e-05, - "loss": 0.5407, + "learning_rate": 2.9887195169519293e-05, + "loss": 0.4884, "step": 2205 }, { "epoch": 0.1, - "learning_rate": 1.988655009141625e-05, - "loss": 0.2933, + "learning_rate": 2.9886727099683272e-05, + "loss": 0.2304, "step": 2210 }, { "epoch": 0.1, - "learning_rate": 1.988608129014111e-05, - "loss": 0.3158, + "learning_rate": 2.9886259029847256e-05, + "loss": 0.2807, "step": 2215 }, { "epoch": 0.1, - "learning_rate": 1.988561248886597e-05, - "loss": 0.3756, + "learning_rate": 2.9885790960011236e-05, + "loss": 0.2353, "step": 2220 }, { "epoch": 0.1, - "learning_rate": 1.988514368759083e-05, - "loss": 0.3151, + "learning_rate": 2.9885322890175215e-05, + "loss": 0.3482, "step": 2225 }, { "epoch": 0.1, - "learning_rate": 1.988467488631569e-05, - "loss": 0.3484, + "learning_rate": 2.98848548203392e-05, + "loss": 0.3445, "step": 2230 }, { "epoch": 0.1, - "learning_rate": 1.9884206085040554e-05, - "loss": 0.514, + "learning_rate": 2.9884386750503175e-05, + "loss": 0.5496, "step": 2235 }, { "epoch": 0.1, - "learning_rate": 1.9883737283765414e-05, - "loss": 0.4266, + "learning_rate": 2.9883918680667155e-05, + "loss": 0.5218, "step": 2240 }, { "epoch": 0.1, - "learning_rate": 1.9883268482490274e-05, - "loss": 0.7312, + "learning_rate": 2.9883450610831135e-05, + "loss": 0.4963, "step": 2245 }, { "epoch": 0.1, - "learning_rate": 1.9882799681215134e-05, - "loss": 0.7081, + "learning_rate": 2.9882982540995118e-05, + "loss": 0.7415, "step": 2250 }, { "epoch": 0.11, - "learning_rate": 1.9882330879939994e-05, - "loss": 0.4765, + "learning_rate": 2.9882514471159098e-05, + "loss": 0.5019, "step": 2255 }, { "epoch": 0.11, - "learning_rate": 1.9881862078664854e-05, - "loss": 0.2093, + "learning_rate": 2.9882046401323077e-05, + "loss": 0.2427, "step": 2260 }, { "epoch": 0.11, - "learning_rate": 1.9881393277389714e-05, - "loss": 0.3321, + "learning_rate": 2.9881578331487057e-05, + "loss": 0.3198, "step": 2265 }, { "epoch": 0.11, - "learning_rate": 1.9880924476114577e-05, - "loss": 0.3887, + "learning_rate": 2.988111026165104e-05, + "loss": 0.3634, "step": 2270 }, { "epoch": 0.11, - "learning_rate": 1.9880455674839437e-05, - "loss": 0.3139, + "learning_rate": 2.988064219181502e-05, + "loss": 0.341, "step": 2275 }, { "epoch": 0.11, - "learning_rate": 1.9879986873564297e-05, - "loss": 0.3986, + "learning_rate": 2.9880174121979e-05, + "loss": 0.4483, "step": 2280 }, { "epoch": 0.11, - "learning_rate": 1.987951807228916e-05, - "loss": 0.502, + "learning_rate": 2.9879706052142983e-05, + "loss": 0.3871, "step": 2285 }, { "epoch": 0.11, - "learning_rate": 1.987904927101402e-05, - "loss": 0.4883, + "learning_rate": 2.9879237982306963e-05, + "loss": 0.4825, "step": 2290 }, { "epoch": 0.11, - "learning_rate": 1.987858046973888e-05, - "loss": 0.5289, + "learning_rate": 2.9878769912470943e-05, + "loss": 0.5595, "step": 2295 }, { "epoch": 0.11, - "learning_rate": 1.987811166846374e-05, - "loss": 1.2296, + "learning_rate": 2.987830184263492e-05, + "loss": 0.8071, "step": 2300 }, { "epoch": 0.11, - "learning_rate": 1.98776428671886e-05, - "loss": 0.4979, + "learning_rate": 2.9877833772798903e-05, + "loss": 0.384, "step": 2305 }, { "epoch": 0.11, - "learning_rate": 1.987717406591346e-05, - "loss": 0.1878, + "learning_rate": 2.9877365702962882e-05, + "loss": 0.1561, "step": 2310 }, { "epoch": 0.11, - "learning_rate": 1.987670526463832e-05, - "loss": 0.3966, + "learning_rate": 2.9876897633126862e-05, + "loss": 0.2556, "step": 2315 }, { "epoch": 0.11, - "learning_rate": 1.987623646336318e-05, - "loss": 0.331, + "learning_rate": 2.9876429563290842e-05, + "loss": 0.3062, "step": 2320 }, { "epoch": 0.11, - "learning_rate": 1.9875767662088043e-05, - "loss": 0.3299, + "learning_rate": 2.9875961493454825e-05, + "loss": 0.3907, "step": 2325 }, { "epoch": 0.11, - "learning_rate": 1.9875298860812903e-05, - "loss": 0.3091, + "learning_rate": 2.9875493423618805e-05, + "loss": 0.2969, "step": 2330 }, { "epoch": 0.11, - "learning_rate": 1.9874830059537763e-05, - "loss": 0.4212, + "learning_rate": 2.9875025353782785e-05, + "loss": 0.4659, "step": 2335 }, { "epoch": 0.11, - "learning_rate": 1.9874361258262623e-05, - "loss": 0.3994, + "learning_rate": 2.9874557283946765e-05, + "loss": 0.5984, "step": 2340 }, { "epoch": 0.11, - "learning_rate": 1.9873892456987483e-05, - "loss": 0.6617, + "learning_rate": 2.9874089214110748e-05, + "loss": 0.6873, "step": 2345 }, { "epoch": 0.11, - "learning_rate": 1.9873423655712346e-05, - "loss": 0.7071, + "learning_rate": 2.9873621144274728e-05, + "loss": 0.9153, "step": 2350 }, { "epoch": 0.11, - "learning_rate": 1.9872954854437206e-05, - "loss": 0.3459, + "learning_rate": 2.9873153074438708e-05, + "loss": 0.4355, "step": 2355 }, { "epoch": 0.11, - "learning_rate": 1.9872486053162066e-05, - "loss": 0.1769, + "learning_rate": 2.9872685004602687e-05, + "loss": 0.2925, "step": 2360 }, { "epoch": 0.11, - "learning_rate": 1.9872017251886926e-05, - "loss": 0.3423, + "learning_rate": 2.9872216934766667e-05, + "loss": 0.3126, "step": 2365 }, { "epoch": 0.11, - "learning_rate": 1.987154845061179e-05, - "loss": 0.4013, + "learning_rate": 2.9871748864930647e-05, + "loss": 0.3362, "step": 2370 }, { "epoch": 0.11, - "learning_rate": 1.987107964933665e-05, - "loss": 0.368, + "learning_rate": 2.9871280795094627e-05, + "loss": 0.2827, "step": 2375 }, { "epoch": 0.11, - "learning_rate": 1.987061084806151e-05, - "loss": 0.3948, + "learning_rate": 2.987081272525861e-05, + "loss": 0.4077, "step": 2380 }, { "epoch": 0.11, - "learning_rate": 1.987014204678637e-05, - "loss": 0.3829, + "learning_rate": 2.987034465542259e-05, + "loss": 0.4149, "step": 2385 }, { "epoch": 0.11, - "learning_rate": 1.986967324551123e-05, - "loss": 0.3998, + "learning_rate": 2.986987658558657e-05, + "loss": 0.3678, "step": 2390 }, { "epoch": 0.11, - "learning_rate": 1.986920444423609e-05, - "loss": 0.5129, + "learning_rate": 2.986940851575055e-05, + "loss": 0.5219, "step": 2395 }, { "epoch": 0.11, - "learning_rate": 1.986873564296095e-05, - "loss": 1.1603, + "learning_rate": 2.9868940445914533e-05, + "loss": 0.8143, "step": 2400 }, { "epoch": 0.11, - "learning_rate": 1.986826684168581e-05, - "loss": 0.4495, + "learning_rate": 2.9868472376078512e-05, + "loss": 0.4312, "step": 2405 }, { "epoch": 0.11, - "learning_rate": 1.9867798040410672e-05, - "loss": 0.2745, + "learning_rate": 2.9868004306242492e-05, + "loss": 0.1985, "step": 2410 }, { "epoch": 0.11, - "learning_rate": 1.9867329239135532e-05, - "loss": 0.3008, + "learning_rate": 2.9867536236406476e-05, + "loss": 0.2725, "step": 2415 }, { "epoch": 0.11, - "learning_rate": 1.9866860437860392e-05, - "loss": 0.3176, + "learning_rate": 2.9867068166570455e-05, + "loss": 0.301, "step": 2420 }, { "epoch": 0.11, - "learning_rate": 1.9866391636585256e-05, - "loss": 0.3361, + "learning_rate": 2.9866600096734432e-05, + "loss": 0.3201, "step": 2425 }, { "epoch": 0.11, - "learning_rate": 1.9865922835310115e-05, - "loss": 0.4109, + "learning_rate": 2.986613202689841e-05, + "loss": 0.2769, "step": 2430 }, { "epoch": 0.11, - "learning_rate": 1.9865454034034975e-05, - "loss": 0.4295, + "learning_rate": 2.9865663957062395e-05, + "loss": 0.4863, "step": 2435 }, { "epoch": 0.11, - "learning_rate": 1.9864985232759835e-05, - "loss": 0.6172, + "learning_rate": 2.9865195887226375e-05, + "loss": 0.4339, "step": 2440 }, { "epoch": 0.11, - "learning_rate": 1.9864516431484695e-05, - "loss": 0.5372, + "learning_rate": 2.9864727817390354e-05, + "loss": 0.6175, "step": 2445 }, { "epoch": 0.11, - "learning_rate": 1.9864047630209555e-05, - "loss": 1.0839, + "learning_rate": 2.9864259747554334e-05, + "loss": 0.7348, "step": 2450 }, { "epoch": 0.11, - "learning_rate": 1.9863578828934415e-05, - "loss": 0.5672, + "learning_rate": 2.9863791677718317e-05, + "loss": 0.3938, "step": 2455 }, { "epoch": 0.11, - "learning_rate": 1.9863110027659275e-05, - "loss": 0.2294, + "learning_rate": 2.9863323607882297e-05, + "loss": 0.3108, "step": 2460 }, { "epoch": 0.12, - "learning_rate": 1.986264122638414e-05, - "loss": 0.4235, + "learning_rate": 2.9862855538046277e-05, + "loss": 0.3003, "step": 2465 }, { "epoch": 0.12, - "learning_rate": 1.9862172425109e-05, - "loss": 0.3189, + "learning_rate": 2.986238746821026e-05, + "loss": 0.4002, "step": 2470 }, { "epoch": 0.12, - "learning_rate": 1.9861703623833858e-05, - "loss": 0.2703, + "learning_rate": 2.986191939837424e-05, + "loss": 0.3234, "step": 2475 }, { "epoch": 0.12, - "learning_rate": 1.9861234822558718e-05, - "loss": 0.4522, + "learning_rate": 2.986145132853822e-05, + "loss": 0.4503, "step": 2480 }, { "epoch": 0.12, - "learning_rate": 1.9860766021283578e-05, - "loss": 0.648, + "learning_rate": 2.9860983258702196e-05, + "loss": 0.5289, "step": 2485 }, { "epoch": 0.12, - "learning_rate": 1.986029722000844e-05, - "loss": 0.4643, + "learning_rate": 2.986051518886618e-05, + "loss": 0.4845, "step": 2490 }, { "epoch": 0.12, - "learning_rate": 1.98598284187333e-05, - "loss": 0.5613, + "learning_rate": 2.986004711903016e-05, + "loss": 0.6735, "step": 2495 }, { "epoch": 0.12, - "learning_rate": 1.985935961745816e-05, - "loss": 0.9657, + "learning_rate": 2.985957904919414e-05, + "loss": 1.0605, "step": 2500 }, { "epoch": 0.12, - "learning_rate": 1.985889081618302e-05, - "loss": 0.3885, + "learning_rate": 2.985911097935812e-05, + "loss": 0.4427, "step": 2505 }, { "epoch": 0.12, - "learning_rate": 1.9858422014907885e-05, - "loss": 0.252, + "learning_rate": 2.9858642909522102e-05, + "loss": 0.1878, "step": 2510 }, { "epoch": 0.12, - "learning_rate": 1.9857953213632744e-05, - "loss": 0.3157, + "learning_rate": 2.9858174839686082e-05, + "loss": 0.2638, "step": 2515 }, { "epoch": 0.12, - "learning_rate": 1.9857484412357604e-05, - "loss": 0.331, + "learning_rate": 2.9857706769850062e-05, + "loss": 0.32, "step": 2520 }, { "epoch": 0.12, - "learning_rate": 1.9857015611082464e-05, - "loss": 0.3348, + "learning_rate": 2.985723870001404e-05, + "loss": 0.4158, "step": 2525 }, { "epoch": 0.12, - "learning_rate": 1.9856546809807324e-05, - "loss": 0.4366, + "learning_rate": 2.9856770630178025e-05, + "loss": 0.4044, "step": 2530 }, { "epoch": 0.12, - "learning_rate": 1.9856078008532184e-05, - "loss": 0.5167, + "learning_rate": 2.9856302560342005e-05, + "loss": 0.5286, "step": 2535 }, { "epoch": 0.12, - "learning_rate": 1.9855609207257044e-05, - "loss": 0.5484, + "learning_rate": 2.9855834490505985e-05, + "loss": 0.5233, "step": 2540 }, { "epoch": 0.12, - "learning_rate": 1.9855140405981904e-05, - "loss": 0.682, + "learning_rate": 2.9855366420669968e-05, + "loss": 0.4839, "step": 2545 }, { "epoch": 0.12, - "learning_rate": 1.9854671604706764e-05, - "loss": 0.9292, + "learning_rate": 2.9854898350833944e-05, + "loss": 1.015, "step": 2550 }, { "epoch": 0.12, - "learning_rate": 1.9854202803431627e-05, - "loss": 0.2851, + "learning_rate": 2.9854430280997924e-05, + "loss": 0.4324, "step": 2555 }, { "epoch": 0.12, - "learning_rate": 1.9853734002156487e-05, - "loss": 0.319, + "learning_rate": 2.9853962211161904e-05, + "loss": 0.1776, "step": 2560 }, { "epoch": 0.12, - "learning_rate": 1.9853265200881347e-05, - "loss": 0.3735, + "learning_rate": 2.9853494141325887e-05, + "loss": 0.2091, "step": 2565 }, { "epoch": 0.12, - "learning_rate": 1.985279639960621e-05, - "loss": 0.3128, + "learning_rate": 2.9853026071489867e-05, + "loss": 0.3604, "step": 2570 }, { "epoch": 0.12, - "learning_rate": 1.985232759833107e-05, - "loss": 0.3411, + "learning_rate": 2.9852558001653847e-05, + "loss": 0.3578, "step": 2575 }, { "epoch": 0.12, - "learning_rate": 1.985185879705593e-05, - "loss": 0.4413, + "learning_rate": 2.9852089931817826e-05, + "loss": 0.3927, "step": 2580 }, { "epoch": 0.12, - "learning_rate": 1.985138999578079e-05, - "loss": 0.5069, + "learning_rate": 2.985162186198181e-05, + "loss": 0.4487, "step": 2585 }, { "epoch": 0.12, - "learning_rate": 1.985092119450565e-05, - "loss": 0.6175, + "learning_rate": 2.985115379214579e-05, + "loss": 0.4263, "step": 2590 }, { "epoch": 0.12, - "learning_rate": 1.985045239323051e-05, - "loss": 0.5128, + "learning_rate": 2.985068572230977e-05, + "loss": 0.653, "step": 2595 }, { "epoch": 0.12, - "learning_rate": 1.984998359195537e-05, - "loss": 0.7707, + "learning_rate": 2.9850217652473752e-05, + "loss": 0.8854, "step": 2600 }, { "epoch": 0.12, - "learning_rate": 1.9849514790680233e-05, - "loss": 0.388, + "learning_rate": 2.9849749582637732e-05, + "loss": 0.464, "step": 2605 }, { "epoch": 0.12, - "learning_rate": 1.9849045989405093e-05, - "loss": 0.2559, + "learning_rate": 2.9849281512801712e-05, + "loss": 0.2695, "step": 2610 }, { "epoch": 0.12, - "learning_rate": 1.9848577188129953e-05, - "loss": 0.3089, + "learning_rate": 2.984881344296569e-05, + "loss": 0.2283, "step": 2615 }, { "epoch": 0.12, - "learning_rate": 1.9848108386854813e-05, - "loss": 0.2978, + "learning_rate": 2.9848345373129672e-05, + "loss": 0.3573, "step": 2620 }, { "epoch": 0.12, - "learning_rate": 1.9847639585579673e-05, - "loss": 0.2533, + "learning_rate": 2.984787730329365e-05, + "loss": 0.2801, "step": 2625 }, { "epoch": 0.12, - "learning_rate": 1.9847170784304533e-05, - "loss": 0.3758, + "learning_rate": 2.984740923345763e-05, + "loss": 0.4285, "step": 2630 }, { "epoch": 0.12, - "learning_rate": 1.9846701983029396e-05, - "loss": 0.2824, + "learning_rate": 2.984694116362161e-05, + "loss": 0.4369, "step": 2635 }, { "epoch": 0.12, - "learning_rate": 1.9846233181754256e-05, - "loss": 0.4697, + "learning_rate": 2.9846473093785594e-05, + "loss": 0.4876, "step": 2640 }, { "epoch": 0.12, - "learning_rate": 1.9845764380479116e-05, - "loss": 0.4773, + "learning_rate": 2.9846005023949574e-05, + "loss": 0.6387, "step": 2645 }, { "epoch": 0.12, - "learning_rate": 1.984529557920398e-05, - "loss": 0.9044, + "learning_rate": 2.9845536954113554e-05, + "loss": 0.9479, "step": 2650 }, { "epoch": 0.12, - "learning_rate": 1.984482677792884e-05, - "loss": 0.3903, + "learning_rate": 2.9845068884277537e-05, + "loss": 0.4644, "step": 2655 }, { "epoch": 0.12, - "learning_rate": 1.98443579766537e-05, - "loss": 0.268, + "learning_rate": 2.9844600814441517e-05, + "loss": 0.1859, "step": 2660 }, { "epoch": 0.12, - "learning_rate": 1.984388917537856e-05, - "loss": 0.205, + "learning_rate": 2.9844132744605497e-05, + "loss": 0.2329, "step": 2665 }, { "epoch": 0.12, - "learning_rate": 1.984342037410342e-05, - "loss": 0.3019, + "learning_rate": 2.9843664674769477e-05, + "loss": 0.324, "step": 2670 }, { "epoch": 0.12, - "learning_rate": 1.984295157282828e-05, - "loss": 0.4408, + "learning_rate": 2.9843196604933457e-05, + "loss": 0.2328, "step": 2675 }, { "epoch": 0.13, - "learning_rate": 1.984248277155314e-05, - "loss": 0.3562, + "learning_rate": 2.9842728535097436e-05, + "loss": 0.403, "step": 2680 }, { "epoch": 0.13, - "learning_rate": 1.9842013970278e-05, - "loss": 0.4502, + "learning_rate": 2.9842260465261416e-05, + "loss": 0.3824, "step": 2685 }, { "epoch": 0.13, - "learning_rate": 1.984154516900286e-05, - "loss": 0.6325, + "learning_rate": 2.9841792395425396e-05, + "loss": 0.547, "step": 2690 }, { "epoch": 0.13, - "learning_rate": 1.9841076367727722e-05, - "loss": 0.734, + "learning_rate": 2.984132432558938e-05, + "loss": 0.5246, "step": 2695 }, { "epoch": 0.13, - "learning_rate": 1.9840607566452582e-05, - "loss": 0.825, + "learning_rate": 2.984085625575336e-05, + "loss": 0.7502, "step": 2700 }, { "epoch": 0.13, - "learning_rate": 1.9840138765177442e-05, - "loss": 0.4505, + "learning_rate": 2.984038818591734e-05, + "loss": 0.4464, "step": 2705 }, { "epoch": 0.13, - "learning_rate": 1.9839669963902302e-05, - "loss": 0.2048, + "learning_rate": 2.983992011608132e-05, + "loss": 0.1972, "step": 2710 }, { "epoch": 0.13, - "learning_rate": 1.9839201162627166e-05, - "loss": 0.2263, + "learning_rate": 2.9839452046245302e-05, + "loss": 0.3288, "step": 2715 }, { "epoch": 0.13, - "learning_rate": 1.9838732361352025e-05, - "loss": 0.2975, + "learning_rate": 2.983898397640928e-05, + "loss": 0.3094, "step": 2720 }, { "epoch": 0.13, - "learning_rate": 1.9838263560076885e-05, - "loss": 0.3431, + "learning_rate": 2.983851590657326e-05, + "loss": 0.3497, "step": 2725 }, { "epoch": 0.13, - "learning_rate": 1.9837794758801745e-05, - "loss": 0.4268, + "learning_rate": 2.9838047836737245e-05, + "loss": 0.35, "step": 2730 }, { "epoch": 0.13, - "learning_rate": 1.9837325957526605e-05, - "loss": 0.4063, + "learning_rate": 2.9837579766901224e-05, + "loss": 0.4482, "step": 2735 }, { "epoch": 0.13, - "learning_rate": 1.9836857156251465e-05, - "loss": 0.4808, + "learning_rate": 2.98371116970652e-05, + "loss": 0.4094, "step": 2740 }, { "epoch": 0.13, - "learning_rate": 1.983638835497633e-05, - "loss": 0.6248, + "learning_rate": 2.983664362722918e-05, + "loss": 0.5456, "step": 2745 }, { "epoch": 0.13, - "learning_rate": 1.983591955370119e-05, - "loss": 0.9324, + "learning_rate": 2.9836175557393164e-05, + "loss": 0.6946, "step": 2750 }, { "epoch": 0.13, - "learning_rate": 1.983545075242605e-05, - "loss": 0.4698, + "learning_rate": 2.9835707487557144e-05, + "loss": 0.3944, "step": 2755 }, { "epoch": 0.13, - "learning_rate": 1.983498195115091e-05, - "loss": 0.2148, + "learning_rate": 2.9835239417721124e-05, + "loss": 0.1891, "step": 2760 }, { "epoch": 0.13, - "learning_rate": 1.9834513149875768e-05, - "loss": 0.2496, + "learning_rate": 2.9834771347885103e-05, + "loss": 0.1876, "step": 2765 }, { "epoch": 0.13, - "learning_rate": 1.9834044348600628e-05, - "loss": 0.3243, + "learning_rate": 2.9834303278049087e-05, + "loss": 0.3824, "step": 2770 }, { "epoch": 0.13, - "learning_rate": 1.983357554732549e-05, - "loss": 0.312, + "learning_rate": 2.9833835208213066e-05, + "loss": 0.3722, "step": 2775 }, { "epoch": 0.13, - "learning_rate": 1.983310674605035e-05, - "loss": 0.3524, + "learning_rate": 2.9833367138377046e-05, + "loss": 0.4569, "step": 2780 }, { "epoch": 0.13, - "learning_rate": 1.983263794477521e-05, - "loss": 0.4036, + "learning_rate": 2.983289906854103e-05, + "loss": 0.5069, "step": 2785 }, { "epoch": 0.13, - "learning_rate": 1.983216914350007e-05, - "loss": 0.4246, + "learning_rate": 2.983243099870501e-05, + "loss": 0.5151, "step": 2790 }, { "epoch": 0.13, - "learning_rate": 1.9831700342224935e-05, - "loss": 0.6055, + "learning_rate": 2.983196292886899e-05, + "loss": 0.5431, "step": 2795 }, { "epoch": 0.13, - "learning_rate": 1.9831231540949795e-05, - "loss": 0.6449, + "learning_rate": 2.983149485903297e-05, + "loss": 1.0826, "step": 2800 }, { "epoch": 0.13, - "learning_rate": 1.9830762739674655e-05, - "loss": 0.3477, + "learning_rate": 2.983102678919695e-05, + "loss": 0.3695, "step": 2805 }, { "epoch": 0.13, - "learning_rate": 1.9830293938399514e-05, - "loss": 0.3349, + "learning_rate": 2.983055871936093e-05, + "loss": 0.2276, "step": 2810 }, { "epoch": 0.13, - "learning_rate": 1.9829825137124374e-05, - "loss": 0.2735, + "learning_rate": 2.983009064952491e-05, + "loss": 0.3133, "step": 2815 }, { "epoch": 0.13, - "learning_rate": 1.9829356335849234e-05, - "loss": 0.294, + "learning_rate": 2.9829622579688888e-05, + "loss": 0.3186, "step": 2820 }, { "epoch": 0.13, - "learning_rate": 1.9828887534574094e-05, - "loss": 0.3747, + "learning_rate": 2.982915450985287e-05, + "loss": 0.3351, "step": 2825 }, { "epoch": 0.13, - "learning_rate": 1.9828418733298954e-05, - "loss": 0.3981, + "learning_rate": 2.982868644001685e-05, + "loss": 0.3789, "step": 2830 }, { "epoch": 0.13, - "learning_rate": 1.9827949932023818e-05, - "loss": 0.5341, + "learning_rate": 2.982821837018083e-05, + "loss": 0.3645, "step": 2835 }, { "epoch": 0.13, - "learning_rate": 1.9827481130748677e-05, - "loss": 0.4484, + "learning_rate": 2.9827750300344814e-05, + "loss": 0.4059, "step": 2840 }, { "epoch": 0.13, - "learning_rate": 1.9827012329473537e-05, - "loss": 0.6313, + "learning_rate": 2.9827282230508794e-05, + "loss": 0.3996, "step": 2845 }, { "epoch": 0.13, - "learning_rate": 1.9826543528198397e-05, - "loss": 0.8526, + "learning_rate": 2.9826814160672774e-05, + "loss": 0.6644, "step": 2850 }, { "epoch": 0.13, - "learning_rate": 1.982607472692326e-05, - "loss": 0.365, + "learning_rate": 2.9826346090836754e-05, + "loss": 0.4051, "step": 2855 }, { "epoch": 0.13, - "learning_rate": 1.982560592564812e-05, - "loss": 0.2652, + "learning_rate": 2.9825878021000737e-05, + "loss": 0.0858, "step": 2860 }, { "epoch": 0.13, - "learning_rate": 1.982513712437298e-05, - "loss": 0.239, + "learning_rate": 2.9825409951164713e-05, + "loss": 0.2712, "step": 2865 }, { "epoch": 0.13, - "learning_rate": 1.982466832309784e-05, - "loss": 0.3384, + "learning_rate": 2.9824941881328693e-05, + "loss": 0.3066, "step": 2870 }, { "epoch": 0.13, - "learning_rate": 1.98241995218227e-05, - "loss": 0.3417, + "learning_rate": 2.9824473811492673e-05, + "loss": 0.2838, "step": 2875 }, { "epoch": 0.13, - "learning_rate": 1.982373072054756e-05, - "loss": 0.348, + "learning_rate": 2.9824005741656656e-05, + "loss": 0.3694, "step": 2880 }, { "epoch": 0.13, - "learning_rate": 1.9823261919272424e-05, - "loss": 0.3826, + "learning_rate": 2.9823537671820636e-05, + "loss": 0.5235, "step": 2885 }, { "epoch": 0.13, - "learning_rate": 1.9822793117997284e-05, - "loss": 0.4936, + "learning_rate": 2.9823069601984616e-05, + "loss": 0.63, "step": 2890 }, { "epoch": 0.14, - "learning_rate": 1.9822324316722143e-05, - "loss": 0.5807, + "learning_rate": 2.98226015321486e-05, + "loss": 0.5719, "step": 2895 }, { "epoch": 0.14, - "learning_rate": 1.9821855515447003e-05, - "loss": 1.0115, + "learning_rate": 2.982213346231258e-05, + "loss": 0.7585, "step": 2900 }, { "epoch": 0.14, - "learning_rate": 1.9821386714171863e-05, - "loss": 0.3353, + "learning_rate": 2.982166539247656e-05, + "loss": 0.3946, "step": 2905 }, { "epoch": 0.14, - "learning_rate": 1.9820917912896723e-05, - "loss": 0.2495, + "learning_rate": 2.982119732264054e-05, + "loss": 0.2283, "step": 2910 }, { "epoch": 0.14, - "learning_rate": 1.9820449111621583e-05, - "loss": 0.2535, + "learning_rate": 2.982072925280452e-05, + "loss": 0.2433, "step": 2915 }, { "epoch": 0.14, - "learning_rate": 1.9819980310346447e-05, - "loss": 0.3094, + "learning_rate": 2.98202611829685e-05, + "loss": 0.2719, "step": 2920 }, { "epoch": 0.14, - "learning_rate": 1.9819511509071306e-05, - "loss": 0.2988, + "learning_rate": 2.981979311313248e-05, + "loss": 0.3201, "step": 2925 }, { "epoch": 0.14, - "learning_rate": 1.9819042707796166e-05, - "loss": 0.3186, + "learning_rate": 2.9819325043296458e-05, + "loss": 0.34, "step": 2930 }, { "epoch": 0.14, - "learning_rate": 1.981857390652103e-05, - "loss": 0.4027, + "learning_rate": 2.981885697346044e-05, + "loss": 0.4069, "step": 2935 }, { "epoch": 0.14, - "learning_rate": 1.981810510524589e-05, - "loss": 0.4593, + "learning_rate": 2.981838890362442e-05, + "loss": 0.4682, "step": 2940 }, { "epoch": 0.14, - "learning_rate": 1.981763630397075e-05, - "loss": 0.6236, + "learning_rate": 2.98179208337884e-05, + "loss": 0.5898, "step": 2945 }, { "epoch": 0.14, - "learning_rate": 1.981716750269561e-05, - "loss": 0.9071, + "learning_rate": 2.981745276395238e-05, + "loss": 0.6208, "step": 2950 }, { "epoch": 0.14, - "learning_rate": 1.981669870142047e-05, - "loss": 0.4223, + "learning_rate": 2.9816984694116364e-05, + "loss": 0.5141, "step": 2955 }, { "epoch": 0.14, - "learning_rate": 1.981622990014533e-05, - "loss": 0.197, + "learning_rate": 2.9816516624280343e-05, + "loss": 0.2397, "step": 2960 }, { "epoch": 0.14, - "learning_rate": 1.981576109887019e-05, - "loss": 0.298, + "learning_rate": 2.9816048554444323e-05, + "loss": 0.211, "step": 2965 }, { "epoch": 0.14, - "learning_rate": 1.981529229759505e-05, - "loss": 0.3526, + "learning_rate": 2.9815580484608306e-05, + "loss": 0.2892, "step": 2970 }, { "epoch": 0.14, - "learning_rate": 1.9814823496319913e-05, - "loss": 0.4274, + "learning_rate": 2.9815112414772286e-05, + "loss": 0.4122, "step": 2975 }, { "epoch": 0.14, - "learning_rate": 1.9814354695044773e-05, - "loss": 0.3132, + "learning_rate": 2.9814644344936266e-05, + "loss": 0.3687, "step": 2980 }, { "epoch": 0.14, - "learning_rate": 1.9813885893769632e-05, - "loss": 0.4685, + "learning_rate": 2.9814176275100246e-05, + "loss": 0.2965, "step": 2985 }, { "epoch": 0.14, - "learning_rate": 1.9813417092494492e-05, - "loss": 0.5327, + "learning_rate": 2.9813708205264226e-05, + "loss": 0.5304, "step": 2990 }, { "epoch": 0.14, - "learning_rate": 1.9812948291219352e-05, - "loss": 0.5887, + "learning_rate": 2.9813240135428206e-05, + "loss": 0.5774, "step": 2995 }, { "epoch": 0.14, - "learning_rate": 1.9812479489944216e-05, - "loss": 0.8516, + "learning_rate": 2.9812772065592185e-05, + "loss": 0.774, "step": 3000 }, { "epoch": 0.14, - "learning_rate": 1.9812010688669076e-05, - "loss": 0.3981, + "learning_rate": 2.9812303995756165e-05, + "loss": 0.4824, "step": 3005 }, { "epoch": 0.14, - "learning_rate": 1.9811541887393936e-05, - "loss": 0.2334, + "learning_rate": 2.981183592592015e-05, + "loss": 0.186, "step": 3010 }, { "epoch": 0.14, - "learning_rate": 1.9811073086118795e-05, - "loss": 0.2776, + "learning_rate": 2.9811367856084128e-05, + "loss": 0.2796, "step": 3015 }, { "epoch": 0.14, - "learning_rate": 1.981060428484366e-05, - "loss": 0.2129, + "learning_rate": 2.9810899786248108e-05, + "loss": 0.2142, "step": 3020 }, { "epoch": 0.14, - "learning_rate": 1.981013548356852e-05, - "loss": 0.3171, + "learning_rate": 2.981043171641209e-05, + "loss": 0.2992, "step": 3025 }, { "epoch": 0.14, - "learning_rate": 1.980966668229338e-05, - "loss": 0.3339, + "learning_rate": 2.980996364657607e-05, + "loss": 0.3176, "step": 3030 }, { "epoch": 0.14, - "learning_rate": 1.980919788101824e-05, - "loss": 0.4108, + "learning_rate": 2.980949557674005e-05, + "loss": 0.5671, "step": 3035 }, { "epoch": 0.14, - "learning_rate": 1.98087290797431e-05, - "loss": 0.4631, + "learning_rate": 2.980902750690403e-05, + "loss": 0.4956, "step": 3040 }, { "epoch": 0.14, - "learning_rate": 1.980826027846796e-05, - "loss": 0.5093, + "learning_rate": 2.9808559437068014e-05, + "loss": 0.5212, "step": 3045 }, { "epoch": 0.14, - "learning_rate": 1.980779147719282e-05, - "loss": 0.8253, + "learning_rate": 2.9808091367231994e-05, + "loss": 0.7228, "step": 3050 }, { "epoch": 0.14, - "learning_rate": 1.980732267591768e-05, - "loss": 0.5317, + "learning_rate": 2.980762329739597e-05, + "loss": 0.3316, "step": 3055 }, { "epoch": 0.14, - "learning_rate": 1.9806853874642538e-05, - "loss": 0.2176, + "learning_rate": 2.980715522755995e-05, + "loss": 0.1685, "step": 3060 }, { "epoch": 0.14, - "learning_rate": 1.98063850733674e-05, - "loss": 0.2664, + "learning_rate": 2.9806687157723933e-05, + "loss": 0.2233, "step": 3065 }, { "epoch": 0.14, - "learning_rate": 1.980591627209226e-05, - "loss": 0.2988, + "learning_rate": 2.9806219087887913e-05, + "loss": 0.3173, "step": 3070 }, { "epoch": 0.14, - "learning_rate": 1.980544747081712e-05, - "loss": 0.3123, + "learning_rate": 2.9805751018051893e-05, + "loss": 0.3252, "step": 3075 }, { "epoch": 0.14, - "learning_rate": 1.9804978669541985e-05, - "loss": 0.3984, + "learning_rate": 2.9805282948215876e-05, + "loss": 0.326, "step": 3080 }, { "epoch": 0.14, - "learning_rate": 1.9804509868266845e-05, - "loss": 0.4429, + "learning_rate": 2.9804814878379856e-05, + "loss": 0.3709, "step": 3085 }, { "epoch": 0.14, - "learning_rate": 1.9804041066991705e-05, - "loss": 0.4721, + "learning_rate": 2.9804346808543836e-05, + "loss": 0.5064, "step": 3090 }, { "epoch": 0.14, - "learning_rate": 1.9803572265716565e-05, - "loss": 0.608, + "learning_rate": 2.9803878738707815e-05, + "loss": 0.5457, "step": 3095 }, { "epoch": 0.14, - "learning_rate": 1.9803103464441424e-05, - "loss": 0.7198, + "learning_rate": 2.98034106688718e-05, + "loss": 1.0209, "step": 3100 }, { "epoch": 0.14, - "learning_rate": 1.9802634663166284e-05, - "loss": 0.4533, + "learning_rate": 2.980294259903578e-05, + "loss": 0.3009, "step": 3105 }, { "epoch": 0.15, - "learning_rate": 1.9802165861891144e-05, - "loss": 0.2521, + "learning_rate": 2.9802474529199758e-05, + "loss": 0.2032, "step": 3110 }, { "epoch": 0.15, - "learning_rate": 1.9801697060616008e-05, - "loss": 0.3518, + "learning_rate": 2.9802006459363738e-05, + "loss": 0.3715, "step": 3115 }, { "epoch": 0.15, - "learning_rate": 1.9801228259340868e-05, - "loss": 0.3765, + "learning_rate": 2.9801538389527718e-05, + "loss": 0.3311, "step": 3120 }, { "epoch": 0.15, - "learning_rate": 1.9800759458065728e-05, - "loss": 0.4222, + "learning_rate": 2.9801070319691698e-05, + "loss": 0.3749, "step": 3125 }, { "epoch": 0.15, - "learning_rate": 1.9800290656790587e-05, - "loss": 0.3578, + "learning_rate": 2.9800602249855678e-05, + "loss": 0.3707, "step": 3130 }, { "epoch": 0.15, - "learning_rate": 1.9799821855515447e-05, - "loss": 0.3973, + "learning_rate": 2.9800134180019657e-05, + "loss": 0.3479, "step": 3135 }, { "epoch": 0.15, - "learning_rate": 1.9799353054240307e-05, - "loss": 0.4282, + "learning_rate": 2.979966611018364e-05, + "loss": 0.5007, "step": 3140 }, { "epoch": 0.15, - "learning_rate": 1.979888425296517e-05, - "loss": 0.5091, + "learning_rate": 2.979919804034762e-05, + "loss": 0.555, "step": 3145 }, { "epoch": 0.15, - "learning_rate": 1.979841545169003e-05, - "loss": 1.0688, + "learning_rate": 2.97987299705116e-05, + "loss": 0.6763, "step": 3150 }, { "epoch": 0.15, - "learning_rate": 1.979794665041489e-05, - "loss": 0.4779, + "learning_rate": 2.9798261900675583e-05, + "loss": 0.3482, "step": 3155 }, { "epoch": 0.15, - "learning_rate": 1.9797477849139754e-05, - "loss": 0.2231, + "learning_rate": 2.9797793830839563e-05, + "loss": 0.1475, "step": 3160 }, { "epoch": 0.15, - "learning_rate": 1.9797009047864614e-05, - "loss": 0.2028, + "learning_rate": 2.9797325761003543e-05, + "loss": 0.1497, "step": 3165 }, { "epoch": 0.15, - "learning_rate": 1.9796540246589474e-05, - "loss": 0.2761, + "learning_rate": 2.9796857691167523e-05, + "loss": 0.2737, "step": 3170 }, { "epoch": 0.15, - "learning_rate": 1.9796071445314334e-05, - "loss": 0.4209, + "learning_rate": 2.9796389621331506e-05, + "loss": 0.3525, "step": 3175 }, { "epoch": 0.15, - "learning_rate": 1.9795602644039194e-05, - "loss": 0.3276, + "learning_rate": 2.9795921551495482e-05, + "loss": 0.3627, "step": 3180 }, { "epoch": 0.15, - "learning_rate": 1.9795133842764054e-05, - "loss": 0.4656, + "learning_rate": 2.9795453481659462e-05, + "loss": 0.5222, "step": 3185 }, { "epoch": 0.15, - "learning_rate": 1.9794665041488913e-05, - "loss": 0.4872, + "learning_rate": 2.9794985411823442e-05, + "loss": 0.5057, "step": 3190 }, { "epoch": 0.15, - "learning_rate": 1.9794196240213773e-05, - "loss": 0.573, + "learning_rate": 2.9794517341987425e-05, + "loss": 0.4937, "step": 3195 }, { "epoch": 0.15, - "learning_rate": 1.9793727438938633e-05, - "loss": 0.9714, + "learning_rate": 2.9794049272151405e-05, + "loss": 0.696, "step": 3200 }, { "epoch": 0.15, - "learning_rate": 1.9793258637663497e-05, - "loss": 0.449, + "learning_rate": 2.9793581202315385e-05, + "loss": 0.5451, "step": 3205 }, { "epoch": 0.15, - "learning_rate": 1.9792789836388357e-05, - "loss": 0.2523, + "learning_rate": 2.9793113132479368e-05, + "loss": 0.1988, "step": 3210 }, { "epoch": 0.15, - "learning_rate": 1.9792321035113217e-05, - "loss": 0.2467, + "learning_rate": 2.9792645062643348e-05, + "loss": 0.1992, "step": 3215 }, { "epoch": 0.15, - "learning_rate": 1.9791852233838076e-05, - "loss": 0.2898, + "learning_rate": 2.9792176992807328e-05, + "loss": 0.263, "step": 3220 }, { "epoch": 0.15, - "learning_rate": 1.979138343256294e-05, - "loss": 0.3189, + "learning_rate": 2.9791708922971308e-05, + "loss": 0.325, "step": 3225 }, { "epoch": 0.15, - "learning_rate": 1.97909146312878e-05, - "loss": 0.4171, + "learning_rate": 2.979124085313529e-05, + "loss": 0.3584, "step": 3230 }, { "epoch": 0.15, - "learning_rate": 1.979044583001266e-05, - "loss": 0.4367, + "learning_rate": 2.979077278329927e-05, + "loss": 0.5023, "step": 3235 }, { "epoch": 0.15, - "learning_rate": 1.978997702873752e-05, - "loss": 0.4654, + "learning_rate": 2.979030471346325e-05, + "loss": 0.6313, "step": 3240 }, { "epoch": 0.15, - "learning_rate": 1.978950822746238e-05, - "loss": 0.5939, + "learning_rate": 2.9789836643627227e-05, + "loss": 0.762, "step": 3245 }, { "epoch": 0.15, - "learning_rate": 1.978903942618724e-05, - "loss": 1.0234, + "learning_rate": 2.978936857379121e-05, + "loss": 0.7333, "step": 3250 }, { "epoch": 0.15, - "learning_rate": 1.9788570624912103e-05, - "loss": 0.4174, + "learning_rate": 2.978890050395519e-05, + "loss": 0.3004, "step": 3255 }, { "epoch": 0.15, - "learning_rate": 1.9788101823636963e-05, - "loss": 0.1468, + "learning_rate": 2.978843243411917e-05, + "loss": 0.1905, "step": 3260 }, { "epoch": 0.15, - "learning_rate": 1.9787633022361823e-05, - "loss": 0.2742, + "learning_rate": 2.9787964364283153e-05, + "loss": 0.2138, "step": 3265 }, { "epoch": 0.15, - "learning_rate": 1.9787164221086683e-05, - "loss": 0.3599, + "learning_rate": 2.9787496294447133e-05, + "loss": 0.3551, "step": 3270 }, { "epoch": 0.15, - "learning_rate": 1.9786695419811542e-05, - "loss": 0.2513, + "learning_rate": 2.9787028224611113e-05, + "loss": 0.2628, "step": 3275 }, { "epoch": 0.15, - "learning_rate": 1.9786226618536402e-05, - "loss": 0.2906, + "learning_rate": 2.9786560154775092e-05, + "loss": 0.3818, "step": 3280 }, { "epoch": 0.15, - "learning_rate": 1.9785757817261266e-05, - "loss": 0.4572, + "learning_rate": 2.9786092084939076e-05, + "loss": 0.4099, "step": 3285 }, { "epoch": 0.15, - "learning_rate": 1.9785289015986126e-05, - "loss": 0.3669, + "learning_rate": 2.9785624015103055e-05, + "loss": 0.4, "step": 3290 }, { "epoch": 0.15, - "learning_rate": 1.9784820214710986e-05, - "loss": 0.5854, + "learning_rate": 2.9785155945267035e-05, + "loss": 0.5284, "step": 3295 }, { "epoch": 0.15, - "learning_rate": 1.9784351413435846e-05, - "loss": 0.8587, + "learning_rate": 2.9784687875431015e-05, + "loss": 0.8593, "step": 3300 }, { "epoch": 0.15, - "learning_rate": 1.978388261216071e-05, - "loss": 0.3697, + "learning_rate": 2.9784219805594998e-05, + "loss": 0.4611, "step": 3305 }, { "epoch": 0.15, - "learning_rate": 1.978341381088557e-05, - "loss": 0.1952, + "learning_rate": 2.9783751735758975e-05, + "loss": 0.1586, "step": 3310 }, { "epoch": 0.15, - "learning_rate": 1.978294500961043e-05, - "loss": 0.252, + "learning_rate": 2.9783283665922954e-05, + "loss": 0.2415, "step": 3315 }, { "epoch": 0.15, - "learning_rate": 1.978247620833529e-05, - "loss": 0.3353, + "learning_rate": 2.9782815596086934e-05, + "loss": 0.2548, "step": 3320 }, { "epoch": 0.16, - "learning_rate": 1.978200740706015e-05, - "loss": 0.3767, + "learning_rate": 2.9782347526250918e-05, + "loss": 0.3687, "step": 3325 }, { "epoch": 0.16, - "learning_rate": 1.978153860578501e-05, - "loss": 0.3057, + "learning_rate": 2.9781879456414897e-05, + "loss": 0.4, "step": 3330 }, { "epoch": 0.16, - "learning_rate": 1.978106980450987e-05, - "loss": 0.4199, + "learning_rate": 2.9781411386578877e-05, + "loss": 0.4192, "step": 3335 }, { "epoch": 0.16, - "learning_rate": 1.978060100323473e-05, - "loss": 0.4672, + "learning_rate": 2.978094331674286e-05, + "loss": 0.3799, "step": 3340 }, { "epoch": 0.16, - "learning_rate": 1.9780132201959592e-05, - "loss": 0.5891, + "learning_rate": 2.978047524690684e-05, + "loss": 0.6527, "step": 3345 }, { "epoch": 0.16, - "learning_rate": 1.977966340068445e-05, - "loss": 1.0011, + "learning_rate": 2.978000717707082e-05, + "loss": 0.8422, "step": 3350 }, { "epoch": 0.16, - "learning_rate": 1.977919459940931e-05, - "loss": 0.3955, + "learning_rate": 2.97795391072348e-05, + "loss": 0.4539, "step": 3355 }, { "epoch": 0.16, - "learning_rate": 1.977872579813417e-05, - "loss": 0.2046, + "learning_rate": 2.9779071037398783e-05, + "loss": 0.2319, "step": 3360 }, { "epoch": 0.16, - "learning_rate": 1.9778256996859035e-05, - "loss": 0.1842, + "learning_rate": 2.9778602967562763e-05, + "loss": 0.1937, "step": 3365 }, { "epoch": 0.16, - "learning_rate": 1.9777788195583895e-05, - "loss": 0.3045, + "learning_rate": 2.977813489772674e-05, + "loss": 0.2484, "step": 3370 }, { "epoch": 0.16, - "learning_rate": 1.9777319394308755e-05, - "loss": 0.3251, + "learning_rate": 2.977766682789072e-05, + "loss": 0.2864, "step": 3375 }, { "epoch": 0.16, - "learning_rate": 1.9776850593033615e-05, - "loss": 0.4542, + "learning_rate": 2.9777198758054702e-05, + "loss": 0.3665, "step": 3380 }, { "epoch": 0.16, - "learning_rate": 1.9776381791758475e-05, - "loss": 0.428, + "learning_rate": 2.9776730688218682e-05, + "loss": 0.4467, "step": 3385 }, { "epoch": 0.16, - "learning_rate": 1.9775912990483335e-05, - "loss": 0.602, + "learning_rate": 2.9776262618382662e-05, + "loss": 0.4694, "step": 3390 }, { "epoch": 0.16, - "learning_rate": 1.9775444189208198e-05, - "loss": 0.472, + "learning_rate": 2.9775794548546645e-05, + "loss": 0.3766, "step": 3395 }, { "epoch": 0.16, - "learning_rate": 1.9774975387933058e-05, - "loss": 0.8672, + "learning_rate": 2.9775326478710625e-05, + "loss": 0.77, "step": 3400 }, { "epoch": 0.16, - "learning_rate": 1.9774506586657918e-05, - "loss": 0.3989, + "learning_rate": 2.9774858408874605e-05, + "loss": 0.3687, "step": 3405 }, { "epoch": 0.16, - "learning_rate": 1.9774037785382778e-05, - "loss": 0.1915, + "learning_rate": 2.9774390339038585e-05, + "loss": 0.2109, "step": 3410 }, { "epoch": 0.16, - "learning_rate": 1.9773568984107638e-05, - "loss": 0.222, + "learning_rate": 2.9773922269202568e-05, + "loss": 0.255, "step": 3415 }, { "epoch": 0.16, - "learning_rate": 1.9773100182832498e-05, - "loss": 0.2807, + "learning_rate": 2.9773454199366548e-05, + "loss": 0.2809, "step": 3420 }, { "epoch": 0.16, - "learning_rate": 1.9772631381557357e-05, - "loss": 0.369, + "learning_rate": 2.9772986129530527e-05, + "loss": 0.3345, "step": 3425 }, { "epoch": 0.16, - "learning_rate": 1.977216258028222e-05, - "loss": 0.4569, + "learning_rate": 2.9772518059694507e-05, + "loss": 0.3559, "step": 3430 }, { "epoch": 0.16, - "learning_rate": 1.977169377900708e-05, - "loss": 0.3986, + "learning_rate": 2.9772049989858487e-05, + "loss": 0.3222, "step": 3435 }, { "epoch": 0.16, - "learning_rate": 1.977122497773194e-05, - "loss": 0.5065, + "learning_rate": 2.9771581920022467e-05, + "loss": 0.4967, "step": 3440 }, { "epoch": 0.16, - "learning_rate": 1.9770756176456804e-05, - "loss": 0.5453, + "learning_rate": 2.9771113850186447e-05, + "loss": 0.5792, "step": 3445 }, { "epoch": 0.16, - "learning_rate": 1.9770287375181664e-05, - "loss": 0.879, + "learning_rate": 2.977064578035043e-05, + "loss": 0.889, "step": 3450 }, { "epoch": 0.16, - "learning_rate": 1.9769818573906524e-05, - "loss": 0.4329, + "learning_rate": 2.977017771051441e-05, + "loss": 0.352, "step": 3455 }, { "epoch": 0.16, - "learning_rate": 1.9769349772631384e-05, - "loss": 0.2126, + "learning_rate": 2.976970964067839e-05, + "loss": 0.1901, "step": 3460 }, { "epoch": 0.16, - "learning_rate": 1.9768880971356244e-05, - "loss": 0.3154, + "learning_rate": 2.976924157084237e-05, + "loss": 0.2705, "step": 3465 }, { "epoch": 0.16, - "learning_rate": 1.9768412170081104e-05, - "loss": 0.314, + "learning_rate": 2.9768773501006353e-05, + "loss": 0.2532, "step": 3470 }, { "epoch": 0.16, - "learning_rate": 1.9767943368805964e-05, - "loss": 0.3373, + "learning_rate": 2.9768305431170332e-05, + "loss": 0.3798, "step": 3475 }, { "epoch": 0.16, - "learning_rate": 1.9767474567530823e-05, - "loss": 0.2566, + "learning_rate": 2.9767837361334312e-05, + "loss": 0.2622, "step": 3480 }, { "epoch": 0.16, - "learning_rate": 1.9767005766255687e-05, - "loss": 0.4568, + "learning_rate": 2.9767369291498292e-05, + "loss": 0.3909, "step": 3485 }, { "epoch": 0.16, - "learning_rate": 1.9766536964980547e-05, - "loss": 0.3373, + "learning_rate": 2.9766901221662275e-05, + "loss": 0.457, "step": 3490 }, { "epoch": 0.16, - "learning_rate": 1.9766068163705407e-05, - "loss": 0.5181, + "learning_rate": 2.9766433151826255e-05, + "loss": 0.4318, "step": 3495 }, { "epoch": 0.16, - "learning_rate": 1.9765599362430267e-05, - "loss": 0.8484, + "learning_rate": 2.976596508199023e-05, + "loss": 0.8586, "step": 3500 }, { "epoch": 0.16, - "learning_rate": 1.9765130561155127e-05, - "loss": 0.3833, + "learning_rate": 2.9765497012154215e-05, + "loss": 0.3323, "step": 3505 }, { "epoch": 0.16, - "learning_rate": 1.976466175987999e-05, - "loss": 0.1497, + "learning_rate": 2.9765028942318194e-05, + "loss": 0.1509, "step": 3510 }, { "epoch": 0.16, - "learning_rate": 1.976419295860485e-05, - "loss": 0.235, + "learning_rate": 2.9764560872482174e-05, + "loss": 0.2893, "step": 3515 }, { "epoch": 0.16, - "learning_rate": 1.976372415732971e-05, - "loss": 0.2564, + "learning_rate": 2.9764092802646154e-05, + "loss": 0.2656, "step": 3520 }, { "epoch": 0.16, - "learning_rate": 1.976325535605457e-05, - "loss": 0.2755, + "learning_rate": 2.9763624732810137e-05, + "loss": 0.3075, "step": 3525 }, { "epoch": 0.16, - "learning_rate": 1.976278655477943e-05, - "loss": 0.416, + "learning_rate": 2.9763156662974117e-05, + "loss": 0.3944, "step": 3530 }, { "epoch": 0.16, - "learning_rate": 1.9762317753504293e-05, - "loss": 0.4785, + "learning_rate": 2.9762688593138097e-05, + "loss": 0.3305, "step": 3535 }, { "epoch": 0.17, - "learning_rate": 1.9761848952229153e-05, - "loss": 0.5485, + "learning_rate": 2.9762220523302077e-05, + "loss": 0.3977, "step": 3540 }, { "epoch": 0.17, - "learning_rate": 1.9761380150954013e-05, - "loss": 0.6251, + "learning_rate": 2.976175245346606e-05, + "loss": 0.5208, "step": 3545 }, { "epoch": 0.17, - "learning_rate": 1.9760911349678873e-05, - "loss": 0.8421, + "learning_rate": 2.976128438363004e-05, + "loss": 0.8924, "step": 3550 }, { "epoch": 0.17, - "learning_rate": 1.9760442548403733e-05, - "loss": 0.3436, + "learning_rate": 2.976081631379402e-05, + "loss": 0.3387, "step": 3555 }, { "epoch": 0.17, - "learning_rate": 1.9759973747128593e-05, - "loss": 0.1759, + "learning_rate": 2.9760348243957996e-05, + "loss": 0.159, "step": 3560 }, { "epoch": 0.17, - "learning_rate": 1.9759504945853453e-05, - "loss": 0.2988, + "learning_rate": 2.975988017412198e-05, + "loss": 0.2022, "step": 3565 }, { "epoch": 0.17, - "learning_rate": 1.9759036144578312e-05, - "loss": 0.2328, + "learning_rate": 2.975941210428596e-05, + "loss": 0.2047, "step": 3570 }, { "epoch": 0.17, - "learning_rate": 1.9758567343303176e-05, - "loss": 0.3053, + "learning_rate": 2.975894403444994e-05, + "loss": 0.3374, "step": 3575 }, { "epoch": 0.17, - "learning_rate": 1.9758098542028036e-05, - "loss": 0.3479, + "learning_rate": 2.9758475964613922e-05, + "loss": 0.3447, "step": 3580 }, { "epoch": 0.17, - "learning_rate": 1.9757629740752896e-05, - "loss": 0.377, + "learning_rate": 2.9758007894777902e-05, + "loss": 0.4563, "step": 3585 }, { "epoch": 0.17, - "learning_rate": 1.975716093947776e-05, - "loss": 0.4114, + "learning_rate": 2.9757539824941882e-05, + "loss": 0.3952, "step": 3590 }, { "epoch": 0.17, - "learning_rate": 1.975669213820262e-05, - "loss": 0.488, + "learning_rate": 2.975707175510586e-05, + "loss": 0.4245, "step": 3595 }, { "epoch": 0.17, - "learning_rate": 1.975622333692748e-05, - "loss": 0.5475, + "learning_rate": 2.9756603685269845e-05, + "loss": 0.5382, "step": 3600 }, { "epoch": 0.17, - "learning_rate": 1.975575453565234e-05, - "loss": 0.5718, + "learning_rate": 2.9756135615433825e-05, + "loss": 0.3683, "step": 3605 }, { "epoch": 0.17, - "learning_rate": 1.97552857343772e-05, - "loss": 0.1882, + "learning_rate": 2.9755667545597804e-05, + "loss": 0.1625, "step": 3610 }, { "epoch": 0.17, - "learning_rate": 1.975481693310206e-05, - "loss": 0.2593, + "learning_rate": 2.9755199475761784e-05, + "loss": 0.2182, "step": 3615 }, { "epoch": 0.17, - "learning_rate": 1.975434813182692e-05, - "loss": 0.2558, + "learning_rate": 2.9754731405925767e-05, + "loss": 0.2643, "step": 3620 }, { "epoch": 0.17, - "learning_rate": 1.9753879330551782e-05, - "loss": 0.2394, + "learning_rate": 2.9754263336089744e-05, + "loss": 0.2336, "step": 3625 }, { "epoch": 0.17, - "learning_rate": 1.9753410529276642e-05, - "loss": 0.4207, + "learning_rate": 2.9753795266253724e-05, + "loss": 0.3078, "step": 3630 }, { "epoch": 0.17, - "learning_rate": 1.9752941728001502e-05, - "loss": 0.4391, + "learning_rate": 2.9753327196417707e-05, + "loss": 0.3142, "step": 3635 }, { "epoch": 0.17, - "learning_rate": 1.9752472926726362e-05, - "loss": 0.7291, + "learning_rate": 2.9752859126581687e-05, + "loss": 0.3788, "step": 3640 }, { "epoch": 0.17, - "learning_rate": 1.975200412545122e-05, - "loss": 0.4494, + "learning_rate": 2.9752391056745667e-05, + "loss": 0.4912, "step": 3645 }, { "epoch": 0.17, - "learning_rate": 1.975153532417608e-05, - "loss": 0.7069, + "learning_rate": 2.9751922986909646e-05, + "loss": 0.9646, "step": 3650 }, { "epoch": 0.17, - "learning_rate": 1.9751066522900945e-05, - "loss": 0.3945, + "learning_rate": 2.975145491707363e-05, + "loss": 0.3359, "step": 3655 }, { "epoch": 0.17, - "learning_rate": 1.9750597721625805e-05, - "loss": 0.2034, + "learning_rate": 2.975098684723761e-05, + "loss": 0.1838, "step": 3660 }, { "epoch": 0.17, - "learning_rate": 1.9750128920350665e-05, - "loss": 0.2243, + "learning_rate": 2.975051877740159e-05, + "loss": 0.1841, "step": 3665 }, { "epoch": 0.17, - "learning_rate": 1.9749660119075528e-05, - "loss": 0.2732, + "learning_rate": 2.975005070756557e-05, + "loss": 0.2018, "step": 3670 }, { "epoch": 0.17, - "learning_rate": 1.9749191317800388e-05, - "loss": 0.3621, + "learning_rate": 2.9749582637729552e-05, + "loss": 0.2357, "step": 3675 }, { "epoch": 0.17, - "learning_rate": 1.9748722516525248e-05, - "loss": 0.3214, + "learning_rate": 2.9749114567893532e-05, + "loss": 0.2938, "step": 3680 }, { "epoch": 0.17, - "learning_rate": 1.9748253715250108e-05, - "loss": 0.5254, + "learning_rate": 2.9748646498057512e-05, + "loss": 0.2645, "step": 3685 }, { "epoch": 0.17, - "learning_rate": 1.9747784913974968e-05, - "loss": 0.5203, + "learning_rate": 2.974817842822149e-05, + "loss": 0.3835, "step": 3690 }, { "epoch": 0.17, - "learning_rate": 1.9747316112699828e-05, - "loss": 0.7242, + "learning_rate": 2.974771035838547e-05, + "loss": 0.6618, "step": 3695 }, { "epoch": 0.17, - "learning_rate": 1.9746847311424688e-05, - "loss": 0.83, + "learning_rate": 2.974724228854945e-05, + "loss": 0.9241, "step": 3700 }, { "epoch": 0.17, - "learning_rate": 1.9746378510149548e-05, - "loss": 0.3907, + "learning_rate": 2.974677421871343e-05, + "loss": 0.4766, "step": 3705 }, { "epoch": 0.17, - "learning_rate": 1.9745909708874408e-05, - "loss": 0.1778, + "learning_rate": 2.9746306148877414e-05, + "loss": 0.1703, "step": 3710 }, { "epoch": 0.17, - "learning_rate": 1.974544090759927e-05, - "loss": 0.2049, + "learning_rate": 2.9745838079041394e-05, + "loss": 0.3016, "step": 3715 }, { "epoch": 0.17, - "learning_rate": 1.974497210632413e-05, - "loss": 0.3739, + "learning_rate": 2.9745370009205374e-05, + "loss": 0.2396, "step": 3720 }, { "epoch": 0.17, - "learning_rate": 1.974450330504899e-05, - "loss": 0.3008, + "learning_rate": 2.9744901939369354e-05, + "loss": 0.2578, "step": 3725 }, { "epoch": 0.17, - "learning_rate": 1.974403450377385e-05, - "loss": 0.2455, + "learning_rate": 2.9744433869533337e-05, + "loss": 0.3347, "step": 3730 }, { "epoch": 0.17, - "learning_rate": 1.9743565702498714e-05, - "loss": 0.4178, + "learning_rate": 2.9743965799697317e-05, + "loss": 0.3658, "step": 3735 }, { "epoch": 0.17, - "learning_rate": 1.9743096901223574e-05, - "loss": 0.5493, + "learning_rate": 2.9743497729861297e-05, + "loss": 0.3817, "step": 3740 }, { "epoch": 0.17, - "learning_rate": 1.9742628099948434e-05, - "loss": 0.4286, + "learning_rate": 2.9743029660025276e-05, + "loss": 0.4796, "step": 3745 }, { "epoch": 0.17, - "learning_rate": 1.9742159298673294e-05, - "loss": 0.6876, + "learning_rate": 2.9742561590189256e-05, + "loss": 0.7048, "step": 3750 }, { "epoch": 0.18, - "learning_rate": 1.9741690497398154e-05, - "loss": 0.3451, + "learning_rate": 2.9742093520353236e-05, + "loss": 0.3976, "step": 3755 }, { "epoch": 0.18, - "learning_rate": 1.9741221696123014e-05, - "loss": 0.1612, + "learning_rate": 2.9741625450517216e-05, + "loss": 0.1921, "step": 3760 }, { "epoch": 0.18, - "learning_rate": 1.9740752894847877e-05, - "loss": 0.2758, + "learning_rate": 2.97411573806812e-05, + "loss": 0.2238, "step": 3765 }, { "epoch": 0.18, - "learning_rate": 1.9740284093572737e-05, - "loss": 0.2768, + "learning_rate": 2.974068931084518e-05, + "loss": 0.2518, "step": 3770 }, { "epoch": 0.18, - "learning_rate": 1.9739815292297597e-05, - "loss": 0.3298, + "learning_rate": 2.974022124100916e-05, + "loss": 0.2936, "step": 3775 }, { "epoch": 0.18, - "learning_rate": 1.9739346491022457e-05, - "loss": 0.3231, + "learning_rate": 2.973975317117314e-05, + "loss": 0.2794, "step": 3780 }, { "epoch": 0.18, - "learning_rate": 1.9738877689747317e-05, - "loss": 0.3359, + "learning_rate": 2.9739285101337122e-05, + "loss": 0.3757, "step": 3785 }, { "epoch": 0.18, - "learning_rate": 1.9738408888472177e-05, - "loss": 0.3687, + "learning_rate": 2.97388170315011e-05, + "loss": 0.2954, "step": 3790 }, { "epoch": 0.18, - "learning_rate": 1.973794008719704e-05, - "loss": 0.5007, + "learning_rate": 2.973834896166508e-05, + "loss": 0.5374, "step": 3795 }, { "epoch": 0.18, - "learning_rate": 1.97374712859219e-05, - "loss": 0.9462, + "learning_rate": 2.973788089182906e-05, + "loss": 0.8038, "step": 3800 }, { "epoch": 0.18, - "learning_rate": 1.973700248464676e-05, - "loss": 0.4636, + "learning_rate": 2.9737412821993044e-05, + "loss": 0.3343, "step": 3805 }, { "epoch": 0.18, - "learning_rate": 1.973653368337162e-05, - "loss": 0.1924, + "learning_rate": 2.9736944752157024e-05, + "loss": 0.2778, "step": 3810 }, { "epoch": 0.18, - "learning_rate": 1.9736064882096483e-05, - "loss": 0.1923, + "learning_rate": 2.9736476682321e-05, + "loss": 0.2653, "step": 3815 }, { "epoch": 0.18, - "learning_rate": 1.9735596080821343e-05, - "loss": 0.2264, + "learning_rate": 2.9736008612484984e-05, + "loss": 0.2064, "step": 3820 }, { "epoch": 0.18, - "learning_rate": 1.9735127279546203e-05, - "loss": 0.3736, + "learning_rate": 2.9735540542648964e-05, + "loss": 0.2617, "step": 3825 }, { "epoch": 0.18, - "learning_rate": 1.9734658478271063e-05, - "loss": 0.4152, + "learning_rate": 2.9735072472812943e-05, + "loss": 0.3189, "step": 3830 }, { "epoch": 0.18, - "learning_rate": 1.9734189676995923e-05, - "loss": 0.4174, + "learning_rate": 2.9734604402976923e-05, + "loss": 0.448, "step": 3835 }, { "epoch": 0.18, - "learning_rate": 1.9733720875720783e-05, - "loss": 0.4885, + "learning_rate": 2.9734136333140907e-05, + "loss": 0.2858, "step": 3840 }, { "epoch": 0.18, - "learning_rate": 1.9733252074445643e-05, - "loss": 0.4311, + "learning_rate": 2.9733668263304886e-05, + "loss": 0.3845, "step": 3845 }, { "epoch": 0.18, - "learning_rate": 1.9732783273170503e-05, - "loss": 0.6807, + "learning_rate": 2.9733200193468866e-05, + "loss": 0.5596, "step": 3850 }, { "epoch": 0.18, - "learning_rate": 1.9732314471895363e-05, - "loss": 0.4367, + "learning_rate": 2.9732732123632846e-05, + "loss": 0.4199, "step": 3855 }, { "epoch": 0.18, - "learning_rate": 1.9731845670620226e-05, - "loss": 0.1888, + "learning_rate": 2.973226405379683e-05, + "loss": 0.1667, "step": 3860 }, { "epoch": 0.18, - "learning_rate": 1.9731376869345086e-05, - "loss": 0.2237, + "learning_rate": 2.973179598396081e-05, + "loss": 0.2107, "step": 3865 }, { "epoch": 0.18, - "learning_rate": 1.9730908068069946e-05, - "loss": 0.2441, + "learning_rate": 2.973132791412479e-05, + "loss": 0.2686, "step": 3870 }, { "epoch": 0.18, - "learning_rate": 1.973043926679481e-05, - "loss": 0.3274, + "learning_rate": 2.973085984428877e-05, + "loss": 0.2633, "step": 3875 }, { "epoch": 0.18, - "learning_rate": 1.972997046551967e-05, - "loss": 0.3156, + "learning_rate": 2.973039177445275e-05, + "loss": 0.2741, "step": 3880 }, { "epoch": 0.18, - "learning_rate": 1.972950166424453e-05, - "loss": 0.3368, + "learning_rate": 2.9729923704616728e-05, + "loss": 0.3799, "step": 3885 }, { "epoch": 0.18, - "learning_rate": 1.972903286296939e-05, - "loss": 0.4301, + "learning_rate": 2.9729455634780708e-05, + "loss": 0.4158, "step": 3890 }, { "epoch": 0.18, - "learning_rate": 1.972856406169425e-05, - "loss": 0.6439, + "learning_rate": 2.972898756494469e-05, + "loss": 0.3757, "step": 3895 }, { "epoch": 0.18, - "learning_rate": 1.972809526041911e-05, - "loss": 0.839, + "learning_rate": 2.972851949510867e-05, + "loss": 0.8281, "step": 3900 }, { "epoch": 0.18, - "learning_rate": 1.9727626459143972e-05, - "loss": 0.4082, + "learning_rate": 2.972805142527265e-05, + "loss": 0.4027, "step": 3905 }, { "epoch": 0.18, - "learning_rate": 1.9727157657868832e-05, - "loss": 0.1958, + "learning_rate": 2.972758335543663e-05, + "loss": 0.18, "step": 3910 }, { "epoch": 0.18, - "learning_rate": 1.9726688856593692e-05, - "loss": 0.1713, + "learning_rate": 2.9727115285600614e-05, + "loss": 0.2759, "step": 3915 }, { "epoch": 0.18, - "learning_rate": 1.9726220055318552e-05, - "loss": 0.2028, + "learning_rate": 2.9726647215764594e-05, + "loss": 0.2724, "step": 3920 }, { "epoch": 0.18, - "learning_rate": 1.9725751254043412e-05, - "loss": 0.2407, + "learning_rate": 2.9726179145928574e-05, + "loss": 0.2124, "step": 3925 }, { "epoch": 0.18, - "learning_rate": 1.9725282452768272e-05, - "loss": 0.4262, + "learning_rate": 2.9725711076092553e-05, + "loss": 0.2936, "step": 3930 }, { "epoch": 0.18, - "learning_rate": 1.972481365149313e-05, - "loss": 0.3567, + "learning_rate": 2.9725243006256537e-05, + "loss": 0.3913, "step": 3935 }, { "epoch": 0.18, - "learning_rate": 1.9724344850217995e-05, - "loss": 0.4333, + "learning_rate": 2.9724774936420513e-05, + "loss": 0.3398, "step": 3940 }, { "epoch": 0.18, - "learning_rate": 1.9723876048942855e-05, - "loss": 0.3769, + "learning_rate": 2.9724306866584493e-05, + "loss": 0.4383, "step": 3945 }, { "epoch": 0.18, - "learning_rate": 1.9723407247667715e-05, - "loss": 0.9714, + "learning_rate": 2.9723838796748476e-05, + "loss": 0.757, "step": 3950 }, { "epoch": 0.18, - "learning_rate": 1.9722938446392578e-05, - "loss": 0.3421, + "learning_rate": 2.9723370726912456e-05, + "loss": 0.3838, "step": 3955 }, { "epoch": 0.18, - "learning_rate": 1.9722469645117438e-05, - "loss": 0.1904, + "learning_rate": 2.9722902657076436e-05, + "loss": 0.2418, "step": 3960 }, { "epoch": 0.19, - "learning_rate": 1.9722000843842298e-05, - "loss": 0.2153, + "learning_rate": 2.9722434587240415e-05, + "loss": 0.2861, "step": 3965 }, { "epoch": 0.19, - "learning_rate": 1.9721532042567158e-05, - "loss": 0.2275, + "learning_rate": 2.97219665174044e-05, + "loss": 0.2855, "step": 3970 }, { "epoch": 0.19, - "learning_rate": 1.9721063241292018e-05, - "loss": 0.3917, + "learning_rate": 2.972149844756838e-05, + "loss": 0.26, "step": 3975 }, { "epoch": 0.19, - "learning_rate": 1.9720594440016878e-05, - "loss": 0.4462, + "learning_rate": 2.972103037773236e-05, + "loss": 0.2921, "step": 3980 }, { "epoch": 0.19, - "learning_rate": 1.9720125638741738e-05, - "loss": 0.319, + "learning_rate": 2.9720562307896338e-05, + "loss": 0.3622, "step": 3985 }, { "epoch": 0.19, - "learning_rate": 1.9719656837466598e-05, - "loss": 0.2997, + "learning_rate": 2.972009423806032e-05, + "loss": 0.4226, "step": 3990 }, { "epoch": 0.19, - "learning_rate": 1.971918803619146e-05, - "loss": 0.601, + "learning_rate": 2.97196261682243e-05, + "loss": 0.4777, "step": 3995 }, { "epoch": 0.19, - "learning_rate": 1.971871923491632e-05, - "loss": 0.6658, + "learning_rate": 2.971915809838828e-05, + "loss": 0.6818, "step": 4000 }, { "epoch": 0.19, - "learning_rate": 1.971825043364118e-05, - "loss": 0.3592, + "learning_rate": 2.971869002855226e-05, + "loss": 0.3419, "step": 4005 }, { "epoch": 0.19, - "learning_rate": 1.971778163236604e-05, - "loss": 0.1744, + "learning_rate": 2.971822195871624e-05, + "loss": 0.1553, "step": 4010 }, { "epoch": 0.19, - "learning_rate": 1.97173128310909e-05, - "loss": 0.2081, + "learning_rate": 2.971775388888022e-05, + "loss": 0.2399, "step": 4015 }, { "epoch": 0.19, - "learning_rate": 1.9716844029815764e-05, - "loss": 0.2472, + "learning_rate": 2.97172858190442e-05, + "loss": 0.2925, "step": 4020 }, { "epoch": 0.19, - "learning_rate": 1.9716375228540624e-05, - "loss": 0.2668, + "learning_rate": 2.9716817749208183e-05, + "loss": 0.3096, "step": 4025 }, { "epoch": 0.19, - "learning_rate": 1.9715906427265484e-05, - "loss": 0.2763, + "learning_rate": 2.9716349679372163e-05, + "loss": 0.3317, "step": 4030 }, { "epoch": 0.19, - "learning_rate": 1.9715437625990344e-05, - "loss": 0.3903, + "learning_rate": 2.9715881609536143e-05, + "loss": 0.3364, "step": 4035 }, { "epoch": 0.19, - "learning_rate": 1.9714968824715204e-05, - "loss": 0.4871, + "learning_rate": 2.9715413539700123e-05, + "loss": 0.3538, "step": 4040 }, { "epoch": 0.19, - "learning_rate": 1.9714500023440067e-05, - "loss": 0.6568, + "learning_rate": 2.9714945469864106e-05, + "loss": 0.5839, "step": 4045 }, { "epoch": 0.19, - "learning_rate": 1.9714031222164927e-05, - "loss": 0.7185, + "learning_rate": 2.9714477400028086e-05, + "loss": 0.8043, "step": 4050 }, { "epoch": 0.19, - "learning_rate": 1.9713562420889787e-05, - "loss": 0.416, + "learning_rate": 2.9714009330192066e-05, + "loss": 0.372, "step": 4055 }, { "epoch": 0.19, - "learning_rate": 1.9713093619614647e-05, - "loss": 0.1623, + "learning_rate": 2.971354126035605e-05, + "loss": 0.1749, "step": 4060 }, { "epoch": 0.19, - "learning_rate": 1.9712624818339507e-05, - "loss": 0.1905, + "learning_rate": 2.9713073190520025e-05, + "loss": 0.2517, "step": 4065 }, { "epoch": 0.19, - "learning_rate": 1.9712156017064367e-05, - "loss": 0.2243, + "learning_rate": 2.9712605120684005e-05, + "loss": 0.2707, "step": 4070 }, { "epoch": 0.19, - "learning_rate": 1.9711687215789227e-05, - "loss": 0.2595, + "learning_rate": 2.9712137050847985e-05, + "loss": 0.2532, "step": 4075 }, { "epoch": 0.19, - "learning_rate": 1.9711218414514087e-05, - "loss": 0.2343, + "learning_rate": 2.9711668981011968e-05, + "loss": 0.3381, "step": 4080 }, { "epoch": 0.19, - "learning_rate": 1.971074961323895e-05, - "loss": 0.3081, + "learning_rate": 2.9711200911175948e-05, + "loss": 0.4232, "step": 4085 }, { "epoch": 0.19, - "learning_rate": 1.971028081196381e-05, - "loss": 0.4051, + "learning_rate": 2.9710732841339928e-05, + "loss": 0.6174, "step": 4090 }, { "epoch": 0.19, - "learning_rate": 1.970981201068867e-05, - "loss": 0.4353, + "learning_rate": 2.9710264771503908e-05, + "loss": 0.5029, "step": 4095 }, { "epoch": 0.19, - "learning_rate": 1.9709343209413533e-05, - "loss": 0.8488, + "learning_rate": 2.970979670166789e-05, + "loss": 0.6542, "step": 4100 }, { "epoch": 0.19, - "learning_rate": 1.9708874408138393e-05, - "loss": 0.3971, + "learning_rate": 2.970932863183187e-05, + "loss": 0.3444, "step": 4105 }, { "epoch": 0.19, - "learning_rate": 1.9708405606863253e-05, - "loss": 0.1639, + "learning_rate": 2.970886056199585e-05, + "loss": 0.2102, "step": 4110 }, { "epoch": 0.19, - "learning_rate": 1.9707936805588113e-05, - "loss": 0.1971, + "learning_rate": 2.9708392492159834e-05, + "loss": 0.19, "step": 4115 }, { "epoch": 0.19, - "learning_rate": 1.9707468004312973e-05, - "loss": 0.1918, + "learning_rate": 2.9707924422323814e-05, + "loss": 0.2002, "step": 4120 }, { "epoch": 0.19, - "learning_rate": 1.9706999203037833e-05, - "loss": 0.2442, + "learning_rate": 2.9707456352487793e-05, + "loss": 0.2521, "step": 4125 }, { "epoch": 0.19, - "learning_rate": 1.9706530401762693e-05, - "loss": 0.3431, + "learning_rate": 2.970698828265177e-05, + "loss": 0.3259, "step": 4130 }, { "epoch": 0.19, - "learning_rate": 1.9706061600487556e-05, - "loss": 0.4143, + "learning_rate": 2.9706520212815753e-05, + "loss": 0.3844, "step": 4135 }, { "epoch": 0.19, - "learning_rate": 1.9705592799212416e-05, - "loss": 0.563, + "learning_rate": 2.9706052142979733e-05, + "loss": 0.3614, "step": 4140 }, { "epoch": 0.19, - "learning_rate": 1.9705123997937276e-05, - "loss": 0.5536, + "learning_rate": 2.9705584073143713e-05, + "loss": 0.3912, "step": 4145 }, { "epoch": 0.19, - "learning_rate": 1.9704655196662136e-05, - "loss": 0.8869, + "learning_rate": 2.9705116003307692e-05, + "loss": 0.7987, "step": 4150 }, { "epoch": 0.19, - "learning_rate": 1.9704186395386996e-05, - "loss": 0.3938, + "learning_rate": 2.9704647933471676e-05, + "loss": 0.4521, "step": 4155 }, { "epoch": 0.19, - "learning_rate": 1.9703717594111856e-05, - "loss": 0.1912, + "learning_rate": 2.9704179863635655e-05, + "loss": 0.2179, "step": 4160 }, { "epoch": 0.19, - "learning_rate": 1.970324879283672e-05, - "loss": 0.2132, + "learning_rate": 2.9703711793799635e-05, + "loss": 0.1927, "step": 4165 }, { "epoch": 0.19, - "learning_rate": 1.970277999156158e-05, - "loss": 0.1632, + "learning_rate": 2.9703243723963615e-05, + "loss": 0.2198, "step": 4170 }, { "epoch": 0.19, - "learning_rate": 1.970231119028644e-05, - "loss": 0.2362, + "learning_rate": 2.97027756541276e-05, + "loss": 0.1865, "step": 4175 }, { "epoch": 0.2, - "learning_rate": 1.97018423890113e-05, - "loss": 0.2287, + "learning_rate": 2.9702307584291578e-05, + "loss": 0.2242, "step": 4180 }, { "epoch": 0.2, - "learning_rate": 1.9701373587736162e-05, - "loss": 0.3168, + "learning_rate": 2.9701839514455558e-05, + "loss": 0.4013, "step": 4185 }, { "epoch": 0.2, - "learning_rate": 1.9700904786461022e-05, - "loss": 0.4876, + "learning_rate": 2.9701371444619538e-05, + "loss": 0.4213, "step": 4190 }, { "epoch": 0.2, - "learning_rate": 1.9700435985185882e-05, - "loss": 0.5353, + "learning_rate": 2.9700903374783518e-05, + "loss": 0.4226, "step": 4195 }, { "epoch": 0.2, - "learning_rate": 1.9699967183910742e-05, - "loss": 0.6848, + "learning_rate": 2.9700435304947497e-05, + "loss": 0.808, "step": 4200 }, { "epoch": 0.2, - "learning_rate": 1.9699498382635602e-05, - "loss": 0.4614, + "learning_rate": 2.9699967235111477e-05, + "loss": 0.3286, "step": 4205 }, { "epoch": 0.2, - "learning_rate": 1.9699029581360462e-05, - "loss": 0.2455, + "learning_rate": 2.969949916527546e-05, + "loss": 0.1521, "step": 4210 }, { "epoch": 0.2, - "learning_rate": 1.9698560780085322e-05, - "loss": 0.1861, + "learning_rate": 2.969903109543944e-05, + "loss": 0.1065, "step": 4215 }, { "epoch": 0.2, - "learning_rate": 1.9698091978810182e-05, - "loss": 0.3312, + "learning_rate": 2.969856302560342e-05, + "loss": 0.223, "step": 4220 }, { "epoch": 0.2, - "learning_rate": 1.9697623177535045e-05, - "loss": 0.2629, + "learning_rate": 2.96980949557674e-05, + "loss": 0.2412, "step": 4225 }, { "epoch": 0.2, - "learning_rate": 1.9697154376259905e-05, - "loss": 0.2331, + "learning_rate": 2.9697626885931383e-05, + "loss": 0.3471, "step": 4230 }, { "epoch": 0.2, - "learning_rate": 1.9696685574984765e-05, - "loss": 0.4012, + "learning_rate": 2.9697158816095363e-05, + "loss": 0.3856, "step": 4235 }, { "epoch": 0.2, - "learning_rate": 1.9696216773709628e-05, - "loss": 0.3765, + "learning_rate": 2.9696690746259343e-05, + "loss": 0.4513, "step": 4240 }, { "epoch": 0.2, - "learning_rate": 1.9695747972434488e-05, - "loss": 0.5471, + "learning_rate": 2.9696222676423326e-05, + "loss": 0.4989, "step": 4245 }, { "epoch": 0.2, - "learning_rate": 1.9695279171159348e-05, - "loss": 0.723, + "learning_rate": 2.9695754606587306e-05, + "loss": 0.7143, "step": 4250 }, { "epoch": 0.2, - "learning_rate": 1.9694810369884208e-05, - "loss": 0.4313, + "learning_rate": 2.9695286536751282e-05, + "loss": 0.3467, "step": 4255 }, { "epoch": 0.2, - "learning_rate": 1.9694341568609068e-05, - "loss": 0.13, + "learning_rate": 2.9694818466915262e-05, + "loss": 0.1694, "step": 4260 }, { "epoch": 0.2, - "learning_rate": 1.9693872767333928e-05, - "loss": 0.2133, + "learning_rate": 2.9694350397079245e-05, + "loss": 0.1576, "step": 4265 }, { "epoch": 0.2, - "learning_rate": 1.9693403966058788e-05, - "loss": 0.2618, + "learning_rate": 2.9693882327243225e-05, + "loss": 0.2404, "step": 4270 }, { "epoch": 0.2, - "learning_rate": 1.969293516478365e-05, - "loss": 0.2428, + "learning_rate": 2.9693414257407205e-05, + "loss": 0.2594, "step": 4275 }, { "epoch": 0.2, - "learning_rate": 1.969246636350851e-05, - "loss": 0.3027, + "learning_rate": 2.9692946187571185e-05, + "loss": 0.3529, "step": 4280 }, { "epoch": 0.2, - "learning_rate": 1.969199756223337e-05, - "loss": 0.3958, + "learning_rate": 2.9692478117735168e-05, + "loss": 0.262, "step": 4285 }, { "epoch": 0.2, - "learning_rate": 1.969152876095823e-05, - "loss": 0.5407, + "learning_rate": 2.9692010047899148e-05, + "loss": 0.4635, "step": 4290 }, { "epoch": 0.2, - "learning_rate": 1.969105995968309e-05, - "loss": 0.5288, + "learning_rate": 2.9691541978063128e-05, + "loss": 0.5061, "step": 4295 }, { "epoch": 0.2, - "learning_rate": 1.969059115840795e-05, - "loss": 0.9622, + "learning_rate": 2.969107390822711e-05, + "loss": 0.8731, "step": 4300 }, { "epoch": 0.2, - "learning_rate": 1.9690122357132814e-05, - "loss": 0.3357, + "learning_rate": 2.969060583839109e-05, + "loss": 0.4071, "step": 4305 }, { "epoch": 0.2, - "learning_rate": 1.9689653555857674e-05, - "loss": 0.2422, + "learning_rate": 2.969013776855507e-05, + "loss": 0.1383, "step": 4310 }, { "epoch": 0.2, - "learning_rate": 1.9689184754582534e-05, - "loss": 0.2317, + "learning_rate": 2.968966969871905e-05, + "loss": 0.3105, "step": 4315 }, { "epoch": 0.2, - "learning_rate": 1.9688715953307397e-05, - "loss": 0.3139, + "learning_rate": 2.968920162888303e-05, + "loss": 0.2413, "step": 4320 }, { "epoch": 0.2, - "learning_rate": 1.9688247152032257e-05, - "loss": 0.2611, + "learning_rate": 2.968873355904701e-05, + "loss": 0.2718, "step": 4325 }, { "epoch": 0.2, - "learning_rate": 1.9687778350757117e-05, - "loss": 0.274, + "learning_rate": 2.968826548921099e-05, + "loss": 0.2647, "step": 4330 }, { "epoch": 0.2, - "learning_rate": 1.9687309549481977e-05, - "loss": 0.4473, + "learning_rate": 2.968779741937497e-05, + "loss": 0.4238, "step": 4335 }, { "epoch": 0.2, - "learning_rate": 1.9686840748206837e-05, - "loss": 0.3167, + "learning_rate": 2.9687329349538953e-05, + "loss": 0.3297, "step": 4340 }, { "epoch": 0.2, - "learning_rate": 1.9686371946931697e-05, - "loss": 0.5854, + "learning_rate": 2.9686861279702932e-05, + "loss": 0.6178, "step": 4345 }, { "epoch": 0.2, - "learning_rate": 1.9685903145656557e-05, - "loss": 0.8531, + "learning_rate": 2.9686393209866912e-05, + "loss": 0.5912, "step": 4350 }, { "epoch": 0.2, - "learning_rate": 1.9685434344381417e-05, - "loss": 0.3545, + "learning_rate": 2.9685925140030892e-05, + "loss": 0.3473, "step": 4355 }, { "epoch": 0.2, - "learning_rate": 1.9684965543106277e-05, - "loss": 0.1751, + "learning_rate": 2.9685457070194875e-05, + "loss": 0.2283, "step": 4360 }, { "epoch": 0.2, - "learning_rate": 1.9684496741831137e-05, - "loss": 0.2515, + "learning_rate": 2.9684989000358855e-05, + "loss": 0.243, "step": 4365 }, { "epoch": 0.2, - "learning_rate": 1.9684027940556e-05, - "loss": 0.2662, + "learning_rate": 2.9684520930522835e-05, + "loss": 0.1972, "step": 4370 }, { "epoch": 0.2, - "learning_rate": 1.968355913928086e-05, - "loss": 0.2842, + "learning_rate": 2.9684052860686818e-05, + "loss": 0.2412, "step": 4375 }, { "epoch": 0.2, - "learning_rate": 1.968309033800572e-05, - "loss": 0.4317, + "learning_rate": 2.9683584790850795e-05, + "loss": 0.3386, "step": 4380 }, { "epoch": 0.2, - "learning_rate": 1.9682621536730583e-05, - "loss": 0.3946, + "learning_rate": 2.9683116721014774e-05, + "loss": 0.3353, "step": 4385 }, { "epoch": 0.2, - "learning_rate": 1.9682152735455443e-05, - "loss": 0.3546, + "learning_rate": 2.9682648651178754e-05, + "loss": 0.395, "step": 4390 }, { "epoch": 0.21, - "learning_rate": 1.9681683934180303e-05, - "loss": 0.4726, + "learning_rate": 2.9682180581342737e-05, + "loss": 0.6366, "step": 4395 }, { "epoch": 0.21, - "learning_rate": 1.9681215132905163e-05, - "loss": 0.6882, + "learning_rate": 2.9681712511506717e-05, + "loss": 0.74, "step": 4400 }, { "epoch": 0.21, - "learning_rate": 1.9680746331630023e-05, - "loss": 0.3417, + "learning_rate": 2.9681244441670697e-05, + "loss": 0.3046, "step": 4405 }, { "epoch": 0.21, - "learning_rate": 1.9680277530354883e-05, - "loss": 0.1884, + "learning_rate": 2.9680776371834677e-05, + "loss": 0.1366, "step": 4410 }, { "epoch": 0.21, - "learning_rate": 1.9679808729079746e-05, - "loss": 0.2584, + "learning_rate": 2.968030830199866e-05, + "loss": 0.1812, "step": 4415 }, { "epoch": 0.21, - "learning_rate": 1.9679339927804606e-05, - "loss": 0.1648, + "learning_rate": 2.967984023216264e-05, + "loss": 0.1934, "step": 4420 }, { "epoch": 0.21, - "learning_rate": 1.9678871126529466e-05, - "loss": 0.3163, + "learning_rate": 2.967937216232662e-05, + "loss": 0.373, "step": 4425 }, { "epoch": 0.21, - "learning_rate": 1.9678402325254326e-05, - "loss": 0.2756, + "learning_rate": 2.9678904092490603e-05, + "loss": 0.2788, "step": 4430 }, { "epoch": 0.21, - "learning_rate": 1.9677933523979186e-05, - "loss": 0.3836, + "learning_rate": 2.9678436022654583e-05, + "loss": 0.4025, "step": 4435 }, { "epoch": 0.21, - "learning_rate": 1.9677464722704046e-05, - "loss": 0.3857, + "learning_rate": 2.9677967952818563e-05, + "loss": 0.319, "step": 4440 }, { "epoch": 0.21, - "learning_rate": 1.9676995921428906e-05, - "loss": 0.5504, + "learning_rate": 2.967749988298254e-05, + "loss": 0.4115, "step": 4445 }, { "epoch": 0.21, - "learning_rate": 1.967652712015377e-05, - "loss": 0.796, + "learning_rate": 2.9677031813146522e-05, + "loss": 0.7493, "step": 4450 }, { "epoch": 0.21, - "learning_rate": 1.967605831887863e-05, - "loss": 0.4518, + "learning_rate": 2.9676563743310502e-05, + "loss": 0.3839, "step": 4455 }, { "epoch": 0.21, - "learning_rate": 1.967558951760349e-05, - "loss": 0.2493, + "learning_rate": 2.9676095673474482e-05, + "loss": 0.1987, "step": 4460 }, { "epoch": 0.21, - "learning_rate": 1.9675120716328352e-05, - "loss": 0.1496, + "learning_rate": 2.967562760363846e-05, + "loss": 0.2261, "step": 4465 }, { "epoch": 0.21, - "learning_rate": 1.9674651915053212e-05, - "loss": 0.2788, + "learning_rate": 2.9675159533802445e-05, + "loss": 0.2989, "step": 4470 }, { "epoch": 0.21, - "learning_rate": 1.9674183113778072e-05, - "loss": 0.2969, + "learning_rate": 2.9674691463966425e-05, + "loss": 0.1901, "step": 4475 }, { "epoch": 0.21, - "learning_rate": 1.9673714312502932e-05, - "loss": 0.3378, + "learning_rate": 2.9674223394130404e-05, + "loss": 0.3234, "step": 4480 }, { "epoch": 0.21, - "learning_rate": 1.9673245511227792e-05, - "loss": 0.5223, + "learning_rate": 2.9673755324294388e-05, + "loss": 0.2385, "step": 4485 }, { "epoch": 0.21, - "learning_rate": 1.9672776709952652e-05, - "loss": 0.4544, + "learning_rate": 2.9673287254458368e-05, + "loss": 0.3662, "step": 4490 }, { "epoch": 0.21, - "learning_rate": 1.9672307908677512e-05, - "loss": 0.6628, + "learning_rate": 2.9672819184622347e-05, + "loss": 0.4362, "step": 4495 }, { "epoch": 0.21, - "learning_rate": 1.9671839107402372e-05, - "loss": 0.898, + "learning_rate": 2.9672351114786327e-05, + "loss": 0.6486, "step": 4500 }, { "epoch": 0.21, - "learning_rate": 1.9671370306127232e-05, - "loss": 0.3878, + "learning_rate": 2.967188304495031e-05, + "loss": 0.3722, "step": 4505 }, { "epoch": 0.21, - "learning_rate": 1.9670901504852095e-05, - "loss": 0.2018, + "learning_rate": 2.9671414975114287e-05, + "loss": 0.1274, "step": 4510 }, { "epoch": 0.21, - "learning_rate": 1.9670432703576955e-05, - "loss": 0.2069, + "learning_rate": 2.9670946905278267e-05, + "loss": 0.15, "step": 4515 }, { "epoch": 0.21, - "learning_rate": 1.9669963902301815e-05, - "loss": 0.242, + "learning_rate": 2.9670478835442246e-05, + "loss": 0.3022, "step": 4520 }, { "epoch": 0.21, - "learning_rate": 1.9669495101026675e-05, - "loss": 0.2806, + "learning_rate": 2.967001076560623e-05, + "loss": 0.2737, "step": 4525 }, { "epoch": 0.21, - "learning_rate": 1.9669026299751538e-05, - "loss": 0.3503, + "learning_rate": 2.966954269577021e-05, + "loss": 0.4052, "step": 4530 }, { "epoch": 0.21, - "learning_rate": 1.9668557498476398e-05, - "loss": 0.2483, + "learning_rate": 2.966907462593419e-05, + "loss": 0.3599, "step": 4535 }, { "epoch": 0.21, - "learning_rate": 1.9668088697201258e-05, - "loss": 0.3632, + "learning_rate": 2.966860655609817e-05, + "loss": 0.4664, "step": 4540 }, { "epoch": 0.21, - "learning_rate": 1.9667619895926118e-05, - "loss": 0.5749, + "learning_rate": 2.9668138486262152e-05, + "loss": 0.5121, "step": 4545 }, { "epoch": 0.21, - "learning_rate": 1.9667151094650978e-05, - "loss": 0.6014, + "learning_rate": 2.9667670416426132e-05, + "loss": 0.804, "step": 4550 }, { "epoch": 0.21, - "learning_rate": 1.966668229337584e-05, - "loss": 0.3388, + "learning_rate": 2.9667202346590112e-05, + "loss": 0.3194, "step": 4555 }, { "epoch": 0.21, - "learning_rate": 1.96662134921007e-05, - "loss": 0.1989, + "learning_rate": 2.9666734276754095e-05, + "loss": 0.1735, "step": 4560 }, { "epoch": 0.21, - "learning_rate": 1.966574469082556e-05, - "loss": 0.2078, + "learning_rate": 2.9666266206918075e-05, + "loss": 0.1628, "step": 4565 }, { "epoch": 0.21, - "learning_rate": 1.966527588955042e-05, - "loss": 0.2163, + "learning_rate": 2.966579813708205e-05, + "loss": 0.2651, "step": 4570 }, { "epoch": 0.21, - "learning_rate": 1.966480708827528e-05, - "loss": 0.2737, + "learning_rate": 2.966533006724603e-05, + "loss": 0.2258, "step": 4575 }, { "epoch": 0.21, - "learning_rate": 1.966433828700014e-05, - "loss": 0.347, + "learning_rate": 2.9664861997410014e-05, + "loss": 0.2476, "step": 4580 }, { "epoch": 0.21, - "learning_rate": 1.9663869485725e-05, - "loss": 0.3929, + "learning_rate": 2.9664393927573994e-05, + "loss": 0.3759, "step": 4585 }, { "epoch": 0.21, - "learning_rate": 1.9663400684449864e-05, - "loss": 0.4723, + "learning_rate": 2.9663925857737974e-05, + "loss": 0.4086, "step": 4590 }, { "epoch": 0.21, - "learning_rate": 1.9662931883174724e-05, - "loss": 0.5059, + "learning_rate": 2.9663457787901954e-05, + "loss": 0.4154, "step": 4595 }, { "epoch": 0.21, - "learning_rate": 1.9662463081899584e-05, - "loss": 0.689, + "learning_rate": 2.9662989718065937e-05, + "loss": 0.5595, "step": 4600 }, { "epoch": 0.21, - "learning_rate": 1.9661994280624444e-05, - "loss": 0.374, + "learning_rate": 2.9662521648229917e-05, + "loss": 0.3738, "step": 4605 }, { "epoch": 0.22, - "learning_rate": 1.9661525479349307e-05, - "loss": 0.1667, + "learning_rate": 2.9662053578393897e-05, + "loss": 0.1435, "step": 4610 }, { "epoch": 0.22, - "learning_rate": 1.9661056678074167e-05, - "loss": 0.1707, + "learning_rate": 2.966158550855788e-05, + "loss": 0.2051, "step": 4615 }, { "epoch": 0.22, - "learning_rate": 1.9660587876799027e-05, - "loss": 0.196, + "learning_rate": 2.966111743872186e-05, + "loss": 0.1334, "step": 4620 }, { "epoch": 0.22, - "learning_rate": 1.9660119075523887e-05, - "loss": 0.3145, + "learning_rate": 2.966064936888584e-05, + "loss": 0.4194, "step": 4625 }, { "epoch": 0.22, - "learning_rate": 1.9659650274248747e-05, - "loss": 0.3389, + "learning_rate": 2.966018129904982e-05, + "loss": 0.2923, "step": 4630 }, { "epoch": 0.22, - "learning_rate": 1.9659181472973607e-05, - "loss": 0.4153, + "learning_rate": 2.96597132292138e-05, + "loss": 0.3075, "step": 4635 }, { "epoch": 0.22, - "learning_rate": 1.9658712671698467e-05, - "loss": 0.3724, + "learning_rate": 2.965924515937778e-05, + "loss": 0.4637, "step": 4640 }, { "epoch": 0.22, - "learning_rate": 1.965824387042333e-05, - "loss": 0.5458, + "learning_rate": 2.965877708954176e-05, + "loss": 0.5122, "step": 4645 }, { "epoch": 0.22, - "learning_rate": 1.965777506914819e-05, - "loss": 0.8993, + "learning_rate": 2.965830901970574e-05, + "loss": 0.6531, "step": 4650 }, { "epoch": 0.22, - "learning_rate": 1.965730626787305e-05, - "loss": 0.3525, + "learning_rate": 2.9657840949869722e-05, + "loss": 0.3787, "step": 4655 }, { "epoch": 0.22, - "learning_rate": 1.965683746659791e-05, - "loss": 0.2098, + "learning_rate": 2.96573728800337e-05, + "loss": 0.1451, "step": 4660 }, { "epoch": 0.22, - "learning_rate": 1.965636866532277e-05, - "loss": 0.1881, + "learning_rate": 2.965690481019768e-05, + "loss": 0.244, "step": 4665 }, { "epoch": 0.22, - "learning_rate": 1.9655899864047633e-05, - "loss": 0.2259, + "learning_rate": 2.9656436740361665e-05, + "loss": 0.2495, "step": 4670 }, { "epoch": 0.22, - "learning_rate": 1.9655431062772493e-05, - "loss": 0.3217, + "learning_rate": 2.9655968670525644e-05, + "loss": 0.3045, "step": 4675 }, { "epoch": 0.22, - "learning_rate": 1.9654962261497353e-05, - "loss": 0.3272, + "learning_rate": 2.9655500600689624e-05, + "loss": 0.189, "step": 4680 }, { "epoch": 0.22, - "learning_rate": 1.9654493460222213e-05, - "loss": 0.3857, + "learning_rate": 2.9655032530853604e-05, + "loss": 0.3852, "step": 4685 }, { "epoch": 0.22, - "learning_rate": 1.9654024658947073e-05, - "loss": 0.4295, + "learning_rate": 2.9654564461017587e-05, + "loss": 0.4357, "step": 4690 }, { "epoch": 0.22, - "learning_rate": 1.9653555857671936e-05, - "loss": 0.4549, + "learning_rate": 2.9654096391181567e-05, + "loss": 0.3422, "step": 4695 }, { "epoch": 0.22, - "learning_rate": 1.9653087056396796e-05, - "loss": 0.6043, + "learning_rate": 2.9653628321345544e-05, + "loss": 0.6239, "step": 4700 }, { "epoch": 0.22, - "learning_rate": 1.9652618255121656e-05, - "loss": 0.3951, + "learning_rate": 2.9653160251509523e-05, + "loss": 0.4151, "step": 4705 }, { "epoch": 0.22, - "learning_rate": 1.9652149453846516e-05, - "loss": 0.1837, + "learning_rate": 2.9652692181673507e-05, + "loss": 0.1538, "step": 4710 }, { "epoch": 0.22, - "learning_rate": 1.9651680652571376e-05, - "loss": 0.2177, + "learning_rate": 2.9652224111837486e-05, + "loss": 0.2, "step": 4715 }, { "epoch": 0.22, - "learning_rate": 1.9651211851296236e-05, - "loss": 0.2921, + "learning_rate": 2.9651756042001466e-05, + "loss": 0.2349, "step": 4720 }, { "epoch": 0.22, - "learning_rate": 1.9650743050021096e-05, - "loss": 0.3297, + "learning_rate": 2.9651287972165446e-05, + "loss": 0.2839, "step": 4725 }, { "epoch": 0.22, - "learning_rate": 1.9650274248745956e-05, - "loss": 0.3355, + "learning_rate": 2.965081990232943e-05, + "loss": 0.2757, "step": 4730 }, { "epoch": 0.22, - "learning_rate": 1.964980544747082e-05, - "loss": 0.4129, + "learning_rate": 2.965035183249341e-05, + "loss": 0.7267, "step": 4735 }, { "epoch": 0.22, - "learning_rate": 1.964933664619568e-05, - "loss": 0.425, + "learning_rate": 2.964988376265739e-05, + "loss": 0.3978, "step": 4740 }, { "epoch": 0.22, - "learning_rate": 1.964886784492054e-05, - "loss": 0.4326, + "learning_rate": 2.9649415692821372e-05, + "loss": 0.3665, "step": 4745 }, { "epoch": 0.22, - "learning_rate": 1.9648399043645402e-05, - "loss": 0.72, + "learning_rate": 2.9648947622985352e-05, + "loss": 0.7552, "step": 4750 }, { "epoch": 0.22, - "learning_rate": 1.9647930242370262e-05, - "loss": 0.4429, + "learning_rate": 2.9648479553149332e-05, + "loss": 0.352, "step": 4755 }, { "epoch": 0.22, - "learning_rate": 1.9647461441095122e-05, - "loss": 0.2009, + "learning_rate": 2.9648011483313308e-05, + "loss": 0.1796, "step": 4760 }, { "epoch": 0.22, - "learning_rate": 1.9646992639819982e-05, - "loss": 0.195, + "learning_rate": 2.964754341347729e-05, + "loss": 0.2277, "step": 4765 }, { "epoch": 0.22, - "learning_rate": 1.9646523838544842e-05, - "loss": 0.2269, + "learning_rate": 2.964707534364127e-05, + "loss": 0.2142, "step": 4770 }, { "epoch": 0.22, - "learning_rate": 1.9646055037269702e-05, - "loss": 0.2516, + "learning_rate": 2.964660727380525e-05, + "loss": 0.2721, "step": 4775 }, { "epoch": 0.22, - "learning_rate": 1.9645586235994562e-05, - "loss": 0.2693, + "learning_rate": 2.964613920396923e-05, + "loss": 0.2925, "step": 4780 }, { "epoch": 0.22, - "learning_rate": 1.9645117434719425e-05, - "loss": 0.4334, + "learning_rate": 2.9645671134133214e-05, + "loss": 0.1989, "step": 4785 }, { "epoch": 0.22, - "learning_rate": 1.9644648633444285e-05, - "loss": 0.4747, + "learning_rate": 2.9645203064297194e-05, + "loss": 0.3872, "step": 4790 }, { "epoch": 0.22, - "learning_rate": 1.9644179832169145e-05, - "loss": 0.5062, + "learning_rate": 2.9644734994461174e-05, + "loss": 0.4552, "step": 4795 }, { "epoch": 0.22, - "learning_rate": 1.9643711030894005e-05, - "loss": 0.627, + "learning_rate": 2.9644266924625157e-05, + "loss": 0.6281, "step": 4800 }, { "epoch": 0.22, - "learning_rate": 1.9643242229618865e-05, - "loss": 0.2878, + "learning_rate": 2.9643798854789137e-05, + "loss": 0.3205, "step": 4805 }, { "epoch": 0.22, - "learning_rate": 1.9642773428343725e-05, - "loss": 0.1459, + "learning_rate": 2.9643330784953116e-05, + "loss": 0.1648, "step": 4810 }, { "epoch": 0.22, - "learning_rate": 1.964230462706859e-05, - "loss": 0.1581, + "learning_rate": 2.9642862715117096e-05, + "loss": 0.1366, "step": 4815 }, { "epoch": 0.22, - "learning_rate": 1.964183582579345e-05, - "loss": 0.2001, + "learning_rate": 2.964239464528108e-05, + "loss": 0.2225, "step": 4820 }, { "epoch": 0.23, - "learning_rate": 1.9641367024518308e-05, - "loss": 0.2857, + "learning_rate": 2.9641926575445056e-05, + "loss": 0.2482, "step": 4825 }, { "epoch": 0.23, - "learning_rate": 1.9640898223243168e-05, - "loss": 0.2737, + "learning_rate": 2.9641458505609036e-05, + "loss": 0.2454, "step": 4830 }, { "epoch": 0.23, - "learning_rate": 1.964042942196803e-05, - "loss": 0.2742, + "learning_rate": 2.9640990435773016e-05, + "loss": 0.4022, "step": 4835 }, { "epoch": 0.23, - "learning_rate": 1.963996062069289e-05, - "loss": 0.4487, + "learning_rate": 2.9640522365937e-05, + "loss": 0.3712, "step": 4840 }, { "epoch": 0.23, - "learning_rate": 1.963949181941775e-05, - "loss": 0.4665, + "learning_rate": 2.964005429610098e-05, + "loss": 0.4711, "step": 4845 }, { "epoch": 0.23, - "learning_rate": 1.963902301814261e-05, - "loss": 0.8322, + "learning_rate": 2.963958622626496e-05, + "loss": 0.704, "step": 4850 }, { "epoch": 0.23, - "learning_rate": 1.963855421686747e-05, - "loss": 0.3451, + "learning_rate": 2.963911815642894e-05, + "loss": 0.3407, "step": 4855 }, { "epoch": 0.23, - "learning_rate": 1.963808541559233e-05, - "loss": 0.1254, + "learning_rate": 2.963865008659292e-05, + "loss": 0.1218, "step": 4860 }, { "epoch": 0.23, - "learning_rate": 1.963761661431719e-05, - "loss": 0.1569, + "learning_rate": 2.96381820167569e-05, + "loss": 0.1539, "step": 4865 }, { "epoch": 0.23, - "learning_rate": 1.963714781304205e-05, - "loss": 0.2125, + "learning_rate": 2.963771394692088e-05, + "loss": 0.2058, "step": 4870 }, { "epoch": 0.23, - "learning_rate": 1.963667901176691e-05, - "loss": 0.2344, + "learning_rate": 2.9637245877084864e-05, + "loss": 0.3064, "step": 4875 }, { "epoch": 0.23, - "learning_rate": 1.9636210210491774e-05, - "loss": 0.2472, + "learning_rate": 2.9636777807248844e-05, + "loss": 0.298, "step": 4880 }, { "epoch": 0.23, - "learning_rate": 1.9635741409216634e-05, - "loss": 0.4773, + "learning_rate": 2.9636309737412824e-05, + "loss": 0.2942, "step": 4885 }, { "epoch": 0.23, - "learning_rate": 1.9635272607941494e-05, - "loss": 0.4222, + "learning_rate": 2.96358416675768e-05, + "loss": 0.376, "step": 4890 }, { "epoch": 0.23, - "learning_rate": 1.9634803806666358e-05, - "loss": 0.4088, + "learning_rate": 2.9635373597740784e-05, + "loss": 0.4057, "step": 4895 }, { "epoch": 0.23, - "learning_rate": 1.9634335005391217e-05, - "loss": 0.4062, + "learning_rate": 2.9634905527904763e-05, + "loss": 0.8207, "step": 4900 }, { "epoch": 0.23, - "learning_rate": 1.9633866204116077e-05, - "loss": 0.3291, + "learning_rate": 2.9634437458068743e-05, + "loss": 0.332, "step": 4905 }, { "epoch": 0.23, - "learning_rate": 1.9633397402840937e-05, - "loss": 0.133, + "learning_rate": 2.9633969388232726e-05, + "loss": 0.1467, "step": 4910 }, { "epoch": 0.23, - "learning_rate": 1.9632928601565797e-05, - "loss": 0.2487, + "learning_rate": 2.9633501318396706e-05, + "loss": 0.1282, "step": 4915 }, { "epoch": 0.23, - "learning_rate": 1.9632459800290657e-05, - "loss": 0.1519, + "learning_rate": 2.9633033248560686e-05, + "loss": 0.2048, "step": 4920 }, { "epoch": 0.23, - "learning_rate": 1.963199099901552e-05, - "loss": 0.3122, + "learning_rate": 2.9632565178724666e-05, + "loss": 0.26, "step": 4925 }, { "epoch": 0.23, - "learning_rate": 1.963152219774038e-05, - "loss": 0.4063, + "learning_rate": 2.963209710888865e-05, + "loss": 0.3335, "step": 4930 }, { "epoch": 0.23, - "learning_rate": 1.963105339646524e-05, - "loss": 0.3803, + "learning_rate": 2.963162903905263e-05, + "loss": 0.2966, "step": 4935 }, { "epoch": 0.23, - "learning_rate": 1.96305845951901e-05, - "loss": 0.4153, + "learning_rate": 2.963116096921661e-05, + "loss": 0.3337, "step": 4940 }, { "epoch": 0.23, - "learning_rate": 1.963011579391496e-05, - "loss": 0.4193, + "learning_rate": 2.963069289938059e-05, + "loss": 0.4369, "step": 4945 }, { "epoch": 0.23, - "learning_rate": 1.962964699263982e-05, - "loss": 0.88, + "learning_rate": 2.963022482954457e-05, + "loss": 0.6286, "step": 4950 }, { "epoch": 0.23, - "learning_rate": 1.962917819136468e-05, - "loss": 0.4152, + "learning_rate": 2.9629756759708548e-05, + "loss": 0.3736, "step": 4955 }, { "epoch": 0.23, - "learning_rate": 1.9628709390089543e-05, - "loss": 0.1861, + "learning_rate": 2.9629288689872528e-05, + "loss": 0.0903, "step": 4960 }, { "epoch": 0.23, - "learning_rate": 1.9628240588814403e-05, - "loss": 0.1928, + "learning_rate": 2.9628820620036508e-05, + "loss": 0.2495, "step": 4965 }, { "epoch": 0.23, - "learning_rate": 1.9627771787539263e-05, - "loss": 0.3147, + "learning_rate": 2.962835255020049e-05, + "loss": 0.3487, "step": 4970 }, { "epoch": 0.23, - "learning_rate": 1.9627302986264127e-05, - "loss": 0.2551, + "learning_rate": 2.962788448036447e-05, + "loss": 0.2783, "step": 4975 }, { "epoch": 0.23, - "learning_rate": 1.9626834184988987e-05, - "loss": 0.3313, + "learning_rate": 2.962741641052845e-05, + "loss": 0.3847, "step": 4980 }, { "epoch": 0.23, - "learning_rate": 1.9626365383713846e-05, - "loss": 0.4429, + "learning_rate": 2.9626948340692434e-05, + "loss": 0.3038, "step": 4985 }, { "epoch": 0.23, - "learning_rate": 1.9625896582438706e-05, - "loss": 0.4012, + "learning_rate": 2.9626480270856414e-05, + "loss": 0.5606, "step": 4990 }, { "epoch": 0.23, - "learning_rate": 1.9625427781163566e-05, - "loss": 0.5365, + "learning_rate": 2.9626012201020393e-05, + "loss": 0.4234, "step": 4995 }, { "epoch": 0.23, - "learning_rate": 1.9624958979888426e-05, - "loss": 0.7345, + "learning_rate": 2.9625544131184373e-05, + "loss": 0.8685, "step": 5000 }, { "epoch": 0.23, - "learning_rate": 1.9624490178613286e-05, - "loss": 0.3755, + "learning_rate": 2.9625076061348356e-05, + "loss": 0.3391, "step": 5005 }, { "epoch": 0.23, - "learning_rate": 1.9624021377338146e-05, - "loss": 0.3096, + "learning_rate": 2.9624607991512336e-05, + "loss": 0.2322, "step": 5010 }, { "epoch": 0.23, - "learning_rate": 1.9623552576063006e-05, - "loss": 0.1869, + "learning_rate": 2.9624139921676313e-05, + "loss": 0.2068, "step": 5015 }, { "epoch": 0.23, - "learning_rate": 1.962308377478787e-05, - "loss": 0.3091, + "learning_rate": 2.9623671851840293e-05, + "loss": 0.257, "step": 5020 }, { "epoch": 0.23, - "learning_rate": 1.962261497351273e-05, - "loss": 0.3136, + "learning_rate": 2.9623203782004276e-05, + "loss": 0.3483, "step": 5025 }, { "epoch": 0.23, - "learning_rate": 1.962214617223759e-05, - "loss": 0.2676, + "learning_rate": 2.9622735712168256e-05, + "loss": 0.1754, "step": 5030 }, { "epoch": 0.23, - "learning_rate": 1.962167737096245e-05, - "loss": 0.3055, + "learning_rate": 2.9622267642332235e-05, + "loss": 0.4093, "step": 5035 }, { "epoch": 0.24, - "learning_rate": 1.9621208569687313e-05, - "loss": 0.3587, + "learning_rate": 2.962179957249622e-05, + "loss": 0.3109, "step": 5040 }, { "epoch": 0.24, - "learning_rate": 1.9620739768412172e-05, - "loss": 0.4906, + "learning_rate": 2.96213315026602e-05, + "loss": 0.4415, "step": 5045 }, { "epoch": 0.24, - "learning_rate": 1.9620270967137032e-05, - "loss": 0.7448, + "learning_rate": 2.9620863432824178e-05, + "loss": 0.8288, "step": 5050 }, { "epoch": 0.24, - "learning_rate": 1.9619802165861892e-05, - "loss": 0.3217, + "learning_rate": 2.9620395362988158e-05, + "loss": 0.3313, "step": 5055 }, { "epoch": 0.24, - "learning_rate": 1.9619333364586752e-05, - "loss": 0.1442, + "learning_rate": 2.961992729315214e-05, + "loss": 0.1508, "step": 5060 }, { "epoch": 0.24, - "learning_rate": 1.9618864563311616e-05, - "loss": 0.2075, + "learning_rate": 2.961945922331612e-05, + "loss": 0.1846, "step": 5065 }, { "epoch": 0.24, - "learning_rate": 1.9618395762036476e-05, - "loss": 0.2914, + "learning_rate": 2.96189911534801e-05, + "loss": 0.1819, "step": 5070 }, { "epoch": 0.24, - "learning_rate": 1.9617926960761335e-05, - "loss": 0.3173, + "learning_rate": 2.9618523083644077e-05, + "loss": 0.255, "step": 5075 }, { "epoch": 0.24, - "learning_rate": 1.9617458159486195e-05, - "loss": 0.367, + "learning_rate": 2.961805501380806e-05, + "loss": 0.2447, "step": 5080 }, { "epoch": 0.24, - "learning_rate": 1.9616989358211055e-05, - "loss": 0.3039, + "learning_rate": 2.961758694397204e-05, + "loss": 0.3081, "step": 5085 }, { "epoch": 0.24, - "learning_rate": 1.9616520556935915e-05, - "loss": 0.4082, + "learning_rate": 2.961711887413602e-05, + "loss": 0.2883, "step": 5090 }, { "epoch": 0.24, - "learning_rate": 1.9616051755660775e-05, - "loss": 0.4816, + "learning_rate": 2.9616650804300003e-05, + "loss": 0.5553, "step": 5095 }, { "epoch": 0.24, - "learning_rate": 1.961558295438564e-05, - "loss": 0.8166, + "learning_rate": 2.9616182734463983e-05, + "loss": 0.5633, "step": 5100 }, { "epoch": 0.24, - "learning_rate": 1.96151141531105e-05, - "loss": 0.3055, + "learning_rate": 2.9615714664627963e-05, + "loss": 0.3469, "step": 5105 }, { "epoch": 0.24, - "learning_rate": 1.961464535183536e-05, - "loss": 0.1351, + "learning_rate": 2.9615246594791943e-05, + "loss": 0.0976, "step": 5110 }, { "epoch": 0.24, - "learning_rate": 1.9614176550560218e-05, - "loss": 0.2588, + "learning_rate": 2.9614778524955926e-05, + "loss": 0.1514, "step": 5115 }, { "epoch": 0.24, - "learning_rate": 1.961370774928508e-05, - "loss": 0.2485, + "learning_rate": 2.9614310455119906e-05, + "loss": 0.2376, "step": 5120 }, { "epoch": 0.24, - "learning_rate": 1.961323894800994e-05, - "loss": 0.3697, + "learning_rate": 2.9613842385283886e-05, + "loss": 0.2272, "step": 5125 }, { "epoch": 0.24, - "learning_rate": 1.96127701467348e-05, - "loss": 0.3124, + "learning_rate": 2.9613374315447865e-05, + "loss": 0.2956, "step": 5130 }, { "epoch": 0.24, - "learning_rate": 1.961230134545966e-05, - "loss": 0.2263, + "learning_rate": 2.961290624561185e-05, + "loss": 0.505, "step": 5135 }, { "epoch": 0.24, - "learning_rate": 1.961183254418452e-05, - "loss": 0.4653, + "learning_rate": 2.9612438175775825e-05, + "loss": 0.279, "step": 5140 }, { "epoch": 0.24, - "learning_rate": 1.961136374290938e-05, - "loss": 0.4411, + "learning_rate": 2.9611970105939805e-05, + "loss": 0.5588, "step": 5145 }, { "epoch": 0.24, - "learning_rate": 1.961089494163424e-05, - "loss": 0.7592, + "learning_rate": 2.9611502036103785e-05, + "loss": 0.7466, "step": 5150 }, { "epoch": 0.24, - "learning_rate": 1.96104261403591e-05, - "loss": 0.3478, + "learning_rate": 2.9611033966267768e-05, + "loss": 0.3743, "step": 5155 }, { "epoch": 0.24, - "learning_rate": 1.9609957339083964e-05, - "loss": 0.2249, + "learning_rate": 2.9610565896431748e-05, + "loss": 0.1645, "step": 5160 }, { "epoch": 0.24, - "learning_rate": 1.9609488537808824e-05, - "loss": 0.164, + "learning_rate": 2.9610097826595728e-05, + "loss": 0.145, "step": 5165 }, { "epoch": 0.24, - "learning_rate": 1.9609019736533684e-05, - "loss": 0.2617, + "learning_rate": 2.960962975675971e-05, + "loss": 0.1843, "step": 5170 }, { "epoch": 0.24, - "learning_rate": 1.9608550935258544e-05, - "loss": 0.3287, + "learning_rate": 2.960916168692369e-05, + "loss": 0.225, "step": 5175 }, { "epoch": 0.24, - "learning_rate": 1.9608082133983408e-05, - "loss": 0.3823, + "learning_rate": 2.960869361708767e-05, + "loss": 0.2798, "step": 5180 }, { "epoch": 0.24, - "learning_rate": 1.9607613332708268e-05, - "loss": 0.3231, + "learning_rate": 2.960822554725165e-05, + "loss": 0.3423, "step": 5185 }, { "epoch": 0.24, - "learning_rate": 1.9607144531433127e-05, - "loss": 0.2896, + "learning_rate": 2.9607757477415633e-05, + "loss": 0.3232, "step": 5190 }, { "epoch": 0.24, - "learning_rate": 1.9606675730157987e-05, - "loss": 0.4749, + "learning_rate": 2.9607289407579613e-05, + "loss": 0.3699, "step": 5195 }, { "epoch": 0.24, - "learning_rate": 1.9606206928882847e-05, - "loss": 0.5387, + "learning_rate": 2.9606821337743593e-05, + "loss": 0.7137, "step": 5200 }, { "epoch": 0.24, - "learning_rate": 1.960573812760771e-05, - "loss": 0.2887, + "learning_rate": 2.960635326790757e-05, + "loss": 0.3644, "step": 5205 }, { "epoch": 0.24, - "learning_rate": 1.960526932633257e-05, - "loss": 0.1177, + "learning_rate": 2.9605885198071553e-05, + "loss": 0.1612, "step": 5210 }, { "epoch": 0.24, - "learning_rate": 1.960480052505743e-05, - "loss": 0.2773, + "learning_rate": 2.9605417128235533e-05, + "loss": 0.2446, "step": 5215 }, { "epoch": 0.24, - "learning_rate": 1.960433172378229e-05, - "loss": 0.2782, + "learning_rate": 2.9604949058399512e-05, + "loss": 0.219, "step": 5220 }, { "epoch": 0.24, - "learning_rate": 1.960386292250715e-05, - "loss": 0.317, + "learning_rate": 2.9604480988563496e-05, + "loss": 0.2171, "step": 5225 }, { "epoch": 0.24, - "learning_rate": 1.960339412123201e-05, - "loss": 0.2609, + "learning_rate": 2.9604012918727475e-05, + "loss": 0.2166, "step": 5230 }, { "epoch": 0.24, - "learning_rate": 1.960292531995687e-05, - "loss": 0.3552, + "learning_rate": 2.9603544848891455e-05, + "loss": 0.3706, "step": 5235 }, { "epoch": 0.24, - "learning_rate": 1.960245651868173e-05, - "loss": 0.3464, + "learning_rate": 2.9603076779055435e-05, + "loss": 0.3719, "step": 5240 }, { "epoch": 0.24, - "learning_rate": 1.9601987717406594e-05, - "loss": 0.5567, + "learning_rate": 2.9602608709219418e-05, + "loss": 0.6137, "step": 5245 }, { "epoch": 0.24, - "learning_rate": 1.9601518916131453e-05, - "loss": 0.6603, + "learning_rate": 2.9602140639383398e-05, + "loss": 0.6093, "step": 5250 }, { "epoch": 0.25, - "learning_rate": 1.9601050114856313e-05, - "loss": 0.4007, + "learning_rate": 2.9601672569547378e-05, + "loss": 0.3343, "step": 5255 }, { "epoch": 0.25, - "learning_rate": 1.9600581313581177e-05, - "loss": 0.1196, + "learning_rate": 2.9601204499711358e-05, + "loss": 0.1317, "step": 5260 }, { "epoch": 0.25, - "learning_rate": 1.9600112512306037e-05, - "loss": 0.1901, + "learning_rate": 2.9600736429875337e-05, + "loss": 0.1908, "step": 5265 }, { "epoch": 0.25, - "learning_rate": 1.9599643711030897e-05, - "loss": 0.2177, + "learning_rate": 2.9600268360039317e-05, + "loss": 0.291, "step": 5270 }, { "epoch": 0.25, - "learning_rate": 1.9599174909755757e-05, - "loss": 0.2707, + "learning_rate": 2.9599800290203297e-05, + "loss": 0.2974, "step": 5275 }, { "epoch": 0.25, - "learning_rate": 1.9598706108480616e-05, - "loss": 0.2405, + "learning_rate": 2.959933222036728e-05, + "loss": 0.2858, "step": 5280 }, { "epoch": 0.25, - "learning_rate": 1.9598237307205476e-05, - "loss": 0.249, + "learning_rate": 2.959886415053126e-05, + "loss": 0.3804, "step": 5285 }, { "epoch": 0.25, - "learning_rate": 1.9597768505930336e-05, - "loss": 0.3312, + "learning_rate": 2.959839608069524e-05, + "loss": 0.3363, "step": 5290 }, { "epoch": 0.25, - "learning_rate": 1.95972997046552e-05, - "loss": 0.4266, + "learning_rate": 2.959792801085922e-05, + "loss": 0.4221, "step": 5295 }, { "epoch": 0.25, - "learning_rate": 1.959683090338006e-05, - "loss": 0.6443, + "learning_rate": 2.9597459941023203e-05, + "loss": 0.584, "step": 5300 }, { "epoch": 0.25, - "learning_rate": 1.959636210210492e-05, - "loss": 0.373, + "learning_rate": 2.9596991871187183e-05, + "loss": 0.2908, "step": 5305 }, { "epoch": 0.25, - "learning_rate": 1.959589330082978e-05, - "loss": 0.1447, + "learning_rate": 2.9596523801351163e-05, + "loss": 0.1704, "step": 5310 }, { "epoch": 0.25, - "learning_rate": 1.959542449955464e-05, - "loss": 0.2002, + "learning_rate": 2.9596055731515142e-05, + "loss": 0.1326, "step": 5315 }, { "epoch": 0.25, - "learning_rate": 1.95949556982795e-05, - "loss": 0.1767, + "learning_rate": 2.9595587661679126e-05, + "loss": 0.2063, "step": 5320 }, { "epoch": 0.25, - "learning_rate": 1.9594486897004363e-05, - "loss": 0.2046, + "learning_rate": 2.9595119591843105e-05, + "loss": 0.2612, "step": 5325 }, { "epoch": 0.25, - "learning_rate": 1.9594018095729223e-05, - "loss": 0.2941, + "learning_rate": 2.9594651522007082e-05, + "loss": 0.2536, "step": 5330 }, { "epoch": 0.25, - "learning_rate": 1.9593549294454082e-05, - "loss": 0.2813, + "learning_rate": 2.9594183452171062e-05, + "loss": 0.3799, "step": 5335 }, { "epoch": 0.25, - "learning_rate": 1.9593080493178942e-05, - "loss": 0.3738, + "learning_rate": 2.9593715382335045e-05, + "loss": 0.3562, "step": 5340 }, { "epoch": 0.25, - "learning_rate": 1.9592611691903806e-05, - "loss": 0.6161, + "learning_rate": 2.9593247312499025e-05, + "loss": 0.4044, "step": 5345 }, { "epoch": 0.25, - "learning_rate": 1.9592142890628666e-05, - "loss": 0.8597, + "learning_rate": 2.9592779242663005e-05, + "loss": 0.8405, "step": 5350 }, { "epoch": 0.25, - "learning_rate": 1.9591674089353526e-05, - "loss": 0.3059, + "learning_rate": 2.9592311172826988e-05, + "loss": 0.258, "step": 5355 }, { "epoch": 0.25, - "learning_rate": 1.9591205288078386e-05, - "loss": 0.1577, + "learning_rate": 2.9591843102990968e-05, + "loss": 0.134, "step": 5360 }, { "epoch": 0.25, - "learning_rate": 1.9590736486803245e-05, - "loss": 0.2368, + "learning_rate": 2.9591375033154947e-05, + "loss": 0.1787, "step": 5365 }, { "epoch": 0.25, - "learning_rate": 1.9590267685528105e-05, - "loss": 0.2593, + "learning_rate": 2.9590906963318927e-05, + "loss": 0.227, "step": 5370 }, { "epoch": 0.25, - "learning_rate": 1.9589798884252965e-05, - "loss": 0.3036, + "learning_rate": 2.959043889348291e-05, + "loss": 0.1671, "step": 5375 }, { "epoch": 0.25, - "learning_rate": 1.9589330082977825e-05, - "loss": 0.3428, + "learning_rate": 2.958997082364689e-05, + "loss": 0.1537, "step": 5380 }, { "epoch": 0.25, - "learning_rate": 1.9588861281702685e-05, - "loss": 0.3202, + "learning_rate": 2.958950275381087e-05, + "loss": 0.3416, "step": 5385 }, { "epoch": 0.25, - "learning_rate": 1.958839248042755e-05, - "loss": 0.3252, + "learning_rate": 2.958903468397485e-05, + "loss": 0.4624, "step": 5390 }, { "epoch": 0.25, - "learning_rate": 1.958792367915241e-05, - "loss": 0.4546, + "learning_rate": 2.958856661413883e-05, + "loss": 0.4494, "step": 5395 }, { "epoch": 0.25, - "learning_rate": 1.958745487787727e-05, - "loss": 0.7591, + "learning_rate": 2.958809854430281e-05, + "loss": 0.4924, "step": 5400 }, { "epoch": 0.25, - "learning_rate": 1.9586986076602132e-05, - "loss": 0.3765, + "learning_rate": 2.958763047446679e-05, + "loss": 0.3339, "step": 5405 }, { "epoch": 0.25, - "learning_rate": 1.958651727532699e-05, - "loss": 0.1274, + "learning_rate": 2.9587162404630773e-05, + "loss": 0.1303, "step": 5410 }, { "epoch": 0.25, - "learning_rate": 1.958604847405185e-05, - "loss": 0.2058, + "learning_rate": 2.9586694334794752e-05, + "loss": 0.1634, "step": 5415 }, { "epoch": 0.25, - "learning_rate": 1.958557967277671e-05, - "loss": 0.3033, + "learning_rate": 2.9586226264958732e-05, + "loss": 0.1576, "step": 5420 }, { "epoch": 0.25, - "learning_rate": 1.958511087150157e-05, - "loss": 0.1879, + "learning_rate": 2.9585758195122712e-05, + "loss": 0.1873, "step": 5425 }, { "epoch": 0.25, - "learning_rate": 1.958464207022643e-05, - "loss": 0.3354, + "learning_rate": 2.9585290125286695e-05, + "loss": 0.2938, "step": 5430 }, { "epoch": 0.25, - "learning_rate": 1.9584173268951295e-05, - "loss": 0.312, + "learning_rate": 2.9584822055450675e-05, + "loss": 0.4097, "step": 5435 }, { "epoch": 0.25, - "learning_rate": 1.9583704467676155e-05, - "loss": 0.3897, + "learning_rate": 2.9584353985614655e-05, + "loss": 0.2995, "step": 5440 }, { "epoch": 0.25, - "learning_rate": 1.9583235666401015e-05, - "loss": 0.5298, + "learning_rate": 2.9583885915778635e-05, + "loss": 0.6294, "step": 5445 }, { "epoch": 0.25, - "learning_rate": 1.9582766865125875e-05, - "loss": 0.8369, + "learning_rate": 2.9583417845942618e-05, + "loss": 0.7594, "step": 5450 }, { "epoch": 0.25, - "learning_rate": 1.9582298063850734e-05, - "loss": 0.2992, + "learning_rate": 2.9582949776106594e-05, + "loss": 0.3177, "step": 5455 }, { "epoch": 0.25, - "learning_rate": 1.9581829262575594e-05, - "loss": 0.1595, + "learning_rate": 2.9582481706270574e-05, + "loss": 0.1492, "step": 5460 }, { "epoch": 0.26, - "learning_rate": 1.9581360461300454e-05, - "loss": 0.1288, + "learning_rate": 2.9582013636434557e-05, + "loss": 0.1839, "step": 5465 }, { "epoch": 0.26, - "learning_rate": 1.9580891660025318e-05, - "loss": 0.2173, + "learning_rate": 2.9581545566598537e-05, + "loss": 0.183, "step": 5470 }, { "epoch": 0.26, - "learning_rate": 1.9580422858750178e-05, - "loss": 0.2598, + "learning_rate": 2.9581077496762517e-05, + "loss": 0.251, "step": 5475 }, { "epoch": 0.26, - "learning_rate": 1.9579954057475038e-05, - "loss": 0.3008, + "learning_rate": 2.9580609426926497e-05, + "loss": 0.3105, "step": 5480 }, { "epoch": 0.26, - "learning_rate": 1.95794852561999e-05, - "loss": 0.4085, + "learning_rate": 2.958014135709048e-05, + "loss": 0.3272, "step": 5485 }, { "epoch": 0.26, - "learning_rate": 1.957901645492476e-05, - "loss": 0.3574, + "learning_rate": 2.957967328725446e-05, + "loss": 0.3467, "step": 5490 }, { "epoch": 0.26, - "learning_rate": 1.957854765364962e-05, - "loss": 0.5726, + "learning_rate": 2.957920521741844e-05, + "loss": 0.4998, "step": 5495 }, { "epoch": 0.26, - "learning_rate": 1.957807885237448e-05, - "loss": 0.7328, + "learning_rate": 2.957873714758242e-05, + "loss": 0.5874, "step": 5500 }, { "epoch": 0.26, - "learning_rate": 1.957761005109934e-05, - "loss": 0.3389, + "learning_rate": 2.9578269077746403e-05, + "loss": 0.4178, "step": 5505 }, { "epoch": 0.26, - "learning_rate": 1.95771412498242e-05, - "loss": 0.1435, + "learning_rate": 2.9577801007910382e-05, + "loss": 0.0913, "step": 5510 }, { "epoch": 0.26, - "learning_rate": 1.957667244854906e-05, - "loss": 0.1596, + "learning_rate": 2.9577332938074362e-05, + "loss": 0.1883, "step": 5515 }, { "epoch": 0.26, - "learning_rate": 1.957620364727392e-05, - "loss": 0.1642, + "learning_rate": 2.9576864868238342e-05, + "loss": 0.2468, "step": 5520 }, { "epoch": 0.26, - "learning_rate": 1.957573484599878e-05, - "loss": 0.2219, + "learning_rate": 2.9576396798402322e-05, + "loss": 0.1948, "step": 5525 }, { "epoch": 0.26, - "learning_rate": 1.9575266044723644e-05, - "loss": 0.2159, + "learning_rate": 2.9575928728566302e-05, + "loss": 0.203, "step": 5530 }, { "epoch": 0.26, - "learning_rate": 1.9574797243448504e-05, - "loss": 0.2886, + "learning_rate": 2.957546065873028e-05, + "loss": 0.2808, "step": 5535 }, { "epoch": 0.26, - "learning_rate": 1.9574328442173363e-05, - "loss": 0.2399, + "learning_rate": 2.9574992588894265e-05, + "loss": 0.3499, "step": 5540 }, { "epoch": 0.26, - "learning_rate": 1.9573859640898223e-05, - "loss": 0.3391, + "learning_rate": 2.9574524519058245e-05, + "loss": 0.4629, "step": 5545 }, { "epoch": 0.26, - "learning_rate": 1.9573390839623087e-05, - "loss": 0.7245, + "learning_rate": 2.9574056449222224e-05, + "loss": 0.6269, "step": 5550 }, { "epoch": 0.26, - "learning_rate": 1.9572922038347947e-05, - "loss": 0.3067, + "learning_rate": 2.9573588379386204e-05, + "loss": 0.3445, "step": 5555 }, { "epoch": 0.26, - "learning_rate": 1.9572453237072807e-05, - "loss": 0.0993, + "learning_rate": 2.9573120309550187e-05, + "loss": 0.1888, "step": 5560 }, { "epoch": 0.26, - "learning_rate": 1.9571984435797667e-05, - "loss": 0.24, + "learning_rate": 2.9572652239714167e-05, + "loss": 0.2382, "step": 5565 }, { "epoch": 0.26, - "learning_rate": 1.9571515634522526e-05, - "loss": 0.1977, + "learning_rate": 2.9572184169878147e-05, + "loss": 0.2049, "step": 5570 }, { "epoch": 0.26, - "learning_rate": 1.957104683324739e-05, - "loss": 0.2716, + "learning_rate": 2.9571716100042127e-05, + "loss": 0.2395, "step": 5575 }, { "epoch": 0.26, - "learning_rate": 1.957057803197225e-05, - "loss": 0.3497, + "learning_rate": 2.9571248030206107e-05, + "loss": 0.2236, "step": 5580 }, { "epoch": 0.26, - "learning_rate": 1.957010923069711e-05, - "loss": 0.2877, + "learning_rate": 2.9570779960370086e-05, + "loss": 0.2339, "step": 5585 }, { "epoch": 0.26, - "learning_rate": 1.956964042942197e-05, - "loss": 0.3118, + "learning_rate": 2.9570311890534066e-05, + "loss": 0.2487, "step": 5590 }, { "epoch": 0.26, - "learning_rate": 1.956917162814683e-05, - "loss": 0.4374, + "learning_rate": 2.956984382069805e-05, + "loss": 0.3833, "step": 5595 }, { "epoch": 0.26, - "learning_rate": 1.956870282687169e-05, - "loss": 0.7082, + "learning_rate": 2.956937575086203e-05, + "loss": 0.5892, "step": 5600 }, { "epoch": 0.26, - "learning_rate": 1.956823402559655e-05, - "loss": 0.2997, + "learning_rate": 2.956890768102601e-05, + "loss": 0.3309, "step": 5605 }, { "epoch": 0.26, - "learning_rate": 1.9567765224321413e-05, - "loss": 0.1337, + "learning_rate": 2.956843961118999e-05, + "loss": 0.1288, "step": 5610 }, { "epoch": 0.26, - "learning_rate": 1.9567296423046273e-05, - "loss": 0.1612, + "learning_rate": 2.9567971541353972e-05, + "loss": 0.1725, "step": 5615 }, { "epoch": 0.26, - "learning_rate": 1.9566827621771133e-05, - "loss": 0.1726, + "learning_rate": 2.9567503471517952e-05, + "loss": 0.2006, "step": 5620 }, { "epoch": 0.26, - "learning_rate": 1.9566358820495996e-05, - "loss": 0.2525, + "learning_rate": 2.9567035401681932e-05, + "loss": 0.2777, "step": 5625 }, { "epoch": 0.26, - "learning_rate": 1.9565890019220856e-05, - "loss": 0.2697, + "learning_rate": 2.956656733184591e-05, + "loss": 0.2572, "step": 5630 }, { "epoch": 0.26, - "learning_rate": 1.9565421217945716e-05, - "loss": 0.3576, + "learning_rate": 2.9566099262009895e-05, + "loss": 0.3117, "step": 5635 }, { "epoch": 0.26, - "learning_rate": 1.9564952416670576e-05, - "loss": 0.3216, + "learning_rate": 2.9565631192173875e-05, + "loss": 0.2662, "step": 5640 }, { "epoch": 0.26, - "learning_rate": 1.9564483615395436e-05, - "loss": 0.4422, + "learning_rate": 2.956516312233785e-05, + "loss": 0.4618, "step": 5645 }, { "epoch": 0.26, - "learning_rate": 1.9564014814120296e-05, - "loss": 0.6881, + "learning_rate": 2.9564695052501834e-05, + "loss": 0.638, "step": 5650 }, { "epoch": 0.26, - "learning_rate": 1.9563546012845156e-05, - "loss": 0.341, + "learning_rate": 2.9564226982665814e-05, + "loss": 0.3631, "step": 5655 }, { "epoch": 0.26, - "learning_rate": 1.9563077211570015e-05, - "loss": 0.2462, + "learning_rate": 2.9563758912829794e-05, + "loss": 0.1522, "step": 5660 }, { "epoch": 0.26, - "learning_rate": 1.9562608410294875e-05, - "loss": 0.1833, + "learning_rate": 2.9563290842993774e-05, + "loss": 0.2193, "step": 5665 }, { "epoch": 0.26, - "learning_rate": 1.956213960901974e-05, - "loss": 0.1736, + "learning_rate": 2.9562822773157757e-05, + "loss": 0.2372, "step": 5670 }, { "epoch": 0.26, - "learning_rate": 1.95616708077446e-05, - "loss": 0.3108, + "learning_rate": 2.9562354703321737e-05, + "loss": 0.2513, "step": 5675 }, { "epoch": 0.27, - "learning_rate": 1.956120200646946e-05, - "loss": 0.1955, + "learning_rate": 2.9561886633485717e-05, + "loss": 0.1802, "step": 5680 }, { "epoch": 0.27, - "learning_rate": 1.956073320519432e-05, - "loss": 0.3466, + "learning_rate": 2.9561418563649696e-05, + "loss": 0.2209, "step": 5685 }, { "epoch": 0.27, - "learning_rate": 1.9560264403919182e-05, - "loss": 0.3074, + "learning_rate": 2.956095049381368e-05, + "loss": 0.3681, "step": 5690 }, { "epoch": 0.27, - "learning_rate": 1.9559795602644042e-05, - "loss": 0.4682, + "learning_rate": 2.956048242397766e-05, + "loss": 0.4923, "step": 5695 }, { "epoch": 0.27, - "learning_rate": 1.95593268013689e-05, - "loss": 0.6463, + "learning_rate": 2.956001435414164e-05, + "loss": 0.5072, "step": 5700 }, { "epoch": 0.27, - "learning_rate": 1.955885800009376e-05, - "loss": 0.327, + "learning_rate": 2.9559546284305622e-05, + "loss": 0.3223, "step": 5705 }, { "epoch": 0.27, - "learning_rate": 1.955838919881862e-05, - "loss": 0.17, + "learning_rate": 2.95590782144696e-05, + "loss": 0.1211, "step": 5710 }, { "epoch": 0.27, - "learning_rate": 1.9557920397543485e-05, - "loss": 0.1442, + "learning_rate": 2.955861014463358e-05, + "loss": 0.2507, "step": 5715 }, { "epoch": 0.27, - "learning_rate": 1.9557451596268345e-05, - "loss": 0.2012, + "learning_rate": 2.955814207479756e-05, + "loss": 0.1866, "step": 5720 }, { "epoch": 0.27, - "learning_rate": 1.9556982794993205e-05, - "loss": 0.2722, + "learning_rate": 2.9557674004961542e-05, + "loss": 0.2197, "step": 5725 }, { "epoch": 0.27, - "learning_rate": 1.9556513993718065e-05, - "loss": 0.2928, + "learning_rate": 2.955720593512552e-05, + "loss": 0.2306, "step": 5730 }, { "epoch": 0.27, - "learning_rate": 1.9556045192442925e-05, - "loss": 0.3275, + "learning_rate": 2.95567378652895e-05, + "loss": 0.2568, "step": 5735 }, { "epoch": 0.27, - "learning_rate": 1.9555576391167785e-05, - "loss": 0.462, + "learning_rate": 2.955626979545348e-05, + "loss": 0.2844, "step": 5740 }, { "epoch": 0.27, - "learning_rate": 1.9555107589892644e-05, - "loss": 0.38, + "learning_rate": 2.9555801725617464e-05, + "loss": 0.346, "step": 5745 }, { "epoch": 0.27, - "learning_rate": 1.9554638788617504e-05, - "loss": 0.7203, + "learning_rate": 2.9555333655781444e-05, + "loss": 0.545, "step": 5750 }, { "epoch": 0.27, - "learning_rate": 1.9554169987342368e-05, - "loss": 0.3981, + "learning_rate": 2.9554865585945424e-05, + "loss": 0.3282, "step": 5755 }, { "epoch": 0.27, - "learning_rate": 1.9553701186067228e-05, - "loss": 0.1104, + "learning_rate": 2.9554397516109404e-05, + "loss": 0.1594, "step": 5760 }, { "epoch": 0.27, - "learning_rate": 1.9553232384792088e-05, - "loss": 0.2247, + "learning_rate": 2.9553929446273387e-05, + "loss": 0.1354, "step": 5765 }, { "epoch": 0.27, - "learning_rate": 1.955276358351695e-05, - "loss": 0.1969, + "learning_rate": 2.9553461376437363e-05, + "loss": 0.2147, "step": 5770 }, { "epoch": 0.27, - "learning_rate": 1.955229478224181e-05, - "loss": 0.2883, + "learning_rate": 2.9552993306601343e-05, + "loss": 0.2238, "step": 5775 }, { "epoch": 0.27, - "learning_rate": 1.955182598096667e-05, - "loss": 0.2871, + "learning_rate": 2.9552525236765326e-05, + "loss": 0.2884, "step": 5780 }, { "epoch": 0.27, - "learning_rate": 1.955135717969153e-05, - "loss": 0.2452, + "learning_rate": 2.9552057166929306e-05, + "loss": 0.1929, "step": 5785 }, { "epoch": 0.27, - "learning_rate": 1.955088837841639e-05, - "loss": 0.3934, + "learning_rate": 2.9551589097093286e-05, + "loss": 0.3401, "step": 5790 }, { "epoch": 0.27, - "learning_rate": 1.955041957714125e-05, - "loss": 0.4091, + "learning_rate": 2.9551121027257266e-05, + "loss": 0.5252, "step": 5795 }, { "epoch": 0.27, - "learning_rate": 1.954995077586611e-05, - "loss": 0.7059, + "learning_rate": 2.955065295742125e-05, + "loss": 0.7707, "step": 5800 }, { "epoch": 0.27, - "learning_rate": 1.954948197459097e-05, - "loss": 0.3207, + "learning_rate": 2.955018488758523e-05, + "loss": 0.3297, "step": 5805 }, { "epoch": 0.27, - "learning_rate": 1.9549013173315834e-05, - "loss": 0.1067, + "learning_rate": 2.954971681774921e-05, + "loss": 0.1562, "step": 5810 }, { "epoch": 0.27, - "learning_rate": 1.9548544372040694e-05, - "loss": 0.1733, + "learning_rate": 2.954924874791319e-05, + "loss": 0.1504, "step": 5815 }, { "epoch": 0.27, - "learning_rate": 1.9548075570765554e-05, - "loss": 0.1885, + "learning_rate": 2.9548780678077172e-05, + "loss": 0.1697, "step": 5820 }, { "epoch": 0.27, - "learning_rate": 1.9547606769490414e-05, - "loss": 0.1855, + "learning_rate": 2.954831260824115e-05, + "loss": 0.3208, "step": 5825 }, { "epoch": 0.27, - "learning_rate": 1.9547137968215274e-05, - "loss": 0.2796, + "learning_rate": 2.954784453840513e-05, + "loss": 0.2643, "step": 5830 }, { "epoch": 0.27, - "learning_rate": 1.9546669166940137e-05, - "loss": 0.3381, + "learning_rate": 2.954737646856911e-05, + "loss": 0.3444, "step": 5835 }, { "epoch": 0.27, - "learning_rate": 1.9546200365664997e-05, - "loss": 0.4255, + "learning_rate": 2.954690839873309e-05, + "loss": 0.3433, "step": 5840 }, { "epoch": 0.27, - "learning_rate": 1.9545731564389857e-05, - "loss": 0.3635, + "learning_rate": 2.954644032889707e-05, + "loss": 0.42, "step": 5845 }, { "epoch": 0.27, - "learning_rate": 1.9545262763114717e-05, - "loss": 0.6507, + "learning_rate": 2.954597225906105e-05, + "loss": 0.5641, "step": 5850 }, { "epoch": 0.27, - "learning_rate": 1.954479396183958e-05, - "loss": 0.3462, + "learning_rate": 2.9545504189225034e-05, + "loss": 0.2726, "step": 5855 }, { "epoch": 0.27, - "learning_rate": 1.954432516056444e-05, - "loss": 0.1309, + "learning_rate": 2.9545036119389014e-05, + "loss": 0.1108, "step": 5860 }, { "epoch": 0.27, - "learning_rate": 1.95438563592893e-05, - "loss": 0.1158, + "learning_rate": 2.9544568049552994e-05, + "loss": 0.2075, "step": 5865 }, { "epoch": 0.27, - "learning_rate": 1.954338755801416e-05, - "loss": 0.1644, + "learning_rate": 2.9544099979716973e-05, + "loss": 0.1593, "step": 5870 }, { "epoch": 0.27, - "learning_rate": 1.954291875673902e-05, - "loss": 0.2895, + "learning_rate": 2.9543631909880957e-05, + "loss": 0.2366, "step": 5875 }, { "epoch": 0.27, - "learning_rate": 1.954244995546388e-05, - "loss": 0.3314, + "learning_rate": 2.9543163840044936e-05, + "loss": 0.3362, "step": 5880 }, { "epoch": 0.27, - "learning_rate": 1.954198115418874e-05, - "loss": 0.2619, + "learning_rate": 2.9542695770208916e-05, + "loss": 0.3316, "step": 5885 }, { "epoch": 0.27, - "learning_rate": 1.95415123529136e-05, - "loss": 0.5068, + "learning_rate": 2.95422277003729e-05, + "loss": 0.3774, "step": 5890 }, { "epoch": 0.28, - "learning_rate": 1.954104355163846e-05, - "loss": 0.3092, + "learning_rate": 2.954175963053688e-05, + "loss": 0.4141, "step": 5895 }, { "epoch": 0.28, - "learning_rate": 1.9540574750363323e-05, - "loss": 0.6608, + "learning_rate": 2.9541291560700856e-05, + "loss": 0.7402, "step": 5900 }, { "epoch": 0.28, - "learning_rate": 1.9540105949088183e-05, - "loss": 0.3003, + "learning_rate": 2.9540823490864835e-05, + "loss": 0.3221, "step": 5905 }, { "epoch": 0.28, - "learning_rate": 1.9539637147813043e-05, - "loss": 0.1948, + "learning_rate": 2.954035542102882e-05, + "loss": 0.1264, "step": 5910 }, { "epoch": 0.28, - "learning_rate": 1.9539168346537906e-05, - "loss": 0.2106, + "learning_rate": 2.95398873511928e-05, + "loss": 0.1677, "step": 5915 }, { "epoch": 0.28, - "learning_rate": 1.9538699545262766e-05, - "loss": 0.1636, + "learning_rate": 2.953941928135678e-05, + "loss": 0.2109, "step": 5920 }, { "epoch": 0.28, - "learning_rate": 1.9538230743987626e-05, - "loss": 0.2327, + "learning_rate": 2.9538951211520758e-05, + "loss": 0.2547, "step": 5925 }, { "epoch": 0.28, - "learning_rate": 1.9537761942712486e-05, - "loss": 0.3433, + "learning_rate": 2.953848314168474e-05, + "loss": 0.2995, "step": 5930 }, { "epoch": 0.28, - "learning_rate": 1.9537293141437346e-05, - "loss": 0.2788, + "learning_rate": 2.953801507184872e-05, + "loss": 0.3493, "step": 5935 }, { "epoch": 0.28, - "learning_rate": 1.9536824340162206e-05, - "loss": 0.3844, + "learning_rate": 2.95375470020127e-05, + "loss": 0.367, "step": 5940 }, { "epoch": 0.28, - "learning_rate": 1.9536355538887066e-05, - "loss": 0.5153, + "learning_rate": 2.953707893217668e-05, + "loss": 0.5445, "step": 5945 }, { "epoch": 0.28, - "learning_rate": 1.953588673761193e-05, - "loss": 0.8266, + "learning_rate": 2.9536610862340664e-05, + "loss": 0.6944, "step": 5950 }, { "epoch": 0.28, - "learning_rate": 1.953541793633679e-05, - "loss": 0.3474, + "learning_rate": 2.9536142792504644e-05, + "loss": 0.3739, "step": 5955 }, { "epoch": 0.28, - "learning_rate": 1.953494913506165e-05, - "loss": 0.1229, + "learning_rate": 2.953567472266862e-05, + "loss": 0.1107, "step": 5960 }, { "epoch": 0.28, - "learning_rate": 1.953448033378651e-05, - "loss": 0.2005, + "learning_rate": 2.9535206652832603e-05, + "loss": 0.2579, "step": 5965 }, { "epoch": 0.28, - "learning_rate": 1.953401153251137e-05, - "loss": 0.1992, + "learning_rate": 2.9534738582996583e-05, + "loss": 0.1703, "step": 5970 }, { "epoch": 0.28, - "learning_rate": 1.9533542731236232e-05, - "loss": 0.2129, + "learning_rate": 2.9534270513160563e-05, + "loss": 0.2861, "step": 5975 }, { "epoch": 0.28, - "learning_rate": 1.9533073929961092e-05, - "loss": 0.2354, + "learning_rate": 2.9533802443324543e-05, + "loss": 0.3869, "step": 5980 }, { "epoch": 0.28, - "learning_rate": 1.9532605128685952e-05, - "loss": 0.3105, + "learning_rate": 2.9533334373488526e-05, + "loss": 0.2519, "step": 5985 }, { "epoch": 0.28, - "learning_rate": 1.9532136327410812e-05, - "loss": 0.3057, + "learning_rate": 2.9532866303652506e-05, + "loss": 0.3323, "step": 5990 }, { "epoch": 0.28, - "learning_rate": 1.9531667526135675e-05, - "loss": 0.4136, + "learning_rate": 2.9532398233816486e-05, + "loss": 0.4471, "step": 5995 }, { "epoch": 0.28, - "learning_rate": 1.9531198724860535e-05, - "loss": 0.6449, + "learning_rate": 2.9531930163980466e-05, + "loss": 0.6038, "step": 6000 }, { "epoch": 0.28, - "learning_rate": 1.9530729923585395e-05, - "loss": 0.2958, + "learning_rate": 2.953146209414445e-05, + "loss": 0.3086, "step": 6005 }, { "epoch": 0.28, - "learning_rate": 1.9530261122310255e-05, - "loss": 0.1949, + "learning_rate": 2.953099402430843e-05, + "loss": 0.214, "step": 6010 }, { "epoch": 0.28, - "learning_rate": 1.9529792321035115e-05, - "loss": 0.2033, + "learning_rate": 2.953052595447241e-05, + "loss": 0.198, "step": 6015 }, { "epoch": 0.28, - "learning_rate": 1.9529323519759975e-05, - "loss": 0.1695, + "learning_rate": 2.953005788463639e-05, + "loss": 0.1601, "step": 6020 }, { "epoch": 0.28, - "learning_rate": 1.9528854718484835e-05, - "loss": 0.2202, + "learning_rate": 2.9529589814800368e-05, + "loss": 0.1962, "step": 6025 }, { "epoch": 0.28, - "learning_rate": 1.9528385917209695e-05, - "loss": 0.4026, + "learning_rate": 2.9529121744964348e-05, + "loss": 0.3013, "step": 6030 }, { "epoch": 0.28, - "learning_rate": 1.9527917115934555e-05, - "loss": 0.2653, + "learning_rate": 2.9528653675128328e-05, + "loss": 0.4289, "step": 6035 }, { "epoch": 0.28, - "learning_rate": 1.9527448314659418e-05, - "loss": 0.4601, + "learning_rate": 2.952818560529231e-05, + "loss": 0.2674, "step": 6040 }, { "epoch": 0.28, - "learning_rate": 1.9526979513384278e-05, - "loss": 0.4971, + "learning_rate": 2.952771753545629e-05, + "loss": 0.4578, "step": 6045 }, { "epoch": 0.28, - "learning_rate": 1.9526510712109138e-05, - "loss": 0.6803, + "learning_rate": 2.952724946562027e-05, + "loss": 0.7972, "step": 6050 }, { "epoch": 0.28, - "learning_rate": 1.9526041910834e-05, - "loss": 0.3679, + "learning_rate": 2.952678139578425e-05, + "loss": 0.3369, "step": 6055 }, { "epoch": 0.28, - "learning_rate": 1.952557310955886e-05, - "loss": 0.0853, + "learning_rate": 2.9526313325948234e-05, + "loss": 0.1701, "step": 6060 }, { "epoch": 0.28, - "learning_rate": 1.952510430828372e-05, - "loss": 0.2384, + "learning_rate": 2.9525845256112213e-05, + "loss": 0.228, "step": 6065 }, { "epoch": 0.28, - "learning_rate": 1.952463550700858e-05, - "loss": 0.1462, + "learning_rate": 2.9525377186276193e-05, + "loss": 0.2577, "step": 6070 }, { "epoch": 0.28, - "learning_rate": 1.952416670573344e-05, - "loss": 0.2165, + "learning_rate": 2.9524909116440176e-05, + "loss": 0.2919, "step": 6075 }, { "epoch": 0.28, - "learning_rate": 1.95236979044583e-05, - "loss": 0.2811, + "learning_rate": 2.9524441046604156e-05, + "loss": 0.1989, "step": 6080 }, { "epoch": 0.28, - "learning_rate": 1.9523229103183164e-05, - "loss": 0.3491, + "learning_rate": 2.9523972976768136e-05, + "loss": 0.3102, "step": 6085 }, { "epoch": 0.28, - "learning_rate": 1.9522760301908024e-05, - "loss": 0.4301, + "learning_rate": 2.9523504906932112e-05, + "loss": 0.413, "step": 6090 }, { "epoch": 0.28, - "learning_rate": 1.9522291500632884e-05, - "loss": 0.4114, + "learning_rate": 2.9523036837096096e-05, + "loss": 0.3839, "step": 6095 }, { "epoch": 0.28, - "learning_rate": 1.9521822699357744e-05, - "loss": 0.5665, + "learning_rate": 2.9522568767260075e-05, + "loss": 0.751, "step": 6100 }, { "epoch": 0.28, - "learning_rate": 1.9521353898082604e-05, - "loss": 0.3065, + "learning_rate": 2.9522100697424055e-05, + "loss": 0.2672, "step": 6105 }, { "epoch": 0.29, - "learning_rate": 1.9520885096807464e-05, - "loss": 0.1459, + "learning_rate": 2.9521632627588035e-05, + "loss": 0.1398, "step": 6110 }, { "epoch": 0.29, - "learning_rate": 1.9520416295532324e-05, - "loss": 0.1675, + "learning_rate": 2.952116455775202e-05, + "loss": 0.2228, "step": 6115 }, { "epoch": 0.29, - "learning_rate": 1.9519947494257187e-05, - "loss": 0.1782, + "learning_rate": 2.9520696487915998e-05, + "loss": 0.1747, "step": 6120 }, { "epoch": 0.29, - "learning_rate": 1.9519478692982047e-05, - "loss": 0.2657, + "learning_rate": 2.9520228418079978e-05, + "loss": 0.2612, "step": 6125 }, { "epoch": 0.29, - "learning_rate": 1.9519009891706907e-05, - "loss": 0.2721, + "learning_rate": 2.9519760348243958e-05, + "loss": 0.261, "step": 6130 }, { "epoch": 0.29, - "learning_rate": 1.951854109043177e-05, - "loss": 0.2553, + "learning_rate": 2.951929227840794e-05, + "loss": 0.3475, "step": 6135 }, { "epoch": 0.29, - "learning_rate": 1.951807228915663e-05, - "loss": 0.3509, + "learning_rate": 2.951882420857192e-05, + "loss": 0.3063, "step": 6140 }, { "epoch": 0.29, - "learning_rate": 1.951760348788149e-05, - "loss": 0.5497, + "learning_rate": 2.95183561387359e-05, + "loss": 0.524, "step": 6145 }, { "epoch": 0.29, - "learning_rate": 1.951713468660635e-05, - "loss": 0.8548, + "learning_rate": 2.951788806889988e-05, + "loss": 0.7232, "step": 6150 }, { "epoch": 0.29, - "learning_rate": 1.951666588533121e-05, - "loss": 0.3924, + "learning_rate": 2.951741999906386e-05, + "loss": 0.2241, "step": 6155 }, { "epoch": 0.29, - "learning_rate": 1.951619708405607e-05, - "loss": 0.2242, + "learning_rate": 2.951695192922784e-05, + "loss": 0.1908, "step": 6160 }, { "epoch": 0.29, - "learning_rate": 1.951572828278093e-05, - "loss": 0.1475, + "learning_rate": 2.951648385939182e-05, + "loss": 0.1571, "step": 6165 }, { "epoch": 0.29, - "learning_rate": 1.951525948150579e-05, - "loss": 0.2086, + "learning_rate": 2.9516015789555803e-05, + "loss": 0.2126, "step": 6170 }, { "epoch": 0.29, - "learning_rate": 1.951479068023065e-05, - "loss": 0.1798, + "learning_rate": 2.9515547719719783e-05, + "loss": 0.2266, "step": 6175 }, { "epoch": 0.29, - "learning_rate": 1.9514321878955513e-05, - "loss": 0.269, + "learning_rate": 2.9515079649883763e-05, + "loss": 0.3131, "step": 6180 }, { "epoch": 0.29, - "learning_rate": 1.9513853077680373e-05, - "loss": 0.2895, + "learning_rate": 2.9514611580047743e-05, + "loss": 0.2176, "step": 6185 }, { "epoch": 0.29, - "learning_rate": 1.9513384276405233e-05, - "loss": 0.3678, + "learning_rate": 2.9514143510211726e-05, + "loss": 0.307, "step": 6190 }, { "epoch": 0.29, - "learning_rate": 1.9512915475130093e-05, - "loss": 0.3819, + "learning_rate": 2.9513675440375706e-05, + "loss": 0.4321, "step": 6195 }, { "epoch": 0.29, - "learning_rate": 1.9512446673854956e-05, - "loss": 0.4726, + "learning_rate": 2.9513207370539685e-05, + "loss": 0.6908, "step": 6200 }, { "epoch": 0.29, - "learning_rate": 1.9511977872579816e-05, - "loss": 0.309, + "learning_rate": 2.951273930070367e-05, + "loss": 0.3091, "step": 6205 }, { "epoch": 0.29, - "learning_rate": 1.9511509071304676e-05, - "loss": 0.1438, + "learning_rate": 2.951227123086765e-05, + "loss": 0.1268, "step": 6210 }, { "epoch": 0.29, - "learning_rate": 1.9511040270029536e-05, - "loss": 0.2037, + "learning_rate": 2.9511803161031625e-05, + "loss": 0.1892, "step": 6215 }, { "epoch": 0.29, - "learning_rate": 1.9510571468754396e-05, - "loss": 0.2466, + "learning_rate": 2.9511335091195605e-05, + "loss": 0.2528, "step": 6220 }, { "epoch": 0.29, - "learning_rate": 1.951010266747926e-05, - "loss": 0.2742, + "learning_rate": 2.9510867021359588e-05, + "loss": 0.2706, "step": 6225 }, { "epoch": 0.29, - "learning_rate": 1.950963386620412e-05, - "loss": 0.26, + "learning_rate": 2.9510398951523568e-05, + "loss": 0.2767, "step": 6230 }, { "epoch": 0.29, - "learning_rate": 1.950916506492898e-05, - "loss": 0.2899, + "learning_rate": 2.9509930881687547e-05, + "loss": 0.3146, "step": 6235 }, { "epoch": 0.29, - "learning_rate": 1.950869626365384e-05, - "loss": 0.4501, + "learning_rate": 2.9509462811851527e-05, + "loss": 0.3389, "step": 6240 }, { "epoch": 0.29, - "learning_rate": 1.95082274623787e-05, - "loss": 0.5071, + "learning_rate": 2.950899474201551e-05, + "loss": 0.4984, "step": 6245 }, { "epoch": 0.29, - "learning_rate": 1.950775866110356e-05, - "loss": 0.6465, + "learning_rate": 2.950852667217949e-05, + "loss": 0.5109, "step": 6250 }, { "epoch": 0.29, - "learning_rate": 1.950728985982842e-05, - "loss": 0.3001, + "learning_rate": 2.950805860234347e-05, + "loss": 0.2741, "step": 6255 }, { "epoch": 0.29, - "learning_rate": 1.950682105855328e-05, - "loss": 0.136, + "learning_rate": 2.9507590532507453e-05, + "loss": 0.1101, "step": 6260 }, { "epoch": 0.29, - "learning_rate": 1.9506352257278142e-05, - "loss": 0.1448, + "learning_rate": 2.9507122462671433e-05, + "loss": 0.1227, "step": 6265 }, { "epoch": 0.29, - "learning_rate": 1.9505883456003002e-05, - "loss": 0.2306, + "learning_rate": 2.9506654392835413e-05, + "loss": 0.2644, "step": 6270 }, { "epoch": 0.29, - "learning_rate": 1.9505414654727862e-05, - "loss": 0.1849, + "learning_rate": 2.9506186322999393e-05, + "loss": 0.1347, "step": 6275 }, { "epoch": 0.29, - "learning_rate": 1.9504945853452725e-05, - "loss": 0.217, + "learning_rate": 2.9505718253163373e-05, + "loss": 0.2553, "step": 6280 }, { "epoch": 0.29, - "learning_rate": 1.9504477052177585e-05, - "loss": 0.3257, + "learning_rate": 2.9505250183327352e-05, + "loss": 0.2612, "step": 6285 }, { "epoch": 0.29, - "learning_rate": 1.9504008250902445e-05, - "loss": 0.4219, + "learning_rate": 2.9504782113491332e-05, + "loss": 0.4636, "step": 6290 }, { "epoch": 0.29, - "learning_rate": 1.9503539449627305e-05, - "loss": 0.4743, + "learning_rate": 2.9504314043655312e-05, + "loss": 0.3807, "step": 6295 }, { "epoch": 0.29, - "learning_rate": 1.9503070648352165e-05, - "loss": 0.6075, + "learning_rate": 2.9503845973819295e-05, + "loss": 0.8262, "step": 6300 }, { "epoch": 0.29, - "learning_rate": 1.9502601847077025e-05, - "loss": 0.3695, + "learning_rate": 2.9503377903983275e-05, + "loss": 0.2826, "step": 6305 }, { "epoch": 0.29, - "learning_rate": 1.9502133045801885e-05, - "loss": 0.1424, + "learning_rate": 2.9502909834147255e-05, + "loss": 0.1274, "step": 6310 }, { "epoch": 0.29, - "learning_rate": 1.9501664244526745e-05, - "loss": 0.2252, + "learning_rate": 2.9502441764311238e-05, + "loss": 0.177, "step": 6315 }, { "epoch": 0.29, - "learning_rate": 1.9501195443251608e-05, - "loss": 0.2399, + "learning_rate": 2.9501973694475218e-05, + "loss": 0.2008, "step": 6320 }, { "epoch": 0.3, - "learning_rate": 1.9500726641976468e-05, - "loss": 0.247, + "learning_rate": 2.9501505624639198e-05, + "loss": 0.2339, "step": 6325 }, { "epoch": 0.3, - "learning_rate": 1.9500257840701328e-05, - "loss": 0.3604, + "learning_rate": 2.9501037554803178e-05, + "loss": 0.2365, "step": 6330 }, { "epoch": 0.3, - "learning_rate": 1.9499789039426188e-05, - "loss": 0.4467, + "learning_rate": 2.950056948496716e-05, + "loss": 0.2761, "step": 6335 }, { "epoch": 0.3, - "learning_rate": 1.9499320238151048e-05, - "loss": 0.4687, + "learning_rate": 2.9500101415131137e-05, + "loss": 0.3615, "step": 6340 }, { "epoch": 0.3, - "learning_rate": 1.949885143687591e-05, - "loss": 0.3686, + "learning_rate": 2.9499633345295117e-05, + "loss": 0.4044, "step": 6345 }, { "epoch": 0.3, - "learning_rate": 1.949838263560077e-05, - "loss": 0.8263, + "learning_rate": 2.9499165275459097e-05, + "loss": 0.6302, "step": 6350 }, { "epoch": 0.3, - "learning_rate": 1.949791383432563e-05, - "loss": 0.3793, + "learning_rate": 2.949869720562308e-05, + "loss": 0.3552, "step": 6355 }, { "epoch": 0.3, - "learning_rate": 1.949744503305049e-05, - "loss": 0.1265, + "learning_rate": 2.949822913578706e-05, + "loss": 0.1164, "step": 6360 }, { "epoch": 0.3, - "learning_rate": 1.9496976231775354e-05, - "loss": 0.1601, + "learning_rate": 2.949776106595104e-05, + "loss": 0.1787, "step": 6365 }, { "epoch": 0.3, - "learning_rate": 1.9496507430500214e-05, - "loss": 0.2208, + "learning_rate": 2.949729299611502e-05, + "loss": 0.1635, "step": 6370 }, { "epoch": 0.3, - "learning_rate": 1.9496038629225074e-05, - "loss": 0.2892, + "learning_rate": 2.9496824926279003e-05, + "loss": 0.2198, "step": 6375 }, { "epoch": 0.3, - "learning_rate": 1.9495569827949934e-05, - "loss": 0.2399, + "learning_rate": 2.9496356856442983e-05, + "loss": 0.1736, "step": 6380 }, { "epoch": 0.3, - "learning_rate": 1.9495101026674794e-05, - "loss": 0.4323, + "learning_rate": 2.9495888786606962e-05, + "loss": 0.1955, "step": 6385 }, { "epoch": 0.3, - "learning_rate": 1.9494632225399654e-05, - "loss": 0.4213, + "learning_rate": 2.9495420716770946e-05, + "loss": 0.4086, "step": 6390 }, { "epoch": 0.3, - "learning_rate": 1.9494163424124514e-05, - "loss": 0.6043, + "learning_rate": 2.9494952646934925e-05, + "loss": 0.3732, "step": 6395 }, { "epoch": 0.3, - "learning_rate": 1.9493694622849374e-05, - "loss": 0.8731, + "learning_rate": 2.9494484577098905e-05, + "loss": 0.581, "step": 6400 }, { "epoch": 0.3, - "learning_rate": 1.9493225821574237e-05, - "loss": 0.2651, + "learning_rate": 2.949401650726288e-05, + "loss": 0.2786, "step": 6405 }, { "epoch": 0.3, - "learning_rate": 1.9492757020299097e-05, - "loss": 0.1896, + "learning_rate": 2.9493548437426865e-05, + "loss": 0.1187, "step": 6410 }, { "epoch": 0.3, - "learning_rate": 1.9492288219023957e-05, - "loss": 0.2098, + "learning_rate": 2.9493080367590845e-05, + "loss": 0.1062, "step": 6415 }, { "epoch": 0.3, - "learning_rate": 1.9491819417748817e-05, - "loss": 0.2555, + "learning_rate": 2.9492612297754824e-05, + "loss": 0.1636, "step": 6420 }, { "epoch": 0.3, - "learning_rate": 1.949135061647368e-05, - "loss": 0.2872, + "learning_rate": 2.9492144227918804e-05, + "loss": 0.2069, "step": 6425 }, { "epoch": 0.3, - "learning_rate": 1.949088181519854e-05, - "loss": 0.306, + "learning_rate": 2.9491676158082787e-05, + "loss": 0.2534, "step": 6430 }, { "epoch": 0.3, - "learning_rate": 1.94904130139234e-05, - "loss": 0.3411, + "learning_rate": 2.9491208088246767e-05, + "loss": 0.2974, "step": 6435 }, { "epoch": 0.3, - "learning_rate": 1.948994421264826e-05, - "loss": 0.3097, + "learning_rate": 2.9490740018410747e-05, + "loss": 0.3632, "step": 6440 }, { "epoch": 0.3, - "learning_rate": 1.948947541137312e-05, - "loss": 0.4879, + "learning_rate": 2.949027194857473e-05, + "loss": 0.4625, "step": 6445 }, { "epoch": 0.3, - "learning_rate": 1.948900661009798e-05, - "loss": 0.6127, + "learning_rate": 2.948980387873871e-05, + "loss": 0.4981, "step": 6450 }, { "epoch": 0.3, - "learning_rate": 1.948853780882284e-05, - "loss": 0.3599, + "learning_rate": 2.948933580890269e-05, + "loss": 0.307, "step": 6455 }, { "epoch": 0.3, - "learning_rate": 1.9488069007547703e-05, - "loss": 0.1394, + "learning_rate": 2.948886773906667e-05, + "loss": 0.1518, "step": 6460 }, { "epoch": 0.3, - "learning_rate": 1.9487600206272563e-05, - "loss": 0.2307, + "learning_rate": 2.948839966923065e-05, + "loss": 0.1984, "step": 6465 }, { "epoch": 0.3, - "learning_rate": 1.9487131404997423e-05, - "loss": 0.2525, + "learning_rate": 2.948793159939463e-05, + "loss": 0.2891, "step": 6470 }, { "epoch": 0.3, - "learning_rate": 1.9486662603722283e-05, - "loss": 0.1769, + "learning_rate": 2.948746352955861e-05, + "loss": 0.2248, "step": 6475 }, { "epoch": 0.3, - "learning_rate": 1.9486193802447143e-05, - "loss": 0.303, + "learning_rate": 2.948699545972259e-05, + "loss": 0.2901, "step": 6480 }, { "epoch": 0.3, - "learning_rate": 1.9485725001172006e-05, - "loss": 0.3339, + "learning_rate": 2.9486527389886572e-05, + "loss": 0.33, "step": 6485 }, { "epoch": 0.3, - "learning_rate": 1.9485256199896866e-05, - "loss": 0.3536, + "learning_rate": 2.9486059320050552e-05, + "loss": 0.2767, "step": 6490 }, { "epoch": 0.3, - "learning_rate": 1.9484787398621726e-05, - "loss": 0.4037, + "learning_rate": 2.9485591250214532e-05, + "loss": 0.3612, "step": 6495 }, { "epoch": 0.3, - "learning_rate": 1.9484318597346586e-05, - "loss": 0.5179, + "learning_rate": 2.9485123180378515e-05, + "loss": 0.644, "step": 6500 }, { "epoch": 0.3, - "learning_rate": 1.948384979607145e-05, - "loss": 0.2462, + "learning_rate": 2.9484655110542495e-05, + "loss": 0.3238, "step": 6505 }, { "epoch": 0.3, - "learning_rate": 1.948338099479631e-05, - "loss": 0.1519, + "learning_rate": 2.9484187040706475e-05, + "loss": 0.1715, "step": 6510 }, { "epoch": 0.3, - "learning_rate": 1.948291219352117e-05, - "loss": 0.2413, + "learning_rate": 2.9483718970870455e-05, + "loss": 0.1669, "step": 6515 }, { "epoch": 0.3, - "learning_rate": 1.948244339224603e-05, - "loss": 0.1308, + "learning_rate": 2.9483250901034438e-05, + "loss": 0.2612, "step": 6520 }, { "epoch": 0.3, - "learning_rate": 1.948197459097089e-05, - "loss": 0.3107, + "learning_rate": 2.9482782831198418e-05, + "loss": 0.2686, "step": 6525 }, { "epoch": 0.3, - "learning_rate": 1.948150578969575e-05, - "loss": 0.3151, + "learning_rate": 2.9482314761362394e-05, + "loss": 0.2823, "step": 6530 }, { "epoch": 0.3, - "learning_rate": 1.948103698842061e-05, - "loss": 0.3446, + "learning_rate": 2.9481846691526374e-05, + "loss": 0.3895, "step": 6535 }, { "epoch": 0.31, - "learning_rate": 1.948056818714547e-05, - "loss": 0.3298, + "learning_rate": 2.9481378621690357e-05, + "loss": 0.3165, "step": 6540 }, { "epoch": 0.31, - "learning_rate": 1.948009938587033e-05, - "loss": 0.3325, + "learning_rate": 2.9480910551854337e-05, + "loss": 0.439, "step": 6545 }, { "epoch": 0.31, - "learning_rate": 1.9479630584595192e-05, - "loss": 0.8412, + "learning_rate": 2.9480442482018317e-05, + "loss": 0.6556, "step": 6550 }, { "epoch": 0.31, - "learning_rate": 1.9479161783320052e-05, - "loss": 0.2412, + "learning_rate": 2.9479974412182296e-05, + "loss": 0.3243, "step": 6555 }, { "epoch": 0.31, - "learning_rate": 1.9478692982044912e-05, - "loss": 0.1557, + "learning_rate": 2.947950634234628e-05, + "loss": 0.1045, "step": 6560 }, { "epoch": 0.31, - "learning_rate": 1.9478224180769775e-05, - "loss": 0.2324, + "learning_rate": 2.947903827251026e-05, + "loss": 0.1191, "step": 6565 }, { "epoch": 0.31, - "learning_rate": 1.9477755379494635e-05, - "loss": 0.2358, + "learning_rate": 2.947857020267424e-05, + "loss": 0.1821, "step": 6570 }, { "epoch": 0.31, - "learning_rate": 1.9477286578219495e-05, - "loss": 0.2939, + "learning_rate": 2.9478102132838223e-05, + "loss": 0.2167, "step": 6575 }, { "epoch": 0.31, - "learning_rate": 1.9476817776944355e-05, - "loss": 0.2902, + "learning_rate": 2.9477634063002202e-05, + "loss": 0.2829, "step": 6580 }, { "epoch": 0.31, - "learning_rate": 1.9476348975669215e-05, - "loss": 0.271, + "learning_rate": 2.9477165993166182e-05, + "loss": 0.3564, "step": 6585 }, { "epoch": 0.31, - "learning_rate": 1.9475880174394075e-05, - "loss": 0.2787, + "learning_rate": 2.9476697923330162e-05, + "loss": 0.5041, "step": 6590 }, { "epoch": 0.31, - "learning_rate": 1.9475411373118935e-05, - "loss": 0.4382, + "learning_rate": 2.9476229853494142e-05, + "loss": 0.3571, "step": 6595 }, { "epoch": 0.31, - "learning_rate": 1.9474942571843798e-05, - "loss": 0.5563, + "learning_rate": 2.947576178365812e-05, + "loss": 0.6696, "step": 6600 }, { "epoch": 0.31, - "learning_rate": 1.9474473770568658e-05, - "loss": 0.3581, + "learning_rate": 2.94752937138221e-05, + "loss": 0.2935, "step": 6605 }, { "epoch": 0.31, - "learning_rate": 1.9474004969293518e-05, - "loss": 0.1465, + "learning_rate": 2.947482564398608e-05, + "loss": 0.1073, "step": 6610 }, { "epoch": 0.31, - "learning_rate": 1.9473536168018378e-05, - "loss": 0.1957, + "learning_rate": 2.9474357574150064e-05, + "loss": 0.1227, "step": 6615 }, { "epoch": 0.31, - "learning_rate": 1.9473067366743238e-05, - "loss": 0.2329, + "learning_rate": 2.9473889504314044e-05, + "loss": 0.1901, "step": 6620 }, { "epoch": 0.31, - "learning_rate": 1.9472598565468098e-05, - "loss": 0.2191, + "learning_rate": 2.9473421434478024e-05, + "loss": 0.1586, "step": 6625 }, { "epoch": 0.31, - "learning_rate": 1.947212976419296e-05, - "loss": 0.2638, + "learning_rate": 2.9472953364642007e-05, + "loss": 0.2283, "step": 6630 }, { "epoch": 0.31, - "learning_rate": 1.947166096291782e-05, - "loss": 0.3851, + "learning_rate": 2.9472485294805987e-05, + "loss": 0.2978, "step": 6635 }, { "epoch": 0.31, - "learning_rate": 1.947119216164268e-05, - "loss": 0.421, + "learning_rate": 2.9472017224969967e-05, + "loss": 0.309, "step": 6640 }, { "epoch": 0.31, - "learning_rate": 1.9470723360367544e-05, - "loss": 0.3695, + "learning_rate": 2.9471549155133947e-05, + "loss": 0.4962, "step": 6645 }, { "epoch": 0.31, - "learning_rate": 1.9470254559092404e-05, - "loss": 0.7012, + "learning_rate": 2.947108108529793e-05, + "loss": 0.639, "step": 6650 }, { "epoch": 0.31, - "learning_rate": 1.9469785757817264e-05, - "loss": 0.3262, + "learning_rate": 2.9470613015461906e-05, + "loss": 0.2725, "step": 6655 }, { "epoch": 0.31, - "learning_rate": 1.9469316956542124e-05, - "loss": 0.0764, + "learning_rate": 2.9470144945625886e-05, + "loss": 0.1545, "step": 6660 }, { "epoch": 0.31, - "learning_rate": 1.9468848155266984e-05, - "loss": 0.2014, + "learning_rate": 2.9469676875789866e-05, + "loss": 0.1472, "step": 6665 }, { "epoch": 0.31, - "learning_rate": 1.9468379353991844e-05, - "loss": 0.2009, + "learning_rate": 2.946920880595385e-05, + "loss": 0.1253, "step": 6670 }, { "epoch": 0.31, - "learning_rate": 1.9467910552716704e-05, - "loss": 0.2012, + "learning_rate": 2.946874073611783e-05, + "loss": 0.2014, "step": 6675 }, { "epoch": 0.31, - "learning_rate": 1.9467441751441564e-05, - "loss": 0.2679, + "learning_rate": 2.946827266628181e-05, + "loss": 0.2666, "step": 6680 }, { "epoch": 0.31, - "learning_rate": 1.9466972950166424e-05, - "loss": 0.2762, + "learning_rate": 2.9467804596445792e-05, + "loss": 0.2151, "step": 6685 }, { "epoch": 0.31, - "learning_rate": 1.9466504148891287e-05, - "loss": 0.4076, + "learning_rate": 2.9467336526609772e-05, + "loss": 0.3208, "step": 6690 }, { "epoch": 0.31, - "learning_rate": 1.9466035347616147e-05, - "loss": 0.3921, + "learning_rate": 2.9466868456773752e-05, + "loss": 0.3027, "step": 6695 }, { "epoch": 0.31, - "learning_rate": 1.9465566546341007e-05, - "loss": 0.6968, + "learning_rate": 2.946640038693773e-05, + "loss": 0.6314, "step": 6700 }, { "epoch": 0.31, - "learning_rate": 1.9465097745065867e-05, - "loss": 0.3596, + "learning_rate": 2.9465932317101715e-05, + "loss": 0.3624, "step": 6705 }, { "epoch": 0.31, - "learning_rate": 1.946462894379073e-05, - "loss": 0.0913, + "learning_rate": 2.9465464247265695e-05, + "loss": 0.1625, "step": 6710 }, { "epoch": 0.31, - "learning_rate": 1.946416014251559e-05, - "loss": 0.1434, + "learning_rate": 2.9464996177429674e-05, + "loss": 0.1313, "step": 6715 }, { "epoch": 0.31, - "learning_rate": 1.946369134124045e-05, - "loss": 0.1815, + "learning_rate": 2.946452810759365e-05, + "loss": 0.2024, "step": 6720 }, { "epoch": 0.31, - "learning_rate": 1.946322253996531e-05, - "loss": 0.2293, + "learning_rate": 2.9464060037757634e-05, + "loss": 0.1811, "step": 6725 }, { "epoch": 0.31, - "learning_rate": 1.946275373869017e-05, - "loss": 0.2578, + "learning_rate": 2.9463591967921614e-05, + "loss": 0.3074, "step": 6730 }, { "epoch": 0.31, - "learning_rate": 1.9462284937415033e-05, - "loss": 0.3253, + "learning_rate": 2.9463123898085594e-05, + "loss": 0.3057, "step": 6735 }, { "epoch": 0.31, - "learning_rate": 1.9461816136139893e-05, - "loss": 0.4554, + "learning_rate": 2.9462655828249573e-05, + "loss": 0.4035, "step": 6740 }, { "epoch": 0.31, - "learning_rate": 1.9461347334864753e-05, - "loss": 0.433, + "learning_rate": 2.9462187758413557e-05, + "loss": 0.4331, "step": 6745 }, { "epoch": 0.31, - "learning_rate": 1.9460878533589613e-05, - "loss": 0.7031, + "learning_rate": 2.9461719688577536e-05, + "loss": 0.5612, "step": 6750 }, { "epoch": 0.32, - "learning_rate": 1.9460409732314473e-05, - "loss": 0.3681, + "learning_rate": 2.9461251618741516e-05, + "loss": 0.2139, "step": 6755 }, { "epoch": 0.32, - "learning_rate": 1.9459940931039333e-05, - "loss": 0.1622, + "learning_rate": 2.94607835489055e-05, + "loss": 0.1331, "step": 6760 }, { "epoch": 0.32, - "learning_rate": 1.9459472129764193e-05, - "loss": 0.1286, + "learning_rate": 2.946031547906948e-05, + "loss": 0.1882, "step": 6765 }, { "epoch": 0.32, - "learning_rate": 1.9459003328489053e-05, - "loss": 0.2374, + "learning_rate": 2.945984740923346e-05, + "loss": 0.2629, "step": 6770 }, { "epoch": 0.32, - "learning_rate": 1.9458534527213916e-05, - "loss": 0.2842, + "learning_rate": 2.945937933939744e-05, + "loss": 0.2592, "step": 6775 }, { "epoch": 0.32, - "learning_rate": 1.9458065725938776e-05, - "loss": 0.2651, + "learning_rate": 2.945891126956142e-05, + "loss": 0.3825, "step": 6780 }, { "epoch": 0.32, - "learning_rate": 1.9457596924663636e-05, - "loss": 0.402, + "learning_rate": 2.94584431997254e-05, + "loss": 0.2151, "step": 6785 }, { "epoch": 0.32, - "learning_rate": 1.94571281233885e-05, - "loss": 0.3272, + "learning_rate": 2.945797512988938e-05, + "loss": 0.2689, "step": 6790 }, { "epoch": 0.32, - "learning_rate": 1.945665932211336e-05, - "loss": 0.4942, + "learning_rate": 2.9457507060053358e-05, + "loss": 0.5211, "step": 6795 }, { "epoch": 0.32, - "learning_rate": 1.945619052083822e-05, - "loss": 0.571, + "learning_rate": 2.945703899021734e-05, + "loss": 0.4612, "step": 6800 }, { "epoch": 0.32, - "learning_rate": 1.945572171956308e-05, - "loss": 0.3568, + "learning_rate": 2.945657092038132e-05, + "loss": 0.3253, "step": 6805 }, { "epoch": 0.32, - "learning_rate": 1.945525291828794e-05, - "loss": 0.1872, + "learning_rate": 2.94561028505453e-05, + "loss": 0.08, "step": 6810 }, { "epoch": 0.32, - "learning_rate": 1.94547841170128e-05, - "loss": 0.2116, + "learning_rate": 2.9455634780709284e-05, + "loss": 0.138, "step": 6815 }, { "epoch": 0.32, - "learning_rate": 1.945431531573766e-05, - "loss": 0.1498, + "learning_rate": 2.9455166710873264e-05, + "loss": 0.2012, "step": 6820 }, { "epoch": 0.32, - "learning_rate": 1.945384651446252e-05, - "loss": 0.2026, + "learning_rate": 2.9454698641037244e-05, + "loss": 0.2973, "step": 6825 }, { "epoch": 0.32, - "learning_rate": 1.9453377713187382e-05, - "loss": 0.2388, + "learning_rate": 2.9454230571201224e-05, + "loss": 0.3456, "step": 6830 }, { "epoch": 0.32, - "learning_rate": 1.9452908911912242e-05, - "loss": 0.3062, + "learning_rate": 2.9453762501365207e-05, + "loss": 0.2108, "step": 6835 }, { "epoch": 0.32, - "learning_rate": 1.9452440110637102e-05, - "loss": 0.4513, + "learning_rate": 2.9453294431529187e-05, + "loss": 0.2841, "step": 6840 }, { "epoch": 0.32, - "learning_rate": 1.9451971309361962e-05, - "loss": 0.4118, + "learning_rate": 2.9452826361693163e-05, + "loss": 0.4899, "step": 6845 }, { "epoch": 0.32, - "learning_rate": 1.9451502508086822e-05, - "loss": 0.5351, + "learning_rate": 2.9452358291857143e-05, + "loss": 0.6438, "step": 6850 }, { "epoch": 0.32, - "learning_rate": 1.9451033706811685e-05, - "loss": 0.2974, + "learning_rate": 2.9451890222021126e-05, + "loss": 0.3009, "step": 6855 }, { "epoch": 0.32, - "learning_rate": 1.9450564905536545e-05, - "loss": 0.1387, + "learning_rate": 2.9451422152185106e-05, + "loss": 0.1273, "step": 6860 }, { "epoch": 0.32, - "learning_rate": 1.9450096104261405e-05, - "loss": 0.1921, + "learning_rate": 2.9450954082349086e-05, + "loss": 0.168, "step": 6865 }, { "epoch": 0.32, - "learning_rate": 1.9449627302986265e-05, - "loss": 0.2271, + "learning_rate": 2.945048601251307e-05, + "loss": 0.1879, "step": 6870 }, { "epoch": 0.32, - "learning_rate": 1.944915850171113e-05, - "loss": 0.2264, + "learning_rate": 2.945001794267705e-05, + "loss": 0.2848, "step": 6875 }, { "epoch": 0.32, - "learning_rate": 1.944868970043599e-05, - "loss": 0.3337, + "learning_rate": 2.944954987284103e-05, + "loss": 0.2592, "step": 6880 }, { "epoch": 0.32, - "learning_rate": 1.9448220899160848e-05, - "loss": 0.34, + "learning_rate": 2.944908180300501e-05, + "loss": 0.3403, "step": 6885 }, { "epoch": 0.32, - "learning_rate": 1.9447752097885708e-05, - "loss": 0.4425, + "learning_rate": 2.944861373316899e-05, + "loss": 0.4345, "step": 6890 }, { "epoch": 0.32, - "learning_rate": 1.9447283296610568e-05, - "loss": 0.5421, + "learning_rate": 2.944814566333297e-05, + "loss": 0.3656, "step": 6895 }, { "epoch": 0.32, - "learning_rate": 1.9446814495335428e-05, - "loss": 0.5687, + "learning_rate": 2.944767759349695e-05, + "loss": 0.4829, "step": 6900 }, { "epoch": 0.32, - "learning_rate": 1.9446345694060288e-05, - "loss": 0.4261, + "learning_rate": 2.944720952366093e-05, + "loss": 0.4176, "step": 6905 }, { "epoch": 0.32, - "learning_rate": 1.9445876892785148e-05, - "loss": 0.0878, + "learning_rate": 2.944674145382491e-05, + "loss": 0.1538, "step": 6910 }, { "epoch": 0.32, - "learning_rate": 1.944540809151001e-05, - "loss": 0.1664, + "learning_rate": 2.944627338398889e-05, + "loss": 0.0787, "step": 6915 }, { "epoch": 0.32, - "learning_rate": 1.944493929023487e-05, - "loss": 0.173, + "learning_rate": 2.944580531415287e-05, + "loss": 0.159, "step": 6920 }, { "epoch": 0.32, - "learning_rate": 1.944447048895973e-05, - "loss": 0.1802, + "learning_rate": 2.9445337244316854e-05, + "loss": 0.1508, "step": 6925 }, { "epoch": 0.32, - "learning_rate": 1.944400168768459e-05, - "loss": 0.3044, + "learning_rate": 2.9444869174480834e-05, + "loss": 0.2665, "step": 6930 }, { "epoch": 0.32, - "learning_rate": 1.9443532886409454e-05, - "loss": 0.2697, + "learning_rate": 2.9444401104644813e-05, + "loss": 0.3905, "step": 6935 }, { "epoch": 0.32, - "learning_rate": 1.9443064085134314e-05, - "loss": 0.3823, + "learning_rate": 2.9443933034808793e-05, + "loss": 0.3491, "step": 6940 }, { "epoch": 0.32, - "learning_rate": 1.9442595283859174e-05, - "loss": 0.5836, + "learning_rate": 2.9443464964972776e-05, + "loss": 0.4199, "step": 6945 }, { "epoch": 0.32, - "learning_rate": 1.9442126482584034e-05, - "loss": 0.8075, + "learning_rate": 2.9442996895136756e-05, + "loss": 0.6787, "step": 6950 }, { "epoch": 0.32, - "learning_rate": 1.9441657681308894e-05, - "loss": 0.3048, + "learning_rate": 2.9442528825300736e-05, + "loss": 0.3151, "step": 6955 }, { "epoch": 0.32, - "learning_rate": 1.9441188880033754e-05, - "loss": 0.178, + "learning_rate": 2.9442060755464716e-05, + "loss": 0.1328, "step": 6960 }, { "epoch": 0.32, - "learning_rate": 1.9440720078758614e-05, - "loss": 0.142, + "learning_rate": 2.94415926856287e-05, + "loss": 0.1392, "step": 6965 }, { "epoch": 0.33, - "learning_rate": 1.9440251277483477e-05, - "loss": 0.1452, + "learning_rate": 2.9441124615792676e-05, + "loss": 0.1369, "step": 6970 }, { "epoch": 0.33, - "learning_rate": 1.9439782476208337e-05, - "loss": 0.2158, + "learning_rate": 2.9440656545956655e-05, + "loss": 0.2611, "step": 6975 }, { "epoch": 0.33, - "learning_rate": 1.9439313674933197e-05, - "loss": 0.2264, + "learning_rate": 2.9440188476120635e-05, + "loss": 0.2943, "step": 6980 }, { "epoch": 0.33, - "learning_rate": 1.9438844873658057e-05, - "loss": 0.3913, + "learning_rate": 2.943972040628462e-05, + "loss": 0.2458, "step": 6985 }, { "epoch": 0.33, - "learning_rate": 1.9438376072382917e-05, - "loss": 0.3618, + "learning_rate": 2.9439252336448598e-05, + "loss": 0.3252, "step": 6990 }, { "epoch": 0.33, - "learning_rate": 1.943790727110778e-05, - "loss": 0.4981, + "learning_rate": 2.9438784266612578e-05, + "loss": 0.4069, "step": 6995 }, { "epoch": 0.33, - "learning_rate": 1.943743846983264e-05, - "loss": 0.5207, + "learning_rate": 2.943831619677656e-05, + "loss": 0.6211, "step": 7000 }, { "epoch": 0.33, - "learning_rate": 1.94369696685575e-05, - "loss": 0.2584, + "learning_rate": 2.943784812694054e-05, + "loss": 0.3158, "step": 7005 }, { "epoch": 0.33, - "learning_rate": 1.943650086728236e-05, - "loss": 0.1643, + "learning_rate": 2.943738005710452e-05, + "loss": 0.1283, "step": 7010 }, { "epoch": 0.33, - "learning_rate": 1.9436032066007223e-05, - "loss": 0.162, + "learning_rate": 2.94369119872685e-05, + "loss": 0.1746, "step": 7015 }, { "epoch": 0.33, - "learning_rate": 1.9435563264732083e-05, - "loss": 0.2558, + "learning_rate": 2.9436443917432484e-05, + "loss": 0.2119, "step": 7020 }, { "epoch": 0.33, - "learning_rate": 1.9435094463456943e-05, - "loss": 0.24, + "learning_rate": 2.9435975847596464e-05, + "loss": 0.182, "step": 7025 }, { "epoch": 0.33, - "learning_rate": 1.9434625662181803e-05, - "loss": 0.2614, + "learning_rate": 2.9435507777760444e-05, + "loss": 0.2626, "step": 7030 }, { "epoch": 0.33, - "learning_rate": 1.9434156860906663e-05, - "loss": 0.2827, + "learning_rate": 2.943503970792442e-05, + "loss": 0.2755, "step": 7035 }, { "epoch": 0.33, - "learning_rate": 1.9433688059631523e-05, - "loss": 0.313, + "learning_rate": 2.9434571638088403e-05, + "loss": 0.2745, "step": 7040 }, { "epoch": 0.33, - "learning_rate": 1.9433219258356383e-05, - "loss": 0.5073, + "learning_rate": 2.9434103568252383e-05, + "loss": 0.4722, "step": 7045 }, { "epoch": 0.33, - "learning_rate": 1.9432750457081243e-05, - "loss": 0.6594, + "learning_rate": 2.9433635498416363e-05, + "loss": 0.7391, "step": 7050 }, { "epoch": 0.33, - "learning_rate": 1.9432281655806103e-05, - "loss": 0.2966, + "learning_rate": 2.9433167428580346e-05, + "loss": 0.3168, "step": 7055 }, { "epoch": 0.33, - "learning_rate": 1.9431812854530966e-05, - "loss": 0.1413, + "learning_rate": 2.9432699358744326e-05, + "loss": 0.1312, "step": 7060 }, { "epoch": 0.33, - "learning_rate": 1.9431344053255826e-05, - "loss": 0.1812, + "learning_rate": 2.9432231288908306e-05, + "loss": 0.156, "step": 7065 }, { "epoch": 0.33, - "learning_rate": 1.9430875251980686e-05, - "loss": 0.1497, + "learning_rate": 2.9431763219072285e-05, + "loss": 0.185, "step": 7070 }, { "epoch": 0.33, - "learning_rate": 1.943040645070555e-05, - "loss": 0.1938, + "learning_rate": 2.943129514923627e-05, + "loss": 0.2077, "step": 7075 }, { "epoch": 0.33, - "learning_rate": 1.942993764943041e-05, - "loss": 0.3134, + "learning_rate": 2.943082707940025e-05, + "loss": 0.22, "step": 7080 }, { "epoch": 0.33, - "learning_rate": 1.942946884815527e-05, - "loss": 0.2142, + "learning_rate": 2.9430359009564228e-05, + "loss": 0.357, "step": 7085 }, { "epoch": 0.33, - "learning_rate": 1.942900004688013e-05, - "loss": 0.4456, + "learning_rate": 2.9429890939728208e-05, + "loss": 0.3937, "step": 7090 }, { "epoch": 0.33, - "learning_rate": 1.942853124560499e-05, - "loss": 0.3444, + "learning_rate": 2.942942286989219e-05, + "loss": 0.3502, "step": 7095 }, { "epoch": 0.33, - "learning_rate": 1.942806244432985e-05, - "loss": 0.8402, + "learning_rate": 2.9428954800056168e-05, + "loss": 0.6648, "step": 7100 }, { "epoch": 0.33, - "learning_rate": 1.942759364305471e-05, - "loss": 0.3128, + "learning_rate": 2.9428486730220148e-05, + "loss": 0.3005, "step": 7105 }, { "epoch": 0.33, - "learning_rate": 1.9427124841779572e-05, - "loss": 0.1323, + "learning_rate": 2.942801866038413e-05, + "loss": 0.1217, "step": 7110 }, { "epoch": 0.33, - "learning_rate": 1.9426656040504432e-05, - "loss": 0.1779, + "learning_rate": 2.942755059054811e-05, + "loss": 0.1151, "step": 7115 }, { "epoch": 0.33, - "learning_rate": 1.9426187239229292e-05, - "loss": 0.1896, + "learning_rate": 2.942708252071209e-05, + "loss": 0.0922, "step": 7120 }, { "epoch": 0.33, - "learning_rate": 1.9425718437954152e-05, - "loss": 0.2415, + "learning_rate": 2.942661445087607e-05, + "loss": 0.1953, "step": 7125 }, { "epoch": 0.33, - "learning_rate": 1.9425249636679012e-05, - "loss": 0.2888, + "learning_rate": 2.9426146381040053e-05, + "loss": 0.2047, "step": 7130 }, { "epoch": 0.33, - "learning_rate": 1.9424780835403872e-05, - "loss": 0.2641, + "learning_rate": 2.9425678311204033e-05, + "loss": 0.2647, "step": 7135 }, { "epoch": 0.33, - "learning_rate": 1.9424312034128735e-05, - "loss": 0.3465, + "learning_rate": 2.9425210241368013e-05, + "loss": 0.314, "step": 7140 }, { "epoch": 0.33, - "learning_rate": 1.9423843232853595e-05, - "loss": 0.4948, + "learning_rate": 2.9424742171531993e-05, + "loss": 0.4359, "step": 7145 }, { "epoch": 0.33, - "learning_rate": 1.9423374431578455e-05, - "loss": 0.6909, + "learning_rate": 2.9424274101695976e-05, + "loss": 0.5848, "step": 7150 }, { "epoch": 0.33, - "learning_rate": 1.942290563030332e-05, - "loss": 0.3321, + "learning_rate": 2.9423806031859956e-05, + "loss": 0.2917, "step": 7155 }, { "epoch": 0.33, - "learning_rate": 1.942243682902818e-05, - "loss": 0.1793, + "learning_rate": 2.9423337962023932e-05, + "loss": 0.1425, "step": 7160 }, { "epoch": 0.33, - "learning_rate": 1.942196802775304e-05, - "loss": 0.1224, + "learning_rate": 2.9422869892187912e-05, + "loss": 0.0683, "step": 7165 }, { "epoch": 0.33, - "learning_rate": 1.94214992264779e-05, - "loss": 0.2602, + "learning_rate": 2.9422401822351895e-05, + "loss": 0.1217, "step": 7170 }, { "epoch": 0.33, - "learning_rate": 1.9421030425202758e-05, - "loss": 0.1669, + "learning_rate": 2.9421933752515875e-05, + "loss": 0.1867, "step": 7175 }, { "epoch": 0.34, - "learning_rate": 1.9420561623927618e-05, - "loss": 0.1957, + "learning_rate": 2.9421465682679855e-05, + "loss": 0.2212, "step": 7180 }, { "epoch": 0.34, - "learning_rate": 1.9420092822652478e-05, - "loss": 0.3104, + "learning_rate": 2.9420997612843838e-05, + "loss": 0.2069, "step": 7185 }, { "epoch": 0.34, - "learning_rate": 1.9419624021377338e-05, - "loss": 0.2715, + "learning_rate": 2.9420529543007818e-05, + "loss": 0.3088, "step": 7190 }, { "epoch": 0.34, - "learning_rate": 1.9419155220102198e-05, - "loss": 0.3714, + "learning_rate": 2.9420061473171798e-05, + "loss": 0.396, "step": 7195 }, { "epoch": 0.34, - "learning_rate": 1.941868641882706e-05, - "loss": 0.6902, + "learning_rate": 2.9419593403335778e-05, + "loss": 0.648, "step": 7200 }, { "epoch": 0.34, - "learning_rate": 1.941821761755192e-05, - "loss": 0.2193, + "learning_rate": 2.941912533349976e-05, + "loss": 0.2814, "step": 7205 }, { "epoch": 0.34, - "learning_rate": 1.941774881627678e-05, - "loss": 0.1142, + "learning_rate": 2.941865726366374e-05, + "loss": 0.1857, "step": 7210 }, { "epoch": 0.34, - "learning_rate": 1.941728001500164e-05, - "loss": 0.1894, + "learning_rate": 2.941818919382772e-05, + "loss": 0.2256, "step": 7215 }, { "epoch": 0.34, - "learning_rate": 1.9416811213726504e-05, - "loss": 0.2033, + "learning_rate": 2.94177211239917e-05, + "loss": 0.1774, "step": 7220 }, { "epoch": 0.34, - "learning_rate": 1.9416342412451364e-05, - "loss": 0.251, + "learning_rate": 2.941725305415568e-05, + "loss": 0.2119, "step": 7225 }, { "epoch": 0.34, - "learning_rate": 1.9415873611176224e-05, - "loss": 0.3118, + "learning_rate": 2.941678498431966e-05, + "loss": 0.2456, "step": 7230 }, { "epoch": 0.34, - "learning_rate": 1.9415404809901084e-05, - "loss": 0.29, + "learning_rate": 2.941631691448364e-05, + "loss": 0.3269, "step": 7235 }, { "epoch": 0.34, - "learning_rate": 1.9414936008625944e-05, - "loss": 0.331, + "learning_rate": 2.9415848844647623e-05, + "loss": 0.312, "step": 7240 }, { "epoch": 0.34, - "learning_rate": 1.9414467207350804e-05, - "loss": 0.441, + "learning_rate": 2.9415380774811603e-05, + "loss": 0.4873, "step": 7245 }, { "epoch": 0.34, - "learning_rate": 1.9413998406075667e-05, - "loss": 0.6936, + "learning_rate": 2.9414912704975583e-05, + "loss": 0.5289, "step": 7250 }, { "epoch": 0.34, - "learning_rate": 1.9413529604800527e-05, - "loss": 0.2578, + "learning_rate": 2.9414444635139562e-05, + "loss": 0.2919, "step": 7255 }, { "epoch": 0.34, - "learning_rate": 1.9413060803525387e-05, - "loss": 0.1108, + "learning_rate": 2.9413976565303546e-05, + "loss": 0.1547, "step": 7260 }, { "epoch": 0.34, - "learning_rate": 1.9412592002250247e-05, - "loss": 0.1718, + "learning_rate": 2.9413508495467525e-05, + "loss": 0.1351, "step": 7265 }, { "epoch": 0.34, - "learning_rate": 1.9412123200975107e-05, - "loss": 0.2078, + "learning_rate": 2.9413040425631505e-05, + "loss": 0.1993, "step": 7270 }, { "epoch": 0.34, - "learning_rate": 1.9411654399699967e-05, - "loss": 0.237, + "learning_rate": 2.9412572355795485e-05, + "loss": 0.2175, "step": 7275 }, { "epoch": 0.34, - "learning_rate": 1.9411185598424827e-05, - "loss": 0.2431, + "learning_rate": 2.9412104285959468e-05, + "loss": 0.2232, "step": 7280 }, { "epoch": 0.34, - "learning_rate": 1.941071679714969e-05, - "loss": 0.2729, + "learning_rate": 2.9411636216123448e-05, + "loss": 0.236, "step": 7285 }, { "epoch": 0.34, - "learning_rate": 1.941024799587455e-05, - "loss": 0.3672, + "learning_rate": 2.9411168146287425e-05, + "loss": 0.315, "step": 7290 }, { "epoch": 0.34, - "learning_rate": 1.940977919459941e-05, - "loss": 0.5002, + "learning_rate": 2.9410700076451408e-05, + "loss": 0.4131, "step": 7295 }, { "epoch": 0.34, - "learning_rate": 1.9409310393324274e-05, - "loss": 0.7313, + "learning_rate": 2.9410232006615388e-05, + "loss": 0.6558, "step": 7300 }, { "epoch": 0.34, - "learning_rate": 1.9408841592049134e-05, - "loss": 0.328, + "learning_rate": 2.9409763936779367e-05, + "loss": 0.2861, "step": 7305 }, { "epoch": 0.34, - "learning_rate": 1.9408372790773993e-05, - "loss": 0.2048, + "learning_rate": 2.9409295866943347e-05, + "loss": 0.0903, "step": 7310 }, { "epoch": 0.34, - "learning_rate": 1.9407903989498853e-05, - "loss": 0.2032, + "learning_rate": 2.940882779710733e-05, + "loss": 0.1395, "step": 7315 }, { "epoch": 0.34, - "learning_rate": 1.9407435188223713e-05, - "loss": 0.2235, + "learning_rate": 2.940835972727131e-05, + "loss": 0.1971, "step": 7320 }, { "epoch": 0.34, - "learning_rate": 1.9406966386948573e-05, - "loss": 0.198, + "learning_rate": 2.940789165743529e-05, + "loss": 0.2326, "step": 7325 }, { "epoch": 0.34, - "learning_rate": 1.9406497585673433e-05, - "loss": 0.2556, + "learning_rate": 2.940742358759927e-05, + "loss": 0.2499, "step": 7330 }, { "epoch": 0.34, - "learning_rate": 1.9406028784398293e-05, - "loss": 0.358, + "learning_rate": 2.9406955517763253e-05, + "loss": 0.2246, "step": 7335 }, { "epoch": 0.34, - "learning_rate": 1.9405559983123156e-05, - "loss": 0.4997, + "learning_rate": 2.9406487447927233e-05, + "loss": 0.2401, "step": 7340 }, { "epoch": 0.34, - "learning_rate": 1.9405091181848016e-05, - "loss": 0.4498, + "learning_rate": 2.9406019378091213e-05, + "loss": 0.3713, "step": 7345 }, { "epoch": 0.34, - "learning_rate": 1.9404622380572876e-05, - "loss": 0.5937, + "learning_rate": 2.940555130825519e-05, + "loss": 0.4214, "step": 7350 }, { "epoch": 0.34, - "learning_rate": 1.9404153579297736e-05, - "loss": 0.2874, + "learning_rate": 2.9405083238419172e-05, + "loss": 0.232, "step": 7355 }, { "epoch": 0.34, - "learning_rate": 1.94036847780226e-05, - "loss": 0.1371, + "learning_rate": 2.9404615168583152e-05, + "loss": 0.1076, "step": 7360 }, { "epoch": 0.34, - "learning_rate": 1.940321597674746e-05, - "loss": 0.1512, + "learning_rate": 2.9404147098747132e-05, + "loss": 0.2181, "step": 7365 }, { "epoch": 0.34, - "learning_rate": 1.940274717547232e-05, - "loss": 0.1584, + "learning_rate": 2.9403679028911115e-05, + "loss": 0.2323, "step": 7370 }, { "epoch": 0.34, - "learning_rate": 1.940227837419718e-05, - "loss": 0.2193, + "learning_rate": 2.9403210959075095e-05, + "loss": 0.2025, "step": 7375 }, { "epoch": 0.34, - "learning_rate": 1.940180957292204e-05, - "loss": 0.2825, + "learning_rate": 2.9402742889239075e-05, + "loss": 0.3211, "step": 7380 }, { "epoch": 0.34, - "learning_rate": 1.9401340771646903e-05, - "loss": 0.3116, + "learning_rate": 2.9402274819403055e-05, + "loss": 0.2685, "step": 7385 }, { "epoch": 0.34, - "learning_rate": 1.9400871970371763e-05, - "loss": 0.3652, + "learning_rate": 2.9401806749567038e-05, + "loss": 0.3723, "step": 7390 }, { "epoch": 0.35, - "learning_rate": 1.9400403169096622e-05, - "loss": 0.4405, + "learning_rate": 2.9401338679731018e-05, + "loss": 0.5123, "step": 7395 }, { "epoch": 0.35, - "learning_rate": 1.9399934367821482e-05, - "loss": 0.6158, + "learning_rate": 2.9400870609894997e-05, + "loss": 0.6605, "step": 7400 }, { "epoch": 0.35, - "learning_rate": 1.9399465566546342e-05, - "loss": 0.3158, + "learning_rate": 2.9400402540058977e-05, + "loss": 0.3077, "step": 7405 }, { "epoch": 0.35, - "learning_rate": 1.9398996765271202e-05, - "loss": 0.0907, + "learning_rate": 2.939993447022296e-05, + "loss": 0.1301, "step": 7410 }, { "epoch": 0.35, - "learning_rate": 1.9398527963996062e-05, - "loss": 0.3189, + "learning_rate": 2.9399466400386937e-05, + "loss": 0.1506, "step": 7415 }, { "epoch": 0.35, - "learning_rate": 1.9398059162720922e-05, - "loss": 0.17, + "learning_rate": 2.9398998330550917e-05, + "loss": 0.1235, "step": 7420 }, { "epoch": 0.35, - "learning_rate": 1.9397590361445785e-05, - "loss": 0.1948, + "learning_rate": 2.93985302607149e-05, + "loss": 0.2549, "step": 7425 }, { "epoch": 0.35, - "learning_rate": 1.9397121560170645e-05, - "loss": 0.2213, + "learning_rate": 2.939806219087888e-05, + "loss": 0.1672, "step": 7430 }, { "epoch": 0.35, - "learning_rate": 1.9396652758895505e-05, - "loss": 0.2708, + "learning_rate": 2.939759412104286e-05, + "loss": 0.174, "step": 7435 }, { "epoch": 0.35, - "learning_rate": 1.939618395762037e-05, - "loss": 0.3346, + "learning_rate": 2.939712605120684e-05, + "loss": 0.1782, "step": 7440 }, { "epoch": 0.35, - "learning_rate": 1.939571515634523e-05, - "loss": 0.3433, + "learning_rate": 2.9396657981370823e-05, + "loss": 0.4178, "step": 7445 }, { "epoch": 0.35, - "learning_rate": 1.939524635507009e-05, - "loss": 0.6586, + "learning_rate": 2.9396189911534802e-05, + "loss": 0.6901, "step": 7450 }, { "epoch": 0.35, - "learning_rate": 1.939477755379495e-05, - "loss": 0.2602, + "learning_rate": 2.9395721841698782e-05, + "loss": 0.2847, "step": 7455 }, { "epoch": 0.35, - "learning_rate": 1.939430875251981e-05, - "loss": 0.1247, + "learning_rate": 2.9395253771862762e-05, + "loss": 0.0785, "step": 7460 }, { "epoch": 0.35, - "learning_rate": 1.939383995124467e-05, - "loss": 0.1999, + "learning_rate": 2.9394785702026745e-05, + "loss": 0.2106, "step": 7465 }, { "epoch": 0.35, - "learning_rate": 1.9393371149969528e-05, - "loss": 0.2082, + "learning_rate": 2.9394317632190725e-05, + "loss": 0.1722, "step": 7470 }, { "epoch": 0.35, - "learning_rate": 1.9392902348694388e-05, - "loss": 0.1804, + "learning_rate": 2.9393849562354705e-05, + "loss": 0.1965, "step": 7475 }, { "epoch": 0.35, - "learning_rate": 1.939243354741925e-05, - "loss": 0.3072, + "learning_rate": 2.9393381492518685e-05, + "loss": 0.2786, "step": 7480 }, { "epoch": 0.35, - "learning_rate": 1.939196474614411e-05, - "loss": 0.2737, + "learning_rate": 2.9392913422682665e-05, + "loss": 0.3831, "step": 7485 }, { "epoch": 0.35, - "learning_rate": 1.939149594486897e-05, - "loss": 0.2939, + "learning_rate": 2.9392445352846644e-05, + "loss": 0.3972, "step": 7490 }, { "epoch": 0.35, - "learning_rate": 1.939102714359383e-05, - "loss": 0.3612, + "learning_rate": 2.9391977283010624e-05, + "loss": 0.3598, "step": 7495 }, { "epoch": 0.35, - "learning_rate": 1.939055834231869e-05, - "loss": 0.591, + "learning_rate": 2.9391509213174607e-05, + "loss": 0.7829, "step": 7500 }, { "epoch": 0.35, - "learning_rate": 1.9390089541043555e-05, - "loss": 0.2915, + "learning_rate": 2.9391041143338587e-05, + "loss": 0.3237, "step": 7505 }, { "epoch": 0.35, - "learning_rate": 1.9389620739768415e-05, - "loss": 0.1465, + "learning_rate": 2.9390573073502567e-05, + "loss": 0.1234, "step": 7510 }, { "epoch": 0.35, - "learning_rate": 1.9389151938493274e-05, - "loss": 0.1969, + "learning_rate": 2.9390105003666547e-05, + "loss": 0.0924, "step": 7515 }, { "epoch": 0.35, - "learning_rate": 1.9388683137218134e-05, - "loss": 0.203, + "learning_rate": 2.938963693383053e-05, + "loss": 0.1817, "step": 7520 }, { "epoch": 0.35, - "learning_rate": 1.9388214335942998e-05, - "loss": 0.1947, + "learning_rate": 2.938916886399451e-05, + "loss": 0.1511, "step": 7525 }, { "epoch": 0.35, - "learning_rate": 1.9387745534667858e-05, - "loss": 0.2782, + "learning_rate": 2.938870079415849e-05, + "loss": 0.1858, "step": 7530 }, { "epoch": 0.35, - "learning_rate": 1.9387276733392718e-05, - "loss": 0.2536, + "learning_rate": 2.938823272432247e-05, + "loss": 0.319, "step": 7535 }, { "epoch": 0.35, - "learning_rate": 1.9386807932117578e-05, - "loss": 0.2968, + "learning_rate": 2.938776465448645e-05, + "loss": 0.1937, "step": 7540 }, { "epoch": 0.35, - "learning_rate": 1.9386339130842437e-05, - "loss": 0.4721, + "learning_rate": 2.938729658465043e-05, + "loss": 0.3744, "step": 7545 }, { "epoch": 0.35, - "learning_rate": 1.9385870329567297e-05, - "loss": 0.6284, + "learning_rate": 2.938682851481441e-05, + "loss": 0.6982, "step": 7550 }, { "epoch": 0.35, - "learning_rate": 1.9385401528292157e-05, - "loss": 0.2729, + "learning_rate": 2.9386360444978392e-05, + "loss": 0.2617, "step": 7555 }, { "epoch": 0.35, - "learning_rate": 1.9384932727017017e-05, - "loss": 0.1275, + "learning_rate": 2.9385892375142372e-05, + "loss": 0.1418, "step": 7560 }, { "epoch": 0.35, - "learning_rate": 1.9384463925741877e-05, - "loss": 0.2018, + "learning_rate": 2.9385424305306352e-05, + "loss": 0.2089, "step": 7565 }, { "epoch": 0.35, - "learning_rate": 1.938399512446674e-05, - "loss": 0.1743, + "learning_rate": 2.938495623547033e-05, + "loss": 0.1712, "step": 7570 }, { "epoch": 0.35, - "learning_rate": 1.93835263231916e-05, - "loss": 0.354, + "learning_rate": 2.9384488165634315e-05, + "loss": 0.2747, "step": 7575 }, { "epoch": 0.35, - "learning_rate": 1.938305752191646e-05, - "loss": 0.1568, + "learning_rate": 2.9384020095798295e-05, + "loss": 0.3313, "step": 7580 }, { "epoch": 0.35, - "learning_rate": 1.9382588720641324e-05, - "loss": 0.2309, + "learning_rate": 2.9383552025962274e-05, + "loss": 0.2143, "step": 7585 }, { "epoch": 0.35, - "learning_rate": 1.9382119919366184e-05, - "loss": 0.3072, + "learning_rate": 2.9383083956126254e-05, + "loss": 0.2866, "step": 7590 }, { "epoch": 0.35, - "learning_rate": 1.9381651118091044e-05, - "loss": 0.3901, + "learning_rate": 2.9382615886290237e-05, + "loss": 0.4264, "step": 7595 }, { "epoch": 0.35, - "learning_rate": 1.9381182316815903e-05, - "loss": 0.6689, + "learning_rate": 2.9382147816454217e-05, + "loss": 0.7221, "step": 7600 }, { "epoch": 0.35, - "learning_rate": 1.9380713515540763e-05, - "loss": 0.3356, + "learning_rate": 2.9381679746618194e-05, + "loss": 0.334, "step": 7605 }, { "epoch": 0.36, - "learning_rate": 1.9380244714265623e-05, - "loss": 0.0993, + "learning_rate": 2.9381211676782177e-05, + "loss": 0.1238, "step": 7610 }, { "epoch": 0.36, - "learning_rate": 1.9379775912990483e-05, - "loss": 0.1436, + "learning_rate": 2.9380743606946157e-05, + "loss": 0.1912, "step": 7615 }, { "epoch": 0.36, - "learning_rate": 1.9379307111715347e-05, - "loss": 0.1633, + "learning_rate": 2.9380275537110137e-05, + "loss": 0.1523, "step": 7620 }, { "epoch": 0.36, - "learning_rate": 1.9378838310440207e-05, - "loss": 0.2219, + "learning_rate": 2.9379807467274116e-05, + "loss": 0.2195, "step": 7625 }, { "epoch": 0.36, - "learning_rate": 1.9378369509165066e-05, - "loss": 0.1863, + "learning_rate": 2.93793393974381e-05, + "loss": 0.2804, "step": 7630 }, { "epoch": 0.36, - "learning_rate": 1.9377900707889926e-05, - "loss": 0.382, + "learning_rate": 2.937887132760208e-05, + "loss": 0.2835, "step": 7635 }, { "epoch": 0.36, - "learning_rate": 1.9377431906614786e-05, - "loss": 0.3374, + "learning_rate": 2.937840325776606e-05, + "loss": 0.3109, "step": 7640 }, { "epoch": 0.36, - "learning_rate": 1.9376963105339646e-05, - "loss": 0.3538, + "learning_rate": 2.937793518793004e-05, + "loss": 0.459, "step": 7645 }, { "epoch": 0.36, - "learning_rate": 1.937649430406451e-05, - "loss": 0.5143, + "learning_rate": 2.9377467118094022e-05, + "loss": 0.7011, "step": 7650 }, { "epoch": 0.36, - "learning_rate": 1.937602550278937e-05, - "loss": 0.3543, + "learning_rate": 2.9376999048258002e-05, + "loss": 0.2537, "step": 7655 }, { "epoch": 0.36, - "learning_rate": 1.937555670151423e-05, - "loss": 0.1521, + "learning_rate": 2.9376530978421982e-05, + "loss": 0.1177, "step": 7660 }, { "epoch": 0.36, - "learning_rate": 1.9375087900239093e-05, - "loss": 0.2513, + "learning_rate": 2.937606290858596e-05, + "loss": 0.1364, "step": 7665 }, { "epoch": 0.36, - "learning_rate": 1.9374619098963953e-05, - "loss": 0.1515, + "learning_rate": 2.937559483874994e-05, + "loss": 0.1555, "step": 7670 }, { "epoch": 0.36, - "learning_rate": 1.9374150297688813e-05, - "loss": 0.2925, + "learning_rate": 2.937512676891392e-05, + "loss": 0.1555, "step": 7675 }, { "epoch": 0.36, - "learning_rate": 1.9373681496413673e-05, - "loss": 0.3807, + "learning_rate": 2.93746586990779e-05, + "loss": 0.2303, "step": 7680 }, { "epoch": 0.36, - "learning_rate": 1.9373212695138533e-05, - "loss": 0.381, + "learning_rate": 2.9374190629241884e-05, + "loss": 0.267, "step": 7685 }, { "epoch": 0.36, - "learning_rate": 1.9372743893863392e-05, - "loss": 0.2791, + "learning_rate": 2.9373722559405864e-05, + "loss": 0.389, "step": 7690 }, { "epoch": 0.36, - "learning_rate": 1.9372275092588252e-05, - "loss": 0.4091, + "learning_rate": 2.9373254489569844e-05, + "loss": 0.3087, "step": 7695 }, { "epoch": 0.36, - "learning_rate": 1.9371806291313112e-05, - "loss": 0.7086, + "learning_rate": 2.9372786419733824e-05, + "loss": 0.5691, "step": 7700 }, { "epoch": 0.36, - "learning_rate": 1.9371337490037972e-05, - "loss": 0.2184, + "learning_rate": 2.9372318349897807e-05, + "loss": 0.2561, "step": 7705 }, { "epoch": 0.36, - "learning_rate": 1.9370868688762836e-05, - "loss": 0.1839, + "learning_rate": 2.9371850280061787e-05, + "loss": 0.1445, "step": 7710 }, { "epoch": 0.36, - "learning_rate": 1.9370399887487696e-05, - "loss": 0.1395, + "learning_rate": 2.9371382210225767e-05, + "loss": 0.1372, "step": 7715 }, { "epoch": 0.36, - "learning_rate": 1.9369931086212555e-05, - "loss": 0.1088, + "learning_rate": 2.937091414038975e-05, + "loss": 0.2023, "step": 7720 }, { "epoch": 0.36, - "learning_rate": 1.9369462284937415e-05, - "loss": 0.1739, + "learning_rate": 2.937044607055373e-05, + "loss": 0.2601, "step": 7725 }, { "epoch": 0.36, - "learning_rate": 1.936899348366228e-05, - "loss": 0.2314, + "learning_rate": 2.9369978000717706e-05, + "loss": 0.3441, "step": 7730 }, { "epoch": 0.36, - "learning_rate": 1.936852468238714e-05, - "loss": 0.3432, + "learning_rate": 2.9369509930881686e-05, + "loss": 0.2752, "step": 7735 }, { "epoch": 0.36, - "learning_rate": 1.9368055881112e-05, - "loss": 0.3118, + "learning_rate": 2.936904186104567e-05, + "loss": 0.5722, "step": 7740 }, { "epoch": 0.36, - "learning_rate": 1.936758707983686e-05, - "loss": 0.4382, + "learning_rate": 2.936857379120965e-05, + "loss": 0.4032, "step": 7745 }, { "epoch": 0.36, - "learning_rate": 1.936711827856172e-05, - "loss": 0.4952, + "learning_rate": 2.936810572137363e-05, + "loss": 0.69, "step": 7750 }, { "epoch": 0.36, - "learning_rate": 1.936664947728658e-05, - "loss": 0.3685, + "learning_rate": 2.936763765153761e-05, + "loss": 0.2623, "step": 7755 }, { "epoch": 0.36, - "learning_rate": 1.936618067601144e-05, - "loss": 0.13, + "learning_rate": 2.9367169581701592e-05, + "loss": 0.1181, "step": 7760 }, { "epoch": 0.36, - "learning_rate": 1.93657118747363e-05, - "loss": 0.1563, + "learning_rate": 2.936670151186557e-05, + "loss": 0.1879, "step": 7765 }, { "epoch": 0.36, - "learning_rate": 1.936524307346116e-05, - "loss": 0.2089, + "learning_rate": 2.936623344202955e-05, + "loss": 0.1422, "step": 7770 }, { "epoch": 0.36, - "learning_rate": 1.936477427218602e-05, - "loss": 0.2659, + "learning_rate": 2.936576537219353e-05, + "loss": 0.2175, "step": 7775 }, { "epoch": 0.36, - "learning_rate": 1.936430547091088e-05, - "loss": 0.1983, + "learning_rate": 2.9365297302357514e-05, + "loss": 0.2454, "step": 7780 }, { "epoch": 0.36, - "learning_rate": 1.936383666963574e-05, - "loss": 0.2818, + "learning_rate": 2.9364829232521494e-05, + "loss": 0.231, "step": 7785 }, { "epoch": 0.36, - "learning_rate": 1.9363367868360605e-05, - "loss": 0.418, + "learning_rate": 2.9364361162685474e-05, + "loss": 0.3173, "step": 7790 }, { "epoch": 0.36, - "learning_rate": 1.9362899067085465e-05, - "loss": 0.4263, + "learning_rate": 2.9363893092849454e-05, + "loss": 0.3939, "step": 7795 }, { "epoch": 0.36, - "learning_rate": 1.9362430265810325e-05, - "loss": 0.6848, + "learning_rate": 2.9363425023013434e-05, + "loss": 0.5128, "step": 7800 }, { "epoch": 0.36, - "learning_rate": 1.9361961464535184e-05, - "loss": 0.3445, + "learning_rate": 2.9362956953177414e-05, + "loss": 0.3327, "step": 7805 }, { "epoch": 0.36, - "learning_rate": 1.9361492663260048e-05, - "loss": 0.1231, + "learning_rate": 2.9362488883341393e-05, + "loss": 0.1581, "step": 7810 }, { "epoch": 0.36, - "learning_rate": 1.9361023861984908e-05, - "loss": 0.1814, + "learning_rate": 2.9362020813505377e-05, + "loss": 0.1292, "step": 7815 }, { "epoch": 0.36, - "learning_rate": 1.9360555060709768e-05, - "loss": 0.2129, + "learning_rate": 2.9361552743669356e-05, + "loss": 0.2084, "step": 7820 }, { "epoch": 0.37, - "learning_rate": 1.9360086259434628e-05, - "loss": 0.185, + "learning_rate": 2.9361084673833336e-05, + "loss": 0.2552, "step": 7825 }, { "epoch": 0.37, - "learning_rate": 1.9359617458159488e-05, - "loss": 0.3591, + "learning_rate": 2.9360616603997316e-05, + "loss": 0.2811, "step": 7830 }, { "epoch": 0.37, - "learning_rate": 1.9359148656884347e-05, - "loss": 0.4372, + "learning_rate": 2.93601485341613e-05, + "loss": 0.2133, "step": 7835 }, { "epoch": 0.37, - "learning_rate": 1.9358679855609207e-05, - "loss": 0.3435, + "learning_rate": 2.935968046432528e-05, + "loss": 0.3318, "step": 7840 }, { "epoch": 0.37, - "learning_rate": 1.9358211054334067e-05, - "loss": 0.3974, + "learning_rate": 2.935921239448926e-05, + "loss": 0.4631, "step": 7845 }, { "epoch": 0.37, - "learning_rate": 1.935774225305893e-05, - "loss": 0.6837, + "learning_rate": 2.9358744324653242e-05, + "loss": 0.5992, "step": 7850 }, { "epoch": 0.37, - "learning_rate": 1.935727345178379e-05, - "loss": 0.3493, + "learning_rate": 2.935827625481722e-05, + "loss": 0.3598, "step": 7855 }, { "epoch": 0.37, - "learning_rate": 1.935680465050865e-05, - "loss": 0.1234, + "learning_rate": 2.9357808184981198e-05, + "loss": 0.1455, "step": 7860 }, { "epoch": 0.37, - "learning_rate": 1.935633584923351e-05, - "loss": 0.1565, + "learning_rate": 2.9357340115145178e-05, + "loss": 0.1451, "step": 7865 }, { "epoch": 0.37, - "learning_rate": 1.9355867047958374e-05, - "loss": 0.1613, + "learning_rate": 2.935687204530916e-05, + "loss": 0.1478, "step": 7870 }, { "epoch": 0.37, - "learning_rate": 1.9355398246683234e-05, - "loss": 0.2633, + "learning_rate": 2.935640397547314e-05, + "loss": 0.1385, "step": 7875 }, { "epoch": 0.37, - "learning_rate": 1.9354929445408094e-05, - "loss": 0.1702, + "learning_rate": 2.935593590563712e-05, + "loss": 0.2764, "step": 7880 }, { "epoch": 0.37, - "learning_rate": 1.9354460644132954e-05, - "loss": 0.3533, + "learning_rate": 2.93554678358011e-05, + "loss": 0.2472, "step": 7885 }, { "epoch": 0.37, - "learning_rate": 1.9353991842857814e-05, - "loss": 0.2938, + "learning_rate": 2.9354999765965084e-05, + "loss": 0.2704, "step": 7890 }, { "epoch": 0.37, - "learning_rate": 1.9353523041582673e-05, - "loss": 0.3936, + "learning_rate": 2.9354531696129064e-05, + "loss": 0.2523, "step": 7895 }, { "epoch": 0.37, - "learning_rate": 1.9353054240307537e-05, - "loss": 0.5828, + "learning_rate": 2.9354063626293044e-05, + "loss": 0.8368, "step": 7900 }, { "epoch": 0.37, - "learning_rate": 1.9352585439032397e-05, - "loss": 0.2816, + "learning_rate": 2.9353595556457027e-05, + "loss": 0.2962, "step": 7905 }, { "epoch": 0.37, - "learning_rate": 1.9352116637757257e-05, - "loss": 0.1281, + "learning_rate": 2.9353127486621007e-05, + "loss": 0.1155, "step": 7910 }, { "epoch": 0.37, - "learning_rate": 1.9351647836482117e-05, - "loss": 0.1108, + "learning_rate": 2.9352659416784986e-05, + "loss": 0.0999, "step": 7915 }, { "epoch": 0.37, - "learning_rate": 1.9351179035206977e-05, - "loss": 0.1505, + "learning_rate": 2.9352191346948963e-05, + "loss": 0.1697, "step": 7920 }, { "epoch": 0.37, - "learning_rate": 1.9350710233931836e-05, - "loss": 0.2456, + "learning_rate": 2.9351723277112946e-05, + "loss": 0.2106, "step": 7925 }, { "epoch": 0.37, - "learning_rate": 1.9350241432656696e-05, - "loss": 0.2419, + "learning_rate": 2.9351255207276926e-05, + "loss": 0.227, "step": 7930 }, { "epoch": 0.37, - "learning_rate": 1.934977263138156e-05, - "loss": 0.2355, + "learning_rate": 2.9350787137440906e-05, + "loss": 0.2881, "step": 7935 }, { "epoch": 0.37, - "learning_rate": 1.934930383010642e-05, - "loss": 0.3848, + "learning_rate": 2.9350319067604886e-05, + "loss": 0.4414, "step": 7940 }, { "epoch": 0.37, - "learning_rate": 1.934883502883128e-05, - "loss": 0.508, + "learning_rate": 2.934985099776887e-05, + "loss": 0.3858, "step": 7945 }, { "epoch": 0.37, - "learning_rate": 1.9348366227556143e-05, - "loss": 0.688, + "learning_rate": 2.934938292793285e-05, + "loss": 0.7588, "step": 7950 }, { "epoch": 0.37, - "learning_rate": 1.9347897426281003e-05, - "loss": 0.2389, + "learning_rate": 2.934891485809683e-05, + "loss": 0.245, "step": 7955 }, { "epoch": 0.37, - "learning_rate": 1.9347428625005863e-05, - "loss": 0.097, + "learning_rate": 2.9348446788260808e-05, + "loss": 0.1279, "step": 7960 }, { "epoch": 0.37, - "learning_rate": 1.9346959823730723e-05, - "loss": 0.1886, + "learning_rate": 2.934797871842479e-05, + "loss": 0.1447, "step": 7965 }, { "epoch": 0.37, - "learning_rate": 1.9346491022455583e-05, - "loss": 0.2037, + "learning_rate": 2.934751064858877e-05, + "loss": 0.2412, "step": 7970 }, { "epoch": 0.37, - "learning_rate": 1.9346022221180443e-05, - "loss": 0.1939, + "learning_rate": 2.934704257875275e-05, + "loss": 0.2052, "step": 7975 }, { "epoch": 0.37, - "learning_rate": 1.9345553419905302e-05, - "loss": 0.2666, + "learning_rate": 2.934657450891673e-05, + "loss": 0.2586, "step": 7980 }, { "epoch": 0.37, - "learning_rate": 1.9345084618630162e-05, - "loss": 0.2793, + "learning_rate": 2.934610643908071e-05, + "loss": 0.2186, "step": 7985 }, { "epoch": 0.37, - "learning_rate": 1.9344615817355026e-05, - "loss": 0.3058, + "learning_rate": 2.934563836924469e-05, + "loss": 0.3543, "step": 7990 }, { "epoch": 0.37, - "learning_rate": 1.9344147016079886e-05, - "loss": 0.3803, + "learning_rate": 2.934517029940867e-05, + "loss": 0.4752, "step": 7995 }, { "epoch": 0.37, - "learning_rate": 1.9343678214804746e-05, - "loss": 0.7639, + "learning_rate": 2.9344702229572654e-05, + "loss": 0.6421, "step": 8000 }, { "epoch": 0.37, - "learning_rate": 1.9343209413529606e-05, - "loss": 0.3895, + "learning_rate": 2.9344234159736633e-05, + "loss": 0.1856, "step": 8005 }, { "epoch": 0.37, - "learning_rate": 1.9342740612254465e-05, - "loss": 0.075, + "learning_rate": 2.9343766089900613e-05, + "loss": 0.0823, "step": 8010 }, { "epoch": 0.37, - "learning_rate": 1.934227181097933e-05, - "loss": 0.1631, + "learning_rate": 2.9343298020064593e-05, + "loss": 0.1514, "step": 8015 }, { "epoch": 0.37, - "learning_rate": 1.934180300970419e-05, - "loss": 0.2211, + "learning_rate": 2.9342829950228576e-05, + "loss": 0.2636, "step": 8020 }, { "epoch": 0.37, - "learning_rate": 1.934133420842905e-05, - "loss": 0.1265, + "learning_rate": 2.9342361880392556e-05, + "loss": 0.2093, "step": 8025 }, { "epoch": 0.37, - "learning_rate": 1.934086540715391e-05, - "loss": 0.2052, + "learning_rate": 2.9341893810556536e-05, + "loss": 0.1602, "step": 8030 }, { "epoch": 0.37, - "learning_rate": 1.9340396605878772e-05, - "loss": 0.2435, + "learning_rate": 2.934142574072052e-05, + "loss": 0.4032, "step": 8035 }, { "epoch": 0.38, - "learning_rate": 1.9339927804603632e-05, - "loss": 0.3957, + "learning_rate": 2.93409576708845e-05, + "loss": 0.3007, "step": 8040 }, { "epoch": 0.38, - "learning_rate": 1.9339459003328492e-05, - "loss": 0.3356, + "learning_rate": 2.9340489601048475e-05, + "loss": 0.4056, "step": 8045 }, { "epoch": 0.38, - "learning_rate": 1.9338990202053352e-05, - "loss": 0.7917, + "learning_rate": 2.9340021531212455e-05, + "loss": 0.7518, "step": 8050 }, { "epoch": 0.38, - "learning_rate": 1.933852140077821e-05, - "loss": 0.3205, + "learning_rate": 2.9339553461376438e-05, + "loss": 0.2775, "step": 8055 }, { "epoch": 0.38, - "learning_rate": 1.933805259950307e-05, - "loss": 0.1124, + "learning_rate": 2.9339085391540418e-05, + "loss": 0.1476, "step": 8060 }, { "epoch": 0.38, - "learning_rate": 1.933758379822793e-05, - "loss": 0.1538, + "learning_rate": 2.9338617321704398e-05, + "loss": 0.1085, "step": 8065 }, { "epoch": 0.38, - "learning_rate": 1.933711499695279e-05, - "loss": 0.2043, + "learning_rate": 2.9338149251868378e-05, + "loss": 0.2244, "step": 8070 }, { "epoch": 0.38, - "learning_rate": 1.933664619567765e-05, - "loss": 0.1682, + "learning_rate": 2.933768118203236e-05, + "loss": 0.1919, "step": 8075 }, { "epoch": 0.38, - "learning_rate": 1.9336177394402515e-05, - "loss": 0.2458, + "learning_rate": 2.933721311219634e-05, + "loss": 0.3301, "step": 8080 }, { "epoch": 0.38, - "learning_rate": 1.9335708593127375e-05, - "loss": 0.402, + "learning_rate": 2.933674504236032e-05, + "loss": 0.2832, "step": 8085 }, { "epoch": 0.38, - "learning_rate": 1.9335239791852235e-05, - "loss": 0.4126, + "learning_rate": 2.9336276972524304e-05, + "loss": 0.3272, "step": 8090 }, { "epoch": 0.38, - "learning_rate": 1.9334770990577098e-05, - "loss": 0.4594, + "learning_rate": 2.9335808902688284e-05, + "loss": 0.4913, "step": 8095 }, { "epoch": 0.38, - "learning_rate": 1.9334302189301958e-05, - "loss": 0.6692, + "learning_rate": 2.9335340832852263e-05, + "loss": 0.6859, "step": 8100 }, { "epoch": 0.38, - "learning_rate": 1.9333833388026818e-05, - "loss": 0.3356, + "learning_rate": 2.9334872763016243e-05, + "loss": 0.2242, "step": 8105 }, { "epoch": 0.38, - "learning_rate": 1.9333364586751678e-05, - "loss": 0.1183, + "learning_rate": 2.9334404693180223e-05, + "loss": 0.1239, "step": 8110 }, { "epoch": 0.38, - "learning_rate": 1.9332895785476538e-05, - "loss": 0.22, + "learning_rate": 2.9333936623344203e-05, + "loss": 0.1162, "step": 8115 }, { "epoch": 0.38, - "learning_rate": 1.9332426984201398e-05, - "loss": 0.1996, + "learning_rate": 2.9333468553508183e-05, + "loss": 0.1759, "step": 8120 }, { "epoch": 0.38, - "learning_rate": 1.9331958182926257e-05, - "loss": 0.273, + "learning_rate": 2.9333000483672163e-05, + "loss": 0.2751, "step": 8125 }, { "epoch": 0.38, - "learning_rate": 1.933148938165112e-05, - "loss": 0.2092, + "learning_rate": 2.9332532413836146e-05, + "loss": 0.3035, "step": 8130 }, { "epoch": 0.38, - "learning_rate": 1.933102058037598e-05, - "loss": 0.2448, + "learning_rate": 2.9332064344000126e-05, + "loss": 0.2707, "step": 8135 }, { "epoch": 0.38, - "learning_rate": 1.933055177910084e-05, - "loss": 0.2191, + "learning_rate": 2.9331596274164105e-05, + "loss": 0.3818, "step": 8140 }, { "epoch": 0.38, - "learning_rate": 1.93300829778257e-05, - "loss": 0.4742, + "learning_rate": 2.9331128204328085e-05, + "loss": 0.5258, "step": 8145 }, { "epoch": 0.38, - "learning_rate": 1.932961417655056e-05, - "loss": 0.8307, + "learning_rate": 2.933066013449207e-05, + "loss": 0.4677, "step": 8150 }, { "epoch": 0.38, - "learning_rate": 1.932914537527542e-05, - "loss": 0.2751, + "learning_rate": 2.9330192064656048e-05, + "loss": 0.3592, "step": 8155 }, { "epoch": 0.38, - "learning_rate": 1.9328676574000284e-05, - "loss": 0.176, + "learning_rate": 2.9329723994820028e-05, + "loss": 0.13, "step": 8160 }, { "epoch": 0.38, - "learning_rate": 1.9328207772725144e-05, - "loss": 0.1733, + "learning_rate": 2.932925592498401e-05, + "loss": 0.1378, "step": 8165 }, { "epoch": 0.38, - "learning_rate": 1.9327738971450004e-05, - "loss": 0.2006, + "learning_rate": 2.9328787855147988e-05, + "loss": 0.2196, "step": 8170 }, { "epoch": 0.38, - "learning_rate": 1.9327270170174867e-05, - "loss": 0.2388, + "learning_rate": 2.9328319785311967e-05, + "loss": 0.2147, "step": 8175 }, { "epoch": 0.38, - "learning_rate": 1.9326801368899727e-05, - "loss": 0.3035, + "learning_rate": 2.9327851715475947e-05, + "loss": 0.1999, "step": 8180 }, { "epoch": 0.38, - "learning_rate": 1.9326332567624587e-05, - "loss": 0.2985, + "learning_rate": 2.932738364563993e-05, + "loss": 0.3967, "step": 8185 }, { "epoch": 0.38, - "learning_rate": 1.9325863766349447e-05, - "loss": 0.3295, + "learning_rate": 2.932691557580391e-05, + "loss": 0.4154, "step": 8190 }, { "epoch": 0.38, - "learning_rate": 1.9325394965074307e-05, - "loss": 0.4557, + "learning_rate": 2.932644750596789e-05, + "loss": 0.3932, "step": 8195 }, { "epoch": 0.38, - "learning_rate": 1.9324926163799167e-05, - "loss": 0.5938, + "learning_rate": 2.932597943613187e-05, + "loss": 0.5204, "step": 8200 }, { "epoch": 0.38, - "learning_rate": 1.9324457362524027e-05, - "loss": 0.3562, + "learning_rate": 2.9325511366295853e-05, + "loss": 0.2393, "step": 8205 }, { "epoch": 0.38, - "learning_rate": 1.9323988561248887e-05, - "loss": 0.1529, + "learning_rate": 2.9325043296459833e-05, + "loss": 0.0981, "step": 8210 }, { "epoch": 0.38, - "learning_rate": 1.9323519759973746e-05, - "loss": 0.1482, + "learning_rate": 2.9324575226623813e-05, + "loss": 0.0835, "step": 8215 }, { "epoch": 0.38, - "learning_rate": 1.932305095869861e-05, - "loss": 0.2249, + "learning_rate": 2.9324107156787796e-05, + "loss": 0.1849, "step": 8220 }, { "epoch": 0.38, - "learning_rate": 1.932258215742347e-05, - "loss": 0.2227, + "learning_rate": 2.9323639086951776e-05, + "loss": 0.2397, "step": 8225 }, { "epoch": 0.38, - "learning_rate": 1.932211335614833e-05, - "loss": 0.2618, + "learning_rate": 2.9323171017115756e-05, + "loss": 0.1983, "step": 8230 }, { "epoch": 0.38, - "learning_rate": 1.932164455487319e-05, - "loss": 0.2611, + "learning_rate": 2.9322702947279732e-05, + "loss": 0.2821, "step": 8235 }, { "epoch": 0.38, - "learning_rate": 1.9321175753598053e-05, - "loss": 0.2406, + "learning_rate": 2.9322234877443715e-05, + "loss": 0.2998, "step": 8240 }, { "epoch": 0.38, - "learning_rate": 1.9320706952322913e-05, - "loss": 0.3629, + "learning_rate": 2.9321766807607695e-05, + "loss": 0.3615, "step": 8245 }, { "epoch": 0.38, - "learning_rate": 1.9320238151047773e-05, - "loss": 0.4992, + "learning_rate": 2.9321298737771675e-05, + "loss": 0.442, "step": 8250 }, { "epoch": 0.39, - "learning_rate": 1.9319769349772633e-05, - "loss": 0.3151, + "learning_rate": 2.9320830667935655e-05, + "loss": 0.3459, "step": 8255 }, { "epoch": 0.39, - "learning_rate": 1.9319300548497493e-05, - "loss": 0.1345, + "learning_rate": 2.9320362598099638e-05, + "loss": 0.0708, "step": 8260 }, { "epoch": 0.39, - "learning_rate": 1.9318831747222353e-05, - "loss": 0.1521, + "learning_rate": 2.9319894528263618e-05, + "loss": 0.128, "step": 8265 }, { "epoch": 0.39, - "learning_rate": 1.9318362945947216e-05, - "loss": 0.1733, + "learning_rate": 2.9319426458427598e-05, + "loss": 0.1367, "step": 8270 }, { "epoch": 0.39, - "learning_rate": 1.9317894144672076e-05, - "loss": 0.2129, + "learning_rate": 2.931895838859158e-05, + "loss": 0.1983, "step": 8275 }, { "epoch": 0.39, - "learning_rate": 1.9317425343396936e-05, - "loss": 0.2378, + "learning_rate": 2.931849031875556e-05, + "loss": 0.262, "step": 8280 }, { "epoch": 0.39, - "learning_rate": 1.9316956542121796e-05, - "loss": 0.2658, + "learning_rate": 2.931802224891954e-05, + "loss": 0.1638, "step": 8285 }, { "epoch": 0.39, - "learning_rate": 1.9316487740846656e-05, - "loss": 0.287, + "learning_rate": 2.931755417908352e-05, + "loss": 0.2613, "step": 8290 }, { "epoch": 0.39, - "learning_rate": 1.9316018939571516e-05, - "loss": 0.3836, + "learning_rate": 2.9317086109247503e-05, + "loss": 0.3276, "step": 8295 }, { "epoch": 0.39, - "learning_rate": 1.931555013829638e-05, - "loss": 0.6966, + "learning_rate": 2.931661803941148e-05, + "loss": 0.5973, "step": 8300 }, { "epoch": 0.39, - "learning_rate": 1.931508133702124e-05, - "loss": 0.3483, + "learning_rate": 2.931614996957546e-05, + "loss": 0.3419, "step": 8305 }, { "epoch": 0.39, - "learning_rate": 1.93146125357461e-05, - "loss": 0.1619, + "learning_rate": 2.931568189973944e-05, + "loss": 0.0987, "step": 8310 }, { "epoch": 0.39, - "learning_rate": 1.931414373447096e-05, - "loss": 0.1387, + "learning_rate": 2.9315213829903423e-05, + "loss": 0.1584, "step": 8315 }, { "epoch": 0.39, - "learning_rate": 1.9313674933195822e-05, - "loss": 0.1587, + "learning_rate": 2.9314745760067403e-05, + "loss": 0.1905, "step": 8320 }, { "epoch": 0.39, - "learning_rate": 1.9313206131920682e-05, - "loss": 0.1447, + "learning_rate": 2.9314277690231382e-05, + "loss": 0.1759, "step": 8325 }, { "epoch": 0.39, - "learning_rate": 1.9312737330645542e-05, - "loss": 0.2997, + "learning_rate": 2.9313809620395366e-05, + "loss": 0.2104, "step": 8330 }, { "epoch": 0.39, - "learning_rate": 1.9312268529370402e-05, - "loss": 0.2873, + "learning_rate": 2.9313341550559345e-05, + "loss": 0.2476, "step": 8335 }, { "epoch": 0.39, - "learning_rate": 1.9311799728095262e-05, - "loss": 0.3927, + "learning_rate": 2.9312873480723325e-05, + "loss": 0.2675, "step": 8340 }, { "epoch": 0.39, - "learning_rate": 1.931133092682012e-05, - "loss": 0.3392, + "learning_rate": 2.9312405410887305e-05, + "loss": 0.2632, "step": 8345 }, { "epoch": 0.39, - "learning_rate": 1.931086212554498e-05, - "loss": 0.3994, + "learning_rate": 2.9311937341051288e-05, + "loss": 0.5837, "step": 8350 }, { "epoch": 0.39, - "learning_rate": 1.931039332426984e-05, - "loss": 0.2752, + "learning_rate": 2.9311469271215268e-05, + "loss": 0.2755, "step": 8355 }, { "epoch": 0.39, - "learning_rate": 1.9309924522994705e-05, - "loss": 0.1263, + "learning_rate": 2.9311001201379244e-05, + "loss": 0.0873, "step": 8360 }, { "epoch": 0.39, - "learning_rate": 1.9309455721719565e-05, - "loss": 0.1493, + "learning_rate": 2.9310533131543224e-05, + "loss": 0.1184, "step": 8365 }, { "epoch": 0.39, - "learning_rate": 1.9308986920444425e-05, - "loss": 0.19, + "learning_rate": 2.9310065061707207e-05, + "loss": 0.161, "step": 8370 }, { "epoch": 0.39, - "learning_rate": 1.9308518119169285e-05, - "loss": 0.2094, + "learning_rate": 2.9309596991871187e-05, + "loss": 0.1826, "step": 8375 }, { "epoch": 0.39, - "learning_rate": 1.9308049317894148e-05, - "loss": 0.3174, + "learning_rate": 2.9309128922035167e-05, + "loss": 0.2666, "step": 8380 }, { "epoch": 0.39, - "learning_rate": 1.9307580516619008e-05, - "loss": 0.2557, + "learning_rate": 2.9308660852199147e-05, + "loss": 0.2212, "step": 8385 }, { "epoch": 0.39, - "learning_rate": 1.9307111715343868e-05, - "loss": 0.4019, + "learning_rate": 2.930819278236313e-05, + "loss": 0.3762, "step": 8390 }, { "epoch": 0.39, - "learning_rate": 1.9306642914068728e-05, - "loss": 0.3886, + "learning_rate": 2.930772471252711e-05, + "loss": 0.4746, "step": 8395 }, { "epoch": 0.39, - "learning_rate": 1.9306174112793588e-05, - "loss": 0.5243, + "learning_rate": 2.930725664269109e-05, + "loss": 0.8668, "step": 8400 }, { "epoch": 0.39, - "learning_rate": 1.9305705311518448e-05, - "loss": 0.2447, + "learning_rate": 2.9306788572855073e-05, + "loss": 0.3019, "step": 8405 }, { "epoch": 0.39, - "learning_rate": 1.930523651024331e-05, - "loss": 0.1114, + "learning_rate": 2.9306320503019053e-05, + "loss": 0.096, "step": 8410 }, { "epoch": 0.39, - "learning_rate": 1.930476770896817e-05, - "loss": 0.1295, + "learning_rate": 2.9305852433183033e-05, + "loss": 0.1645, "step": 8415 }, { "epoch": 0.39, - "learning_rate": 1.930429890769303e-05, - "loss": 0.2123, + "learning_rate": 2.9305384363347012e-05, + "loss": 0.181, "step": 8420 }, { "epoch": 0.39, - "learning_rate": 1.930383010641789e-05, - "loss": 0.2388, + "learning_rate": 2.9304916293510992e-05, + "loss": 0.1734, "step": 8425 }, { "epoch": 0.39, - "learning_rate": 1.930336130514275e-05, - "loss": 0.194, + "learning_rate": 2.9304448223674972e-05, + "loss": 0.2393, "step": 8430 }, { "epoch": 0.39, - "learning_rate": 1.930289250386761e-05, - "loss": 0.2023, + "learning_rate": 2.9303980153838952e-05, + "loss": 0.292, "step": 8435 }, { "epoch": 0.39, - "learning_rate": 1.930242370259247e-05, - "loss": 0.3366, + "learning_rate": 2.930351208400293e-05, + "loss": 0.2921, "step": 8440 }, { "epoch": 0.39, - "learning_rate": 1.9301954901317334e-05, - "loss": 0.4688, + "learning_rate": 2.9303044014166915e-05, + "loss": 0.5353, "step": 8445 }, { "epoch": 0.39, - "learning_rate": 1.9301486100042194e-05, - "loss": 0.5275, + "learning_rate": 2.9302575944330895e-05, + "loss": 0.707, "step": 8450 }, { "epoch": 0.39, - "learning_rate": 1.9301017298767054e-05, - "loss": 0.198, + "learning_rate": 2.9302107874494875e-05, + "loss": 0.2303, "step": 8455 }, { "epoch": 0.39, - "learning_rate": 1.9300548497491917e-05, - "loss": 0.1555, + "learning_rate": 2.9301639804658858e-05, + "loss": 0.1254, "step": 8460 }, { "epoch": 0.39, - "learning_rate": 1.9300079696216777e-05, - "loss": 0.1445, + "learning_rate": 2.9301171734822838e-05, + "loss": 0.1365, "step": 8465 }, { "epoch": 0.4, - "learning_rate": 1.9299610894941637e-05, - "loss": 0.1536, + "learning_rate": 2.9300703664986817e-05, + "loss": 0.0969, "step": 8470 }, { "epoch": 0.4, - "learning_rate": 1.9299142093666497e-05, - "loss": 0.2828, + "learning_rate": 2.9300235595150797e-05, + "loss": 0.1487, "step": 8475 }, { "epoch": 0.4, - "learning_rate": 1.9298673292391357e-05, - "loss": 0.2771, + "learning_rate": 2.929976752531478e-05, + "loss": 0.2461, "step": 8480 }, { "epoch": 0.4, - "learning_rate": 1.9298204491116217e-05, - "loss": 0.2566, + "learning_rate": 2.929929945547876e-05, + "loss": 0.3552, "step": 8485 }, { "epoch": 0.4, - "learning_rate": 1.9297735689841077e-05, - "loss": 0.2707, + "learning_rate": 2.9298831385642737e-05, + "loss": 0.3688, "step": 8490 }, { "epoch": 0.4, - "learning_rate": 1.9297266888565937e-05, - "loss": 0.3241, + "learning_rate": 2.9298363315806716e-05, + "loss": 0.3737, "step": 8495 }, { "epoch": 0.4, - "learning_rate": 1.92967980872908e-05, - "loss": 0.7655, + "learning_rate": 2.92978952459707e-05, + "loss": 0.4919, "step": 8500 }, { "epoch": 0.4, - "learning_rate": 1.929632928601566e-05, - "loss": 0.2513, + "learning_rate": 2.929742717613468e-05, + "loss": 0.24, "step": 8505 }, { "epoch": 0.4, - "learning_rate": 1.929586048474052e-05, - "loss": 0.1115, + "learning_rate": 2.929695910629866e-05, + "loss": 0.1171, "step": 8510 }, { "epoch": 0.4, - "learning_rate": 1.929539168346538e-05, - "loss": 0.1718, + "learning_rate": 2.9296491036462643e-05, + "loss": 0.0792, "step": 8515 }, { "epoch": 0.4, - "learning_rate": 1.929492288219024e-05, - "loss": 0.1965, + "learning_rate": 2.9296022966626622e-05, + "loss": 0.1682, "step": 8520 }, { "epoch": 0.4, - "learning_rate": 1.9294454080915103e-05, - "loss": 0.2988, + "learning_rate": 2.9295554896790602e-05, + "loss": 0.1375, "step": 8525 }, { "epoch": 0.4, - "learning_rate": 1.9293985279639963e-05, - "loss": 0.243, + "learning_rate": 2.9295086826954582e-05, + "loss": 0.2226, "step": 8530 }, { "epoch": 0.4, - "learning_rate": 1.9293516478364823e-05, - "loss": 0.2191, + "learning_rate": 2.9294618757118565e-05, + "loss": 0.2332, "step": 8535 }, { "epoch": 0.4, - "learning_rate": 1.9293047677089683e-05, - "loss": 0.3439, + "learning_rate": 2.9294150687282545e-05, + "loss": 0.2816, "step": 8540 }, { "epoch": 0.4, - "learning_rate": 1.9292578875814543e-05, - "loss": 0.422, + "learning_rate": 2.9293682617446525e-05, + "loss": 0.4733, "step": 8545 }, { "epoch": 0.4, - "learning_rate": 1.9292110074539406e-05, - "loss": 0.6352, + "learning_rate": 2.92932145476105e-05, + "loss": 0.6968, "step": 8550 }, { "epoch": 0.4, - "learning_rate": 1.9291641273264266e-05, - "loss": 0.2785, + "learning_rate": 2.9292746477774484e-05, + "loss": 0.3567, "step": 8555 }, { "epoch": 0.4, - "learning_rate": 1.9291172471989126e-05, - "loss": 0.1242, + "learning_rate": 2.9292278407938464e-05, + "loss": 0.1067, "step": 8560 }, { "epoch": 0.4, - "learning_rate": 1.9290703670713986e-05, - "loss": 0.1312, + "learning_rate": 2.9291810338102444e-05, + "loss": 0.0944, "step": 8565 }, { "epoch": 0.4, - "learning_rate": 1.9290234869438846e-05, - "loss": 0.1535, + "learning_rate": 2.9291342268266424e-05, + "loss": 0.1015, "step": 8570 }, { "epoch": 0.4, - "learning_rate": 1.9289766068163706e-05, - "loss": 0.2025, + "learning_rate": 2.9290874198430407e-05, + "loss": 0.172, "step": 8575 }, { "epoch": 0.4, - "learning_rate": 1.9289297266888566e-05, - "loss": 0.2068, + "learning_rate": 2.9290406128594387e-05, + "loss": 0.2308, "step": 8580 }, { "epoch": 0.4, - "learning_rate": 1.9288828465613426e-05, - "loss": 0.344, + "learning_rate": 2.9289938058758367e-05, + "loss": 0.2653, "step": 8585 }, { "epoch": 0.4, - "learning_rate": 1.928835966433829e-05, - "loss": 0.2893, + "learning_rate": 2.928946998892235e-05, + "loss": 0.2944, "step": 8590 }, { "epoch": 0.4, - "learning_rate": 1.928789086306315e-05, - "loss": 0.5662, + "learning_rate": 2.928900191908633e-05, + "loss": 0.5232, "step": 8595 }, { "epoch": 0.4, - "learning_rate": 1.928742206178801e-05, - "loss": 0.5428, + "learning_rate": 2.928853384925031e-05, + "loss": 0.8223, "step": 8600 }, { "epoch": 0.4, - "learning_rate": 1.9286953260512872e-05, - "loss": 0.227, + "learning_rate": 2.928806577941429e-05, + "loss": 0.3007, "step": 8605 }, { "epoch": 0.4, - "learning_rate": 1.9286484459237732e-05, - "loss": 0.1013, + "learning_rate": 2.9287597709578273e-05, + "loss": 0.1118, "step": 8610 }, { "epoch": 0.4, - "learning_rate": 1.9286015657962592e-05, - "loss": 0.1645, + "learning_rate": 2.928712963974225e-05, + "loss": 0.2052, "step": 8615 }, { "epoch": 0.4, - "learning_rate": 1.9285546856687452e-05, - "loss": 0.1979, + "learning_rate": 2.928666156990623e-05, + "loss": 0.2462, "step": 8620 }, { "epoch": 0.4, - "learning_rate": 1.9285078055412312e-05, - "loss": 0.2604, + "learning_rate": 2.928619350007021e-05, + "loss": 0.254, "step": 8625 }, { "epoch": 0.4, - "learning_rate": 1.9284609254137172e-05, - "loss": 0.2365, + "learning_rate": 2.9285725430234192e-05, + "loss": 0.1789, "step": 8630 }, { "epoch": 0.4, - "learning_rate": 1.9284140452862032e-05, - "loss": 0.2302, + "learning_rate": 2.928525736039817e-05, + "loss": 0.2189, "step": 8635 }, { "epoch": 0.4, - "learning_rate": 1.9283671651586895e-05, - "loss": 0.2653, + "learning_rate": 2.928478929056215e-05, + "loss": 0.3401, "step": 8640 }, { "epoch": 0.4, - "learning_rate": 1.9283202850311755e-05, - "loss": 0.3308, + "learning_rate": 2.9284321220726135e-05, + "loss": 0.2369, "step": 8645 }, { "epoch": 0.4, - "learning_rate": 1.9282734049036615e-05, - "loss": 0.5864, + "learning_rate": 2.9283853150890115e-05, + "loss": 0.6526, "step": 8650 }, { "epoch": 0.4, - "learning_rate": 1.9282265247761475e-05, - "loss": 0.3617, + "learning_rate": 2.9283385081054094e-05, + "loss": 0.2264, "step": 8655 }, { "epoch": 0.4, - "learning_rate": 1.9281796446486335e-05, - "loss": 0.1467, + "learning_rate": 2.9282917011218074e-05, + "loss": 0.098, "step": 8660 }, { "epoch": 0.4, - "learning_rate": 1.9281327645211195e-05, - "loss": 0.1168, + "learning_rate": 2.9282448941382057e-05, + "loss": 0.1507, "step": 8665 }, { "epoch": 0.4, - "learning_rate": 1.9280858843936058e-05, - "loss": 0.2285, + "learning_rate": 2.9281980871546037e-05, + "loss": 0.1024, "step": 8670 }, { "epoch": 0.4, - "learning_rate": 1.9280390042660918e-05, - "loss": 0.2232, + "learning_rate": 2.9281512801710017e-05, + "loss": 0.1547, "step": 8675 }, { "epoch": 0.41, - "learning_rate": 1.9279921241385778e-05, - "loss": 0.1808, + "learning_rate": 2.9281044731873993e-05, + "loss": 0.2488, "step": 8680 }, { "epoch": 0.41, - "learning_rate": 1.927945244011064e-05, - "loss": 0.2971, + "learning_rate": 2.9280576662037977e-05, + "loss": 0.3293, "step": 8685 }, { "epoch": 0.41, - "learning_rate": 1.92789836388355e-05, - "loss": 0.2418, + "learning_rate": 2.9280108592201956e-05, + "loss": 0.2928, "step": 8690 }, { "epoch": 0.41, - "learning_rate": 1.927851483756036e-05, - "loss": 0.3725, + "learning_rate": 2.9279640522365936e-05, + "loss": 0.4896, "step": 8695 }, { "epoch": 0.41, - "learning_rate": 1.927804603628522e-05, - "loss": 0.6266, + "learning_rate": 2.927917245252992e-05, + "loss": 0.451, "step": 8700 }, { "epoch": 0.41, - "learning_rate": 1.927757723501008e-05, - "loss": 0.3189, + "learning_rate": 2.92787043826939e-05, + "loss": 0.2863, "step": 8705 }, { "epoch": 0.41, - "learning_rate": 1.927710843373494e-05, - "loss": 0.0855, + "learning_rate": 2.927823631285788e-05, + "loss": 0.1178, "step": 8710 }, { "epoch": 0.41, - "learning_rate": 1.92766396324598e-05, - "loss": 0.1166, + "learning_rate": 2.927776824302186e-05, + "loss": 0.1274, "step": 8715 }, { "epoch": 0.41, - "learning_rate": 1.927617083118466e-05, - "loss": 0.2014, + "learning_rate": 2.9277300173185842e-05, + "loss": 0.1832, "step": 8720 }, { "epoch": 0.41, - "learning_rate": 1.927570202990952e-05, - "loss": 0.257, + "learning_rate": 2.9276832103349822e-05, + "loss": 0.2102, "step": 8725 }, { "epoch": 0.41, - "learning_rate": 1.9275233228634384e-05, - "loss": 0.3014, + "learning_rate": 2.9276364033513802e-05, + "loss": 0.2072, "step": 8730 }, { "epoch": 0.41, - "learning_rate": 1.9274764427359244e-05, - "loss": 0.2917, + "learning_rate": 2.927589596367778e-05, + "loss": 0.4095, "step": 8735 }, { "epoch": 0.41, - "learning_rate": 1.9274295626084104e-05, - "loss": 0.3215, + "learning_rate": 2.927542789384176e-05, + "loss": 0.2432, "step": 8740 }, { "epoch": 0.41, - "learning_rate": 1.9273826824808964e-05, - "loss": 0.3349, + "learning_rate": 2.927495982400574e-05, + "loss": 0.3224, "step": 8745 }, { "epoch": 0.41, - "learning_rate": 1.9273358023533827e-05, - "loss": 0.5432, + "learning_rate": 2.927449175416972e-05, + "loss": 0.5451, "step": 8750 }, { "epoch": 0.41, - "learning_rate": 1.9272889222258687e-05, - "loss": 0.3159, + "learning_rate": 2.92740236843337e-05, + "loss": 0.175, "step": 8755 }, { "epoch": 0.41, - "learning_rate": 1.9272420420983547e-05, - "loss": 0.1048, + "learning_rate": 2.9273555614497684e-05, + "loss": 0.1485, "step": 8760 }, { "epoch": 0.41, - "learning_rate": 1.9271951619708407e-05, - "loss": 0.1627, + "learning_rate": 2.9273087544661664e-05, + "loss": 0.157, "step": 8765 }, { "epoch": 0.41, - "learning_rate": 1.9271482818433267e-05, - "loss": 0.1638, + "learning_rate": 2.9272619474825644e-05, + "loss": 0.174, "step": 8770 }, { "epoch": 0.41, - "learning_rate": 1.9271014017158127e-05, - "loss": 0.1727, + "learning_rate": 2.9272151404989627e-05, + "loss": 0.1748, "step": 8775 }, { "epoch": 0.41, - "learning_rate": 1.927054521588299e-05, - "loss": 0.1882, + "learning_rate": 2.9271683335153607e-05, + "loss": 0.2344, "step": 8780 }, { "epoch": 0.41, - "learning_rate": 1.927007641460785e-05, - "loss": 0.214, + "learning_rate": 2.9271215265317587e-05, + "loss": 0.2776, "step": 8785 }, { "epoch": 0.41, - "learning_rate": 1.926960761333271e-05, - "loss": 0.2846, + "learning_rate": 2.9270747195481566e-05, + "loss": 0.3259, "step": 8790 }, { "epoch": 0.41, - "learning_rate": 1.926913881205757e-05, - "loss": 0.4149, + "learning_rate": 2.927027912564555e-05, + "loss": 0.3422, "step": 8795 }, { "epoch": 0.41, - "learning_rate": 1.926867001078243e-05, - "loss": 0.5301, + "learning_rate": 2.926981105580953e-05, + "loss": 0.5633, "step": 8800 }, { "epoch": 0.41, - "learning_rate": 1.926820120950729e-05, - "loss": 0.2375, + "learning_rate": 2.9269342985973506e-05, + "loss": 0.2588, "step": 8805 }, { "epoch": 0.41, - "learning_rate": 1.9267732408232153e-05, - "loss": 0.1747, + "learning_rate": 2.9268874916137486e-05, + "loss": 0.172, "step": 8810 }, { "epoch": 0.41, - "learning_rate": 1.9267263606957013e-05, - "loss": 0.1291, + "learning_rate": 2.926840684630147e-05, + "loss": 0.1366, "step": 8815 }, { "epoch": 0.41, - "learning_rate": 1.9266794805681873e-05, - "loss": 0.1782, + "learning_rate": 2.926793877646545e-05, + "loss": 0.2007, "step": 8820 }, { "epoch": 0.41, - "learning_rate": 1.9266326004406736e-05, - "loss": 0.1866, + "learning_rate": 2.926747070662943e-05, + "loss": 0.2308, "step": 8825 }, { "epoch": 0.41, - "learning_rate": 1.9265857203131596e-05, - "loss": 0.2627, + "learning_rate": 2.926700263679341e-05, + "loss": 0.2118, "step": 8830 }, { "epoch": 0.41, - "learning_rate": 1.9265388401856456e-05, - "loss": 0.3373, + "learning_rate": 2.926653456695739e-05, + "loss": 0.3322, "step": 8835 }, { "epoch": 0.41, - "learning_rate": 1.9264919600581316e-05, - "loss": 0.3501, + "learning_rate": 2.926606649712137e-05, + "loss": 0.3329, "step": 8840 }, { "epoch": 0.41, - "learning_rate": 1.9264450799306176e-05, - "loss": 0.3043, + "learning_rate": 2.926559842728535e-05, + "loss": 0.346, "step": 8845 }, { "epoch": 0.41, - "learning_rate": 1.9263981998031036e-05, - "loss": 0.5005, + "learning_rate": 2.9265130357449334e-05, + "loss": 0.7412, "step": 8850 }, { "epoch": 0.41, - "learning_rate": 1.9263513196755896e-05, - "loss": 0.2497, + "learning_rate": 2.9264662287613314e-05, + "loss": 0.2103, "step": 8855 }, { "epoch": 0.41, - "learning_rate": 1.9263044395480756e-05, - "loss": 0.1362, + "learning_rate": 2.9264194217777294e-05, + "loss": 0.1274, "step": 8860 }, { "epoch": 0.41, - "learning_rate": 1.9262575594205616e-05, - "loss": 0.1199, + "learning_rate": 2.9263726147941274e-05, + "loss": 0.1788, "step": 8865 }, { "epoch": 0.41, - "learning_rate": 1.9262106792930476e-05, - "loss": 0.1285, + "learning_rate": 2.9263258078105254e-05, + "loss": 0.1556, "step": 8870 }, { "epoch": 0.41, - "learning_rate": 1.926163799165534e-05, - "loss": 0.1813, + "learning_rate": 2.9262790008269233e-05, + "loss": 0.18, "step": 8875 }, { "epoch": 0.41, - "learning_rate": 1.92611691903802e-05, - "loss": 0.1687, + "learning_rate": 2.9262321938433213e-05, + "loss": 0.1629, "step": 8880 }, { "epoch": 0.41, - "learning_rate": 1.926070038910506e-05, - "loss": 0.2707, + "learning_rate": 2.9261853868597196e-05, + "loss": 0.2814, "step": 8885 }, { "epoch": 0.41, - "learning_rate": 1.9260231587829922e-05, - "loss": 0.3641, + "learning_rate": 2.9261385798761176e-05, + "loss": 0.4083, "step": 8890 }, { "epoch": 0.42, - "learning_rate": 1.9259762786554782e-05, - "loss": 0.4102, + "learning_rate": 2.9260917728925156e-05, + "loss": 0.2613, "step": 8895 }, { "epoch": 0.42, - "learning_rate": 1.9259293985279642e-05, - "loss": 0.5151, + "learning_rate": 2.9260449659089136e-05, + "loss": 0.4846, "step": 8900 }, { "epoch": 0.42, - "learning_rate": 1.9258825184004502e-05, - "loss": 0.3642, + "learning_rate": 2.925998158925312e-05, + "loss": 0.2465, "step": 8905 }, { "epoch": 0.42, - "learning_rate": 1.9258356382729362e-05, - "loss": 0.102, + "learning_rate": 2.92595135194171e-05, + "loss": 0.1036, "step": 8910 }, { "epoch": 0.42, - "learning_rate": 1.9257887581454222e-05, - "loss": 0.1298, + "learning_rate": 2.925904544958108e-05, + "loss": 0.1551, "step": 8915 }, { "epoch": 0.42, - "learning_rate": 1.9257418780179085e-05, - "loss": 0.1646, + "learning_rate": 2.925857737974506e-05, + "loss": 0.1265, "step": 8920 }, { "epoch": 0.42, - "learning_rate": 1.9256949978903945e-05, - "loss": 0.2703, + "learning_rate": 2.9258109309909042e-05, + "loss": 0.2116, "step": 8925 }, { "epoch": 0.42, - "learning_rate": 1.9256481177628805e-05, - "loss": 0.2339, + "learning_rate": 2.9257641240073018e-05, + "loss": 0.3118, "step": 8930 }, { "epoch": 0.42, - "learning_rate": 1.9256012376353665e-05, - "loss": 0.2508, + "learning_rate": 2.9257173170236998e-05, + "loss": 0.4265, "step": 8935 }, { "epoch": 0.42, - "learning_rate": 1.9255543575078525e-05, - "loss": 0.2826, + "learning_rate": 2.925670510040098e-05, + "loss": 0.3197, "step": 8940 }, { "epoch": 0.42, - "learning_rate": 1.9255074773803385e-05, - "loss": 0.4002, + "learning_rate": 2.925623703056496e-05, + "loss": 0.2795, "step": 8945 }, { "epoch": 0.42, - "learning_rate": 1.9254605972528245e-05, - "loss": 0.5675, + "learning_rate": 2.925576896072894e-05, + "loss": 0.5113, "step": 8950 }, { "epoch": 0.42, - "learning_rate": 1.9254137171253108e-05, - "loss": 0.2835, + "learning_rate": 2.925530089089292e-05, + "loss": 0.3283, "step": 8955 }, { "epoch": 0.42, - "learning_rate": 1.9253668369977968e-05, - "loss": 0.0916, + "learning_rate": 2.9254832821056904e-05, + "loss": 0.0701, "step": 8960 }, { "epoch": 0.42, - "learning_rate": 1.9253199568702828e-05, - "loss": 0.1194, + "learning_rate": 2.9254364751220884e-05, + "loss": 0.1698, "step": 8965 }, { "epoch": 0.42, - "learning_rate": 1.925273076742769e-05, - "loss": 0.1408, + "learning_rate": 2.9253896681384864e-05, + "loss": 0.1525, "step": 8970 }, { "epoch": 0.42, - "learning_rate": 1.925226196615255e-05, - "loss": 0.1439, + "learning_rate": 2.9253428611548843e-05, + "loss": 0.1762, "step": 8975 }, { "epoch": 0.42, - "learning_rate": 1.925179316487741e-05, - "loss": 0.2504, + "learning_rate": 2.9252960541712827e-05, + "loss": 0.2388, "step": 8980 }, { "epoch": 0.42, - "learning_rate": 1.925132436360227e-05, - "loss": 0.3225, + "learning_rate": 2.9252492471876806e-05, + "loss": 0.3278, "step": 8985 }, { "epoch": 0.42, - "learning_rate": 1.925085556232713e-05, - "loss": 0.3019, + "learning_rate": 2.9252024402040786e-05, + "loss": 0.3177, "step": 8990 }, { "epoch": 0.42, - "learning_rate": 1.925038676105199e-05, - "loss": 0.5128, + "learning_rate": 2.9251556332204763e-05, + "loss": 0.4042, "step": 8995 }, { "epoch": 0.42, - "learning_rate": 1.924991795977685e-05, - "loss": 0.744, + "learning_rate": 2.9251088262368746e-05, + "loss": 0.6997, "step": 9000 }, { "epoch": 0.42, - "learning_rate": 1.924944915850171e-05, - "loss": 0.3016, + "learning_rate": 2.9250620192532726e-05, + "loss": 0.2166, "step": 9005 }, { "epoch": 0.42, - "learning_rate": 1.9248980357226574e-05, - "loss": 0.1325, + "learning_rate": 2.9250152122696705e-05, + "loss": 0.087, "step": 9010 }, { "epoch": 0.42, - "learning_rate": 1.9248511555951434e-05, - "loss": 0.125, + "learning_rate": 2.924968405286069e-05, + "loss": 0.1007, "step": 9015 }, { "epoch": 0.42, - "learning_rate": 1.9248042754676294e-05, - "loss": 0.1749, + "learning_rate": 2.924921598302467e-05, + "loss": 0.1181, "step": 9020 }, { "epoch": 0.42, - "learning_rate": 1.9247573953401154e-05, - "loss": 0.2229, + "learning_rate": 2.9248747913188648e-05, + "loss": 0.2034, "step": 9025 }, { "epoch": 0.42, - "learning_rate": 1.9247105152126014e-05, - "loss": 0.1848, + "learning_rate": 2.9248279843352628e-05, + "loss": 0.1965, "step": 9030 }, { "epoch": 0.42, - "learning_rate": 1.9246636350850877e-05, - "loss": 0.2923, + "learning_rate": 2.924781177351661e-05, + "loss": 0.2305, "step": 9035 }, { "epoch": 0.42, - "learning_rate": 1.9246167549575737e-05, - "loss": 0.3329, + "learning_rate": 2.924734370368059e-05, + "loss": 0.3461, "step": 9040 }, { "epoch": 0.42, - "learning_rate": 1.9245698748300597e-05, - "loss": 0.3743, + "learning_rate": 2.924687563384457e-05, + "loss": 0.4492, "step": 9045 }, { "epoch": 0.42, - "learning_rate": 1.9245229947025457e-05, - "loss": 0.6587, + "learning_rate": 2.924640756400855e-05, + "loss": 0.5317, "step": 9050 }, { "epoch": 0.42, - "learning_rate": 1.9244761145750317e-05, - "loss": 0.2159, + "learning_rate": 2.924593949417253e-05, + "loss": 0.2327, "step": 9055 }, { "epoch": 0.42, - "learning_rate": 1.924429234447518e-05, - "loss": 0.1817, + "learning_rate": 2.924547142433651e-05, + "loss": 0.1141, "step": 9060 }, { "epoch": 0.42, - "learning_rate": 1.924382354320004e-05, - "loss": 0.1316, + "learning_rate": 2.924500335450049e-05, + "loss": 0.2636, "step": 9065 }, { "epoch": 0.42, - "learning_rate": 1.92433547419249e-05, - "loss": 0.1602, + "learning_rate": 2.9244535284664473e-05, + "loss": 0.1519, "step": 9070 }, { "epoch": 0.42, - "learning_rate": 1.924288594064976e-05, - "loss": 0.1972, + "learning_rate": 2.9244067214828453e-05, + "loss": 0.2387, "step": 9075 }, { "epoch": 0.42, - "learning_rate": 1.924241713937462e-05, - "loss": 0.2425, + "learning_rate": 2.9243599144992433e-05, + "loss": 0.3108, "step": 9080 }, { "epoch": 0.42, - "learning_rate": 1.924194833809948e-05, - "loss": 0.3043, + "learning_rate": 2.9243131075156413e-05, + "loss": 0.341, "step": 9085 }, { "epoch": 0.42, - "learning_rate": 1.924147953682434e-05, - "loss": 0.3386, + "learning_rate": 2.9242663005320396e-05, + "loss": 0.3498, "step": 9090 }, { "epoch": 0.42, - "learning_rate": 1.92410107355492e-05, - "loss": 0.5037, + "learning_rate": 2.9242194935484376e-05, + "loss": 0.3984, "step": 9095 }, { "epoch": 0.42, - "learning_rate": 1.9240541934274063e-05, - "loss": 0.8734, + "learning_rate": 2.9241726865648356e-05, + "loss": 0.4773, "step": 9100 }, { "epoch": 0.42, - "learning_rate": 1.9240073132998923e-05, - "loss": 0.34, + "learning_rate": 2.9241258795812336e-05, + "loss": 0.2201, "step": 9105 }, { "epoch": 0.43, - "learning_rate": 1.9239604331723783e-05, - "loss": 0.0871, + "learning_rate": 2.924079072597632e-05, + "loss": 0.1281, "step": 9110 }, { "epoch": 0.43, - "learning_rate": 1.9239135530448646e-05, - "loss": 0.1388, + "learning_rate": 2.92403226561403e-05, + "loss": 0.1381, "step": 9115 }, { "epoch": 0.43, - "learning_rate": 1.9238666729173506e-05, - "loss": 0.1693, + "learning_rate": 2.9239854586304275e-05, + "loss": 0.113, "step": 9120 }, { "epoch": 0.43, - "learning_rate": 1.9238197927898366e-05, - "loss": 0.2853, + "learning_rate": 2.9239386516468258e-05, + "loss": 0.1503, "step": 9125 }, { "epoch": 0.43, - "learning_rate": 1.9237729126623226e-05, - "loss": 0.313, + "learning_rate": 2.9238918446632238e-05, + "loss": 0.2912, "step": 9130 }, { "epoch": 0.43, - "learning_rate": 1.9237260325348086e-05, - "loss": 0.2605, + "learning_rate": 2.9238450376796218e-05, + "loss": 0.2596, "step": 9135 }, { "epoch": 0.43, - "learning_rate": 1.9236791524072946e-05, - "loss": 0.2986, + "learning_rate": 2.9237982306960198e-05, + "loss": 0.3625, "step": 9140 }, { "epoch": 0.43, - "learning_rate": 1.9236322722797806e-05, - "loss": 0.4476, + "learning_rate": 2.923751423712418e-05, + "loss": 0.2824, "step": 9145 }, { "epoch": 0.43, - "learning_rate": 1.923585392152267e-05, - "loss": 0.6561, + "learning_rate": 2.923704616728816e-05, + "loss": 0.8002, "step": 9150 }, { "epoch": 0.43, - "learning_rate": 1.923538512024753e-05, - "loss": 0.3252, + "learning_rate": 2.923657809745214e-05, + "loss": 0.2374, "step": 9155 }, { "epoch": 0.43, - "learning_rate": 1.923491631897239e-05, - "loss": 0.0688, + "learning_rate": 2.923611002761612e-05, + "loss": 0.1549, "step": 9160 }, { "epoch": 0.43, - "learning_rate": 1.923444751769725e-05, - "loss": 0.1332, + "learning_rate": 2.9235641957780104e-05, + "loss": 0.0864, "step": 9165 }, { "epoch": 0.43, - "learning_rate": 1.923397871642211e-05, - "loss": 0.1325, + "learning_rate": 2.9235173887944083e-05, + "loss": 0.1367, "step": 9170 }, { "epoch": 0.43, - "learning_rate": 1.9233509915146972e-05, - "loss": 0.127, + "learning_rate": 2.9234705818108063e-05, + "loss": 0.1108, "step": 9175 }, { "epoch": 0.43, - "learning_rate": 1.9233041113871832e-05, - "loss": 0.1931, + "learning_rate": 2.9234237748272043e-05, + "loss": 0.1935, "step": 9180 }, { "epoch": 0.43, - "learning_rate": 1.9232572312596692e-05, - "loss": 0.1826, + "learning_rate": 2.9233769678436023e-05, + "loss": 0.1717, "step": 9185 }, { "epoch": 0.43, - "learning_rate": 1.9232103511321552e-05, - "loss": 0.3144, + "learning_rate": 2.9233301608600003e-05, + "loss": 0.2344, "step": 9190 }, { "epoch": 0.43, - "learning_rate": 1.9231634710046412e-05, - "loss": 0.3538, + "learning_rate": 2.9232833538763982e-05, + "loss": 0.2817, "step": 9195 }, { "epoch": 0.43, - "learning_rate": 1.9231165908771275e-05, - "loss": 0.5988, + "learning_rate": 2.9232365468927966e-05, + "loss": 0.6679, "step": 9200 }, { "epoch": 0.43, - "learning_rate": 1.9230697107496135e-05, - "loss": 0.3227, + "learning_rate": 2.9231897399091945e-05, + "loss": 0.2419, "step": 9205 }, { "epoch": 0.43, - "learning_rate": 1.9230228306220995e-05, - "loss": 0.1147, + "learning_rate": 2.9231429329255925e-05, + "loss": 0.1268, "step": 9210 }, { "epoch": 0.43, - "learning_rate": 1.9229759504945855e-05, - "loss": 0.133, + "learning_rate": 2.9230961259419905e-05, + "loss": 0.1094, "step": 9215 }, { "epoch": 0.43, - "learning_rate": 1.9229290703670715e-05, - "loss": 0.2695, + "learning_rate": 2.9230493189583888e-05, + "loss": 0.1455, "step": 9220 }, { "epoch": 0.43, - "learning_rate": 1.9228821902395575e-05, - "loss": 0.2266, + "learning_rate": 2.9230025119747868e-05, + "loss": 0.2385, "step": 9225 }, { "epoch": 0.43, - "learning_rate": 1.9228353101120435e-05, - "loss": 0.216, + "learning_rate": 2.9229557049911848e-05, + "loss": 0.1942, "step": 9230 }, { "epoch": 0.43, - "learning_rate": 1.9227884299845295e-05, - "loss": 0.3179, + "learning_rate": 2.9229088980075828e-05, + "loss": 0.2294, "step": 9235 }, { "epoch": 0.43, - "learning_rate": 1.9227415498570158e-05, - "loss": 0.2371, + "learning_rate": 2.922862091023981e-05, + "loss": 0.2644, "step": 9240 }, { "epoch": 0.43, - "learning_rate": 1.9226946697295018e-05, - "loss": 0.4098, + "learning_rate": 2.9228152840403787e-05, + "loss": 0.2645, "step": 9245 }, { "epoch": 0.43, - "learning_rate": 1.9226477896019878e-05, - "loss": 0.533, + "learning_rate": 2.9227684770567767e-05, + "loss": 0.503, "step": 9250 }, { "epoch": 0.43, - "learning_rate": 1.922600909474474e-05, - "loss": 0.2309, + "learning_rate": 2.922721670073175e-05, + "loss": 0.3093, "step": 9255 }, { "epoch": 0.43, - "learning_rate": 1.92255402934696e-05, - "loss": 0.1296, + "learning_rate": 2.922674863089573e-05, + "loss": 0.0982, "step": 9260 }, { "epoch": 0.43, - "learning_rate": 1.922507149219446e-05, - "loss": 0.1396, + "learning_rate": 2.922628056105971e-05, + "loss": 0.0931, "step": 9265 }, { "epoch": 0.43, - "learning_rate": 1.922460269091932e-05, - "loss": 0.2446, + "learning_rate": 2.922581249122369e-05, + "loss": 0.2207, "step": 9270 }, { "epoch": 0.43, - "learning_rate": 1.922413388964418e-05, - "loss": 0.1899, + "learning_rate": 2.9225344421387673e-05, + "loss": 0.2538, "step": 9275 }, { "epoch": 0.43, - "learning_rate": 1.922366508836904e-05, - "loss": 0.285, + "learning_rate": 2.9224876351551653e-05, + "loss": 0.2333, "step": 9280 }, { "epoch": 0.43, - "learning_rate": 1.92231962870939e-05, - "loss": 0.28, + "learning_rate": 2.9224408281715633e-05, + "loss": 0.1845, "step": 9285 }, { "epoch": 0.43, - "learning_rate": 1.9222727485818764e-05, - "loss": 0.2611, + "learning_rate": 2.9223940211879612e-05, + "loss": 0.2484, "step": 9290 }, { "epoch": 0.43, - "learning_rate": 1.9222258684543624e-05, - "loss": 0.424, + "learning_rate": 2.9223472142043596e-05, + "loss": 0.3841, "step": 9295 }, { "epoch": 0.43, - "learning_rate": 1.9221789883268484e-05, - "loss": 0.615, + "learning_rate": 2.9223004072207576e-05, + "loss": 0.5719, "step": 9300 }, { "epoch": 0.43, - "learning_rate": 1.9221321081993344e-05, - "loss": 0.2705, + "learning_rate": 2.9222536002371555e-05, + "loss": 0.2552, "step": 9305 }, { "epoch": 0.43, - "learning_rate": 1.9220852280718204e-05, - "loss": 0.0816, + "learning_rate": 2.9222067932535535e-05, + "loss": 0.0922, "step": 9310 }, { "epoch": 0.43, - "learning_rate": 1.9220383479443064e-05, - "loss": 0.1157, + "learning_rate": 2.9221599862699515e-05, + "loss": 0.1027, "step": 9315 }, { "epoch": 0.43, - "learning_rate": 1.9219914678167927e-05, - "loss": 0.1803, + "learning_rate": 2.9221131792863495e-05, + "loss": 0.2423, "step": 9320 }, { "epoch": 0.44, - "learning_rate": 1.9219445876892787e-05, - "loss": 0.1273, + "learning_rate": 2.9220663723027475e-05, + "loss": 0.2209, "step": 9325 }, { "epoch": 0.44, - "learning_rate": 1.9218977075617647e-05, - "loss": 0.269, + "learning_rate": 2.9220195653191458e-05, + "loss": 0.1603, "step": 9330 }, { "epoch": 0.44, - "learning_rate": 1.921850827434251e-05, - "loss": 0.1694, + "learning_rate": 2.9219727583355438e-05, + "loss": 0.2639, "step": 9335 }, { "epoch": 0.44, - "learning_rate": 1.921803947306737e-05, - "loss": 0.303, + "learning_rate": 2.9219259513519417e-05, + "loss": 0.3166, "step": 9340 }, { "epoch": 0.44, - "learning_rate": 1.921757067179223e-05, - "loss": 0.4144, + "learning_rate": 2.9218791443683397e-05, + "loss": 0.3233, "step": 9345 }, { "epoch": 0.44, - "learning_rate": 1.921710187051709e-05, - "loss": 0.4884, + "learning_rate": 2.921832337384738e-05, + "loss": 0.6509, "step": 9350 }, { "epoch": 0.44, - "learning_rate": 1.921663306924195e-05, - "loss": 0.2585, + "learning_rate": 2.921785530401136e-05, + "loss": 0.2402, "step": 9355 }, { "epoch": 0.44, - "learning_rate": 1.921616426796681e-05, - "loss": 0.0678, + "learning_rate": 2.921738723417534e-05, + "loss": 0.0998, "step": 9360 }, { "epoch": 0.44, - "learning_rate": 1.921569546669167e-05, - "loss": 0.1916, + "learning_rate": 2.921691916433932e-05, + "loss": 0.121, "step": 9365 }, { "epoch": 0.44, - "learning_rate": 1.921522666541653e-05, - "loss": 0.1597, + "learning_rate": 2.92164510945033e-05, + "loss": 0.1643, "step": 9370 }, { "epoch": 0.44, - "learning_rate": 1.921475786414139e-05, - "loss": 0.2067, + "learning_rate": 2.921598302466728e-05, + "loss": 0.1153, "step": 9375 }, { "epoch": 0.44, - "learning_rate": 1.921428906286625e-05, - "loss": 0.1931, + "learning_rate": 2.921551495483126e-05, + "loss": 0.2076, "step": 9380 }, { "epoch": 0.44, - "learning_rate": 1.9213820261591113e-05, - "loss": 0.427, + "learning_rate": 2.9215046884995243e-05, + "loss": 0.2147, "step": 9385 }, { "epoch": 0.44, - "learning_rate": 1.9213351460315973e-05, - "loss": 0.3432, + "learning_rate": 2.9214578815159222e-05, + "loss": 0.3021, "step": 9390 }, { "epoch": 0.44, - "learning_rate": 1.9212882659040833e-05, - "loss": 0.3622, + "learning_rate": 2.9214110745323202e-05, + "loss": 0.4945, "step": 9395 }, { "epoch": 0.44, - "learning_rate": 1.9212413857765696e-05, - "loss": 0.4917, + "learning_rate": 2.9213642675487182e-05, + "loss": 0.7701, "step": 9400 }, { "epoch": 0.44, - "learning_rate": 1.9211945056490556e-05, - "loss": 0.2831, + "learning_rate": 2.9213174605651165e-05, + "loss": 0.2791, "step": 9405 }, { "epoch": 0.44, - "learning_rate": 1.9211476255215416e-05, - "loss": 0.1006, + "learning_rate": 2.9212706535815145e-05, + "loss": 0.1013, "step": 9410 }, { "epoch": 0.44, - "learning_rate": 1.9211007453940276e-05, - "loss": 0.1673, + "learning_rate": 2.9212238465979125e-05, + "loss": 0.1797, "step": 9415 }, { "epoch": 0.44, - "learning_rate": 1.9210538652665136e-05, - "loss": 0.2262, + "learning_rate": 2.9211770396143105e-05, + "loss": 0.196, "step": 9420 }, { "epoch": 0.44, - "learning_rate": 1.9210069851389996e-05, - "loss": 0.156, + "learning_rate": 2.9211302326307088e-05, + "loss": 0.1942, "step": 9425 }, { "epoch": 0.44, - "learning_rate": 1.920960105011486e-05, - "loss": 0.1828, + "learning_rate": 2.9210834256471068e-05, + "loss": 0.2213, "step": 9430 }, { "epoch": 0.44, - "learning_rate": 1.920913224883972e-05, - "loss": 0.1871, + "learning_rate": 2.9210366186635044e-05, + "loss": 0.2232, "step": 9435 }, { "epoch": 0.44, - "learning_rate": 1.920866344756458e-05, - "loss": 0.3073, + "learning_rate": 2.9209898116799027e-05, + "loss": 0.2296, "step": 9440 }, { "epoch": 0.44, - "learning_rate": 1.920819464628944e-05, - "loss": 0.4646, + "learning_rate": 2.9209430046963007e-05, + "loss": 0.3679, "step": 9445 }, { "epoch": 0.44, - "learning_rate": 1.92077258450143e-05, - "loss": 0.5954, + "learning_rate": 2.9208961977126987e-05, + "loss": 0.5784, "step": 9450 }, { "epoch": 0.44, - "learning_rate": 1.920725704373916e-05, - "loss": 0.2833, + "learning_rate": 2.9208493907290967e-05, + "loss": 0.3411, "step": 9455 }, { "epoch": 0.44, - "learning_rate": 1.920678824246402e-05, - "loss": 0.1231, + "learning_rate": 2.920802583745495e-05, + "loss": 0.0989, "step": 9460 }, { "epoch": 0.44, - "learning_rate": 1.9206319441188882e-05, - "loss": 0.1253, + "learning_rate": 2.920755776761893e-05, + "loss": 0.0827, "step": 9465 }, { "epoch": 0.44, - "learning_rate": 1.9205850639913742e-05, - "loss": 0.1852, + "learning_rate": 2.920708969778291e-05, + "loss": 0.1644, "step": 9470 }, { "epoch": 0.44, - "learning_rate": 1.9205381838638602e-05, - "loss": 0.2212, + "learning_rate": 2.920662162794689e-05, + "loss": 0.1767, "step": 9475 }, { "epoch": 0.44, - "learning_rate": 1.9204913037363466e-05, - "loss": 0.1587, + "learning_rate": 2.9206153558110873e-05, + "loss": 0.2319, "step": 9480 }, { "epoch": 0.44, - "learning_rate": 1.9204444236088325e-05, - "loss": 0.3125, + "learning_rate": 2.9205685488274852e-05, + "loss": 0.2015, "step": 9485 }, { "epoch": 0.44, - "learning_rate": 1.9203975434813185e-05, - "loss": 0.3626, + "learning_rate": 2.9205217418438832e-05, + "loss": 0.3777, "step": 9490 }, { "epoch": 0.44, - "learning_rate": 1.9203506633538045e-05, - "loss": 0.4767, + "learning_rate": 2.9204749348602816e-05, + "loss": 0.3779, "step": 9495 }, { "epoch": 0.44, - "learning_rate": 1.9203037832262905e-05, - "loss": 0.6986, + "learning_rate": 2.9204281278766792e-05, + "loss": 0.5985, "step": 9500 }, { "epoch": 0.44, - "learning_rate": 1.9202569030987765e-05, - "loss": 0.2779, + "learning_rate": 2.9203813208930772e-05, + "loss": 0.2136, "step": 9505 }, { "epoch": 0.44, - "learning_rate": 1.9202100229712625e-05, - "loss": 0.095, + "learning_rate": 2.920334513909475e-05, + "loss": 0.1769, "step": 9510 }, { "epoch": 0.44, - "learning_rate": 1.9201631428437485e-05, - "loss": 0.1261, + "learning_rate": 2.9202877069258735e-05, + "loss": 0.2131, "step": 9515 }, { "epoch": 0.44, - "learning_rate": 1.9201162627162345e-05, - "loss": 0.1483, + "learning_rate": 2.9202408999422715e-05, + "loss": 0.2437, "step": 9520 }, { "epoch": 0.44, - "learning_rate": 1.920069382588721e-05, - "loss": 0.1876, + "learning_rate": 2.9201940929586694e-05, + "loss": 0.1986, "step": 9525 }, { "epoch": 0.44, - "learning_rate": 1.9200225024612068e-05, - "loss": 0.236, + "learning_rate": 2.9201472859750674e-05, + "loss": 0.1832, "step": 9530 }, { "epoch": 0.44, - "learning_rate": 1.9199756223336928e-05, - "loss": 0.2127, + "learning_rate": 2.9201004789914657e-05, + "loss": 0.3835, "step": 9535 }, { "epoch": 0.45, - "learning_rate": 1.9199287422061788e-05, - "loss": 0.2471, + "learning_rate": 2.9200536720078637e-05, + "loss": 0.3604, "step": 9540 }, { "epoch": 0.45, - "learning_rate": 1.919881862078665e-05, - "loss": 0.4438, + "learning_rate": 2.9200068650242617e-05, + "loss": 0.4509, "step": 9545 }, { "epoch": 0.45, - "learning_rate": 1.919834981951151e-05, - "loss": 0.5967, + "learning_rate": 2.9199600580406597e-05, + "loss": 0.4497, "step": 9550 }, { "epoch": 0.45, - "learning_rate": 1.919788101823637e-05, - "loss": 0.3444, + "learning_rate": 2.919913251057058e-05, + "loss": 0.265, "step": 9555 }, { "epoch": 0.45, - "learning_rate": 1.919741221696123e-05, - "loss": 0.142, + "learning_rate": 2.9198664440734557e-05, + "loss": 0.1075, "step": 9560 }, { "epoch": 0.45, - "learning_rate": 1.919694341568609e-05, - "loss": 0.0761, + "learning_rate": 2.9198196370898536e-05, + "loss": 0.246, "step": 9565 }, { "epoch": 0.45, - "learning_rate": 1.9196474614410955e-05, - "loss": 0.2179, + "learning_rate": 2.919772830106252e-05, + "loss": 0.1737, "step": 9570 }, { "epoch": 0.45, - "learning_rate": 1.9196005813135814e-05, - "loss": 0.1785, + "learning_rate": 2.91972602312265e-05, + "loss": 0.1635, "step": 9575 }, { "epoch": 0.45, - "learning_rate": 1.9195537011860674e-05, - "loss": 0.2536, + "learning_rate": 2.919679216139048e-05, + "loss": 0.1547, "step": 9580 }, { "epoch": 0.45, - "learning_rate": 1.9195068210585534e-05, - "loss": 0.1959, + "learning_rate": 2.919632409155446e-05, + "loss": 0.2552, "step": 9585 }, { "epoch": 0.45, - "learning_rate": 1.9194599409310394e-05, - "loss": 0.2637, + "learning_rate": 2.9195856021718442e-05, + "loss": 0.363, "step": 9590 }, { "epoch": 0.45, - "learning_rate": 1.9194130608035254e-05, - "loss": 0.4235, + "learning_rate": 2.9195387951882422e-05, + "loss": 0.2709, "step": 9595 }, { "epoch": 0.45, - "learning_rate": 1.9193661806760114e-05, - "loss": 0.3954, + "learning_rate": 2.9194919882046402e-05, + "loss": 0.5096, "step": 9600 }, { "epoch": 0.45, - "learning_rate": 1.9193193005484977e-05, - "loss": 0.2325, + "learning_rate": 2.919445181221038e-05, + "loss": 0.2245, "step": 9605 }, { "epoch": 0.45, - "learning_rate": 1.9192724204209837e-05, - "loss": 0.1367, + "learning_rate": 2.9193983742374365e-05, + "loss": 0.0959, "step": 9610 }, { "epoch": 0.45, - "learning_rate": 1.9192255402934697e-05, - "loss": 0.1429, + "learning_rate": 2.9193515672538345e-05, + "loss": 0.1363, "step": 9615 }, { "epoch": 0.45, - "learning_rate": 1.9191786601659557e-05, - "loss": 0.1687, + "learning_rate": 2.9193047602702325e-05, + "loss": 0.1798, "step": 9620 }, { "epoch": 0.45, - "learning_rate": 1.919131780038442e-05, - "loss": 0.263, + "learning_rate": 2.9192579532866304e-05, + "loss": 0.1677, "step": 9625 }, { "epoch": 0.45, - "learning_rate": 1.919084899910928e-05, - "loss": 0.2288, + "learning_rate": 2.9192111463030284e-05, + "loss": 0.1836, "step": 9630 }, { "epoch": 0.45, - "learning_rate": 1.919038019783414e-05, - "loss": 0.2932, + "learning_rate": 2.9191643393194264e-05, + "loss": 0.2168, "step": 9635 }, { "epoch": 0.45, - "learning_rate": 1.9189911396559e-05, - "loss": 0.5055, + "learning_rate": 2.9191175323358244e-05, + "loss": 0.3292, "step": 9640 }, { "epoch": 0.45, - "learning_rate": 1.918944259528386e-05, - "loss": 0.5668, + "learning_rate": 2.9190707253522227e-05, + "loss": 0.3587, "step": 9645 }, { "epoch": 0.45, - "learning_rate": 1.918897379400872e-05, - "loss": 0.5284, + "learning_rate": 2.9190239183686207e-05, + "loss": 0.3841, "step": 9650 }, { "epoch": 0.45, - "learning_rate": 1.918850499273358e-05, - "loss": 0.3792, + "learning_rate": 2.9189771113850187e-05, + "loss": 0.2487, "step": 9655 }, { "epoch": 0.45, - "learning_rate": 1.9188036191458443e-05, - "loss": 0.1393, + "learning_rate": 2.9189303044014166e-05, + "loss": 0.0936, "step": 9660 }, { "epoch": 0.45, - "learning_rate": 1.9187567390183303e-05, - "loss": 0.147, + "learning_rate": 2.918883497417815e-05, + "loss": 0.137, "step": 9665 }, { "epoch": 0.45, - "learning_rate": 1.9187098588908163e-05, - "loss": 0.1659, + "learning_rate": 2.918836690434213e-05, + "loss": 0.1499, "step": 9670 }, { "epoch": 0.45, - "learning_rate": 1.9186629787633023e-05, - "loss": 0.1626, + "learning_rate": 2.918789883450611e-05, + "loss": 0.1184, "step": 9675 }, { "epoch": 0.45, - "learning_rate": 1.9186160986357883e-05, - "loss": 0.1592, + "learning_rate": 2.9187430764670092e-05, + "loss": 0.2235, "step": 9680 }, { "epoch": 0.45, - "learning_rate": 1.9185692185082747e-05, - "loss": 0.2789, + "learning_rate": 2.9186962694834072e-05, + "loss": 0.2345, "step": 9685 }, { "epoch": 0.45, - "learning_rate": 1.9185223383807606e-05, - "loss": 0.2428, + "learning_rate": 2.918649462499805e-05, + "loss": 0.2049, "step": 9690 }, { "epoch": 0.45, - "learning_rate": 1.9184754582532466e-05, - "loss": 0.3408, + "learning_rate": 2.918602655516203e-05, + "loss": 0.314, "step": 9695 }, { "epoch": 0.45, - "learning_rate": 1.9184285781257326e-05, - "loss": 0.6366, + "learning_rate": 2.9185558485326012e-05, + "loss": 0.6398, "step": 9700 }, { "epoch": 0.45, - "learning_rate": 1.9183816979982186e-05, - "loss": 0.2249, + "learning_rate": 2.918509041548999e-05, + "loss": 0.2686, "step": 9705 }, { "epoch": 0.45, - "learning_rate": 1.918334817870705e-05, - "loss": 0.1138, + "learning_rate": 2.918462234565397e-05, + "loss": 0.0707, "step": 9710 }, { "epoch": 0.45, - "learning_rate": 1.918287937743191e-05, - "loss": 0.1484, + "learning_rate": 2.918415427581795e-05, + "loss": 0.1081, "step": 9715 }, { "epoch": 0.45, - "learning_rate": 1.918241057615677e-05, - "loss": 0.1745, + "learning_rate": 2.9183686205981934e-05, + "loss": 0.1142, "step": 9720 }, { "epoch": 0.45, - "learning_rate": 1.918194177488163e-05, - "loss": 0.1442, + "learning_rate": 2.9183218136145914e-05, + "loss": 0.2019, "step": 9725 }, { "epoch": 0.45, - "learning_rate": 1.918147297360649e-05, - "loss": 0.2449, + "learning_rate": 2.9182750066309894e-05, + "loss": 0.1826, "step": 9730 }, { "epoch": 0.45, - "learning_rate": 1.918100417233135e-05, - "loss": 0.2956, + "learning_rate": 2.9182281996473877e-05, + "loss": 0.2352, "step": 9735 }, { "epoch": 0.45, - "learning_rate": 1.918053537105621e-05, - "loss": 0.3274, + "learning_rate": 2.9181813926637857e-05, + "loss": 0.3715, "step": 9740 }, { "epoch": 0.45, - "learning_rate": 1.918006656978107e-05, - "loss": 0.3507, + "learning_rate": 2.9181345856801837e-05, + "loss": 0.4708, "step": 9745 }, { "epoch": 0.45, - "learning_rate": 1.9179597768505932e-05, - "loss": 0.6847, + "learning_rate": 2.9180877786965813e-05, + "loss": 0.5184, "step": 9750 }, { "epoch": 0.46, - "learning_rate": 1.9179128967230792e-05, - "loss": 0.2816, + "learning_rate": 2.9180409717129797e-05, + "loss": 0.2204, "step": 9755 }, { "epoch": 0.46, - "learning_rate": 1.9178660165955652e-05, - "loss": 0.1874, + "learning_rate": 2.9179941647293776e-05, + "loss": 0.1351, "step": 9760 }, { "epoch": 0.46, - "learning_rate": 1.9178191364680516e-05, - "loss": 0.1852, + "learning_rate": 2.9179473577457756e-05, + "loss": 0.1712, "step": 9765 }, { "epoch": 0.46, - "learning_rate": 1.9177722563405376e-05, - "loss": 0.1936, + "learning_rate": 2.9179005507621736e-05, + "loss": 0.1685, "step": 9770 }, { "epoch": 0.46, - "learning_rate": 1.9177253762130236e-05, - "loss": 0.148, + "learning_rate": 2.917853743778572e-05, + "loss": 0.1494, "step": 9775 }, { "epoch": 0.46, - "learning_rate": 1.9176784960855095e-05, - "loss": 0.2847, + "learning_rate": 2.91780693679497e-05, + "loss": 0.2308, "step": 9780 }, { "epoch": 0.46, - "learning_rate": 1.9176316159579955e-05, - "loss": 0.3086, + "learning_rate": 2.917760129811368e-05, + "loss": 0.3646, "step": 9785 }, { "epoch": 0.46, - "learning_rate": 1.9175847358304815e-05, - "loss": 0.2333, + "learning_rate": 2.917713322827766e-05, + "loss": 0.2776, "step": 9790 }, { "epoch": 0.46, - "learning_rate": 1.9175378557029675e-05, - "loss": 0.4765, + "learning_rate": 2.9176665158441642e-05, + "loss": 0.317, "step": 9795 }, { "epoch": 0.46, - "learning_rate": 1.917490975575454e-05, - "loss": 0.5674, + "learning_rate": 2.917619708860562e-05, + "loss": 0.4983, "step": 9800 }, { "epoch": 0.46, - "learning_rate": 1.91744409544794e-05, - "loss": 0.2181, + "learning_rate": 2.91757290187696e-05, + "loss": 0.2496, "step": 9805 }, { "epoch": 0.46, - "learning_rate": 1.917397215320426e-05, - "loss": 0.065, + "learning_rate": 2.9175260948933585e-05, + "loss": 0.0691, "step": 9810 }, { "epoch": 0.46, - "learning_rate": 1.917350335192912e-05, - "loss": 0.1668, + "learning_rate": 2.917479287909756e-05, + "loss": 0.157, "step": 9815 }, { "epoch": 0.46, - "learning_rate": 1.9173034550653978e-05, - "loss": 0.1574, + "learning_rate": 2.917432480926154e-05, + "loss": 0.1621, "step": 9820 }, { "epoch": 0.46, - "learning_rate": 1.9172565749378838e-05, - "loss": 0.1636, + "learning_rate": 2.917385673942552e-05, + "loss": 0.1488, "step": 9825 }, { "epoch": 0.46, - "learning_rate": 1.91720969481037e-05, - "loss": 0.2269, + "learning_rate": 2.9173388669589504e-05, + "loss": 0.2045, "step": 9830 }, { "epoch": 0.46, - "learning_rate": 1.917162814682856e-05, - "loss": 0.2636, + "learning_rate": 2.9172920599753484e-05, + "loss": 0.2649, "step": 9835 }, { "epoch": 0.46, - "learning_rate": 1.917115934555342e-05, - "loss": 0.1976, + "learning_rate": 2.9172452529917464e-05, + "loss": 0.2739, "step": 9840 }, { "epoch": 0.46, - "learning_rate": 1.917069054427828e-05, - "loss": 0.4929, + "learning_rate": 2.9171984460081443e-05, + "loss": 0.4539, "step": 9845 }, { "epoch": 0.46, - "learning_rate": 1.9170221743003145e-05, - "loss": 0.6642, + "learning_rate": 2.9171516390245427e-05, + "loss": 0.458, "step": 9850 }, { "epoch": 0.46, - "learning_rate": 1.9169752941728005e-05, - "loss": 0.2061, + "learning_rate": 2.9171048320409406e-05, + "loss": 0.2403, "step": 9855 }, { "epoch": 0.46, - "learning_rate": 1.9169284140452865e-05, - "loss": 0.106, + "learning_rate": 2.9170580250573386e-05, + "loss": 0.0825, "step": 9860 }, { "epoch": 0.46, - "learning_rate": 1.9168815339177724e-05, - "loss": 0.1755, + "learning_rate": 2.917011218073737e-05, + "loss": 0.1144, "step": 9865 }, { "epoch": 0.46, - "learning_rate": 1.9168346537902584e-05, - "loss": 0.1511, + "learning_rate": 2.916964411090135e-05, + "loss": 0.1379, "step": 9870 }, { "epoch": 0.46, - "learning_rate": 1.9167877736627444e-05, - "loss": 0.2493, + "learning_rate": 2.916917604106533e-05, + "loss": 0.1643, "step": 9875 }, { "epoch": 0.46, - "learning_rate": 1.9167408935352304e-05, - "loss": 0.309, + "learning_rate": 2.9168707971229306e-05, + "loss": 0.1821, "step": 9880 }, { "epoch": 0.46, - "learning_rate": 1.9166940134077164e-05, - "loss": 0.2733, + "learning_rate": 2.916823990139329e-05, + "loss": 0.2061, "step": 9885 }, { "epoch": 0.46, - "learning_rate": 1.9166471332802024e-05, - "loss": 0.288, + "learning_rate": 2.916777183155727e-05, + "loss": 0.2136, "step": 9890 }, { "epoch": 0.46, - "learning_rate": 1.9166002531526887e-05, - "loss": 0.3824, + "learning_rate": 2.916730376172125e-05, + "loss": 0.2821, "step": 9895 }, { "epoch": 0.46, - "learning_rate": 1.9165533730251747e-05, - "loss": 0.6902, + "learning_rate": 2.9166835691885228e-05, + "loss": 0.6509, "step": 9900 }, { "epoch": 0.46, - "learning_rate": 1.9165064928976607e-05, - "loss": 0.2577, + "learning_rate": 2.916636762204921e-05, + "loss": 0.241, "step": 9905 }, { "epoch": 0.46, - "learning_rate": 1.916459612770147e-05, - "loss": 0.1099, + "learning_rate": 2.916589955221319e-05, + "loss": 0.1254, "step": 9910 }, { "epoch": 0.46, - "learning_rate": 1.916412732642633e-05, - "loss": 0.1004, + "learning_rate": 2.916543148237717e-05, + "loss": 0.1624, "step": 9915 }, { "epoch": 0.46, - "learning_rate": 1.916365852515119e-05, - "loss": 0.1233, + "learning_rate": 2.9164963412541154e-05, + "loss": 0.1871, "step": 9920 }, { "epoch": 0.46, - "learning_rate": 1.916318972387605e-05, - "loss": 0.1185, + "learning_rate": 2.9164495342705134e-05, + "loss": 0.1965, "step": 9925 }, { "epoch": 0.46, - "learning_rate": 1.916272092260091e-05, - "loss": 0.2188, + "learning_rate": 2.9164027272869114e-05, + "loss": 0.2924, "step": 9930 }, { "epoch": 0.46, - "learning_rate": 1.916225212132577e-05, - "loss": 0.2539, + "learning_rate": 2.9163559203033094e-05, + "loss": 0.2327, "step": 9935 }, { "epoch": 0.46, - "learning_rate": 1.9161783320050634e-05, - "loss": 0.2565, + "learning_rate": 2.9163091133197073e-05, + "loss": 0.3731, "step": 9940 }, { "epoch": 0.46, - "learning_rate": 1.9161314518775494e-05, - "loss": 0.4717, + "learning_rate": 2.9162623063361053e-05, + "loss": 0.411, "step": 9945 }, { "epoch": 0.46, - "learning_rate": 1.9160845717500354e-05, - "loss": 0.493, + "learning_rate": 2.9162154993525033e-05, + "loss": 0.4525, "step": 9950 }, { "epoch": 0.46, - "learning_rate": 1.9160376916225213e-05, - "loss": 0.2629, + "learning_rate": 2.9161686923689013e-05, + "loss": 0.1787, "step": 9955 }, { "epoch": 0.46, - "learning_rate": 1.9159908114950073e-05, - "loss": 0.07, + "learning_rate": 2.9161218853852996e-05, + "loss": 0.1399, "step": 9960 }, { "epoch": 0.46, - "learning_rate": 1.9159439313674933e-05, - "loss": 0.1622, + "learning_rate": 2.9160750784016976e-05, + "loss": 0.1591, "step": 9965 }, { "epoch": 0.47, - "learning_rate": 1.9158970512399793e-05, - "loss": 0.2093, + "learning_rate": 2.9160282714180956e-05, + "loss": 0.2047, "step": 9970 }, { "epoch": 0.47, - "learning_rate": 1.9158501711124657e-05, - "loss": 0.1833, + "learning_rate": 2.9159814644344936e-05, + "loss": 0.1774, "step": 9975 }, { "epoch": 0.47, - "learning_rate": 1.9158032909849516e-05, - "loss": 0.2507, + "learning_rate": 2.915934657450892e-05, + "loss": 0.2215, "step": 9980 }, { "epoch": 0.47, - "learning_rate": 1.9157564108574376e-05, - "loss": 0.3487, + "learning_rate": 2.91588785046729e-05, + "loss": 0.2193, "step": 9985 }, { "epoch": 0.47, - "learning_rate": 1.915709530729924e-05, - "loss": 0.3129, + "learning_rate": 2.915841043483688e-05, + "loss": 0.2754, "step": 9990 }, { "epoch": 0.47, - "learning_rate": 1.91566265060241e-05, - "loss": 0.4036, + "learning_rate": 2.915794236500086e-05, + "loss": 0.3376, "step": 9995 }, { "epoch": 0.47, - "learning_rate": 1.915615770474896e-05, - "loss": 0.5754, + "learning_rate": 2.915747429516484e-05, + "loss": 0.5115, "step": 10000 }, { "epoch": 0.47, - "learning_rate": 1.915568890347382e-05, - "loss": 0.2006, + "learning_rate": 2.9157006225328818e-05, + "loss": 0.2713, "step": 10005 }, { "epoch": 0.47, - "learning_rate": 1.915522010219868e-05, - "loss": 0.1141, + "learning_rate": 2.9156538155492798e-05, + "loss": 0.1161, "step": 10010 }, { "epoch": 0.47, - "learning_rate": 1.915475130092354e-05, - "loss": 0.1682, + "learning_rate": 2.915607008565678e-05, + "loss": 0.1312, "step": 10015 }, { "epoch": 0.47, - "learning_rate": 1.91542824996484e-05, - "loss": 0.1504, + "learning_rate": 2.915560201582076e-05, + "loss": 0.1382, "step": 10020 }, { "epoch": 0.47, - "learning_rate": 1.915381369837326e-05, - "loss": 0.2555, + "learning_rate": 2.915513394598474e-05, + "loss": 0.2032, "step": 10025 }, { "epoch": 0.47, - "learning_rate": 1.915334489709812e-05, - "loss": 0.2481, + "learning_rate": 2.915466587614872e-05, + "loss": 0.2508, "step": 10030 }, { "epoch": 0.47, - "learning_rate": 1.9152876095822983e-05, - "loss": 0.1897, + "learning_rate": 2.9154197806312704e-05, + "loss": 0.3753, "step": 10035 }, { "epoch": 0.47, - "learning_rate": 1.9152407294547842e-05, - "loss": 0.2689, + "learning_rate": 2.9153729736476683e-05, + "loss": 0.3759, "step": 10040 }, { "epoch": 0.47, - "learning_rate": 1.9151938493272702e-05, - "loss": 0.4531, + "learning_rate": 2.9153261666640663e-05, + "loss": 0.3609, "step": 10045 }, { "epoch": 0.47, - "learning_rate": 1.9151469691997562e-05, - "loss": 0.6491, + "learning_rate": 2.9152793596804646e-05, + "loss": 0.5963, "step": 10050 }, { "epoch": 0.47, - "learning_rate": 1.9151000890722426e-05, - "loss": 0.2868, + "learning_rate": 2.9152325526968626e-05, + "loss": 0.2251, "step": 10055 }, { "epoch": 0.47, - "learning_rate": 1.9150532089447286e-05, - "loss": 0.0885, + "learning_rate": 2.9151857457132606e-05, + "loss": 0.119, "step": 10060 }, { "epoch": 0.47, - "learning_rate": 1.9150063288172146e-05, - "loss": 0.1029, + "learning_rate": 2.9151389387296586e-05, + "loss": 0.0838, "step": 10065 }, { "epoch": 0.47, - "learning_rate": 1.9149594486897005e-05, - "loss": 0.1705, + "learning_rate": 2.9150921317460566e-05, + "loss": 0.0799, "step": 10070 }, { "epoch": 0.47, - "learning_rate": 1.9149125685621865e-05, - "loss": 0.2066, + "learning_rate": 2.9150453247624546e-05, + "loss": 0.2083, "step": 10075 }, { "epoch": 0.47, - "learning_rate": 1.914865688434673e-05, - "loss": 0.2518, + "learning_rate": 2.9149985177788525e-05, + "loss": 0.258, "step": 10080 }, { "epoch": 0.47, - "learning_rate": 1.914818808307159e-05, - "loss": 0.1692, + "learning_rate": 2.9149517107952505e-05, + "loss": 0.2772, "step": 10085 }, { "epoch": 0.47, - "learning_rate": 1.914771928179645e-05, - "loss": 0.3126, + "learning_rate": 2.914904903811649e-05, + "loss": 0.2897, "step": 10090 }, { "epoch": 0.47, - "learning_rate": 1.914725048052131e-05, - "loss": 0.5543, + "learning_rate": 2.9148580968280468e-05, + "loss": 0.4434, "step": 10095 }, { "epoch": 0.47, - "learning_rate": 1.914678167924617e-05, - "loss": 0.8418, + "learning_rate": 2.9148112898444448e-05, + "loss": 0.5291, "step": 10100 }, { "epoch": 0.47, - "learning_rate": 1.914631287797103e-05, - "loss": 0.2604, + "learning_rate": 2.914764482860843e-05, + "loss": 0.295, "step": 10105 }, { "epoch": 0.47, - "learning_rate": 1.914584407669589e-05, - "loss": 0.0586, + "learning_rate": 2.914717675877241e-05, + "loss": 0.1425, "step": 10110 }, { "epoch": 0.47, - "learning_rate": 1.914537527542075e-05, - "loss": 0.1234, + "learning_rate": 2.914670868893639e-05, + "loss": 0.1711, "step": 10115 }, { "epoch": 0.47, - "learning_rate": 1.914490647414561e-05, - "loss": 0.1425, + "learning_rate": 2.914624061910037e-05, + "loss": 0.136, "step": 10120 }, { "epoch": 0.47, - "learning_rate": 1.914443767287047e-05, - "loss": 0.2744, + "learning_rate": 2.9145772549264354e-05, + "loss": 0.1828, "step": 10125 }, { "epoch": 0.47, - "learning_rate": 1.914396887159533e-05, - "loss": 0.2341, + "learning_rate": 2.914530447942833e-05, + "loss": 0.2204, "step": 10130 }, { "epoch": 0.47, - "learning_rate": 1.9143500070320195e-05, - "loss": 0.3112, + "learning_rate": 2.914483640959231e-05, + "loss": 0.1504, "step": 10135 }, { "epoch": 0.47, - "learning_rate": 1.9143031269045055e-05, - "loss": 0.301, + "learning_rate": 2.914436833975629e-05, + "loss": 0.1957, "step": 10140 }, { "epoch": 0.47, - "learning_rate": 1.9142562467769915e-05, - "loss": 0.4034, + "learning_rate": 2.9143900269920273e-05, + "loss": 0.3153, "step": 10145 }, { "epoch": 0.47, - "learning_rate": 1.9142093666494775e-05, - "loss": 0.4897, + "learning_rate": 2.9143432200084253e-05, + "loss": 0.4669, "step": 10150 }, { "epoch": 0.47, - "learning_rate": 1.9141624865219635e-05, - "loss": 0.2841, + "learning_rate": 2.9142964130248233e-05, + "loss": 0.2938, "step": 10155 }, { "epoch": 0.47, - "learning_rate": 1.9141156063944494e-05, - "loss": 0.1272, + "learning_rate": 2.9142496060412213e-05, + "loss": 0.0995, "step": 10160 }, { "epoch": 0.47, - "learning_rate": 1.9140687262669354e-05, - "loss": 0.095, + "learning_rate": 2.9142027990576196e-05, + "loss": 0.1582, "step": 10165 }, { "epoch": 0.47, - "learning_rate": 1.9140218461394214e-05, - "loss": 0.2431, + "learning_rate": 2.9141559920740176e-05, + "loss": 0.1777, "step": 10170 }, { "epoch": 0.47, - "learning_rate": 1.9139749660119078e-05, - "loss": 0.185, + "learning_rate": 2.9141091850904155e-05, + "loss": 0.0941, "step": 10175 }, { "epoch": 0.48, - "learning_rate": 1.9139280858843938e-05, - "loss": 0.1765, + "learning_rate": 2.914062378106814e-05, + "loss": 0.2079, "step": 10180 }, { "epoch": 0.48, - "learning_rate": 1.9138812057568797e-05, - "loss": 0.1876, + "learning_rate": 2.914015571123212e-05, + "loss": 0.3759, "step": 10185 }, { "epoch": 0.48, - "learning_rate": 1.9138343256293657e-05, - "loss": 0.205, + "learning_rate": 2.9139687641396098e-05, + "loss": 0.2368, "step": 10190 }, { "epoch": 0.48, - "learning_rate": 1.913787445501852e-05, - "loss": 0.2962, + "learning_rate": 2.9139219571560075e-05, + "loss": 0.3776, "step": 10195 }, { "epoch": 0.48, - "learning_rate": 1.913740565374338e-05, - "loss": 0.6509, + "learning_rate": 2.9138751501724058e-05, + "loss": 0.6228, "step": 10200 }, { "epoch": 0.48, - "learning_rate": 1.913693685246824e-05, - "loss": 0.2717, + "learning_rate": 2.9138283431888038e-05, + "loss": 0.1931, "step": 10205 }, { "epoch": 0.48, - "learning_rate": 1.91364680511931e-05, - "loss": 0.1069, + "learning_rate": 2.9137815362052018e-05, + "loss": 0.0513, "step": 10210 }, { "epoch": 0.48, - "learning_rate": 1.913599924991796e-05, - "loss": 0.2321, + "learning_rate": 2.9137347292215997e-05, + "loss": 0.1184, "step": 10215 }, { "epoch": 0.48, - "learning_rate": 1.9135530448642824e-05, - "loss": 0.1316, + "learning_rate": 2.913687922237998e-05, + "loss": 0.1752, "step": 10220 }, { "epoch": 0.48, - "learning_rate": 1.9135061647367684e-05, - "loss": 0.1441, + "learning_rate": 2.913641115254396e-05, + "loss": 0.1899, "step": 10225 }, { "epoch": 0.48, - "learning_rate": 1.9134592846092544e-05, - "loss": 0.2482, + "learning_rate": 2.913594308270794e-05, + "loss": 0.2375, "step": 10230 }, { "epoch": 0.48, - "learning_rate": 1.9134124044817404e-05, - "loss": 0.3006, + "learning_rate": 2.9135475012871923e-05, + "loss": 0.2201, "step": 10235 }, { "epoch": 0.48, - "learning_rate": 1.9133655243542264e-05, - "loss": 0.4242, + "learning_rate": 2.9135006943035903e-05, + "loss": 0.3693, "step": 10240 }, { "epoch": 0.48, - "learning_rate": 1.9133186442267123e-05, - "loss": 0.3968, + "learning_rate": 2.9134538873199883e-05, + "loss": 0.3074, "step": 10245 }, { "epoch": 0.48, - "learning_rate": 1.9132717640991983e-05, - "loss": 0.5792, + "learning_rate": 2.9134070803363863e-05, + "loss": 0.5954, "step": 10250 }, { "epoch": 0.48, - "learning_rate": 1.9132248839716843e-05, - "loss": 0.3714, + "learning_rate": 2.9133602733527843e-05, + "loss": 0.2433, "step": 10255 }, { "epoch": 0.48, - "learning_rate": 1.9131780038441707e-05, - "loss": 0.1612, + "learning_rate": 2.9133134663691822e-05, + "loss": 0.0825, "step": 10260 }, { "epoch": 0.48, - "learning_rate": 1.9131311237166567e-05, - "loss": 0.1666, + "learning_rate": 2.9132666593855802e-05, + "loss": 0.1623, "step": 10265 }, { "epoch": 0.48, - "learning_rate": 1.9130842435891427e-05, - "loss": 0.2056, + "learning_rate": 2.9132198524019782e-05, + "loss": 0.1625, "step": 10270 }, { "epoch": 0.48, - "learning_rate": 1.913037363461629e-05, - "loss": 0.2004, + "learning_rate": 2.9131730454183765e-05, + "loss": 0.2033, "step": 10275 }, { "epoch": 0.48, - "learning_rate": 1.912990483334115e-05, - "loss": 0.2485, + "learning_rate": 2.9131262384347745e-05, + "loss": 0.303, "step": 10280 }, { "epoch": 0.48, - "learning_rate": 1.912943603206601e-05, - "loss": 0.2626, + "learning_rate": 2.9130794314511725e-05, + "loss": 0.4226, "step": 10285 }, { "epoch": 0.48, - "learning_rate": 1.912896723079087e-05, - "loss": 0.3242, + "learning_rate": 2.9130326244675708e-05, + "loss": 0.2968, "step": 10290 }, { "epoch": 0.48, - "learning_rate": 1.912849842951573e-05, - "loss": 0.3341, + "learning_rate": 2.9129858174839688e-05, + "loss": 0.4019, "step": 10295 }, { "epoch": 0.48, - "learning_rate": 1.912802962824059e-05, - "loss": 0.5659, + "learning_rate": 2.9129390105003668e-05, + "loss": 0.5179, "step": 10300 }, { "epoch": 0.48, - "learning_rate": 1.912756082696545e-05, - "loss": 0.2671, + "learning_rate": 2.9128922035167648e-05, + "loss": 0.3199, "step": 10305 }, { "epoch": 0.48, - "learning_rate": 1.9127092025690313e-05, - "loss": 0.1295, + "learning_rate": 2.912845396533163e-05, + "loss": 0.1018, "step": 10310 }, { "epoch": 0.48, - "learning_rate": 1.9126623224415173e-05, - "loss": 0.1669, + "learning_rate": 2.912798589549561e-05, + "loss": 0.0952, "step": 10315 }, { "epoch": 0.48, - "learning_rate": 1.9126154423140033e-05, - "loss": 0.1559, + "learning_rate": 2.9127517825659587e-05, + "loss": 0.0911, "step": 10320 }, { "epoch": 0.48, - "learning_rate": 1.9125685621864893e-05, - "loss": 0.1648, + "learning_rate": 2.9127049755823567e-05, + "loss": 0.1483, "step": 10325 }, { "epoch": 0.48, - "learning_rate": 1.9125216820589753e-05, - "loss": 0.2274, + "learning_rate": 2.912658168598755e-05, + "loss": 0.1974, "step": 10330 }, { "epoch": 0.48, - "learning_rate": 1.9124748019314612e-05, - "loss": 0.3051, + "learning_rate": 2.912611361615153e-05, + "loss": 0.2161, "step": 10335 }, { "epoch": 0.48, - "learning_rate": 1.9124279218039476e-05, - "loss": 0.3135, + "learning_rate": 2.912564554631551e-05, + "loss": 0.3492, "step": 10340 }, { "epoch": 0.48, - "learning_rate": 1.9123810416764336e-05, - "loss": 0.4214, + "learning_rate": 2.9125177476479493e-05, + "loss": 0.4528, "step": 10345 }, { "epoch": 0.48, - "learning_rate": 1.9123341615489196e-05, - "loss": 0.4696, + "learning_rate": 2.9124709406643473e-05, + "loss": 0.6735, "step": 10350 }, { "epoch": 0.48, - "learning_rate": 1.9122872814214056e-05, - "loss": 0.2314, + "learning_rate": 2.9124241336807453e-05, + "loss": 0.2464, "step": 10355 }, { "epoch": 0.48, - "learning_rate": 1.912240401293892e-05, - "loss": 0.0987, + "learning_rate": 2.9123773266971432e-05, + "loss": 0.1387, "step": 10360 }, { "epoch": 0.48, - "learning_rate": 1.912193521166378e-05, - "loss": 0.103, + "learning_rate": 2.9123305197135416e-05, + "loss": 0.1129, "step": 10365 }, { "epoch": 0.48, - "learning_rate": 1.912146641038864e-05, - "loss": 0.1721, + "learning_rate": 2.9122837127299395e-05, + "loss": 0.149, "step": 10370 }, { "epoch": 0.48, - "learning_rate": 1.91209976091135e-05, - "loss": 0.1067, + "learning_rate": 2.9122369057463375e-05, + "loss": 0.1488, "step": 10375 }, { "epoch": 0.48, - "learning_rate": 1.912052880783836e-05, - "loss": 0.2847, + "learning_rate": 2.9121900987627355e-05, + "loss": 0.2723, "step": 10380 }, { "epoch": 0.48, - "learning_rate": 1.912006000656322e-05, - "loss": 0.2213, + "learning_rate": 2.9121432917791335e-05, + "loss": 0.2941, "step": 10385 }, { "epoch": 0.48, - "learning_rate": 1.911959120528808e-05, - "loss": 0.2882, + "learning_rate": 2.9120964847955315e-05, + "loss": 0.289, "step": 10390 }, { "epoch": 0.49, - "learning_rate": 1.911912240401294e-05, - "loss": 0.4464, + "learning_rate": 2.9120496778119295e-05, + "loss": 0.2986, "step": 10395 }, { "epoch": 0.49, - "learning_rate": 1.91186536027378e-05, - "loss": 0.6176, + "learning_rate": 2.9120028708283274e-05, + "loss": 0.4768, "step": 10400 }, { "epoch": 0.49, - "learning_rate": 1.911818480146266e-05, - "loss": 0.294, + "learning_rate": 2.9119560638447258e-05, + "loss": 0.2318, "step": 10405 }, { "epoch": 0.49, - "learning_rate": 1.911771600018752e-05, - "loss": 0.1381, + "learning_rate": 2.9119092568611237e-05, + "loss": 0.087, "step": 10410 }, { "epoch": 0.49, - "learning_rate": 1.911724719891238e-05, - "loss": 0.1054, + "learning_rate": 2.9118624498775217e-05, + "loss": 0.1399, "step": 10415 }, { "epoch": 0.49, - "learning_rate": 1.9116778397637245e-05, - "loss": 0.1755, + "learning_rate": 2.91181564289392e-05, + "loss": 0.1293, "step": 10420 }, { "epoch": 0.49, - "learning_rate": 1.9116309596362105e-05, - "loss": 0.13, + "learning_rate": 2.911768835910318e-05, + "loss": 0.1353, "step": 10425 }, { "epoch": 0.49, - "learning_rate": 1.9115840795086965e-05, - "loss": 0.1956, + "learning_rate": 2.911722028926716e-05, + "loss": 0.2022, "step": 10430 }, { "epoch": 0.49, - "learning_rate": 1.9115371993811825e-05, - "loss": 0.272, + "learning_rate": 2.911675221943114e-05, + "loss": 0.289, "step": 10435 }, { "epoch": 0.49, - "learning_rate": 1.9114903192536685e-05, - "loss": 0.325, + "learning_rate": 2.9116284149595123e-05, + "loss": 0.5087, "step": 10440 }, { "epoch": 0.49, - "learning_rate": 1.9114434391261545e-05, - "loss": 0.4834, + "learning_rate": 2.91158160797591e-05, + "loss": 0.3206, "step": 10445 }, { "epoch": 0.49, - "learning_rate": 1.9113965589986408e-05, - "loss": 0.4972, + "learning_rate": 2.911534800992308e-05, + "loss": 0.597, "step": 10450 }, { "epoch": 0.49, - "learning_rate": 1.9113496788711268e-05, - "loss": 0.31, + "learning_rate": 2.911487994008706e-05, + "loss": 0.3211, "step": 10455 }, { "epoch": 0.49, - "learning_rate": 1.9113027987436128e-05, - "loss": 0.0929, + "learning_rate": 2.9114411870251042e-05, + "loss": 0.1773, "step": 10460 }, { "epoch": 0.49, - "learning_rate": 1.9112559186160988e-05, - "loss": 0.1527, + "learning_rate": 2.9113943800415022e-05, + "loss": 0.1689, "step": 10465 }, { "epoch": 0.49, - "learning_rate": 1.9112090384885848e-05, - "loss": 0.2049, + "learning_rate": 2.9113475730579002e-05, + "loss": 0.1645, "step": 10470 }, { "epoch": 0.49, - "learning_rate": 1.9111621583610708e-05, - "loss": 0.2407, + "learning_rate": 2.9113007660742985e-05, + "loss": 0.2574, "step": 10475 }, { "epoch": 0.49, - "learning_rate": 1.9111152782335567e-05, - "loss": 0.2558, + "learning_rate": 2.9112539590906965e-05, + "loss": 0.2319, "step": 10480 }, { "epoch": 0.49, - "learning_rate": 1.911068398106043e-05, - "loss": 0.2731, + "learning_rate": 2.9112071521070945e-05, + "loss": 0.2182, "step": 10485 }, { "epoch": 0.49, - "learning_rate": 1.911021517978529e-05, - "loss": 0.3283, + "learning_rate": 2.9111603451234925e-05, + "loss": 0.2716, "step": 10490 }, { "epoch": 0.49, - "learning_rate": 1.910974637851015e-05, - "loss": 0.4684, + "learning_rate": 2.9111135381398908e-05, + "loss": 0.2271, "step": 10495 }, { "epoch": 0.49, - "learning_rate": 1.9109277577235014e-05, - "loss": 0.4627, + "learning_rate": 2.9110667311562888e-05, + "loss": 0.6028, "step": 10500 }, { "epoch": 0.49, - "learning_rate": 1.9108808775959874e-05, - "loss": 0.2122, + "learning_rate": 2.9110199241726867e-05, + "loss": 0.2078, "step": 10505 }, { "epoch": 0.49, - "learning_rate": 1.9108339974684734e-05, - "loss": 0.1003, + "learning_rate": 2.9109731171890844e-05, + "loss": 0.1231, "step": 10510 }, { "epoch": 0.49, - "learning_rate": 1.9107871173409594e-05, - "loss": 0.1256, + "learning_rate": 2.9109263102054827e-05, + "loss": 0.1728, "step": 10515 }, { "epoch": 0.49, - "learning_rate": 1.9107402372134454e-05, - "loss": 0.0787, + "learning_rate": 2.9108795032218807e-05, + "loss": 0.1528, "step": 10520 }, { "epoch": 0.49, - "learning_rate": 1.9106933570859314e-05, - "loss": 0.1799, + "learning_rate": 2.9108326962382787e-05, + "loss": 0.2504, "step": 10525 }, { "epoch": 0.49, - "learning_rate": 1.9106464769584174e-05, - "loss": 0.2717, + "learning_rate": 2.910785889254677e-05, + "loss": 0.2285, "step": 10530 }, { "epoch": 0.49, - "learning_rate": 1.9105995968309034e-05, - "loss": 0.2664, + "learning_rate": 2.910739082271075e-05, + "loss": 0.1583, "step": 10535 }, { "epoch": 0.49, - "learning_rate": 1.9105527167033893e-05, - "loss": 0.3668, + "learning_rate": 2.910692275287473e-05, + "loss": 0.1986, "step": 10540 }, { "epoch": 0.49, - "learning_rate": 1.9105058365758757e-05, - "loss": 0.3952, + "learning_rate": 2.910645468303871e-05, + "loss": 0.3468, "step": 10545 }, { "epoch": 0.49, - "learning_rate": 1.9104589564483617e-05, - "loss": 0.5244, + "learning_rate": 2.9105986613202693e-05, + "loss": 0.3821, "step": 10550 }, { "epoch": 0.49, - "learning_rate": 1.9104120763208477e-05, - "loss": 0.257, + "learning_rate": 2.9105518543366672e-05, + "loss": 0.2888, "step": 10555 }, { "epoch": 0.49, - "learning_rate": 1.910365196193334e-05, - "loss": 0.1311, + "learning_rate": 2.9105050473530652e-05, + "loss": 0.1296, "step": 10560 }, { "epoch": 0.49, - "learning_rate": 1.91031831606582e-05, - "loss": 0.1379, + "learning_rate": 2.9104582403694632e-05, + "loss": 0.149, "step": 10565 }, { "epoch": 0.49, - "learning_rate": 1.910271435938306e-05, - "loss": 0.2639, + "learning_rate": 2.9104114333858615e-05, + "loss": 0.1573, "step": 10570 }, { "epoch": 0.49, - "learning_rate": 1.910224555810792e-05, - "loss": 0.1975, + "learning_rate": 2.910364626402259e-05, + "loss": 0.1666, "step": 10575 }, { "epoch": 0.49, - "learning_rate": 1.910177675683278e-05, - "loss": 0.1877, + "learning_rate": 2.910317819418657e-05, + "loss": 0.2131, "step": 10580 }, { "epoch": 0.49, - "learning_rate": 1.910130795555764e-05, - "loss": 0.1974, + "learning_rate": 2.910271012435055e-05, + "loss": 0.3106, "step": 10585 }, { "epoch": 0.49, - "learning_rate": 1.9100839154282503e-05, - "loss": 0.356, + "learning_rate": 2.9102242054514534e-05, + "loss": 0.2393, "step": 10590 }, { "epoch": 0.49, - "learning_rate": 1.9100370353007363e-05, - "loss": 0.3649, + "learning_rate": 2.9101773984678514e-05, + "loss": 0.3747, "step": 10595 }, { "epoch": 0.49, - "learning_rate": 1.9099901551732223e-05, - "loss": 0.6513, + "learning_rate": 2.9101305914842494e-05, + "loss": 0.5276, "step": 10600 }, { "epoch": 0.49, - "learning_rate": 1.9099432750457083e-05, - "loss": 0.219, + "learning_rate": 2.9100837845006477e-05, + "loss": 0.2109, "step": 10605 }, { "epoch": 0.5, - "learning_rate": 1.9098963949181943e-05, - "loss": 0.1712, + "learning_rate": 2.9100369775170457e-05, + "loss": 0.1697, "step": 10610 }, { "epoch": 0.5, - "learning_rate": 1.9098495147906803e-05, - "loss": 0.1444, + "learning_rate": 2.9099901705334437e-05, + "loss": 0.1781, "step": 10615 }, { "epoch": 0.5, - "learning_rate": 1.9098026346631663e-05, - "loss": 0.1519, + "learning_rate": 2.9099433635498417e-05, + "loss": 0.1481, "step": 10620 }, { "epoch": 0.5, - "learning_rate": 1.9097557545356526e-05, - "loss": 0.1516, + "learning_rate": 2.90989655656624e-05, + "loss": 0.1875, "step": 10625 }, { "epoch": 0.5, - "learning_rate": 1.9097088744081386e-05, - "loss": 0.2533, + "learning_rate": 2.909849749582638e-05, + "loss": 0.23, "step": 10630 }, { "epoch": 0.5, - "learning_rate": 1.9096619942806246e-05, - "loss": 0.2046, + "learning_rate": 2.9098029425990356e-05, + "loss": 0.2837, "step": 10635 }, { "epoch": 0.5, - "learning_rate": 1.909615114153111e-05, - "loss": 0.2561, + "learning_rate": 2.9097561356154336e-05, + "loss": 0.3611, "step": 10640 }, { "epoch": 0.5, - "learning_rate": 1.909568234025597e-05, - "loss": 0.4102, + "learning_rate": 2.909709328631832e-05, + "loss": 0.3145, "step": 10645 }, { "epoch": 0.5, - "learning_rate": 1.909521353898083e-05, - "loss": 0.4769, + "learning_rate": 2.90966252164823e-05, + "loss": 0.7858, "step": 10650 }, { "epoch": 0.5, - "learning_rate": 1.909474473770569e-05, - "loss": 0.3088, + "learning_rate": 2.909615714664628e-05, + "loss": 0.2585, "step": 10655 }, { "epoch": 0.5, - "learning_rate": 1.909427593643055e-05, - "loss": 0.1282, + "learning_rate": 2.9095689076810262e-05, + "loss": 0.1072, "step": 10660 }, { "epoch": 0.5, - "learning_rate": 1.909380713515541e-05, - "loss": 0.1414, + "learning_rate": 2.9095221006974242e-05, + "loss": 0.0863, "step": 10665 }, { "epoch": 0.5, - "learning_rate": 1.909333833388027e-05, - "loss": 0.1356, + "learning_rate": 2.9094752937138222e-05, + "loss": 0.2089, "step": 10670 }, { "epoch": 0.5, - "learning_rate": 1.909286953260513e-05, - "loss": 0.1508, + "learning_rate": 2.90942848673022e-05, + "loss": 0.1946, "step": 10675 }, { "epoch": 0.5, - "learning_rate": 1.909240073132999e-05, - "loss": 0.2121, + "learning_rate": 2.9093816797466185e-05, + "loss": 0.1804, "step": 10680 }, { "epoch": 0.5, - "learning_rate": 1.9091931930054852e-05, - "loss": 0.2012, + "learning_rate": 2.9093348727630165e-05, + "loss": 0.2488, "step": 10685 }, { "epoch": 0.5, - "learning_rate": 1.9091463128779712e-05, - "loss": 0.4462, + "learning_rate": 2.9092880657794144e-05, + "loss": 0.3446, "step": 10690 }, { "epoch": 0.5, - "learning_rate": 1.9090994327504572e-05, - "loss": 0.3469, + "learning_rate": 2.9092412587958124e-05, + "loss": 0.4518, "step": 10695 }, { "epoch": 0.5, - "learning_rate": 1.909052552622943e-05, - "loss": 0.5109, + "learning_rate": 2.9091944518122104e-05, + "loss": 0.5246, "step": 10700 }, { "epoch": 0.5, - "learning_rate": 1.9090056724954295e-05, - "loss": 0.2304, + "learning_rate": 2.9091476448286084e-05, + "loss": 0.2917, "step": 10705 }, { "epoch": 0.5, - "learning_rate": 1.9089587923679155e-05, - "loss": 0.1118, + "learning_rate": 2.9091008378450064e-05, + "loss": 0.1728, "step": 10710 }, { "epoch": 0.5, - "learning_rate": 1.9089119122404015e-05, - "loss": 0.1731, + "learning_rate": 2.9090540308614047e-05, + "loss": 0.1577, "step": 10715 }, { "epoch": 0.5, - "learning_rate": 1.9088650321128875e-05, - "loss": 0.1422, + "learning_rate": 2.9090072238778027e-05, + "loss": 0.0824, "step": 10720 }, { "epoch": 0.5, - "learning_rate": 1.9088181519853735e-05, - "loss": 0.2179, + "learning_rate": 2.9089604168942007e-05, + "loss": 0.1226, "step": 10725 }, { "epoch": 0.5, - "learning_rate": 1.9087712718578598e-05, - "loss": 0.2427, + "learning_rate": 2.9089136099105986e-05, + "loss": 0.2118, "step": 10730 }, { "epoch": 0.5, - "learning_rate": 1.9087243917303458e-05, - "loss": 0.2411, + "learning_rate": 2.908866802926997e-05, + "loss": 0.162, "step": 10735 }, { "epoch": 0.5, - "learning_rate": 1.9086775116028318e-05, - "loss": 0.2579, + "learning_rate": 2.908819995943395e-05, + "loss": 0.3048, "step": 10740 }, { "epoch": 0.5, - "learning_rate": 1.9086306314753178e-05, - "loss": 0.3481, + "learning_rate": 2.908773188959793e-05, + "loss": 0.2798, "step": 10745 }, { "epoch": 0.5, - "learning_rate": 1.9085837513478038e-05, - "loss": 0.5099, + "learning_rate": 2.908726381976191e-05, + "loss": 0.4306, "step": 10750 }, { "epoch": 0.5, - "learning_rate": 1.9085368712202898e-05, - "loss": 0.2352, + "learning_rate": 2.9086795749925892e-05, + "loss": 0.2259, "step": 10755 }, { "epoch": 0.5, - "learning_rate": 1.9084899910927758e-05, - "loss": 0.1045, + "learning_rate": 2.908632768008987e-05, + "loss": 0.116, "step": 10760 }, { "epoch": 0.5, - "learning_rate": 1.9084431109652618e-05, - "loss": 0.2044, + "learning_rate": 2.908585961025385e-05, + "loss": 0.108, "step": 10765 }, { "epoch": 0.5, - "learning_rate": 1.908396230837748e-05, - "loss": 0.1178, + "learning_rate": 2.9085391540417828e-05, + "loss": 0.1246, "step": 10770 }, { "epoch": 0.5, - "learning_rate": 1.908349350710234e-05, - "loss": 0.2247, + "learning_rate": 2.908492347058181e-05, + "loss": 0.1518, "step": 10775 }, { "epoch": 0.5, - "learning_rate": 1.90830247058272e-05, - "loss": 0.1583, + "learning_rate": 2.908445540074579e-05, + "loss": 0.1694, "step": 10780 }, { "epoch": 0.5, - "learning_rate": 1.9082555904552064e-05, - "loss": 0.2375, + "learning_rate": 2.908398733090977e-05, + "loss": 0.3221, "step": 10785 }, { "epoch": 0.5, - "learning_rate": 1.9082087103276924e-05, - "loss": 0.3239, + "learning_rate": 2.9083519261073754e-05, + "loss": 0.2946, "step": 10790 }, { "epoch": 0.5, - "learning_rate": 1.9081618302001784e-05, - "loss": 0.3176, + "learning_rate": 2.9083051191237734e-05, + "loss": 0.4544, "step": 10795 }, { "epoch": 0.5, - "learning_rate": 1.9081149500726644e-05, - "loss": 0.5501, + "learning_rate": 2.9082583121401714e-05, + "loss": 0.6625, "step": 10800 }, { "epoch": 0.5, - "learning_rate": 1.9080680699451504e-05, - "loss": 0.2226, + "learning_rate": 2.9082115051565694e-05, + "loss": 0.3402, "step": 10805 }, { "epoch": 0.5, - "learning_rate": 1.9080211898176364e-05, - "loss": 0.085, + "learning_rate": 2.9081646981729677e-05, + "loss": 0.1606, "step": 10810 }, { "epoch": 0.5, - "learning_rate": 1.9079743096901224e-05, - "loss": 0.1627, + "learning_rate": 2.9081178911893657e-05, + "loss": 0.1431, "step": 10815 }, { "epoch": 0.5, - "learning_rate": 1.9079274295626084e-05, - "loss": 0.0776, + "learning_rate": 2.9080710842057637e-05, + "loss": 0.1159, "step": 10820 }, { "epoch": 0.51, - "learning_rate": 1.9078805494350947e-05, - "loss": 0.2069, + "learning_rate": 2.9080242772221613e-05, + "loss": 0.171, "step": 10825 }, { "epoch": 0.51, - "learning_rate": 1.9078336693075807e-05, - "loss": 0.2966, + "learning_rate": 2.9079774702385596e-05, + "loss": 0.143, "step": 10830 }, { "epoch": 0.51, - "learning_rate": 1.9077867891800667e-05, - "loss": 0.3082, + "learning_rate": 2.9079306632549576e-05, + "loss": 0.3442, "step": 10835 }, { "epoch": 0.51, - "learning_rate": 1.9077399090525527e-05, - "loss": 0.2768, + "learning_rate": 2.9078838562713556e-05, + "loss": 0.3745, "step": 10840 }, { "epoch": 0.51, - "learning_rate": 1.9076930289250387e-05, - "loss": 0.3287, + "learning_rate": 2.907837049287754e-05, + "loss": 0.2238, "step": 10845 }, { "epoch": 0.51, - "learning_rate": 1.907646148797525e-05, - "loss": 0.6915, + "learning_rate": 2.907790242304152e-05, + "loss": 0.558, "step": 10850 }, { "epoch": 0.51, - "learning_rate": 1.907599268670011e-05, - "loss": 0.2431, + "learning_rate": 2.90774343532055e-05, + "loss": 0.1763, "step": 10855 }, { "epoch": 0.51, - "learning_rate": 1.907552388542497e-05, - "loss": 0.1133, + "learning_rate": 2.907696628336948e-05, + "loss": 0.0905, "step": 10860 }, { "epoch": 0.51, - "learning_rate": 1.907505508414983e-05, - "loss": 0.1717, + "learning_rate": 2.9076498213533462e-05, + "loss": 0.1248, "step": 10865 }, { "epoch": 0.51, - "learning_rate": 1.9074586282874693e-05, - "loss": 0.1157, + "learning_rate": 2.907603014369744e-05, + "loss": 0.0964, "step": 10870 }, { "epoch": 0.51, - "learning_rate": 1.9074117481599553e-05, - "loss": 0.2148, + "learning_rate": 2.907556207386142e-05, + "loss": 0.1588, "step": 10875 }, { "epoch": 0.51, - "learning_rate": 1.9073648680324413e-05, - "loss": 0.3085, + "learning_rate": 2.90750940040254e-05, + "loss": 0.2716, "step": 10880 }, { "epoch": 0.51, - "learning_rate": 1.9073179879049273e-05, - "loss": 0.2192, + "learning_rate": 2.9074625934189384e-05, + "loss": 0.1772, "step": 10885 }, { "epoch": 0.51, - "learning_rate": 1.9072711077774133e-05, - "loss": 0.2851, + "learning_rate": 2.907415786435336e-05, + "loss": 0.3076, "step": 10890 }, { "epoch": 0.51, - "learning_rate": 1.9072242276498993e-05, - "loss": 0.2993, + "learning_rate": 2.907368979451734e-05, + "loss": 0.3516, "step": 10895 }, { "epoch": 0.51, - "learning_rate": 1.9071773475223853e-05, - "loss": 0.3583, + "learning_rate": 2.9073221724681324e-05, + "loss": 0.651, "step": 10900 }, { "epoch": 0.51, - "learning_rate": 1.9071304673948713e-05, - "loss": 0.3118, + "learning_rate": 2.9072753654845304e-05, + "loss": 0.377, "step": 10905 }, { "epoch": 0.51, - "learning_rate": 1.9070835872673576e-05, - "loss": 0.0893, + "learning_rate": 2.9072285585009283e-05, + "loss": 0.1039, "step": 10910 }, { "epoch": 0.51, - "learning_rate": 1.9070367071398436e-05, - "loss": 0.0958, + "learning_rate": 2.9071817515173263e-05, + "loss": 0.158, "step": 10915 }, { "epoch": 0.51, - "learning_rate": 1.9069898270123296e-05, - "loss": 0.1406, + "learning_rate": 2.9071349445337247e-05, + "loss": 0.1531, "step": 10920 }, { "epoch": 0.51, - "learning_rate": 1.9069429468848156e-05, - "loss": 0.1843, + "learning_rate": 2.9070881375501226e-05, + "loss": 0.1671, "step": 10925 }, { "epoch": 0.51, - "learning_rate": 1.906896066757302e-05, - "loss": 0.2012, + "learning_rate": 2.9070413305665206e-05, + "loss": 0.164, "step": 10930 }, { "epoch": 0.51, - "learning_rate": 1.906849186629788e-05, - "loss": 0.2509, + "learning_rate": 2.9069945235829186e-05, + "loss": 0.2341, "step": 10935 }, { "epoch": 0.51, - "learning_rate": 1.906802306502274e-05, - "loss": 0.3919, + "learning_rate": 2.906947716599317e-05, + "loss": 0.3567, "step": 10940 }, { "epoch": 0.51, - "learning_rate": 1.90675542637476e-05, - "loss": 0.3983, + "learning_rate": 2.906900909615715e-05, + "loss": 0.4844, "step": 10945 }, { "epoch": 0.51, - "learning_rate": 1.906708546247246e-05, - "loss": 0.5589, + "learning_rate": 2.9068541026321125e-05, + "loss": 0.5868, "step": 10950 }, { "epoch": 0.51, - "learning_rate": 1.906661666119732e-05, - "loss": 0.2583, + "learning_rate": 2.9068072956485105e-05, + "loss": 0.2721, "step": 10955 }, { "epoch": 0.51, - "learning_rate": 1.9066147859922182e-05, - "loss": 0.1028, + "learning_rate": 2.906760488664909e-05, + "loss": 0.0986, "step": 10960 }, { "epoch": 0.51, - "learning_rate": 1.9065679058647042e-05, - "loss": 0.1177, + "learning_rate": 2.9067136816813068e-05, + "loss": 0.128, "step": 10965 }, { "epoch": 0.51, - "learning_rate": 1.9065210257371902e-05, - "loss": 0.2102, + "learning_rate": 2.9066668746977048e-05, + "loss": 0.2164, "step": 10970 }, { "epoch": 0.51, - "learning_rate": 1.9064741456096762e-05, - "loss": 0.1336, + "learning_rate": 2.906620067714103e-05, + "loss": 0.1356, "step": 10975 }, { "epoch": 0.51, - "learning_rate": 1.9064272654821622e-05, - "loss": 0.2197, + "learning_rate": 2.906573260730501e-05, + "loss": 0.1868, "step": 10980 }, { "epoch": 0.51, - "learning_rate": 1.9063803853546482e-05, - "loss": 0.2254, + "learning_rate": 2.906526453746899e-05, + "loss": 0.1627, "step": 10985 }, { "epoch": 0.51, - "learning_rate": 1.9063335052271345e-05, - "loss": 0.3272, + "learning_rate": 2.906479646763297e-05, + "loss": 0.2136, "step": 10990 }, { "epoch": 0.51, - "learning_rate": 1.9062866250996205e-05, - "loss": 0.3669, + "learning_rate": 2.9064328397796954e-05, + "loss": 0.3246, "step": 10995 }, { "epoch": 0.51, - "learning_rate": 1.9062397449721065e-05, - "loss": 0.5502, + "learning_rate": 2.9063860327960934e-05, + "loss": 0.6251, "step": 11000 }, { "epoch": 0.51, - "learning_rate": 1.9061928648445925e-05, - "loss": 0.2126, + "learning_rate": 2.9063392258124914e-05, + "loss": 0.2857, "step": 11005 }, { "epoch": 0.51, - "learning_rate": 1.9061459847170788e-05, - "loss": 0.1083, + "learning_rate": 2.9062924188288893e-05, + "loss": 0.0919, "step": 11010 }, { "epoch": 0.51, - "learning_rate": 1.9060991045895648e-05, - "loss": 0.1021, + "learning_rate": 2.9062456118452873e-05, + "loss": 0.1208, "step": 11015 }, { "epoch": 0.51, - "learning_rate": 1.9060522244620508e-05, - "loss": 0.2316, + "learning_rate": 2.9061988048616853e-05, + "loss": 0.2048, "step": 11020 }, { "epoch": 0.51, - "learning_rate": 1.9060053443345368e-05, - "loss": 0.1626, + "learning_rate": 2.9061519978780833e-05, + "loss": 0.1597, "step": 11025 }, { "epoch": 0.51, - "learning_rate": 1.9059584642070228e-05, - "loss": 0.1842, + "learning_rate": 2.9061051908944816e-05, + "loss": 0.1806, "step": 11030 }, { "epoch": 0.51, - "learning_rate": 1.9059115840795088e-05, - "loss": 0.2737, + "learning_rate": 2.9060583839108796e-05, + "loss": 0.2313, "step": 11035 }, { "epoch": 0.52, - "learning_rate": 1.9058647039519948e-05, - "loss": 0.2535, + "learning_rate": 2.9060115769272776e-05, + "loss": 0.3312, "step": 11040 }, { "epoch": 0.52, - "learning_rate": 1.9058178238244808e-05, - "loss": 0.49, + "learning_rate": 2.9059647699436756e-05, + "loss": 0.3499, "step": 11045 }, { "epoch": 0.52, - "learning_rate": 1.9057709436969668e-05, - "loss": 0.7787, + "learning_rate": 2.905917962960074e-05, + "loss": 0.502, "step": 11050 }, { "epoch": 0.52, - "learning_rate": 1.905724063569453e-05, - "loss": 0.2668, + "learning_rate": 2.905871155976472e-05, + "loss": 0.2127, "step": 11055 }, { "epoch": 0.52, - "learning_rate": 1.905677183441939e-05, - "loss": 0.097, + "learning_rate": 2.90582434899287e-05, + "loss": 0.0773, "step": 11060 }, { "epoch": 0.52, - "learning_rate": 1.905630303314425e-05, - "loss": 0.0949, + "learning_rate": 2.9057775420092678e-05, + "loss": 0.2171, "step": 11065 }, { "epoch": 0.52, - "learning_rate": 1.9055834231869114e-05, - "loss": 0.1723, + "learning_rate": 2.905730735025666e-05, + "loss": 0.1632, "step": 11070 }, { "epoch": 0.52, - "learning_rate": 1.9055365430593974e-05, - "loss": 0.2328, + "learning_rate": 2.905683928042064e-05, + "loss": 0.1804, "step": 11075 }, { "epoch": 0.52, - "learning_rate": 1.9054896629318834e-05, - "loss": 0.2321, + "learning_rate": 2.9056371210584618e-05, + "loss": 0.1888, "step": 11080 }, { "epoch": 0.52, - "learning_rate": 1.9054427828043694e-05, - "loss": 0.3256, + "learning_rate": 2.90559031407486e-05, + "loss": 0.1782, "step": 11085 }, { "epoch": 0.52, - "learning_rate": 1.9053959026768554e-05, - "loss": 0.331, + "learning_rate": 2.905543507091258e-05, + "loss": 0.2457, "step": 11090 }, { "epoch": 0.52, - "learning_rate": 1.9053490225493414e-05, - "loss": 0.3254, + "learning_rate": 2.905496700107656e-05, + "loss": 0.3533, "step": 11095 }, { "epoch": 0.52, - "learning_rate": 1.9053021424218277e-05, - "loss": 0.6373, + "learning_rate": 2.905449893124054e-05, + "loss": 0.662, "step": 11100 }, { "epoch": 0.52, - "learning_rate": 1.9052552622943137e-05, - "loss": 0.2815, + "learning_rate": 2.9054030861404523e-05, + "loss": 0.2686, "step": 11105 }, { "epoch": 0.52, - "learning_rate": 1.9052083821667997e-05, - "loss": 0.1203, + "learning_rate": 2.9053562791568503e-05, + "loss": 0.0685, "step": 11110 }, { "epoch": 0.52, - "learning_rate": 1.9051615020392857e-05, - "loss": 0.2007, + "learning_rate": 2.9053094721732483e-05, + "loss": 0.1569, "step": 11115 }, { "epoch": 0.52, - "learning_rate": 1.9051146219117717e-05, - "loss": 0.1206, + "learning_rate": 2.9052626651896463e-05, + "loss": 0.1152, "step": 11120 }, { "epoch": 0.52, - "learning_rate": 1.9050677417842577e-05, - "loss": 0.2319, + "learning_rate": 2.9052158582060446e-05, + "loss": 0.1515, "step": 11125 }, { "epoch": 0.52, - "learning_rate": 1.9050208616567437e-05, - "loss": 0.1269, + "learning_rate": 2.9051690512224426e-05, + "loss": 0.242, "step": 11130 }, { "epoch": 0.52, - "learning_rate": 1.90497398152923e-05, - "loss": 0.2155, + "learning_rate": 2.9051222442388406e-05, + "loss": 0.2395, "step": 11135 }, { "epoch": 0.52, - "learning_rate": 1.904927101401716e-05, - "loss": 0.1693, + "learning_rate": 2.9050754372552386e-05, + "loss": 0.2851, "step": 11140 }, { "epoch": 0.52, - "learning_rate": 1.904880221274202e-05, - "loss": 0.4148, + "learning_rate": 2.9050286302716365e-05, + "loss": 0.347, "step": 11145 }, { "epoch": 0.52, - "learning_rate": 1.9048333411466883e-05, - "loss": 0.6929, + "learning_rate": 2.9049818232880345e-05, + "loss": 0.4843, "step": 11150 }, { "epoch": 0.52, - "learning_rate": 1.9047864610191743e-05, - "loss": 0.2697, + "learning_rate": 2.9049350163044325e-05, + "loss": 0.225, "step": 11155 }, { "epoch": 0.52, - "learning_rate": 1.9047395808916603e-05, - "loss": 0.115, + "learning_rate": 2.9048882093208308e-05, + "loss": 0.1013, "step": 11160 }, { "epoch": 0.52, - "learning_rate": 1.9046927007641463e-05, - "loss": 0.0827, + "learning_rate": 2.9048414023372288e-05, + "loss": 0.1425, "step": 11165 }, { "epoch": 0.52, - "learning_rate": 1.9046458206366323e-05, - "loss": 0.132, + "learning_rate": 2.9047945953536268e-05, + "loss": 0.2072, "step": 11170 }, { "epoch": 0.52, - "learning_rate": 1.9045989405091183e-05, - "loss": 0.2029, + "learning_rate": 2.9047477883700248e-05, + "loss": 0.1754, "step": 11175 }, { "epoch": 0.52, - "learning_rate": 1.9045520603816043e-05, - "loss": 0.1555, + "learning_rate": 2.904700981386423e-05, + "loss": 0.1523, "step": 11180 }, { "epoch": 0.52, - "learning_rate": 1.9045051802540903e-05, - "loss": 0.2248, + "learning_rate": 2.904654174402821e-05, + "loss": 0.2163, "step": 11185 }, { "epoch": 0.52, - "learning_rate": 1.9044583001265763e-05, - "loss": 0.3005, + "learning_rate": 2.904607367419219e-05, + "loss": 0.3456, "step": 11190 }, { "epoch": 0.52, - "learning_rate": 1.9044114199990626e-05, - "loss": 0.335, + "learning_rate": 2.904560560435617e-05, + "loss": 0.3785, "step": 11195 }, { "epoch": 0.52, - "learning_rate": 1.9043645398715486e-05, - "loss": 0.636, + "learning_rate": 2.9045137534520154e-05, + "loss": 0.571, "step": 11200 }, { "epoch": 0.52, - "learning_rate": 1.9043176597440346e-05, - "loss": 0.2538, + "learning_rate": 2.904466946468413e-05, + "loss": 0.2442, "step": 11205 }, { "epoch": 0.52, - "learning_rate": 1.9042707796165206e-05, - "loss": 0.1232, + "learning_rate": 2.904420139484811e-05, + "loss": 0.1094, "step": 11210 }, { "epoch": 0.52, - "learning_rate": 1.904223899489007e-05, - "loss": 0.0881, + "learning_rate": 2.9043733325012093e-05, + "loss": 0.1536, "step": 11215 }, { "epoch": 0.52, - "learning_rate": 1.904177019361493e-05, - "loss": 0.1117, + "learning_rate": 2.9043265255176073e-05, + "loss": 0.1979, "step": 11220 }, { "epoch": 0.52, - "learning_rate": 1.904130139233979e-05, - "loss": 0.1845, + "learning_rate": 2.9042797185340053e-05, + "loss": 0.1617, "step": 11225 }, { "epoch": 0.52, - "learning_rate": 1.904083259106465e-05, - "loss": 0.2521, + "learning_rate": 2.9042329115504032e-05, + "loss": 0.2681, "step": 11230 }, { "epoch": 0.52, - "learning_rate": 1.904036378978951e-05, - "loss": 0.2299, + "learning_rate": 2.9041861045668016e-05, + "loss": 0.2328, "step": 11235 }, { "epoch": 0.52, - "learning_rate": 1.9039894988514372e-05, - "loss": 0.2237, + "learning_rate": 2.9041392975831995e-05, + "loss": 0.2601, "step": 11240 }, { "epoch": 0.52, - "learning_rate": 1.9039426187239232e-05, - "loss": 0.4258, + "learning_rate": 2.9040924905995975e-05, + "loss": 0.2401, "step": 11245 }, { "epoch": 0.52, - "learning_rate": 1.9038957385964092e-05, - "loss": 0.6981, + "learning_rate": 2.9040456836159955e-05, + "loss": 0.5136, "step": 11250 }, { "epoch": 0.53, - "learning_rate": 1.9038488584688952e-05, - "loss": 0.2667, + "learning_rate": 2.903998876632394e-05, + "loss": 0.2174, "step": 11255 }, { "epoch": 0.53, - "learning_rate": 1.9038019783413812e-05, - "loss": 0.093, + "learning_rate": 2.9039520696487918e-05, + "loss": 0.0978, "step": 11260 }, { "epoch": 0.53, - "learning_rate": 1.9037550982138672e-05, - "loss": 0.1913, + "learning_rate": 2.9039052626651898e-05, + "loss": 0.1548, "step": 11265 }, { "epoch": 0.53, - "learning_rate": 1.9037082180863532e-05, - "loss": 0.1173, + "learning_rate": 2.9038584556815878e-05, + "loss": 0.1512, "step": 11270 }, { "epoch": 0.53, - "learning_rate": 1.9036613379588392e-05, - "loss": 0.2151, + "learning_rate": 2.9038116486979858e-05, + "loss": 0.0844, "step": 11275 }, { "epoch": 0.53, - "learning_rate": 1.9036144578313255e-05, - "loss": 0.1969, + "learning_rate": 2.9037648417143837e-05, + "loss": 0.2364, "step": 11280 }, { "epoch": 0.53, - "learning_rate": 1.9035675777038115e-05, - "loss": 0.2601, + "learning_rate": 2.9037180347307817e-05, + "loss": 0.2016, "step": 11285 }, { "epoch": 0.53, - "learning_rate": 1.9035206975762975e-05, - "loss": 0.2812, + "learning_rate": 2.90367122774718e-05, + "loss": 0.3631, "step": 11290 }, { "epoch": 0.53, - "learning_rate": 1.9034738174487838e-05, - "loss": 0.3009, + "learning_rate": 2.903624420763578e-05, + "loss": 0.3152, "step": 11295 }, { "epoch": 0.53, - "learning_rate": 1.9034269373212698e-05, - "loss": 0.5065, + "learning_rate": 2.903577613779976e-05, + "loss": 0.6023, "step": 11300 }, { "epoch": 0.53, - "learning_rate": 1.9033800571937558e-05, - "loss": 0.2912, + "learning_rate": 2.903530806796374e-05, + "loss": 0.2934, "step": 11305 }, { "epoch": 0.53, - "learning_rate": 1.9033331770662418e-05, - "loss": 0.0743, + "learning_rate": 2.9034839998127723e-05, + "loss": 0.0976, "step": 11310 }, { "epoch": 0.53, - "learning_rate": 1.9032862969387278e-05, - "loss": 0.1384, + "learning_rate": 2.9034371928291703e-05, + "loss": 0.124, "step": 11315 }, { "epoch": 0.53, - "learning_rate": 1.9032394168112138e-05, - "loss": 0.1069, + "learning_rate": 2.9033903858455683e-05, + "loss": 0.2261, "step": 11320 }, { "epoch": 0.53, - "learning_rate": 1.9031925366836998e-05, - "loss": 0.1452, + "learning_rate": 2.9033435788619666e-05, + "loss": 0.1269, "step": 11325 }, { "epoch": 0.53, - "learning_rate": 1.9031456565561858e-05, - "loss": 0.162, + "learning_rate": 2.9032967718783642e-05, + "loss": 0.2572, "step": 11330 }, { "epoch": 0.53, - "learning_rate": 1.903098776428672e-05, - "loss": 0.2265, + "learning_rate": 2.9032499648947622e-05, + "loss": 0.233, "step": 11335 }, { "epoch": 0.53, - "learning_rate": 1.903051896301158e-05, - "loss": 0.277, + "learning_rate": 2.9032031579111602e-05, + "loss": 0.3301, "step": 11340 }, { "epoch": 0.53, - "learning_rate": 1.903005016173644e-05, - "loss": 0.3428, + "learning_rate": 2.9031563509275585e-05, + "loss": 0.339, "step": 11345 }, { "epoch": 0.53, - "learning_rate": 1.90295813604613e-05, - "loss": 0.4802, + "learning_rate": 2.9031095439439565e-05, + "loss": 0.5173, "step": 11350 }, { "epoch": 0.53, - "learning_rate": 1.902911255918616e-05, - "loss": 0.2706, + "learning_rate": 2.9030627369603545e-05, + "loss": 0.2303, "step": 11355 }, { "epoch": 0.53, - "learning_rate": 1.9028643757911024e-05, - "loss": 0.087, + "learning_rate": 2.9030159299767525e-05, + "loss": 0.2254, "step": 11360 }, { "epoch": 0.53, - "learning_rate": 1.9028174956635884e-05, - "loss": 0.1064, + "learning_rate": 2.9029691229931508e-05, + "loss": 0.1155, "step": 11365 }, { "epoch": 0.53, - "learning_rate": 1.9027706155360744e-05, - "loss": 0.0903, + "learning_rate": 2.9029223160095488e-05, + "loss": 0.2024, "step": 11370 }, { "epoch": 0.53, - "learning_rate": 1.9027237354085604e-05, - "loss": 0.2627, + "learning_rate": 2.9028755090259468e-05, + "loss": 0.1095, "step": 11375 }, { "epoch": 0.53, - "learning_rate": 1.9026768552810467e-05, - "loss": 0.1548, + "learning_rate": 2.9028287020423447e-05, + "loss": 0.2487, "step": 11380 }, { "epoch": 0.53, - "learning_rate": 1.9026299751535327e-05, - "loss": 0.2691, + "learning_rate": 2.902781895058743e-05, + "loss": 0.2335, "step": 11385 }, { "epoch": 0.53, - "learning_rate": 1.9025830950260187e-05, - "loss": 0.2731, + "learning_rate": 2.902735088075141e-05, + "loss": 0.2552, "step": 11390 }, { "epoch": 0.53, - "learning_rate": 1.9025362148985047e-05, - "loss": 0.3839, + "learning_rate": 2.9026882810915387e-05, + "loss": 0.4556, "step": 11395 }, { "epoch": 0.53, - "learning_rate": 1.9024893347709907e-05, - "loss": 0.6679, + "learning_rate": 2.902641474107937e-05, + "loss": 0.4673, "step": 11400 }, { "epoch": 0.53, - "learning_rate": 1.9024424546434767e-05, - "loss": 0.3018, + "learning_rate": 2.902594667124335e-05, + "loss": 0.2411, "step": 11405 }, { "epoch": 0.53, - "learning_rate": 1.9023955745159627e-05, - "loss": 0.1024, + "learning_rate": 2.902547860140733e-05, + "loss": 0.1411, "step": 11410 }, { "epoch": 0.53, - "learning_rate": 1.9023486943884487e-05, - "loss": 0.1274, + "learning_rate": 2.902501053157131e-05, + "loss": 0.1266, "step": 11415 }, { "epoch": 0.53, - "learning_rate": 1.902301814260935e-05, - "loss": 0.1693, + "learning_rate": 2.9024542461735293e-05, + "loss": 0.1989, "step": 11420 }, { "epoch": 0.53, - "learning_rate": 1.902254934133421e-05, - "loss": 0.1957, + "learning_rate": 2.9024074391899272e-05, + "loss": 0.1909, "step": 11425 }, { "epoch": 0.53, - "learning_rate": 1.902208054005907e-05, - "loss": 0.1639, + "learning_rate": 2.9023606322063252e-05, + "loss": 0.2077, "step": 11430 }, { "epoch": 0.53, - "learning_rate": 1.902161173878393e-05, - "loss": 0.2524, + "learning_rate": 2.9023138252227232e-05, + "loss": 0.2693, "step": 11435 }, { "epoch": 0.53, - "learning_rate": 1.9021142937508793e-05, - "loss": 0.4686, + "learning_rate": 2.9022670182391215e-05, + "loss": 0.2329, "step": 11440 }, { "epoch": 0.53, - "learning_rate": 1.9020674136233653e-05, - "loss": 0.4812, + "learning_rate": 2.9022202112555195e-05, + "loss": 0.3578, "step": 11445 }, { "epoch": 0.53, - "learning_rate": 1.9020205334958513e-05, - "loss": 0.4917, + "learning_rate": 2.9021734042719175e-05, + "loss": 0.5947, "step": 11450 }, { "epoch": 0.53, - "learning_rate": 1.9019736533683373e-05, - "loss": 0.2595, + "learning_rate": 2.9021265972883155e-05, + "loss": 0.2367, "step": 11455 }, { "epoch": 0.53, - "learning_rate": 1.9019267732408233e-05, - "loss": 0.0769, + "learning_rate": 2.9020797903047135e-05, + "loss": 0.0884, "step": 11460 }, { "epoch": 0.53, - "learning_rate": 1.9018798931133093e-05, - "loss": 0.1518, + "learning_rate": 2.9020329833211114e-05, + "loss": 0.1093, "step": 11465 }, { "epoch": 0.54, - "learning_rate": 1.9018330129857953e-05, - "loss": 0.1158, + "learning_rate": 2.9019861763375094e-05, + "loss": 0.1203, "step": 11470 }, { "epoch": 0.54, - "learning_rate": 1.9017861328582816e-05, - "loss": 0.1847, + "learning_rate": 2.9019393693539077e-05, + "loss": 0.1763, "step": 11475 }, { "epoch": 0.54, - "learning_rate": 1.9017392527307676e-05, - "loss": 0.1882, + "learning_rate": 2.9018925623703057e-05, + "loss": 0.2753, "step": 11480 }, { "epoch": 0.54, - "learning_rate": 1.9016923726032536e-05, - "loss": 0.2294, + "learning_rate": 2.9018457553867037e-05, + "loss": 0.2258, "step": 11485 }, { "epoch": 0.54, - "learning_rate": 1.9016454924757396e-05, - "loss": 0.371, + "learning_rate": 2.9017989484031017e-05, + "loss": 0.2626, "step": 11490 }, { "epoch": 0.54, - "learning_rate": 1.9015986123482256e-05, - "loss": 0.3808, + "learning_rate": 2.9017521414195e-05, + "loss": 0.3519, "step": 11495 }, { "epoch": 0.54, - "learning_rate": 1.901551732220712e-05, - "loss": 0.5649, + "learning_rate": 2.901705334435898e-05, + "loss": 0.508, "step": 11500 }, { "epoch": 0.54, - "learning_rate": 1.901504852093198e-05, - "loss": 0.3712, + "learning_rate": 2.901658527452296e-05, + "loss": 0.1452, "step": 11505 }, { "epoch": 0.54, - "learning_rate": 1.901457971965684e-05, - "loss": 0.1052, + "learning_rate": 2.9016117204686943e-05, + "loss": 0.0792, "step": 11510 }, { "epoch": 0.54, - "learning_rate": 1.90141109183817e-05, - "loss": 0.1216, + "learning_rate": 2.9015649134850923e-05, + "loss": 0.1266, "step": 11515 }, { "epoch": 0.54, - "learning_rate": 1.9013642117106562e-05, - "loss": 0.1673, + "learning_rate": 2.90151810650149e-05, + "loss": 0.1531, "step": 11520 }, { "epoch": 0.54, - "learning_rate": 1.9013173315831422e-05, - "loss": 0.1775, + "learning_rate": 2.901471299517888e-05, + "loss": 0.1746, "step": 11525 }, { "epoch": 0.54, - "learning_rate": 1.9012704514556282e-05, - "loss": 0.2124, + "learning_rate": 2.9014244925342862e-05, + "loss": 0.2665, "step": 11530 }, { "epoch": 0.54, - "learning_rate": 1.9012235713281142e-05, - "loss": 0.2195, + "learning_rate": 2.9013776855506842e-05, + "loss": 0.3047, "step": 11535 }, { "epoch": 0.54, - "learning_rate": 1.9011766912006002e-05, - "loss": 0.2557, + "learning_rate": 2.9013308785670822e-05, + "loss": 0.304, "step": 11540 }, { "epoch": 0.54, - "learning_rate": 1.9011298110730862e-05, - "loss": 0.3398, + "learning_rate": 2.90128407158348e-05, + "loss": 0.3622, "step": 11545 }, { "epoch": 0.54, - "learning_rate": 1.9010829309455722e-05, - "loss": 0.6744, + "learning_rate": 2.9012372645998785e-05, + "loss": 0.56, "step": 11550 }, { "epoch": 0.54, - "learning_rate": 1.9010360508180582e-05, - "loss": 0.2358, + "learning_rate": 2.9011904576162765e-05, + "loss": 0.2347, "step": 11555 }, { "epoch": 0.54, - "learning_rate": 1.9009891706905442e-05, - "loss": 0.0978, + "learning_rate": 2.9011436506326744e-05, + "loss": 0.0708, "step": 11560 }, { "epoch": 0.54, - "learning_rate": 1.9009422905630305e-05, - "loss": 0.1722, + "learning_rate": 2.9010968436490724e-05, + "loss": 0.0918, "step": 11565 }, { "epoch": 0.54, - "learning_rate": 1.9008954104355165e-05, - "loss": 0.1497, + "learning_rate": 2.9010500366654708e-05, + "loss": 0.148, "step": 11570 }, { "epoch": 0.54, - "learning_rate": 1.9008485303080025e-05, - "loss": 0.2723, + "learning_rate": 2.9010032296818687e-05, + "loss": 0.2018, "step": 11575 }, { "epoch": 0.54, - "learning_rate": 1.900801650180489e-05, - "loss": 0.1787, + "learning_rate": 2.9009564226982667e-05, + "loss": 0.1662, "step": 11580 }, { "epoch": 0.54, - "learning_rate": 1.900754770052975e-05, - "loss": 0.3611, + "learning_rate": 2.9009096157146647e-05, + "loss": 0.2973, "step": 11585 }, { "epoch": 0.54, - "learning_rate": 1.9007078899254608e-05, - "loss": 0.2891, + "learning_rate": 2.9008628087310627e-05, + "loss": 0.2854, "step": 11590 }, { "epoch": 0.54, - "learning_rate": 1.9006610097979468e-05, - "loss": 0.3841, + "learning_rate": 2.9008160017474607e-05, + "loss": 0.3845, "step": 11595 }, { "epoch": 0.54, - "learning_rate": 1.9006141296704328e-05, - "loss": 0.6541, + "learning_rate": 2.9007691947638586e-05, + "loss": 0.5031, "step": 11600 }, { "epoch": 0.54, - "learning_rate": 1.9005672495429188e-05, - "loss": 0.234, + "learning_rate": 2.900722387780257e-05, + "loss": 0.2475, "step": 11605 }, { "epoch": 0.54, - "learning_rate": 1.900520369415405e-05, - "loss": 0.1288, + "learning_rate": 2.900675580796655e-05, + "loss": 0.1119, "step": 11610 }, { "epoch": 0.54, - "learning_rate": 1.900473489287891e-05, - "loss": 0.1206, + "learning_rate": 2.900628773813053e-05, + "loss": 0.1501, "step": 11615 }, { "epoch": 0.54, - "learning_rate": 1.900426609160377e-05, - "loss": 0.176, + "learning_rate": 2.900581966829451e-05, + "loss": 0.102, "step": 11620 }, { "epoch": 0.54, - "learning_rate": 1.900379729032863e-05, - "loss": 0.2343, + "learning_rate": 2.9005351598458492e-05, + "loss": 0.1555, "step": 11625 }, { "epoch": 0.54, - "learning_rate": 1.900332848905349e-05, - "loss": 0.2604, + "learning_rate": 2.9004883528622472e-05, + "loss": 0.2079, "step": 11630 }, { "epoch": 0.54, - "learning_rate": 1.900285968777835e-05, - "loss": 0.2572, + "learning_rate": 2.9004415458786452e-05, + "loss": 0.2517, "step": 11635 }, { "epoch": 0.54, - "learning_rate": 1.900239088650321e-05, - "loss": 0.2942, + "learning_rate": 2.9003947388950435e-05, + "loss": 0.3671, "step": 11640 }, { "epoch": 0.54, - "learning_rate": 1.9001922085228074e-05, - "loss": 0.4468, + "learning_rate": 2.900347931911441e-05, + "loss": 0.3443, "step": 11645 }, { "epoch": 0.54, - "learning_rate": 1.9001453283952934e-05, - "loss": 0.7121, + "learning_rate": 2.900301124927839e-05, + "loss": 0.4903, "step": 11650 }, { "epoch": 0.54, - "learning_rate": 1.9000984482677794e-05, - "loss": 0.2138, + "learning_rate": 2.900254317944237e-05, + "loss": 0.2725, "step": 11655 }, { "epoch": 0.54, - "learning_rate": 1.9000515681402657e-05, - "loss": 0.1269, + "learning_rate": 2.9002075109606354e-05, + "loss": 0.0895, "step": 11660 }, { "epoch": 0.54, - "learning_rate": 1.9000046880127517e-05, - "loss": 0.1191, + "learning_rate": 2.9001607039770334e-05, + "loss": 0.1666, "step": 11665 }, { "epoch": 0.54, - "learning_rate": 1.8999578078852377e-05, - "loss": 0.2002, + "learning_rate": 2.9001138969934314e-05, + "loss": 0.0981, "step": 11670 }, { "epoch": 0.54, - "learning_rate": 1.8999109277577237e-05, - "loss": 0.1856, + "learning_rate": 2.9000670900098294e-05, + "loss": 0.1788, "step": 11675 }, { "epoch": 0.55, - "learning_rate": 1.8998640476302097e-05, - "loss": 0.1776, + "learning_rate": 2.9000202830262277e-05, + "loss": 0.1948, "step": 11680 }, { "epoch": 0.55, - "learning_rate": 1.8998171675026957e-05, - "loss": 0.3287, + "learning_rate": 2.8999734760426257e-05, + "loss": 0.1784, "step": 11685 }, { "epoch": 0.55, - "learning_rate": 1.8997702873751817e-05, - "loss": 0.2521, + "learning_rate": 2.8999266690590237e-05, + "loss": 0.2079, "step": 11690 }, { "epoch": 0.55, - "learning_rate": 1.8997234072476677e-05, - "loss": 0.2936, + "learning_rate": 2.899879862075422e-05, + "loss": 0.2909, "step": 11695 }, { "epoch": 0.55, - "learning_rate": 1.8996765271201537e-05, - "loss": 0.5431, + "learning_rate": 2.89983305509182e-05, + "loss": 0.5101, "step": 11700 }, { "epoch": 0.55, - "learning_rate": 1.89962964699264e-05, - "loss": 0.2955, + "learning_rate": 2.899786248108218e-05, + "loss": 0.2351, "step": 11705 }, { "epoch": 0.55, - "learning_rate": 1.899582766865126e-05, - "loss": 0.067, + "learning_rate": 2.8997394411246156e-05, + "loss": 0.1133, "step": 11710 }, { "epoch": 0.55, - "learning_rate": 1.899535886737612e-05, - "loss": 0.1617, + "learning_rate": 2.899692634141014e-05, + "loss": 0.1062, "step": 11715 }, { "epoch": 0.55, - "learning_rate": 1.899489006610098e-05, - "loss": 0.2038, + "learning_rate": 2.899645827157412e-05, + "loss": 0.1585, "step": 11720 }, { "epoch": 0.55, - "learning_rate": 1.8994421264825843e-05, - "loss": 0.259, + "learning_rate": 2.89959902017381e-05, + "loss": 0.2545, "step": 11725 }, { "epoch": 0.55, - "learning_rate": 1.8993952463550703e-05, - "loss": 0.18, + "learning_rate": 2.899552213190208e-05, + "loss": 0.2166, "step": 11730 }, { "epoch": 0.55, - "learning_rate": 1.8993483662275563e-05, - "loss": 0.2509, + "learning_rate": 2.8995054062066062e-05, + "loss": 0.1675, "step": 11735 }, { "epoch": 0.55, - "learning_rate": 1.8993014861000423e-05, - "loss": 0.3614, + "learning_rate": 2.899458599223004e-05, + "loss": 0.3697, "step": 11740 }, { "epoch": 0.55, - "learning_rate": 1.8992546059725283e-05, - "loss": 0.4022, + "learning_rate": 2.899411792239402e-05, + "loss": 0.4677, "step": 11745 }, { "epoch": 0.55, - "learning_rate": 1.8992077258450146e-05, - "loss": 0.5694, + "learning_rate": 2.8993649852558005e-05, + "loss": 0.5517, "step": 11750 }, { "epoch": 0.55, - "learning_rate": 1.8991608457175006e-05, - "loss": 0.2532, + "learning_rate": 2.8993181782721984e-05, + "loss": 0.2434, "step": 11755 }, { "epoch": 0.55, - "learning_rate": 1.8991139655899866e-05, - "loss": 0.1058, + "learning_rate": 2.8992713712885964e-05, + "loss": 0.0643, "step": 11760 }, { "epoch": 0.55, - "learning_rate": 1.8990670854624726e-05, - "loss": 0.133, + "learning_rate": 2.8992245643049944e-05, + "loss": 0.1173, "step": 11765 }, { "epoch": 0.55, - "learning_rate": 1.8990202053349586e-05, - "loss": 0.1465, + "learning_rate": 2.8991777573213927e-05, + "loss": 0.1006, "step": 11770 }, { "epoch": 0.55, - "learning_rate": 1.8989733252074446e-05, - "loss": 0.1479, + "learning_rate": 2.8991309503377904e-05, + "loss": 0.1823, "step": 11775 }, { "epoch": 0.55, - "learning_rate": 1.8989264450799306e-05, - "loss": 0.2112, + "learning_rate": 2.8990841433541884e-05, + "loss": 0.1258, "step": 11780 }, { "epoch": 0.55, - "learning_rate": 1.8988795649524166e-05, - "loss": 0.229, + "learning_rate": 2.8990373363705863e-05, + "loss": 0.2484, "step": 11785 }, { "epoch": 0.55, - "learning_rate": 1.898832684824903e-05, - "loss": 0.2436, + "learning_rate": 2.8989905293869847e-05, + "loss": 0.2762, "step": 11790 }, { "epoch": 0.55, - "learning_rate": 1.898785804697389e-05, - "loss": 0.3649, + "learning_rate": 2.8989437224033826e-05, + "loss": 0.3407, "step": 11795 }, { "epoch": 0.55, - "learning_rate": 1.898738924569875e-05, - "loss": 0.7018, + "learning_rate": 2.8988969154197806e-05, + "loss": 0.679, "step": 11800 }, { "epoch": 0.55, - "learning_rate": 1.8986920444423613e-05, - "loss": 0.255, + "learning_rate": 2.8988501084361786e-05, + "loss": 0.1905, "step": 11805 }, { "epoch": 0.55, - "learning_rate": 1.8986451643148472e-05, - "loss": 0.13, + "learning_rate": 2.898803301452577e-05, + "loss": 0.0999, "step": 11810 }, { "epoch": 0.55, - "learning_rate": 1.8985982841873332e-05, - "loss": 0.1292, + "learning_rate": 2.898756494468975e-05, + "loss": 0.1598, "step": 11815 }, { "epoch": 0.55, - "learning_rate": 1.8985514040598192e-05, - "loss": 0.1515, + "learning_rate": 2.898709687485373e-05, + "loss": 0.0974, "step": 11820 }, { "epoch": 0.55, - "learning_rate": 1.8985045239323052e-05, - "loss": 0.1737, + "learning_rate": 2.8986628805017712e-05, + "loss": 0.0905, "step": 11825 }, { "epoch": 0.55, - "learning_rate": 1.8984576438047912e-05, - "loss": 0.1838, + "learning_rate": 2.8986160735181692e-05, + "loss": 0.1875, "step": 11830 }, { "epoch": 0.55, - "learning_rate": 1.8984107636772772e-05, - "loss": 0.2843, + "learning_rate": 2.898569266534567e-05, + "loss": 0.1852, "step": 11835 }, { "epoch": 0.55, - "learning_rate": 1.8983638835497632e-05, - "loss": 0.309, + "learning_rate": 2.8985224595509648e-05, + "loss": 0.2225, "step": 11840 }, { "epoch": 0.55, - "learning_rate": 1.8983170034222495e-05, - "loss": 0.3991, + "learning_rate": 2.898475652567363e-05, + "loss": 0.2743, "step": 11845 }, { "epoch": 0.55, - "learning_rate": 1.8982701232947355e-05, - "loss": 0.7293, + "learning_rate": 2.898428845583761e-05, + "loss": 0.5935, "step": 11850 }, { "epoch": 0.55, - "learning_rate": 1.8982232431672215e-05, - "loss": 0.2043, + "learning_rate": 2.898382038600159e-05, + "loss": 0.2495, "step": 11855 }, { "epoch": 0.55, - "learning_rate": 1.8981763630397075e-05, - "loss": 0.1122, + "learning_rate": 2.898335231616557e-05, + "loss": 0.0954, "step": 11860 }, { "epoch": 0.55, - "learning_rate": 1.8981294829121935e-05, - "loss": 0.1696, + "learning_rate": 2.8982884246329554e-05, + "loss": 0.1585, "step": 11865 }, { "epoch": 0.55, - "learning_rate": 1.89808260278468e-05, - "loss": 0.1471, + "learning_rate": 2.8982416176493534e-05, + "loss": 0.1473, "step": 11870 }, { "epoch": 0.55, - "learning_rate": 1.898035722657166e-05, - "loss": 0.2066, + "learning_rate": 2.8981948106657514e-05, + "loss": 0.1356, "step": 11875 }, { "epoch": 0.55, - "learning_rate": 1.8979888425296518e-05, - "loss": 0.2047, + "learning_rate": 2.8981480036821497e-05, + "loss": 0.1425, "step": 11880 }, { "epoch": 0.55, - "learning_rate": 1.8979419624021378e-05, - "loss": 0.2603, + "learning_rate": 2.8981011966985477e-05, + "loss": 0.2479, "step": 11885 }, { "epoch": 0.55, - "learning_rate": 1.897895082274624e-05, - "loss": 0.3069, + "learning_rate": 2.8980543897149456e-05, + "loss": 0.2751, "step": 11890 }, { "epoch": 0.56, - "learning_rate": 1.89784820214711e-05, - "loss": 0.4019, + "learning_rate": 2.8980075827313436e-05, + "loss": 0.343, "step": 11895 }, { "epoch": 0.56, - "learning_rate": 1.897801322019596e-05, - "loss": 0.5205, + "learning_rate": 2.8979607757477416e-05, + "loss": 0.7812, "step": 11900 }, { "epoch": 0.56, - "learning_rate": 1.897754441892082e-05, - "loss": 0.2621, + "learning_rate": 2.8979139687641396e-05, + "loss": 0.219, "step": 11905 }, { "epoch": 0.56, - "learning_rate": 1.897707561764568e-05, - "loss": 0.1128, + "learning_rate": 2.8978671617805376e-05, + "loss": 0.051, "step": 11910 }, { "epoch": 0.56, - "learning_rate": 1.897660681637054e-05, - "loss": 0.1462, + "learning_rate": 2.8978203547969356e-05, + "loss": 0.0918, "step": 11915 }, { "epoch": 0.56, - "learning_rate": 1.89761380150954e-05, - "loss": 0.1285, + "learning_rate": 2.897773547813334e-05, + "loss": 0.1408, "step": 11920 }, { "epoch": 0.56, - "learning_rate": 1.897566921382026e-05, - "loss": 0.1582, + "learning_rate": 2.897726740829732e-05, + "loss": 0.1209, "step": 11925 }, { "epoch": 0.56, - "learning_rate": 1.8975200412545124e-05, - "loss": 0.2148, + "learning_rate": 2.89767993384613e-05, + "loss": 0.1562, "step": 11930 }, { "epoch": 0.56, - "learning_rate": 1.8974731611269984e-05, - "loss": 0.2385, + "learning_rate": 2.897633126862528e-05, + "loss": 0.1601, "step": 11935 }, { "epoch": 0.56, - "learning_rate": 1.8974262809994844e-05, - "loss": 0.2819, + "learning_rate": 2.897586319878926e-05, + "loss": 0.3487, "step": 11940 }, { "epoch": 0.56, - "learning_rate": 1.8973794008719704e-05, - "loss": 0.2586, + "learning_rate": 2.897539512895324e-05, + "loss": 0.3111, "step": 11945 }, { "epoch": 0.56, - "learning_rate": 1.8973325207444568e-05, - "loss": 0.7001, + "learning_rate": 2.897492705911722e-05, + "loss": 0.4768, "step": 11950 }, { "epoch": 0.56, - "learning_rate": 1.8972856406169427e-05, - "loss": 0.2394, + "learning_rate": 2.8974458989281204e-05, + "loss": 0.2605, "step": 11955 }, { "epoch": 0.56, - "learning_rate": 1.8972387604894287e-05, - "loss": 0.0715, + "learning_rate": 2.897399091944518e-05, + "loss": 0.1403, "step": 11960 }, { "epoch": 0.56, - "learning_rate": 1.8971918803619147e-05, - "loss": 0.1559, + "learning_rate": 2.897352284960916e-05, + "loss": 0.0971, "step": 11965 }, { "epoch": 0.56, - "learning_rate": 1.8971450002344007e-05, - "loss": 0.164, + "learning_rate": 2.897305477977314e-05, + "loss": 0.2108, "step": 11970 }, { "epoch": 0.56, - "learning_rate": 1.8970981201068867e-05, - "loss": 0.1327, + "learning_rate": 2.8972586709937124e-05, + "loss": 0.1406, "step": 11975 }, { "epoch": 0.56, - "learning_rate": 1.8970512399793727e-05, - "loss": 0.2868, + "learning_rate": 2.8972118640101103e-05, + "loss": 0.2211, "step": 11980 }, { "epoch": 0.56, - "learning_rate": 1.897004359851859e-05, - "loss": 0.2215, + "learning_rate": 2.8971650570265083e-05, + "loss": 0.2503, "step": 11985 }, { "epoch": 0.56, - "learning_rate": 1.896957479724345e-05, - "loss": 0.3209, + "learning_rate": 2.8971182500429063e-05, + "loss": 0.2778, "step": 11990 }, { "epoch": 0.56, - "learning_rate": 1.896910599596831e-05, - "loss": 0.3683, + "learning_rate": 2.8970714430593046e-05, + "loss": 0.379, "step": 11995 }, { "epoch": 0.56, - "learning_rate": 1.896863719469317e-05, - "loss": 0.6778, + "learning_rate": 2.8970246360757026e-05, + "loss": 0.5211, "step": 12000 }, { "epoch": 0.56, - "learning_rate": 1.896816839341803e-05, - "loss": 0.2626, + "learning_rate": 2.8969778290921006e-05, + "loss": 0.1406, "step": 12005 }, { "epoch": 0.56, - "learning_rate": 1.8967699592142894e-05, - "loss": 0.1251, + "learning_rate": 2.896931022108499e-05, + "loss": 0.0509, "step": 12010 }, { "epoch": 0.56, - "learning_rate": 1.8967230790867753e-05, - "loss": 0.134, + "learning_rate": 2.896884215124897e-05, + "loss": 0.0842, "step": 12015 }, { "epoch": 0.56, - "learning_rate": 1.8966761989592613e-05, - "loss": 0.1713, + "learning_rate": 2.896837408141295e-05, + "loss": 0.2049, "step": 12020 }, { "epoch": 0.56, - "learning_rate": 1.8966293188317473e-05, - "loss": 0.1434, + "learning_rate": 2.8967906011576925e-05, + "loss": 0.1409, "step": 12025 }, { "epoch": 0.56, - "learning_rate": 1.8965824387042337e-05, - "loss": 0.2269, + "learning_rate": 2.896743794174091e-05, + "loss": 0.1995, "step": 12030 }, { "epoch": 0.56, - "learning_rate": 1.8965355585767197e-05, - "loss": 0.1525, + "learning_rate": 2.8966969871904888e-05, + "loss": 0.1705, "step": 12035 }, { "epoch": 0.56, - "learning_rate": 1.8964886784492056e-05, - "loss": 0.2387, + "learning_rate": 2.8966501802068868e-05, + "loss": 0.3089, "step": 12040 }, { "epoch": 0.56, - "learning_rate": 1.8964417983216916e-05, - "loss": 0.3101, + "learning_rate": 2.8966033732232848e-05, + "loss": 0.2439, "step": 12045 }, { "epoch": 0.56, - "learning_rate": 1.8963949181941776e-05, - "loss": 0.5783, + "learning_rate": 2.896556566239683e-05, + "loss": 0.3632, "step": 12050 }, { "epoch": 0.56, - "learning_rate": 1.8963480380666636e-05, - "loss": 0.2279, + "learning_rate": 2.896509759256081e-05, + "loss": 0.2364, "step": 12055 }, { "epoch": 0.56, - "learning_rate": 1.8963011579391496e-05, - "loss": 0.1041, + "learning_rate": 2.896462952272479e-05, + "loss": 0.1063, "step": 12060 }, { "epoch": 0.56, - "learning_rate": 1.8962542778116356e-05, - "loss": 0.1346, + "learning_rate": 2.8964161452888774e-05, + "loss": 0.1204, "step": 12065 }, { "epoch": 0.56, - "learning_rate": 1.8962073976841216e-05, - "loss": 0.1, + "learning_rate": 2.8963693383052754e-05, + "loss": 0.105, "step": 12070 }, { "epoch": 0.56, - "learning_rate": 1.896160517556608e-05, - "loss": 0.1159, + "learning_rate": 2.8963225313216733e-05, + "loss": 0.1662, "step": 12075 }, { "epoch": 0.56, - "learning_rate": 1.896113637429094e-05, - "loss": 0.2763, + "learning_rate": 2.8962757243380713e-05, + "loss": 0.238, "step": 12080 }, { "epoch": 0.56, - "learning_rate": 1.89606675730158e-05, - "loss": 0.2358, + "learning_rate": 2.8962289173544696e-05, + "loss": 0.2373, "step": 12085 }, { "epoch": 0.56, - "learning_rate": 1.8960198771740663e-05, - "loss": 0.3385, + "learning_rate": 2.8961821103708673e-05, + "loss": 0.3565, "step": 12090 }, { "epoch": 0.56, - "learning_rate": 1.8959729970465523e-05, - "loss": 0.3224, + "learning_rate": 2.8961353033872653e-05, + "loss": 0.2394, "step": 12095 }, { "epoch": 0.56, - "learning_rate": 1.8959261169190382e-05, - "loss": 0.6675, + "learning_rate": 2.8960884964036633e-05, + "loss": 0.5335, "step": 12100 }, { "epoch": 0.56, - "learning_rate": 1.8958792367915242e-05, - "loss": 0.2995, + "learning_rate": 2.8960416894200616e-05, + "loss": 0.2562, "step": 12105 }, { "epoch": 0.57, - "learning_rate": 1.8958323566640102e-05, - "loss": 0.0414, + "learning_rate": 2.8959948824364596e-05, + "loss": 0.111, "step": 12110 }, { "epoch": 0.57, - "learning_rate": 1.8957854765364962e-05, - "loss": 0.112, + "learning_rate": 2.8959480754528575e-05, + "loss": 0.1321, "step": 12115 }, { "epoch": 0.57, - "learning_rate": 1.8957385964089822e-05, - "loss": 0.1313, + "learning_rate": 2.895901268469256e-05, + "loss": 0.1541, "step": 12120 }, { "epoch": 0.57, - "learning_rate": 1.8956917162814686e-05, - "loss": 0.1316, + "learning_rate": 2.895854461485654e-05, + "loss": 0.1097, "step": 12125 }, { "epoch": 0.57, - "learning_rate": 1.8956448361539545e-05, - "loss": 0.1418, + "learning_rate": 2.8958076545020518e-05, + "loss": 0.3175, "step": 12130 }, { "epoch": 0.57, - "learning_rate": 1.8955979560264405e-05, - "loss": 0.2418, + "learning_rate": 2.8957608475184498e-05, + "loss": 0.2379, "step": 12135 }, { "epoch": 0.57, - "learning_rate": 1.8955510758989265e-05, - "loss": 0.3003, + "learning_rate": 2.895714040534848e-05, + "loss": 0.317, "step": 12140 }, { "epoch": 0.57, - "learning_rate": 1.8955041957714125e-05, - "loss": 0.4785, + "learning_rate": 2.895667233551246e-05, + "loss": 0.2476, "step": 12145 }, { "epoch": 0.57, - "learning_rate": 1.8954573156438985e-05, - "loss": 0.5624, + "learning_rate": 2.8956204265676438e-05, + "loss": 0.6227, "step": 12150 }, { "epoch": 0.57, - "learning_rate": 1.895410435516385e-05, - "loss": 0.2527, + "learning_rate": 2.8955736195840417e-05, + "loss": 0.2231, "step": 12155 }, { "epoch": 0.57, - "learning_rate": 1.895363555388871e-05, - "loss": 0.0963, + "learning_rate": 2.89552681260044e-05, + "loss": 0.0913, "step": 12160 }, { "epoch": 0.57, - "learning_rate": 1.895316675261357e-05, - "loss": 0.0813, + "learning_rate": 2.895480005616838e-05, + "loss": 0.1101, "step": 12165 }, { "epoch": 0.57, - "learning_rate": 1.8952697951338432e-05, - "loss": 0.1403, + "learning_rate": 2.895433198633236e-05, + "loss": 0.1475, "step": 12170 }, { "epoch": 0.57, - "learning_rate": 1.895222915006329e-05, - "loss": 0.1686, + "learning_rate": 2.895386391649634e-05, + "loss": 0.1494, "step": 12175 }, { "epoch": 0.57, - "learning_rate": 1.895176034878815e-05, - "loss": 0.1835, + "learning_rate": 2.8953395846660323e-05, + "loss": 0.1823, "step": 12180 }, { "epoch": 0.57, - "learning_rate": 1.895129154751301e-05, - "loss": 0.1565, + "learning_rate": 2.8952927776824303e-05, + "loss": 0.2334, "step": 12185 }, { "epoch": 0.57, - "learning_rate": 1.895082274623787e-05, - "loss": 0.383, + "learning_rate": 2.8952459706988283e-05, + "loss": 0.2612, "step": 12190 }, { "epoch": 0.57, - "learning_rate": 1.895035394496273e-05, - "loss": 0.375, + "learning_rate": 2.8951991637152266e-05, + "loss": 0.3328, "step": 12195 }, { "epoch": 0.57, - "learning_rate": 1.894988514368759e-05, - "loss": 0.8233, + "learning_rate": 2.8951523567316246e-05, + "loss": 0.5815, "step": 12200 }, { "epoch": 0.57, - "learning_rate": 1.894941634241245e-05, - "loss": 0.2411, + "learning_rate": 2.8951055497480226e-05, + "loss": 0.2459, "step": 12205 }, { "epoch": 0.57, - "learning_rate": 1.894894754113731e-05, - "loss": 0.0966, + "learning_rate": 2.8950587427644205e-05, + "loss": 0.0765, "step": 12210 }, { "epoch": 0.57, - "learning_rate": 1.8948478739862174e-05, - "loss": 0.1386, + "learning_rate": 2.8950119357808185e-05, + "loss": 0.0804, "step": 12215 }, { "epoch": 0.57, - "learning_rate": 1.8948009938587034e-05, - "loss": 0.2216, + "learning_rate": 2.8949651287972165e-05, + "loss": 0.1282, "step": 12220 }, { "epoch": 0.57, - "learning_rate": 1.8947541137311894e-05, - "loss": 0.181, + "learning_rate": 2.8949183218136145e-05, + "loss": 0.1609, "step": 12225 }, { "epoch": 0.57, - "learning_rate": 1.8947072336036754e-05, - "loss": 0.1556, + "learning_rate": 2.8948715148300125e-05, + "loss": 0.11, "step": 12230 }, { "epoch": 0.57, - "learning_rate": 1.8946603534761618e-05, - "loss": 0.2458, + "learning_rate": 2.8948247078464108e-05, + "loss": 0.2662, "step": 12235 }, { "epoch": 0.57, - "learning_rate": 1.8946134733486478e-05, - "loss": 0.2611, + "learning_rate": 2.8947779008628088e-05, + "loss": 0.3361, "step": 12240 }, { "epoch": 0.57, - "learning_rate": 1.8945665932211337e-05, - "loss": 0.305, + "learning_rate": 2.8947310938792068e-05, + "loss": 0.3896, "step": 12245 }, { "epoch": 0.57, - "learning_rate": 1.8945197130936197e-05, - "loss": 0.5079, + "learning_rate": 2.894684286895605e-05, + "loss": 0.5672, "step": 12250 }, { "epoch": 0.57, - "learning_rate": 1.8944728329661057e-05, - "loss": 0.2764, + "learning_rate": 2.894637479912003e-05, + "loss": 0.2442, "step": 12255 }, { "epoch": 0.57, - "learning_rate": 1.894425952838592e-05, - "loss": 0.0815, + "learning_rate": 2.894590672928401e-05, + "loss": 0.0852, "step": 12260 }, { "epoch": 0.57, - "learning_rate": 1.894379072711078e-05, - "loss": 0.0799, + "learning_rate": 2.894543865944799e-05, + "loss": 0.086, "step": 12265 }, { "epoch": 0.57, - "learning_rate": 1.894332192583564e-05, - "loss": 0.1726, + "learning_rate": 2.8944970589611973e-05, + "loss": 0.1488, "step": 12270 }, { "epoch": 0.57, - "learning_rate": 1.89428531245605e-05, - "loss": 0.1347, + "learning_rate": 2.8944502519775953e-05, + "loss": 0.1247, "step": 12275 }, { "epoch": 0.57, - "learning_rate": 1.894238432328536e-05, - "loss": 0.1555, + "learning_rate": 2.894403444993993e-05, + "loss": 0.1959, "step": 12280 }, { "epoch": 0.57, - "learning_rate": 1.894191552201022e-05, - "loss": 0.2513, + "learning_rate": 2.894356638010391e-05, + "loss": 0.1784, "step": 12285 }, { "epoch": 0.57, - "learning_rate": 1.894144672073508e-05, - "loss": 0.3376, + "learning_rate": 2.8943098310267893e-05, + "loss": 0.2463, "step": 12290 }, { "epoch": 0.57, - "learning_rate": 1.8940977919459944e-05, - "loss": 0.3014, + "learning_rate": 2.8942630240431873e-05, + "loss": 0.451, "step": 12295 }, { "epoch": 0.57, - "learning_rate": 1.8940509118184804e-05, - "loss": 0.5171, + "learning_rate": 2.8942162170595852e-05, + "loss": 0.4983, "step": 12300 }, { "epoch": 0.57, - "learning_rate": 1.8940040316909663e-05, - "loss": 0.2599, + "learning_rate": 2.8941694100759836e-05, + "loss": 0.2711, "step": 12305 }, { "epoch": 0.57, - "learning_rate": 1.8939571515634523e-05, - "loss": 0.0997, + "learning_rate": 2.8941226030923815e-05, + "loss": 0.1113, "step": 12310 }, { "epoch": 0.57, - "learning_rate": 1.8939102714359387e-05, - "loss": 0.1469, + "learning_rate": 2.8940757961087795e-05, + "loss": 0.1236, "step": 12315 }, { "epoch": 0.57, - "learning_rate": 1.8938633913084247e-05, - "loss": 0.1405, + "learning_rate": 2.8940289891251775e-05, + "loss": 0.0896, "step": 12320 }, { "epoch": 0.58, - "learning_rate": 1.8938165111809107e-05, - "loss": 0.2392, + "learning_rate": 2.8939821821415758e-05, + "loss": 0.1486, "step": 12325 }, { "epoch": 0.58, - "learning_rate": 1.8937696310533967e-05, - "loss": 0.1594, + "learning_rate": 2.8939353751579738e-05, + "loss": 0.173, "step": 12330 }, { "epoch": 0.58, - "learning_rate": 1.8937227509258826e-05, - "loss": 0.181, + "learning_rate": 2.8938885681743718e-05, + "loss": 0.1792, "step": 12335 }, { "epoch": 0.58, - "learning_rate": 1.8936758707983686e-05, - "loss": 0.2841, + "learning_rate": 2.8938417611907694e-05, + "loss": 0.2277, "step": 12340 }, { "epoch": 0.58, - "learning_rate": 1.8936289906708546e-05, - "loss": 0.3794, + "learning_rate": 2.8937949542071678e-05, + "loss": 0.335, "step": 12345 }, { "epoch": 0.58, - "learning_rate": 1.8935821105433406e-05, - "loss": 0.4712, + "learning_rate": 2.8937481472235657e-05, + "loss": 0.4859, "step": 12350 }, { "epoch": 0.58, - "learning_rate": 1.893535230415827e-05, - "loss": 0.2406, + "learning_rate": 2.8937013402399637e-05, + "loss": 0.2462, "step": 12355 }, { "epoch": 0.58, - "learning_rate": 1.893488350288313e-05, - "loss": 0.1092, + "learning_rate": 2.8936545332563617e-05, + "loss": 0.1036, "step": 12360 }, { "epoch": 0.58, - "learning_rate": 1.893441470160799e-05, - "loss": 0.0954, + "learning_rate": 2.89360772627276e-05, + "loss": 0.0784, "step": 12365 }, { "epoch": 0.58, - "learning_rate": 1.893394590033285e-05, - "loss": 0.1485, + "learning_rate": 2.893560919289158e-05, + "loss": 0.2005, "step": 12370 }, { "epoch": 0.58, - "learning_rate": 1.8933477099057713e-05, - "loss": 0.2261, + "learning_rate": 2.893514112305556e-05, + "loss": 0.1206, "step": 12375 }, { "epoch": 0.58, - "learning_rate": 1.8933008297782573e-05, - "loss": 0.2628, + "learning_rate": 2.8934673053219543e-05, + "loss": 0.1557, "step": 12380 }, { "epoch": 0.58, - "learning_rate": 1.8932539496507433e-05, - "loss": 0.2334, + "learning_rate": 2.8934204983383523e-05, + "loss": 0.2565, "step": 12385 }, { "epoch": 0.58, - "learning_rate": 1.8932070695232293e-05, - "loss": 0.2887, + "learning_rate": 2.8933736913547503e-05, + "loss": 0.2539, "step": 12390 }, { "epoch": 0.58, - "learning_rate": 1.8931601893957152e-05, - "loss": 0.2995, + "learning_rate": 2.8933268843711482e-05, + "loss": 0.4199, "step": 12395 }, { "epoch": 0.58, - "learning_rate": 1.8931133092682016e-05, - "loss": 0.6158, + "learning_rate": 2.8932800773875466e-05, + "loss": 0.5691, "step": 12400 }, { "epoch": 0.58, - "learning_rate": 1.8930664291406876e-05, - "loss": 0.2373, + "learning_rate": 2.8932332704039442e-05, + "loss": 0.1846, "step": 12405 }, { "epoch": 0.58, - "learning_rate": 1.8930195490131736e-05, - "loss": 0.078, + "learning_rate": 2.8931864634203422e-05, + "loss": 0.0847, "step": 12410 }, { "epoch": 0.58, - "learning_rate": 1.8929726688856596e-05, - "loss": 0.119, + "learning_rate": 2.8931396564367402e-05, + "loss": 0.0395, "step": 12415 }, { "epoch": 0.58, - "learning_rate": 1.8929257887581455e-05, - "loss": 0.0879, + "learning_rate": 2.8930928494531385e-05, + "loss": 0.1356, "step": 12420 }, { "epoch": 0.58, - "learning_rate": 1.8928789086306315e-05, - "loss": 0.1112, + "learning_rate": 2.8930460424695365e-05, + "loss": 0.1799, "step": 12425 }, { "epoch": 0.58, - "learning_rate": 1.8928320285031175e-05, - "loss": 0.1409, + "learning_rate": 2.8929992354859345e-05, + "loss": 0.2015, "step": 12430 }, { "epoch": 0.58, - "learning_rate": 1.8927851483756035e-05, - "loss": 0.2855, + "learning_rate": 2.8929524285023328e-05, + "loss": 0.1996, "step": 12435 }, { "epoch": 0.58, - "learning_rate": 1.89273826824809e-05, - "loss": 0.1745, + "learning_rate": 2.8929056215187308e-05, + "loss": 0.2419, "step": 12440 }, { "epoch": 0.58, - "learning_rate": 1.892691388120576e-05, - "loss": 0.344, + "learning_rate": 2.8928588145351287e-05, + "loss": 0.3582, "step": 12445 }, { "epoch": 0.58, - "learning_rate": 1.892644507993062e-05, - "loss": 0.4228, + "learning_rate": 2.8928120075515267e-05, + "loss": 0.4335, "step": 12450 }, { "epoch": 0.58, - "learning_rate": 1.8925976278655482e-05, - "loss": 0.2344, + "learning_rate": 2.892765200567925e-05, + "loss": 0.2001, "step": 12455 }, { "epoch": 0.58, - "learning_rate": 1.8925507477380342e-05, - "loss": 0.0847, + "learning_rate": 2.892718393584323e-05, + "loss": 0.0701, "step": 12460 }, { "epoch": 0.58, - "learning_rate": 1.89250386761052e-05, - "loss": 0.1617, + "learning_rate": 2.892671586600721e-05, + "loss": 0.0676, "step": 12465 }, { "epoch": 0.58, - "learning_rate": 1.892456987483006e-05, - "loss": 0.1262, + "learning_rate": 2.8926247796171186e-05, + "loss": 0.2049, "step": 12470 }, { "epoch": 0.58, - "learning_rate": 1.892410107355492e-05, - "loss": 0.1367, + "learning_rate": 2.892577972633517e-05, + "loss": 0.1933, "step": 12475 }, { "epoch": 0.58, - "learning_rate": 1.892363227227978e-05, - "loss": 0.2337, + "learning_rate": 2.892531165649915e-05, + "loss": 0.2288, "step": 12480 }, { "epoch": 0.58, - "learning_rate": 1.892316347100464e-05, - "loss": 0.2775, + "learning_rate": 2.892484358666313e-05, + "loss": 0.2598, "step": 12485 }, { "epoch": 0.58, - "learning_rate": 1.89226946697295e-05, - "loss": 0.3666, + "learning_rate": 2.8924375516827113e-05, + "loss": 0.2414, "step": 12490 }, { "epoch": 0.58, - "learning_rate": 1.8922225868454365e-05, - "loss": 0.3938, + "learning_rate": 2.8923907446991092e-05, + "loss": 0.2961, "step": 12495 }, { "epoch": 0.58, - "learning_rate": 1.8921757067179225e-05, - "loss": 0.6377, + "learning_rate": 2.8923439377155072e-05, + "loss": 0.4972, "step": 12500 }, { "epoch": 0.58, - "learning_rate": 1.8921288265904085e-05, - "loss": 0.2246, + "learning_rate": 2.8922971307319052e-05, + "loss": 0.2474, "step": 12505 }, { "epoch": 0.58, - "learning_rate": 1.8920819464628944e-05, - "loss": 0.074, + "learning_rate": 2.8922503237483035e-05, + "loss": 0.1236, "step": 12510 }, { "epoch": 0.58, - "learning_rate": 1.8920350663353804e-05, - "loss": 0.1685, + "learning_rate": 2.8922035167647015e-05, + "loss": 0.1226, "step": 12515 }, { "epoch": 0.58, - "learning_rate": 1.8919881862078668e-05, - "loss": 0.1246, + "learning_rate": 2.8921567097810995e-05, + "loss": 0.1345, "step": 12520 }, { "epoch": 0.58, - "learning_rate": 1.8919413060803528e-05, - "loss": 0.1052, + "learning_rate": 2.8921099027974975e-05, + "loss": 0.1681, "step": 12525 }, { "epoch": 0.58, - "learning_rate": 1.8918944259528388e-05, - "loss": 0.2717, + "learning_rate": 2.8920630958138954e-05, + "loss": 0.2256, "step": 12530 }, { "epoch": 0.58, - "learning_rate": 1.8918475458253248e-05, - "loss": 0.1854, + "learning_rate": 2.8920162888302934e-05, + "loss": 0.2, "step": 12535 }, { "epoch": 0.59, - "learning_rate": 1.891800665697811e-05, - "loss": 0.3528, + "learning_rate": 2.8919694818466914e-05, + "loss": 0.2551, "step": 12540 }, { "epoch": 0.59, - "learning_rate": 1.891753785570297e-05, - "loss": 0.1803, + "learning_rate": 2.8919226748630897e-05, + "loss": 0.2243, "step": 12545 }, { "epoch": 0.59, - "learning_rate": 1.891706905442783e-05, - "loss": 0.5755, + "learning_rate": 2.8918758678794877e-05, + "loss": 0.4802, "step": 12550 }, { "epoch": 0.59, - "learning_rate": 1.891660025315269e-05, - "loss": 0.2829, + "learning_rate": 2.8918290608958857e-05, + "loss": 0.1943, "step": 12555 }, { "epoch": 0.59, - "learning_rate": 1.891613145187755e-05, - "loss": 0.0912, + "learning_rate": 2.8917822539122837e-05, + "loss": 0.0714, "step": 12560 }, { "epoch": 0.59, - "learning_rate": 1.891566265060241e-05, - "loss": 0.1055, + "learning_rate": 2.891735446928682e-05, + "loss": 0.2009, "step": 12565 }, { "epoch": 0.59, - "learning_rate": 1.891519384932727e-05, - "loss": 0.1751, + "learning_rate": 2.89168863994508e-05, + "loss": 0.1655, "step": 12570 }, { "epoch": 0.59, - "learning_rate": 1.891472504805213e-05, - "loss": 0.1399, + "learning_rate": 2.891641832961478e-05, + "loss": 0.1689, "step": 12575 }, { "epoch": 0.59, - "learning_rate": 1.891425624677699e-05, - "loss": 0.1935, + "learning_rate": 2.891595025977876e-05, + "loss": 0.1921, "step": 12580 }, { "epoch": 0.59, - "learning_rate": 1.8913787445501854e-05, - "loss": 0.2453, + "learning_rate": 2.8915482189942743e-05, + "loss": 0.2718, "step": 12585 }, { "epoch": 0.59, - "learning_rate": 1.8913318644226714e-05, - "loss": 0.1864, + "learning_rate": 2.8915014120106722e-05, + "loss": 0.2729, "step": 12590 }, { "epoch": 0.59, - "learning_rate": 1.8912849842951574e-05, - "loss": 0.3226, + "learning_rate": 2.89145460502707e-05, + "loss": 0.3708, "step": 12595 }, { "epoch": 0.59, - "learning_rate": 1.8912381041676437e-05, - "loss": 0.3684, + "learning_rate": 2.891407798043468e-05, + "loss": 0.5397, "step": 12600 }, { "epoch": 0.59, - "learning_rate": 1.8911912240401297e-05, - "loss": 0.2648, + "learning_rate": 2.8913609910598662e-05, + "loss": 0.219, "step": 12605 }, { "epoch": 0.59, - "learning_rate": 1.8911443439126157e-05, - "loss": 0.061, + "learning_rate": 2.8913141840762642e-05, + "loss": 0.0587, "step": 12610 }, { "epoch": 0.59, - "learning_rate": 1.8910974637851017e-05, - "loss": 0.1064, + "learning_rate": 2.891267377092662e-05, + "loss": 0.0868, "step": 12615 }, { "epoch": 0.59, - "learning_rate": 1.8910505836575877e-05, - "loss": 0.1771, + "learning_rate": 2.8912205701090605e-05, + "loss": 0.1228, "step": 12620 }, { "epoch": 0.59, - "learning_rate": 1.8910037035300736e-05, - "loss": 0.1259, + "learning_rate": 2.8911737631254585e-05, + "loss": 0.2103, "step": 12625 }, { "epoch": 0.59, - "learning_rate": 1.8909568234025596e-05, - "loss": 0.1778, + "learning_rate": 2.8911269561418564e-05, + "loss": 0.1224, "step": 12630 }, { "epoch": 0.59, - "learning_rate": 1.890909943275046e-05, - "loss": 0.1552, + "learning_rate": 2.8910801491582544e-05, + "loss": 0.1422, "step": 12635 }, { "epoch": 0.59, - "learning_rate": 1.890863063147532e-05, - "loss": 0.2755, + "learning_rate": 2.8910333421746527e-05, + "loss": 0.1755, "step": 12640 }, { "epoch": 0.59, - "learning_rate": 1.890816183020018e-05, - "loss": 0.3597, + "learning_rate": 2.8909865351910507e-05, + "loss": 0.3288, "step": 12645 }, { "epoch": 0.59, - "learning_rate": 1.890769302892504e-05, - "loss": 0.6109, + "learning_rate": 2.8909397282074487e-05, + "loss": 0.4658, "step": 12650 }, { "epoch": 0.59, - "learning_rate": 1.89072242276499e-05, - "loss": 0.2731, + "learning_rate": 2.8908929212238467e-05, + "loss": 0.2296, "step": 12655 }, { "epoch": 0.59, - "learning_rate": 1.890675542637476e-05, - "loss": 0.1214, + "learning_rate": 2.8908461142402447e-05, + "loss": 0.1478, "step": 12660 }, { "epoch": 0.59, - "learning_rate": 1.8906286625099623e-05, - "loss": 0.1173, + "learning_rate": 2.8907993072566426e-05, + "loss": 0.122, "step": 12665 }, { "epoch": 0.59, - "learning_rate": 1.8905817823824483e-05, - "loss": 0.1032, + "learning_rate": 2.8907525002730406e-05, + "loss": 0.1071, "step": 12670 }, { "epoch": 0.59, - "learning_rate": 1.8905349022549343e-05, - "loss": 0.2092, + "learning_rate": 2.890705693289439e-05, + "loss": 0.1918, "step": 12675 }, { "epoch": 0.59, - "learning_rate": 1.8904880221274206e-05, - "loss": 0.2052, + "learning_rate": 2.890658886305837e-05, + "loss": 0.1592, "step": 12680 }, { "epoch": 0.59, - "learning_rate": 1.8904411419999066e-05, - "loss": 0.3043, + "learning_rate": 2.890612079322235e-05, + "loss": 0.2755, "step": 12685 }, { "epoch": 0.59, - "learning_rate": 1.8903942618723926e-05, - "loss": 0.3043, + "learning_rate": 2.890565272338633e-05, + "loss": 0.3408, "step": 12690 }, { "epoch": 0.59, - "learning_rate": 1.8903473817448786e-05, - "loss": 0.3598, + "learning_rate": 2.8905184653550312e-05, + "loss": 0.2939, "step": 12695 }, { "epoch": 0.59, - "learning_rate": 1.8903005016173646e-05, - "loss": 0.5739, + "learning_rate": 2.8904716583714292e-05, + "loss": 0.52, "step": 12700 }, { "epoch": 0.59, - "learning_rate": 1.8902536214898506e-05, - "loss": 0.1942, + "learning_rate": 2.8904248513878272e-05, + "loss": 0.2381, "step": 12705 }, { "epoch": 0.59, - "learning_rate": 1.8902067413623366e-05, - "loss": 0.0969, + "learning_rate": 2.890378044404225e-05, + "loss": 0.1097, "step": 12710 }, { "epoch": 0.59, - "learning_rate": 1.8901598612348225e-05, - "loss": 0.121, + "learning_rate": 2.8903312374206235e-05, + "loss": 0.1028, "step": 12715 }, { "epoch": 0.59, - "learning_rate": 1.8901129811073085e-05, - "loss": 0.1946, + "learning_rate": 2.890284430437021e-05, + "loss": 0.0656, "step": 12720 }, { "epoch": 0.59, - "learning_rate": 1.890066100979795e-05, - "loss": 0.1054, + "learning_rate": 2.890237623453419e-05, + "loss": 0.1366, "step": 12725 }, { "epoch": 0.59, - "learning_rate": 1.890019220852281e-05, - "loss": 0.2131, + "learning_rate": 2.8901908164698174e-05, + "loss": 0.1737, "step": 12730 }, { "epoch": 0.59, - "learning_rate": 1.889972340724767e-05, - "loss": 0.3432, + "learning_rate": 2.8901440094862154e-05, + "loss": 0.2923, "step": 12735 }, { "epoch": 0.59, - "learning_rate": 1.889925460597253e-05, - "loss": 0.3533, + "learning_rate": 2.8900972025026134e-05, + "loss": 0.2551, "step": 12740 }, { "epoch": 0.59, - "learning_rate": 1.8898785804697392e-05, - "loss": 0.2782, + "learning_rate": 2.8900503955190114e-05, + "loss": 0.2689, "step": 12745 }, { "epoch": 0.59, - "learning_rate": 1.8898317003422252e-05, - "loss": 1.0246, + "learning_rate": 2.8900035885354097e-05, + "loss": 0.4539, "step": 12750 }, { "epoch": 0.6, - "learning_rate": 1.8897848202147112e-05, - "loss": 0.2875, + "learning_rate": 2.8899567815518077e-05, + "loss": 0.2615, "step": 12755 }, { "epoch": 0.6, - "learning_rate": 1.889737940087197e-05, - "loss": 0.0689, + "learning_rate": 2.8899099745682057e-05, + "loss": 0.0602, "step": 12760 }, { "epoch": 0.6, - "learning_rate": 1.889691059959683e-05, - "loss": 0.1025, + "learning_rate": 2.8898631675846036e-05, + "loss": 0.0877, "step": 12765 }, { "epoch": 0.6, - "learning_rate": 1.889644179832169e-05, - "loss": 0.1406, + "learning_rate": 2.889816360601002e-05, + "loss": 0.075, "step": 12770 }, { "epoch": 0.6, - "learning_rate": 1.8895972997046555e-05, - "loss": 0.1504, + "learning_rate": 2.8897695536174e-05, + "loss": 0.118, "step": 12775 }, { "epoch": 0.6, - "learning_rate": 1.8895504195771415e-05, - "loss": 0.2003, + "learning_rate": 2.889722746633798e-05, + "loss": 0.1976, "step": 12780 }, { "epoch": 0.6, - "learning_rate": 1.8895035394496275e-05, - "loss": 0.2294, + "learning_rate": 2.8896759396501956e-05, + "loss": 0.2382, "step": 12785 }, { "epoch": 0.6, - "learning_rate": 1.8894566593221135e-05, - "loss": 0.2264, + "learning_rate": 2.889629132666594e-05, + "loss": 0.2397, "step": 12790 }, { "epoch": 0.6, - "learning_rate": 1.8894097791945995e-05, - "loss": 0.3517, + "learning_rate": 2.889582325682992e-05, + "loss": 0.2587, "step": 12795 }, { "epoch": 0.6, - "learning_rate": 1.8893628990670854e-05, - "loss": 0.6386, + "learning_rate": 2.88953551869939e-05, + "loss": 0.4395, "step": 12800 }, { "epoch": 0.6, - "learning_rate": 1.8893160189395718e-05, - "loss": 0.2141, + "learning_rate": 2.8894887117157882e-05, + "loss": 0.2838, "step": 12805 }, { "epoch": 0.6, - "learning_rate": 1.8892691388120578e-05, - "loss": 0.1371, + "learning_rate": 2.889441904732186e-05, + "loss": 0.1163, "step": 12810 }, { "epoch": 0.6, - "learning_rate": 1.8892222586845438e-05, - "loss": 0.119, + "learning_rate": 2.889395097748584e-05, + "loss": 0.089, "step": 12815 }, { "epoch": 0.6, - "learning_rate": 1.8891753785570298e-05, - "loss": 0.1502, + "learning_rate": 2.889348290764982e-05, + "loss": 0.1262, "step": 12820 }, { "epoch": 0.6, - "learning_rate": 1.889128498429516e-05, - "loss": 0.1665, + "learning_rate": 2.8893014837813804e-05, + "loss": 0.2655, "step": 12825 }, { "epoch": 0.6, - "learning_rate": 1.889081618302002e-05, - "loss": 0.1954, + "learning_rate": 2.8892546767977784e-05, + "loss": 0.1867, "step": 12830 }, { "epoch": 0.6, - "learning_rate": 1.889034738174488e-05, - "loss": 0.2543, + "learning_rate": 2.8892078698141764e-05, + "loss": 0.2756, "step": 12835 }, { "epoch": 0.6, - "learning_rate": 1.888987858046974e-05, - "loss": 0.2318, + "learning_rate": 2.8891610628305744e-05, + "loss": 0.2579, "step": 12840 }, { "epoch": 0.6, - "learning_rate": 1.88894097791946e-05, - "loss": 0.3885, + "learning_rate": 2.8891142558469724e-05, + "loss": 0.4167, "step": 12845 }, { "epoch": 0.6, - "learning_rate": 1.888894097791946e-05, - "loss": 0.4542, + "learning_rate": 2.8890674488633703e-05, + "loss": 0.6041, "step": 12850 }, { "epoch": 0.6, - "learning_rate": 1.888847217664432e-05, - "loss": 0.2372, + "learning_rate": 2.8890206418797683e-05, + "loss": 0.2392, "step": 12855 }, { "epoch": 0.6, - "learning_rate": 1.888800337536918e-05, - "loss": 0.1154, + "learning_rate": 2.8889738348961666e-05, + "loss": 0.1081, "step": 12860 }, { "epoch": 0.6, - "learning_rate": 1.8887534574094044e-05, - "loss": 0.0905, + "learning_rate": 2.8889270279125646e-05, + "loss": 0.0972, "step": 12865 }, { "epoch": 0.6, - "learning_rate": 1.8887065772818904e-05, - "loss": 0.1059, + "learning_rate": 2.8888802209289626e-05, + "loss": 0.1922, "step": 12870 }, { "epoch": 0.6, - "learning_rate": 1.8886596971543764e-05, - "loss": 0.2216, + "learning_rate": 2.8888334139453606e-05, + "loss": 0.1852, "step": 12875 }, { "epoch": 0.6, - "learning_rate": 1.8886128170268624e-05, - "loss": 0.1879, + "learning_rate": 2.888786606961759e-05, + "loss": 0.2465, "step": 12880 }, { "epoch": 0.6, - "learning_rate": 1.8885659368993487e-05, - "loss": 0.2195, + "learning_rate": 2.888739799978157e-05, + "loss": 0.2227, "step": 12885 }, { "epoch": 0.6, - "learning_rate": 1.8885190567718347e-05, - "loss": 0.3195, + "learning_rate": 2.888692992994555e-05, + "loss": 0.3551, "step": 12890 }, { "epoch": 0.6, - "learning_rate": 1.8884721766443207e-05, - "loss": 0.2832, + "learning_rate": 2.888646186010953e-05, + "loss": 0.3309, "step": 12895 }, { "epoch": 0.6, - "learning_rate": 1.8884252965168067e-05, - "loss": 0.4605, + "learning_rate": 2.8885993790273512e-05, + "loss": 0.645, "step": 12900 }, { "epoch": 0.6, - "learning_rate": 1.8883784163892927e-05, - "loss": 0.2373, + "learning_rate": 2.888552572043749e-05, + "loss": 0.2844, "step": 12905 }, { "epoch": 0.6, - "learning_rate": 1.888331536261779e-05, - "loss": 0.1402, + "learning_rate": 2.8885057650601468e-05, + "loss": 0.1013, "step": 12910 }, { "epoch": 0.6, - "learning_rate": 1.888284656134265e-05, - "loss": 0.0874, + "learning_rate": 2.888458958076545e-05, + "loss": 0.1039, "step": 12915 }, { "epoch": 0.6, - "learning_rate": 1.888237776006751e-05, - "loss": 0.1415, + "learning_rate": 2.888412151092943e-05, + "loss": 0.1857, "step": 12920 }, { "epoch": 0.6, - "learning_rate": 1.888190895879237e-05, - "loss": 0.2131, + "learning_rate": 2.888365344109341e-05, + "loss": 0.189, "step": 12925 }, { "epoch": 0.6, - "learning_rate": 1.888144015751723e-05, - "loss": 0.1925, + "learning_rate": 2.888318537125739e-05, + "loss": 0.2779, "step": 12930 }, { "epoch": 0.6, - "learning_rate": 1.888097135624209e-05, - "loss": 0.1838, + "learning_rate": 2.8882717301421374e-05, + "loss": 0.2452, "step": 12935 }, { "epoch": 0.6, - "learning_rate": 1.888050255496695e-05, - "loss": 0.2667, + "learning_rate": 2.8882249231585354e-05, + "loss": 0.2025, "step": 12940 }, { "epoch": 0.6, - "learning_rate": 1.888003375369181e-05, - "loss": 0.4606, + "learning_rate": 2.8881781161749334e-05, + "loss": 0.3729, "step": 12945 }, { "epoch": 0.6, - "learning_rate": 1.8879564952416673e-05, - "loss": 0.593, + "learning_rate": 2.8881313091913313e-05, + "loss": 0.5273, "step": 12950 }, { "epoch": 0.6, - "learning_rate": 1.8879096151141533e-05, - "loss": 0.2165, + "learning_rate": 2.8880845022077297e-05, + "loss": 0.2253, "step": 12955 }, { "epoch": 0.6, - "learning_rate": 1.8878627349866393e-05, - "loss": 0.1231, + "learning_rate": 2.8880376952241276e-05, + "loss": 0.0965, "step": 12960 }, { "epoch": 0.6, - "learning_rate": 1.8878158548591256e-05, - "loss": 0.1449, + "learning_rate": 2.8879908882405256e-05, + "loss": 0.0847, "step": 12965 }, { "epoch": 0.61, - "learning_rate": 1.8877689747316116e-05, - "loss": 0.1908, + "learning_rate": 2.8879440812569236e-05, + "loss": 0.1271, "step": 12970 }, { "epoch": 0.61, - "learning_rate": 1.8877220946040976e-05, - "loss": 0.1959, + "learning_rate": 2.8878972742733216e-05, + "loss": 0.1779, "step": 12975 }, { "epoch": 0.61, - "learning_rate": 1.8876752144765836e-05, - "loss": 0.264, + "learning_rate": 2.8878504672897196e-05, + "loss": 0.1899, "step": 12980 }, { "epoch": 0.61, - "learning_rate": 1.8876283343490696e-05, - "loss": 0.2641, + "learning_rate": 2.8878036603061175e-05, + "loss": 0.1681, "step": 12985 }, { "epoch": 0.61, - "learning_rate": 1.8875814542215556e-05, - "loss": 0.2006, + "learning_rate": 2.887756853322516e-05, + "loss": 0.2269, "step": 12990 }, { "epoch": 0.61, - "learning_rate": 1.8875345740940416e-05, - "loss": 0.3727, + "learning_rate": 2.887710046338914e-05, + "loss": 0.2698, "step": 12995 }, { "epoch": 0.61, - "learning_rate": 1.8874876939665276e-05, - "loss": 0.6469, + "learning_rate": 2.887663239355312e-05, + "loss": 0.5955, "step": 13000 }, { "epoch": 0.61, - "learning_rate": 1.887440813839014e-05, - "loss": 0.1506, + "learning_rate": 2.8876164323717098e-05, + "loss": 0.2408, "step": 13005 }, { "epoch": 0.61, - "learning_rate": 1.8873939337115e-05, - "loss": 0.1014, + "learning_rate": 2.887569625388108e-05, + "loss": 0.0575, "step": 13010 }, { "epoch": 0.61, - "learning_rate": 1.887347053583986e-05, - "loss": 0.1381, + "learning_rate": 2.887522818404506e-05, + "loss": 0.0674, "step": 13015 }, { "epoch": 0.61, - "learning_rate": 1.887300173456472e-05, - "loss": 0.1419, + "learning_rate": 2.887476011420904e-05, + "loss": 0.2003, "step": 13020 }, { "epoch": 0.61, - "learning_rate": 1.887253293328958e-05, - "loss": 0.1822, + "learning_rate": 2.887429204437302e-05, + "loss": 0.2053, "step": 13025 }, { "epoch": 0.61, - "learning_rate": 1.8872064132014442e-05, - "loss": 0.2727, + "learning_rate": 2.8873823974537004e-05, + "loss": 0.1924, "step": 13030 }, { "epoch": 0.61, - "learning_rate": 1.8871595330739302e-05, - "loss": 0.2228, + "learning_rate": 2.887335590470098e-05, + "loss": 0.2305, "step": 13035 }, { "epoch": 0.61, - "learning_rate": 1.8871126529464162e-05, - "loss": 0.2866, + "learning_rate": 2.887288783486496e-05, + "loss": 0.2027, "step": 13040 }, { "epoch": 0.61, - "learning_rate": 1.8870657728189022e-05, - "loss": 0.2962, + "learning_rate": 2.8872419765028943e-05, + "loss": 0.3431, "step": 13045 }, { "epoch": 0.61, - "learning_rate": 1.8870188926913885e-05, - "loss": 0.5063, + "learning_rate": 2.8871951695192923e-05, + "loss": 0.486, "step": 13050 }, { "epoch": 0.61, - "learning_rate": 1.8869720125638745e-05, - "loss": 0.2182, + "learning_rate": 2.8871483625356903e-05, + "loss": 0.1978, "step": 13055 }, { "epoch": 0.61, - "learning_rate": 1.8869251324363605e-05, - "loss": 0.168, + "learning_rate": 2.8871015555520883e-05, + "loss": 0.1411, "step": 13060 }, { "epoch": 0.61, - "learning_rate": 1.8868782523088465e-05, - "loss": 0.0722, + "learning_rate": 2.8870547485684866e-05, + "loss": 0.1373, "step": 13065 }, { "epoch": 0.61, - "learning_rate": 1.8868313721813325e-05, - "loss": 0.1356, + "learning_rate": 2.8870079415848846e-05, + "loss": 0.141, "step": 13070 }, { "epoch": 0.61, - "learning_rate": 1.8867844920538185e-05, - "loss": 0.2862, + "learning_rate": 2.8869611346012826e-05, + "loss": 0.1704, "step": 13075 }, { "epoch": 0.61, - "learning_rate": 1.8867376119263045e-05, - "loss": 0.1236, + "learning_rate": 2.8869143276176806e-05, + "loss": 0.2197, "step": 13080 }, { "epoch": 0.61, - "learning_rate": 1.8866907317987905e-05, - "loss": 0.2837, + "learning_rate": 2.886867520634079e-05, + "loss": 0.2301, "step": 13085 }, { "epoch": 0.61, - "learning_rate": 1.8866438516712765e-05, - "loss": 0.329, + "learning_rate": 2.886820713650477e-05, + "loss": 0.2399, "step": 13090 }, { "epoch": 0.61, - "learning_rate": 1.8865969715437628e-05, - "loss": 0.3582, + "learning_rate": 2.886773906666875e-05, + "loss": 0.3613, "step": 13095 }, { "epoch": 0.61, - "learning_rate": 1.8865500914162488e-05, - "loss": 0.5645, + "learning_rate": 2.8867270996832728e-05, + "loss": 0.5423, "step": 13100 }, { "epoch": 0.61, - "learning_rate": 1.8865032112887348e-05, - "loss": 0.2567, + "learning_rate": 2.8866802926996708e-05, + "loss": 0.2183, "step": 13105 }, { "epoch": 0.61, - "learning_rate": 1.886456331161221e-05, - "loss": 0.1308, + "learning_rate": 2.8866334857160688e-05, + "loss": 0.0634, "step": 13110 }, { "epoch": 0.61, - "learning_rate": 1.886409451033707e-05, - "loss": 0.1187, + "learning_rate": 2.8865866787324668e-05, + "loss": 0.0862, "step": 13115 }, { "epoch": 0.61, - "learning_rate": 1.886362570906193e-05, - "loss": 0.1349, + "learning_rate": 2.886539871748865e-05, + "loss": 0.1013, "step": 13120 }, { "epoch": 0.61, - "learning_rate": 1.886315690778679e-05, - "loss": 0.1611, + "learning_rate": 2.886493064765263e-05, + "loss": 0.1501, "step": 13125 }, { "epoch": 0.61, - "learning_rate": 1.886268810651165e-05, - "loss": 0.1943, + "learning_rate": 2.886446257781661e-05, + "loss": 0.2243, "step": 13130 }, { "epoch": 0.61, - "learning_rate": 1.886221930523651e-05, - "loss": 0.1838, + "learning_rate": 2.886399450798059e-05, + "loss": 0.2639, "step": 13135 }, { "epoch": 0.61, - "learning_rate": 1.886175050396137e-05, - "loss": 0.2555, + "learning_rate": 2.8863526438144574e-05, + "loss": 0.2846, "step": 13140 }, { "epoch": 0.61, - "learning_rate": 1.8861281702686234e-05, - "loss": 0.3234, + "learning_rate": 2.8863058368308553e-05, + "loss": 0.4036, "step": 13145 }, { "epoch": 0.61, - "learning_rate": 1.8860812901411094e-05, - "loss": 0.5682, + "learning_rate": 2.8862590298472533e-05, + "loss": 0.5067, "step": 13150 }, { "epoch": 0.61, - "learning_rate": 1.8860344100135954e-05, - "loss": 0.2446, + "learning_rate": 2.8862122228636516e-05, + "loss": 0.3654, "step": 13155 }, { "epoch": 0.61, - "learning_rate": 1.8859875298860814e-05, - "loss": 0.1032, + "learning_rate": 2.8861654158800496e-05, + "loss": 0.0709, "step": 13160 }, { "epoch": 0.61, - "learning_rate": 1.8859406497585674e-05, - "loss": 0.0889, + "learning_rate": 2.8861186088964473e-05, + "loss": 0.1663, "step": 13165 }, { "epoch": 0.61, - "learning_rate": 1.8858937696310534e-05, - "loss": 0.2059, + "learning_rate": 2.8860718019128452e-05, + "loss": 0.1669, "step": 13170 }, { "epoch": 0.61, - "learning_rate": 1.8858468895035397e-05, - "loss": 0.1573, + "learning_rate": 2.8860249949292436e-05, + "loss": 0.1563, "step": 13175 }, { "epoch": 0.61, - "learning_rate": 1.8858000093760257e-05, - "loss": 0.1783, + "learning_rate": 2.8859781879456415e-05, + "loss": 0.1988, "step": 13180 }, { "epoch": 0.62, - "learning_rate": 1.8857531292485117e-05, - "loss": 0.2177, + "learning_rate": 2.8859313809620395e-05, + "loss": 0.2369, "step": 13185 }, { "epoch": 0.62, - "learning_rate": 1.885706249120998e-05, - "loss": 0.24, + "learning_rate": 2.8858845739784375e-05, + "loss": 0.2254, "step": 13190 }, { "epoch": 0.62, - "learning_rate": 1.885659368993484e-05, - "loss": 0.362, + "learning_rate": 2.885837766994836e-05, + "loss": 0.288, "step": 13195 }, { "epoch": 0.62, - "learning_rate": 1.88561248886597e-05, - "loss": 0.5683, + "learning_rate": 2.8857909600112338e-05, + "loss": 0.5139, "step": 13200 }, { "epoch": 0.62, - "learning_rate": 1.885565608738456e-05, - "loss": 0.254, + "learning_rate": 2.8857441530276318e-05, + "loss": 0.2087, "step": 13205 }, { "epoch": 0.62, - "learning_rate": 1.885518728610942e-05, - "loss": 0.1012, + "learning_rate": 2.8856973460440298e-05, + "loss": 0.0958, "step": 13210 }, { "epoch": 0.62, - "learning_rate": 1.885471848483428e-05, - "loss": 0.1314, + "learning_rate": 2.885650539060428e-05, + "loss": 0.1304, "step": 13215 }, { "epoch": 0.62, - "learning_rate": 1.885424968355914e-05, - "loss": 0.1051, + "learning_rate": 2.885603732076826e-05, + "loss": 0.1667, "step": 13220 }, { "epoch": 0.62, - "learning_rate": 1.8853780882284e-05, - "loss": 0.1469, + "learning_rate": 2.8855569250932237e-05, + "loss": 0.2053, "step": 13225 }, { "epoch": 0.62, - "learning_rate": 1.885331208100886e-05, - "loss": 0.1632, + "learning_rate": 2.885510118109622e-05, + "loss": 0.1388, "step": 13230 }, { "epoch": 0.62, - "learning_rate": 1.8852843279733723e-05, - "loss": 0.2104, + "learning_rate": 2.88546331112602e-05, + "loss": 0.1431, "step": 13235 }, { "epoch": 0.62, - "learning_rate": 1.8852374478458583e-05, - "loss": 0.3246, + "learning_rate": 2.885416504142418e-05, + "loss": 0.2002, "step": 13240 }, { "epoch": 0.62, - "learning_rate": 1.8851905677183443e-05, - "loss": 0.213, + "learning_rate": 2.885369697158816e-05, + "loss": 0.3338, "step": 13245 }, { "epoch": 0.62, - "learning_rate": 1.8851436875908303e-05, - "loss": 0.5547, + "learning_rate": 2.8853228901752143e-05, + "loss": 0.7644, "step": 13250 }, { "epoch": 0.62, - "learning_rate": 1.8850968074633166e-05, - "loss": 0.2415, + "learning_rate": 2.8852760831916123e-05, + "loss": 0.1865, "step": 13255 }, { "epoch": 0.62, - "learning_rate": 1.8850499273358026e-05, - "loss": 0.0852, + "learning_rate": 2.8852292762080103e-05, + "loss": 0.0992, "step": 13260 }, { "epoch": 0.62, - "learning_rate": 1.8850030472082886e-05, - "loss": 0.0872, + "learning_rate": 2.8851824692244083e-05, + "loss": 0.1559, "step": 13265 }, { "epoch": 0.62, - "learning_rate": 1.8849561670807746e-05, - "loss": 0.1391, + "learning_rate": 2.8851356622408066e-05, + "loss": 0.1212, "step": 13270 }, { "epoch": 0.62, - "learning_rate": 1.8849092869532606e-05, - "loss": 0.0998, + "learning_rate": 2.8850888552572046e-05, + "loss": 0.2575, "step": 13275 }, { "epoch": 0.62, - "learning_rate": 1.8848624068257466e-05, - "loss": 0.1616, + "learning_rate": 2.8850420482736025e-05, + "loss": 0.1288, "step": 13280 }, { "epoch": 0.62, - "learning_rate": 1.884815526698233e-05, - "loss": 0.1745, + "learning_rate": 2.884995241290001e-05, + "loss": 0.2245, "step": 13285 }, { "epoch": 0.62, - "learning_rate": 1.884768646570719e-05, - "loss": 0.3633, + "learning_rate": 2.8849484343063985e-05, + "loss": 0.2046, "step": 13290 }, { "epoch": 0.62, - "learning_rate": 1.884721766443205e-05, - "loss": 0.4456, + "learning_rate": 2.8849016273227965e-05, + "loss": 0.2378, "step": 13295 }, { "epoch": 0.62, - "learning_rate": 1.884674886315691e-05, - "loss": 0.6131, + "learning_rate": 2.8848548203391945e-05, + "loss": 0.4623, "step": 13300 }, { "epoch": 0.62, - "learning_rate": 1.884628006188177e-05, - "loss": 0.2623, + "learning_rate": 2.8848080133555928e-05, + "loss": 0.2265, "step": 13305 }, { "epoch": 0.62, - "learning_rate": 1.884581126060663e-05, - "loss": 0.0944, + "learning_rate": 2.8847612063719908e-05, + "loss": 0.0554, "step": 13310 }, { "epoch": 0.62, - "learning_rate": 1.8845342459331492e-05, - "loss": 0.1549, + "learning_rate": 2.8847143993883887e-05, + "loss": 0.125, "step": 13315 }, { "epoch": 0.62, - "learning_rate": 1.8844873658056352e-05, - "loss": 0.145, + "learning_rate": 2.8846675924047867e-05, + "loss": 0.1131, "step": 13320 }, { "epoch": 0.62, - "learning_rate": 1.8844404856781212e-05, - "loss": 0.1983, + "learning_rate": 2.884620785421185e-05, + "loss": 0.1376, "step": 13325 }, { "epoch": 0.62, - "learning_rate": 1.8843936055506072e-05, - "loss": 0.263, + "learning_rate": 2.884573978437583e-05, + "loss": 0.1802, "step": 13330 }, { "epoch": 0.62, - "learning_rate": 1.8843467254230935e-05, - "loss": 0.2859, + "learning_rate": 2.884527171453981e-05, + "loss": 0.1885, "step": 13335 }, { "epoch": 0.62, - "learning_rate": 1.8842998452955795e-05, - "loss": 0.2764, + "learning_rate": 2.8844803644703793e-05, + "loss": 0.2701, "step": 13340 }, { "epoch": 0.62, - "learning_rate": 1.8842529651680655e-05, - "loss": 0.3232, + "learning_rate": 2.8844335574867773e-05, + "loss": 0.2813, "step": 13345 }, { "epoch": 0.62, - "learning_rate": 1.8842060850405515e-05, - "loss": 0.5258, + "learning_rate": 2.884386750503175e-05, + "loss": 0.5205, "step": 13350 }, { "epoch": 0.62, - "learning_rate": 1.8841592049130375e-05, - "loss": 0.2526, + "learning_rate": 2.884339943519573e-05, + "loss": 0.1893, "step": 13355 }, { "epoch": 0.62, - "learning_rate": 1.8841123247855235e-05, - "loss": 0.0839, + "learning_rate": 2.8842931365359713e-05, + "loss": 0.0986, "step": 13360 }, { "epoch": 0.62, - "learning_rate": 1.8840654446580095e-05, - "loss": 0.1898, + "learning_rate": 2.8842463295523692e-05, + "loss": 0.1085, "step": 13365 }, { "epoch": 0.62, - "learning_rate": 1.8840185645304955e-05, - "loss": 0.1795, + "learning_rate": 2.8841995225687672e-05, + "loss": 0.1641, "step": 13370 }, { "epoch": 0.62, - "learning_rate": 1.8839716844029818e-05, - "loss": 0.1649, + "learning_rate": 2.8841527155851652e-05, + "loss": 0.1011, "step": 13375 }, { "epoch": 0.62, - "learning_rate": 1.8839248042754678e-05, - "loss": 0.2422, + "learning_rate": 2.8841059086015635e-05, + "loss": 0.2169, "step": 13380 }, { "epoch": 0.62, - "learning_rate": 1.8838779241479538e-05, - "loss": 0.2588, + "learning_rate": 2.8840591016179615e-05, + "loss": 0.1129, "step": 13385 }, { "epoch": 0.62, - "learning_rate": 1.8838310440204398e-05, - "loss": 0.2336, + "learning_rate": 2.8840122946343595e-05, + "loss": 0.2633, "step": 13390 }, { "epoch": 0.63, - "learning_rate": 1.883784163892926e-05, - "loss": 0.3311, + "learning_rate": 2.8839654876507575e-05, + "loss": 0.3939, "step": 13395 }, { "epoch": 0.63, - "learning_rate": 1.883737283765412e-05, - "loss": 0.5475, + "learning_rate": 2.8839186806671558e-05, + "loss": 0.4045, "step": 13400 }, { "epoch": 0.63, - "learning_rate": 1.883690403637898e-05, - "loss": 0.3553, + "learning_rate": 2.8838718736835538e-05, + "loss": 0.2207, "step": 13405 }, { "epoch": 0.63, - "learning_rate": 1.883643523510384e-05, - "loss": 0.1288, + "learning_rate": 2.8838250666999518e-05, + "loss": 0.0585, "step": 13410 }, { "epoch": 0.63, - "learning_rate": 1.88359664338287e-05, - "loss": 0.1275, + "learning_rate": 2.8837782597163497e-05, + "loss": 0.1279, "step": 13415 }, { "epoch": 0.63, - "learning_rate": 1.883549763255356e-05, - "loss": 0.1297, + "learning_rate": 2.8837314527327477e-05, + "loss": 0.1611, "step": 13420 }, { "epoch": 0.63, - "learning_rate": 1.8835028831278424e-05, - "loss": 0.1836, + "learning_rate": 2.8836846457491457e-05, + "loss": 0.1943, "step": 13425 }, { "epoch": 0.63, - "learning_rate": 1.8834560030003284e-05, - "loss": 0.2246, + "learning_rate": 2.8836378387655437e-05, + "loss": 0.1545, "step": 13430 }, { "epoch": 0.63, - "learning_rate": 1.8834091228728144e-05, - "loss": 0.1759, + "learning_rate": 2.883591031781942e-05, + "loss": 0.1712, "step": 13435 }, { "epoch": 0.63, - "learning_rate": 1.8833622427453004e-05, - "loss": 0.2417, + "learning_rate": 2.88354422479834e-05, + "loss": 0.1363, "step": 13440 }, { "epoch": 0.63, - "learning_rate": 1.8833153626177864e-05, - "loss": 0.3051, + "learning_rate": 2.883497417814738e-05, + "loss": 0.3044, "step": 13445 }, { "epoch": 0.63, - "learning_rate": 1.8832684824902724e-05, - "loss": 0.5476, + "learning_rate": 2.883450610831136e-05, + "loss": 0.4945, "step": 13450 }, { "epoch": 0.63, - "learning_rate": 1.8832216023627584e-05, - "loss": 0.2013, + "learning_rate": 2.8834038038475343e-05, + "loss": 0.1929, "step": 13455 }, { "epoch": 0.63, - "learning_rate": 1.8831747222352447e-05, - "loss": 0.0828, + "learning_rate": 2.8833569968639323e-05, + "loss": 0.1058, "step": 13460 }, { "epoch": 0.63, - "learning_rate": 1.8831278421077307e-05, - "loss": 0.0529, + "learning_rate": 2.8833101898803302e-05, + "loss": 0.0493, "step": 13465 }, { "epoch": 0.63, - "learning_rate": 1.8830809619802167e-05, - "loss": 0.0868, + "learning_rate": 2.8832633828967286e-05, + "loss": 0.1075, "step": 13470 }, { "epoch": 0.63, - "learning_rate": 1.883034081852703e-05, - "loss": 0.1599, + "learning_rate": 2.8832165759131265e-05, + "loss": 0.1647, "step": 13475 }, { "epoch": 0.63, - "learning_rate": 1.882987201725189e-05, - "loss": 0.2012, + "learning_rate": 2.8831697689295242e-05, + "loss": 0.2417, "step": 13480 }, { "epoch": 0.63, - "learning_rate": 1.882940321597675e-05, - "loss": 0.2599, + "learning_rate": 2.883122961945922e-05, + "loss": 0.2051, "step": 13485 }, { "epoch": 0.63, - "learning_rate": 1.882893441470161e-05, - "loss": 0.1924, + "learning_rate": 2.8830761549623205e-05, + "loss": 0.2409, "step": 13490 }, { "epoch": 0.63, - "learning_rate": 1.882846561342647e-05, - "loss": 0.4321, + "learning_rate": 2.8830293479787185e-05, + "loss": 0.2244, "step": 13495 }, { "epoch": 0.63, - "learning_rate": 1.882799681215133e-05, - "loss": 0.6084, + "learning_rate": 2.8829825409951164e-05, + "loss": 0.4758, "step": 13500 }, { "epoch": 0.63, - "learning_rate": 1.882752801087619e-05, - "loss": 0.1664, + "learning_rate": 2.8829357340115144e-05, + "loss": 0.3203, "step": 13505 }, { "epoch": 0.63, - "learning_rate": 1.882705920960105e-05, - "loss": 0.1051, + "learning_rate": 2.8828889270279127e-05, + "loss": 0.0932, "step": 13510 }, { "epoch": 0.63, - "learning_rate": 1.8826590408325913e-05, - "loss": 0.1545, + "learning_rate": 2.8828421200443107e-05, + "loss": 0.148, "step": 13515 }, { "epoch": 0.63, - "learning_rate": 1.8826121607050773e-05, - "loss": 0.1605, + "learning_rate": 2.8827953130607087e-05, + "loss": 0.108, "step": 13520 }, { "epoch": 0.63, - "learning_rate": 1.8825652805775633e-05, - "loss": 0.1598, + "learning_rate": 2.882748506077107e-05, + "loss": 0.1227, "step": 13525 }, { "epoch": 0.63, - "learning_rate": 1.8825184004500493e-05, - "loss": 0.2838, + "learning_rate": 2.882701699093505e-05, + "loss": 0.2322, "step": 13530 }, { "epoch": 0.63, - "learning_rate": 1.8824715203225353e-05, - "loss": 0.1525, + "learning_rate": 2.882654892109903e-05, + "loss": 0.1955, "step": 13535 }, { "epoch": 0.63, - "learning_rate": 1.8824246401950216e-05, - "loss": 0.2664, + "learning_rate": 2.8826080851263006e-05, + "loss": 0.2372, "step": 13540 }, { "epoch": 0.63, - "learning_rate": 1.8823777600675076e-05, - "loss": 0.3345, + "learning_rate": 2.882561278142699e-05, + "loss": 0.2577, "step": 13545 }, { "epoch": 0.63, - "learning_rate": 1.8823308799399936e-05, - "loss": 0.3239, + "learning_rate": 2.882514471159097e-05, + "loss": 0.6495, "step": 13550 }, { "epoch": 0.63, - "learning_rate": 1.8822839998124796e-05, - "loss": 0.195, + "learning_rate": 2.882467664175495e-05, + "loss": 0.3233, "step": 13555 }, { "epoch": 0.63, - "learning_rate": 1.882237119684966e-05, - "loss": 0.0748, + "learning_rate": 2.882420857191893e-05, + "loss": 0.0876, "step": 13560 }, { "epoch": 0.63, - "learning_rate": 1.882190239557452e-05, - "loss": 0.1101, + "learning_rate": 2.8823740502082912e-05, + "loss": 0.0832, "step": 13565 }, { "epoch": 0.63, - "learning_rate": 1.882143359429938e-05, - "loss": 0.1743, + "learning_rate": 2.8823272432246892e-05, + "loss": 0.1516, "step": 13570 }, { "epoch": 0.63, - "learning_rate": 1.882096479302424e-05, - "loss": 0.2923, + "learning_rate": 2.8822804362410872e-05, + "loss": 0.1306, "step": 13575 }, { "epoch": 0.63, - "learning_rate": 1.88204959917491e-05, - "loss": 0.2246, + "learning_rate": 2.8822336292574852e-05, + "loss": 0.189, "step": 13580 }, { "epoch": 0.63, - "learning_rate": 1.882002719047396e-05, - "loss": 0.3307, + "learning_rate": 2.8821868222738835e-05, + "loss": 0.2558, "step": 13585 }, { "epoch": 0.63, - "learning_rate": 1.881955838919882e-05, - "loss": 0.237, + "learning_rate": 2.8821400152902815e-05, + "loss": 0.2561, "step": 13590 }, { "epoch": 0.63, - "learning_rate": 1.881908958792368e-05, - "loss": 0.3441, + "learning_rate": 2.8820932083066795e-05, + "loss": 0.2606, "step": 13595 }, { "epoch": 0.63, - "learning_rate": 1.881862078664854e-05, - "loss": 0.6305, + "learning_rate": 2.8820464013230778e-05, + "loss": 0.4794, "step": 13600 }, { "epoch": 0.63, - "learning_rate": 1.8818151985373402e-05, - "loss": 0.234, + "learning_rate": 2.8819995943394754e-05, + "loss": 0.1903, "step": 13605 }, { "epoch": 0.64, - "learning_rate": 1.8817683184098262e-05, - "loss": 0.136, + "learning_rate": 2.8819527873558734e-05, + "loss": 0.1164, "step": 13610 }, { "epoch": 0.64, - "learning_rate": 1.8817214382823122e-05, - "loss": 0.1397, + "learning_rate": 2.8819059803722714e-05, + "loss": 0.115, "step": 13615 }, { "epoch": 0.64, - "learning_rate": 1.8816745581547985e-05, - "loss": 0.1026, + "learning_rate": 2.8818591733886697e-05, + "loss": 0.0967, "step": 13620 }, { "epoch": 0.64, - "learning_rate": 1.8816276780272845e-05, - "loss": 0.2265, + "learning_rate": 2.8818123664050677e-05, + "loss": 0.1154, "step": 13625 }, { "epoch": 0.64, - "learning_rate": 1.8815807978997705e-05, - "loss": 0.2514, + "learning_rate": 2.8817655594214657e-05, + "loss": 0.247, "step": 13630 }, { "epoch": 0.64, - "learning_rate": 1.8815339177722565e-05, - "loss": 0.1731, + "learning_rate": 2.8817187524378636e-05, + "loss": 0.1664, "step": 13635 }, { "epoch": 0.64, - "learning_rate": 1.8814870376447425e-05, - "loss": 0.1958, + "learning_rate": 2.881671945454262e-05, + "loss": 0.2397, "step": 13640 }, { "epoch": 0.64, - "learning_rate": 1.8814401575172285e-05, - "loss": 0.3905, + "learning_rate": 2.88162513847066e-05, + "loss": 0.1994, "step": 13645 }, { "epoch": 0.64, - "learning_rate": 1.8813932773897145e-05, - "loss": 0.573, + "learning_rate": 2.881578331487058e-05, + "loss": 0.4377, "step": 13650 }, { "epoch": 0.64, - "learning_rate": 1.8813463972622008e-05, - "loss": 0.2444, + "learning_rate": 2.8815315245034563e-05, + "loss": 0.2532, "step": 13655 }, { "epoch": 0.64, - "learning_rate": 1.8812995171346868e-05, - "loss": 0.0534, + "learning_rate": 2.8814847175198542e-05, + "loss": 0.1245, "step": 13660 }, { "epoch": 0.64, - "learning_rate": 1.8812526370071728e-05, - "loss": 0.0949, + "learning_rate": 2.8814379105362522e-05, + "loss": 0.0604, "step": 13665 }, { "epoch": 0.64, - "learning_rate": 1.8812057568796588e-05, - "loss": 0.1562, + "learning_rate": 2.88139110355265e-05, + "loss": 0.0789, "step": 13670 }, { "epoch": 0.64, - "learning_rate": 1.8811588767521448e-05, - "loss": 0.1673, + "learning_rate": 2.8813442965690482e-05, + "loss": 0.1269, "step": 13675 }, { "epoch": 0.64, - "learning_rate": 1.8811119966246308e-05, - "loss": 0.2063, + "learning_rate": 2.881297489585446e-05, + "loss": 0.1837, "step": 13680 }, { "epoch": 0.64, - "learning_rate": 1.881065116497117e-05, - "loss": 0.1861, + "learning_rate": 2.881250682601844e-05, + "loss": 0.1308, "step": 13685 }, { "epoch": 0.64, - "learning_rate": 1.881018236369603e-05, - "loss": 0.2188, + "learning_rate": 2.881203875618242e-05, + "loss": 0.2554, "step": 13690 }, { "epoch": 0.64, - "learning_rate": 1.880971356242089e-05, - "loss": 0.3303, + "learning_rate": 2.8811570686346404e-05, + "loss": 0.2574, "step": 13695 }, { "epoch": 0.64, - "learning_rate": 1.8809244761145754e-05, - "loss": 0.6072, + "learning_rate": 2.8811102616510384e-05, + "loss": 0.4211, "step": 13700 }, { "epoch": 0.64, - "learning_rate": 1.8808775959870614e-05, - "loss": 0.2465, + "learning_rate": 2.8810634546674364e-05, + "loss": 0.2948, "step": 13705 }, { "epoch": 0.64, - "learning_rate": 1.8808307158595474e-05, - "loss": 0.0536, + "learning_rate": 2.8810166476838347e-05, + "loss": 0.0887, "step": 13710 }, { "epoch": 0.64, - "learning_rate": 1.8807838357320334e-05, - "loss": 0.0992, + "learning_rate": 2.8809698407002327e-05, + "loss": 0.0908, "step": 13715 }, { "epoch": 0.64, - "learning_rate": 1.8807369556045194e-05, - "loss": 0.1814, + "learning_rate": 2.8809230337166307e-05, + "loss": 0.1765, "step": 13720 }, { "epoch": 0.64, - "learning_rate": 1.8806900754770054e-05, - "loss": 0.1639, + "learning_rate": 2.8808762267330287e-05, + "loss": 0.2056, "step": 13725 }, { "epoch": 0.64, - "learning_rate": 1.8806431953494914e-05, - "loss": 0.174, + "learning_rate": 2.8808294197494267e-05, + "loss": 0.2093, "step": 13730 }, { "epoch": 0.64, - "learning_rate": 1.8805963152219774e-05, - "loss": 0.1975, + "learning_rate": 2.8807826127658246e-05, + "loss": 0.1898, "step": 13735 }, { "epoch": 0.64, - "learning_rate": 1.8805494350944634e-05, - "loss": 0.175, + "learning_rate": 2.8807358057822226e-05, + "loss": 0.3098, "step": 13740 }, { "epoch": 0.64, - "learning_rate": 1.8805025549669497e-05, - "loss": 0.3268, + "learning_rate": 2.8806889987986206e-05, + "loss": 0.433, "step": 13745 }, { "epoch": 0.64, - "learning_rate": 1.8804556748394357e-05, - "loss": 0.582, + "learning_rate": 2.880642191815019e-05, + "loss": 0.3335, "step": 13750 }, { "epoch": 0.64, - "learning_rate": 1.8804087947119217e-05, - "loss": 0.2182, + "learning_rate": 2.880595384831417e-05, + "loss": 0.2012, "step": 13755 }, { "epoch": 0.64, - "learning_rate": 1.880361914584408e-05, - "loss": 0.093, + "learning_rate": 2.880548577847815e-05, + "loss": 0.0612, "step": 13760 }, { "epoch": 0.64, - "learning_rate": 1.880315034456894e-05, - "loss": 0.1654, + "learning_rate": 2.8805017708642132e-05, + "loss": 0.0931, "step": 13765 }, { "epoch": 0.64, - "learning_rate": 1.88026815432938e-05, - "loss": 0.0881, + "learning_rate": 2.8804549638806112e-05, + "loss": 0.122, "step": 13770 }, { "epoch": 0.64, - "learning_rate": 1.880221274201866e-05, - "loss": 0.1605, + "learning_rate": 2.8804081568970092e-05, + "loss": 0.1118, "step": 13775 }, { "epoch": 0.64, - "learning_rate": 1.880174394074352e-05, - "loss": 0.1614, + "learning_rate": 2.880361349913407e-05, + "loss": 0.2083, "step": 13780 }, { "epoch": 0.64, - "learning_rate": 1.880127513946838e-05, - "loss": 0.1891, + "learning_rate": 2.8803145429298055e-05, + "loss": 0.1495, "step": 13785 }, { "epoch": 0.64, - "learning_rate": 1.880080633819324e-05, - "loss": 0.2944, + "learning_rate": 2.8802677359462035e-05, + "loss": 0.1985, "step": 13790 }, { "epoch": 0.64, - "learning_rate": 1.8800337536918103e-05, - "loss": 0.4841, + "learning_rate": 2.880220928962601e-05, + "loss": 0.2896, "step": 13795 }, { "epoch": 0.64, - "learning_rate": 1.8799868735642963e-05, - "loss": 0.5165, + "learning_rate": 2.880174121978999e-05, + "loss": 0.6363, "step": 13800 }, { "epoch": 0.64, - "learning_rate": 1.8799399934367823e-05, - "loss": 0.2663, + "learning_rate": 2.8801273149953974e-05, + "loss": 0.1096, "step": 13805 }, { "epoch": 0.64, - "learning_rate": 1.8798931133092683e-05, - "loss": 0.067, + "learning_rate": 2.8800805080117954e-05, + "loss": 0.0928, "step": 13810 }, { "epoch": 0.64, - "learning_rate": 1.8798462331817543e-05, - "loss": 0.1632, + "learning_rate": 2.8800337010281934e-05, + "loss": 0.1624, "step": 13815 }, { "epoch": 0.64, - "learning_rate": 1.8797993530542403e-05, - "loss": 0.0878, + "learning_rate": 2.8799868940445913e-05, + "loss": 0.1412, "step": 13820 }, { "epoch": 0.65, - "learning_rate": 1.8797524729267266e-05, - "loss": 0.1335, + "learning_rate": 2.8799400870609897e-05, + "loss": 0.1277, "step": 13825 }, { "epoch": 0.65, - "learning_rate": 1.8797055927992126e-05, - "loss": 0.2923, + "learning_rate": 2.8798932800773876e-05, + "loss": 0.2675, "step": 13830 }, { "epoch": 0.65, - "learning_rate": 1.8796587126716986e-05, - "loss": 0.2697, + "learning_rate": 2.8798464730937856e-05, + "loss": 0.3349, "step": 13835 }, { "epoch": 0.65, - "learning_rate": 1.879611832544185e-05, - "loss": 0.2409, + "learning_rate": 2.879799666110184e-05, + "loss": 0.2923, "step": 13840 }, { "epoch": 0.65, - "learning_rate": 1.879564952416671e-05, - "loss": 0.3278, + "learning_rate": 2.879752859126582e-05, + "loss": 0.3276, "step": 13845 }, { "epoch": 0.65, - "learning_rate": 1.879518072289157e-05, - "loss": 0.4615, + "learning_rate": 2.87970605214298e-05, + "loss": 0.606, "step": 13850 }, { "epoch": 0.65, - "learning_rate": 1.879471192161643e-05, - "loss": 0.2961, + "learning_rate": 2.879659245159378e-05, + "loss": 0.2451, "step": 13855 }, { "epoch": 0.65, - "learning_rate": 1.879424312034129e-05, - "loss": 0.1467, + "learning_rate": 2.879612438175776e-05, + "loss": 0.1395, "step": 13860 }, { "epoch": 0.65, - "learning_rate": 1.879377431906615e-05, - "loss": 0.0899, + "learning_rate": 2.879565631192174e-05, + "loss": 0.0936, "step": 13865 }, { "epoch": 0.65, - "learning_rate": 1.879330551779101e-05, - "loss": 0.1482, + "learning_rate": 2.879518824208572e-05, + "loss": 0.164, "step": 13870 }, { "epoch": 0.65, - "learning_rate": 1.879283671651587e-05, - "loss": 0.1441, + "learning_rate": 2.8794720172249698e-05, + "loss": 0.1364, "step": 13875 }, { "epoch": 0.65, - "learning_rate": 1.879236791524073e-05, - "loss": 0.1818, + "learning_rate": 2.879425210241368e-05, + "loss": 0.132, "step": 13880 }, { "epoch": 0.65, - "learning_rate": 1.8791899113965592e-05, - "loss": 0.2261, + "learning_rate": 2.879378403257766e-05, + "loss": 0.1665, "step": 13885 }, { "epoch": 0.65, - "learning_rate": 1.8791430312690452e-05, - "loss": 0.2463, + "learning_rate": 2.879331596274164e-05, + "loss": 0.285, "step": 13890 }, { "epoch": 0.65, - "learning_rate": 1.8790961511415312e-05, - "loss": 0.2855, + "learning_rate": 2.8792847892905624e-05, + "loss": 0.4156, "step": 13895 }, { "epoch": 0.65, - "learning_rate": 1.8790492710140172e-05, - "loss": 0.5662, + "learning_rate": 2.8792379823069604e-05, + "loss": 0.6868, "step": 13900 }, { "epoch": 0.65, - "learning_rate": 1.8790023908865035e-05, - "loss": 0.217, + "learning_rate": 2.8791911753233584e-05, + "loss": 0.1785, "step": 13905 }, { "epoch": 0.65, - "learning_rate": 1.8789555107589895e-05, - "loss": 0.1254, + "learning_rate": 2.8791443683397564e-05, + "loss": 0.0816, "step": 13910 }, { "epoch": 0.65, - "learning_rate": 1.8789086306314755e-05, - "loss": 0.1276, + "learning_rate": 2.8790975613561547e-05, + "loss": 0.0749, "step": 13915 }, { "epoch": 0.65, - "learning_rate": 1.8788617505039615e-05, - "loss": 0.1244, + "learning_rate": 2.8790507543725523e-05, + "loss": 0.1498, "step": 13920 }, { "epoch": 0.65, - "learning_rate": 1.8788148703764475e-05, - "loss": 0.1612, + "learning_rate": 2.8790039473889503e-05, + "loss": 0.1049, "step": 13925 }, { "epoch": 0.65, - "learning_rate": 1.8787679902489335e-05, - "loss": 0.2602, + "learning_rate": 2.8789571404053483e-05, + "loss": 0.1121, "step": 13930 }, { "epoch": 0.65, - "learning_rate": 1.87872111012142e-05, - "loss": 0.3629, + "learning_rate": 2.8789103334217466e-05, + "loss": 0.1455, "step": 13935 }, { "epoch": 0.65, - "learning_rate": 1.8786742299939058e-05, - "loss": 0.266, + "learning_rate": 2.8788635264381446e-05, + "loss": 0.2959, "step": 13940 }, { "epoch": 0.65, - "learning_rate": 1.8786273498663918e-05, - "loss": 0.3049, + "learning_rate": 2.8788167194545426e-05, + "loss": 0.2852, "step": 13945 }, { "epoch": 0.65, - "learning_rate": 1.8785804697388778e-05, - "loss": 0.5083, + "learning_rate": 2.878769912470941e-05, + "loss": 0.6112, "step": 13950 }, { "epoch": 0.65, - "learning_rate": 1.8785335896113638e-05, - "loss": 0.2277, + "learning_rate": 2.878723105487339e-05, + "loss": 0.324, "step": 13955 }, { "epoch": 0.65, - "learning_rate": 1.8784867094838498e-05, - "loss": 0.1174, + "learning_rate": 2.878676298503737e-05, + "loss": 0.1254, "step": 13960 }, { "epoch": 0.65, - "learning_rate": 1.8784398293563358e-05, - "loss": 0.0655, + "learning_rate": 2.878629491520135e-05, + "loss": 0.0945, "step": 13965 }, { "epoch": 0.65, - "learning_rate": 1.878392949228822e-05, - "loss": 0.1587, + "learning_rate": 2.8785826845365332e-05, + "loss": 0.1694, "step": 13970 }, { "epoch": 0.65, - "learning_rate": 1.878346069101308e-05, - "loss": 0.1778, + "learning_rate": 2.878535877552931e-05, + "loss": 0.1078, "step": 13975 }, { "epoch": 0.65, - "learning_rate": 1.878299188973794e-05, - "loss": 0.2082, + "learning_rate": 2.878489070569329e-05, + "loss": 0.2042, "step": 13980 }, { "epoch": 0.65, - "learning_rate": 1.8782523088462804e-05, - "loss": 0.1945, + "learning_rate": 2.8784422635857268e-05, + "loss": 0.2052, "step": 13985 }, { "epoch": 0.65, - "learning_rate": 1.8782054287187664e-05, - "loss": 0.2287, + "learning_rate": 2.878395456602125e-05, + "loss": 0.2638, "step": 13990 }, { "epoch": 0.65, - "learning_rate": 1.8781585485912524e-05, - "loss": 0.2369, + "learning_rate": 2.878348649618523e-05, + "loss": 0.3099, "step": 13995 }, { "epoch": 0.65, - "learning_rate": 1.8781116684637384e-05, - "loss": 0.5127, + "learning_rate": 2.878301842634921e-05, + "loss": 0.3693, "step": 14000 }, { "epoch": 0.65, - "learning_rate": 1.8780647883362244e-05, - "loss": 0.2929, + "learning_rate": 2.878255035651319e-05, + "loss": 0.1832, "step": 14005 }, { "epoch": 0.65, - "learning_rate": 1.8780179082087104e-05, - "loss": 0.1198, + "learning_rate": 2.8782082286677174e-05, + "loss": 0.0897, "step": 14010 }, { "epoch": 0.65, - "learning_rate": 1.8779710280811964e-05, - "loss": 0.0986, + "learning_rate": 2.8781614216841153e-05, + "loss": 0.1162, "step": 14015 }, { "epoch": 0.65, - "learning_rate": 1.8779241479536824e-05, - "loss": 0.1393, + "learning_rate": 2.8781146147005133e-05, + "loss": 0.0888, "step": 14020 }, { "epoch": 0.65, - "learning_rate": 1.8778772678261687e-05, - "loss": 0.1961, + "learning_rate": 2.8780678077169116e-05, + "loss": 0.0888, "step": 14025 }, { "epoch": 0.65, - "learning_rate": 1.8778303876986547e-05, - "loss": 0.2312, + "learning_rate": 2.8780210007333096e-05, + "loss": 0.1766, "step": 14030 }, { "epoch": 0.65, - "learning_rate": 1.8777835075711407e-05, - "loss": 0.2327, + "learning_rate": 2.8779741937497076e-05, + "loss": 0.1924, "step": 14035 }, { "epoch": 0.66, - "learning_rate": 1.8777366274436267e-05, - "loss": 0.258, + "learning_rate": 2.8779273867661056e-05, + "loss": 0.2636, "step": 14040 }, { "epoch": 0.66, - "learning_rate": 1.8776897473161127e-05, - "loss": 0.4388, + "learning_rate": 2.8778805797825036e-05, + "loss": 0.2952, "step": 14045 }, { "epoch": 0.66, - "learning_rate": 1.877642867188599e-05, - "loss": 0.5567, + "learning_rate": 2.8778337727989016e-05, + "loss": 0.5158, "step": 14050 }, { "epoch": 0.66, - "learning_rate": 1.877595987061085e-05, - "loss": 0.222, + "learning_rate": 2.8777869658152995e-05, + "loss": 0.223, "step": 14055 }, { "epoch": 0.66, - "learning_rate": 1.877549106933571e-05, - "loss": 0.0841, + "learning_rate": 2.8777401588316975e-05, + "loss": 0.0951, "step": 14060 }, { "epoch": 0.66, - "learning_rate": 1.877502226806057e-05, - "loss": 0.0847, + "learning_rate": 2.877693351848096e-05, + "loss": 0.0751, "step": 14065 }, { "epoch": 0.66, - "learning_rate": 1.877455346678543e-05, - "loss": 0.181, + "learning_rate": 2.8776465448644938e-05, + "loss": 0.09, "step": 14070 }, { "epoch": 0.66, - "learning_rate": 1.8774084665510293e-05, - "loss": 0.2742, + "learning_rate": 2.8775997378808918e-05, + "loss": 0.1597, "step": 14075 }, { "epoch": 0.66, - "learning_rate": 1.8773615864235153e-05, - "loss": 0.1208, + "learning_rate": 2.87755293089729e-05, + "loss": 0.0965, "step": 14080 }, { "epoch": 0.66, - "learning_rate": 1.8773147062960013e-05, - "loss": 0.2701, + "learning_rate": 2.877506123913688e-05, + "loss": 0.2149, "step": 14085 }, { "epoch": 0.66, - "learning_rate": 1.8772678261684873e-05, - "loss": 0.2529, + "learning_rate": 2.877459316930086e-05, + "loss": 0.3045, "step": 14090 }, { "epoch": 0.66, - "learning_rate": 1.8772209460409733e-05, - "loss": 0.3087, + "learning_rate": 2.877412509946484e-05, + "loss": 0.3601, "step": 14095 }, { "epoch": 0.66, - "learning_rate": 1.8771740659134593e-05, - "loss": 0.6273, + "learning_rate": 2.8773657029628824e-05, + "loss": 0.8148, "step": 14100 }, { "epoch": 0.66, - "learning_rate": 1.8771271857859453e-05, - "loss": 0.1981, + "learning_rate": 2.8773188959792804e-05, + "loss": 0.2421, "step": 14105 }, { "epoch": 0.66, - "learning_rate": 1.8770803056584316e-05, - "loss": 0.1016, + "learning_rate": 2.877272088995678e-05, + "loss": 0.0657, "step": 14110 }, { "epoch": 0.66, - "learning_rate": 1.8770334255309176e-05, - "loss": 0.115, + "learning_rate": 2.877225282012076e-05, + "loss": 0.097, "step": 14115 }, { "epoch": 0.66, - "learning_rate": 1.8769865454034036e-05, - "loss": 0.07, + "learning_rate": 2.8771784750284743e-05, + "loss": 0.1607, "step": 14120 }, { "epoch": 0.66, - "learning_rate": 1.8769396652758896e-05, - "loss": 0.1167, + "learning_rate": 2.8771316680448723e-05, + "loss": 0.1174, "step": 14125 }, { "epoch": 0.66, - "learning_rate": 1.876892785148376e-05, - "loss": 0.2139, + "learning_rate": 2.8770848610612703e-05, + "loss": 0.1596, "step": 14130 }, { "epoch": 0.66, - "learning_rate": 1.876845905020862e-05, - "loss": 0.2074, + "learning_rate": 2.8770380540776686e-05, + "loss": 0.2451, "step": 14135 }, { "epoch": 0.66, - "learning_rate": 1.876799024893348e-05, - "loss": 0.173, + "learning_rate": 2.8769912470940666e-05, + "loss": 0.2717, "step": 14140 }, { "epoch": 0.66, - "learning_rate": 1.876752144765834e-05, - "loss": 0.3318, + "learning_rate": 2.8769444401104646e-05, + "loss": 0.4318, "step": 14145 }, { "epoch": 0.66, - "learning_rate": 1.87670526463832e-05, - "loss": 0.5468, + "learning_rate": 2.8768976331268625e-05, + "loss": 0.591, "step": 14150 }, { "epoch": 0.66, - "learning_rate": 1.876658384510806e-05, - "loss": 0.242, + "learning_rate": 2.876850826143261e-05, + "loss": 0.2317, "step": 14155 }, { "epoch": 0.66, - "learning_rate": 1.876611504383292e-05, - "loss": 0.0742, + "learning_rate": 2.876804019159659e-05, + "loss": 0.0934, "step": 14160 }, { "epoch": 0.66, - "learning_rate": 1.8765646242557782e-05, - "loss": 0.0878, + "learning_rate": 2.8767572121760568e-05, + "loss": 0.0979, "step": 14165 }, { "epoch": 0.66, - "learning_rate": 1.8765177441282642e-05, - "loss": 0.157, + "learning_rate": 2.8767104051924548e-05, + "loss": 0.0935, "step": 14170 }, { "epoch": 0.66, - "learning_rate": 1.8764708640007502e-05, - "loss": 0.1693, + "learning_rate": 2.8766635982088528e-05, + "loss": 0.1455, "step": 14175 }, { "epoch": 0.66, - "learning_rate": 1.8764239838732362e-05, - "loss": 0.1899, + "learning_rate": 2.8766167912252508e-05, + "loss": 0.207, "step": 14180 }, { "epoch": 0.66, - "learning_rate": 1.8763771037457222e-05, - "loss": 0.2461, + "learning_rate": 2.8765699842416488e-05, + "loss": 0.1849, "step": 14185 }, { "epoch": 0.66, - "learning_rate": 1.8763302236182085e-05, - "loss": 0.2921, + "learning_rate": 2.8765231772580467e-05, + "loss": 0.2923, "step": 14190 }, { "epoch": 0.66, - "learning_rate": 1.8762833434906945e-05, - "loss": 0.285, + "learning_rate": 2.876476370274445e-05, + "loss": 0.2744, "step": 14195 }, { "epoch": 0.66, - "learning_rate": 1.8762364633631805e-05, - "loss": 0.4061, + "learning_rate": 2.876429563290843e-05, + "loss": 0.4343, "step": 14200 }, { "epoch": 0.66, - "learning_rate": 1.8761895832356665e-05, - "loss": 0.2432, + "learning_rate": 2.876382756307241e-05, + "loss": 0.184, "step": 14205 }, { "epoch": 0.66, - "learning_rate": 1.876142703108153e-05, - "loss": 0.1719, + "learning_rate": 2.8763359493236393e-05, + "loss": 0.0398, "step": 14210 }, { "epoch": 0.66, - "learning_rate": 1.876095822980639e-05, - "loss": 0.1674, + "learning_rate": 2.8762891423400373e-05, + "loss": 0.1524, "step": 14215 }, { "epoch": 0.66, - "learning_rate": 1.876048942853125e-05, - "loss": 0.1117, + "learning_rate": 2.8762423353564353e-05, + "loss": 0.1415, "step": 14220 }, { "epoch": 0.66, - "learning_rate": 1.876002062725611e-05, - "loss": 0.1181, + "learning_rate": 2.8761955283728333e-05, + "loss": 0.1488, "step": 14225 }, { "epoch": 0.66, - "learning_rate": 1.875955182598097e-05, - "loss": 0.2447, + "learning_rate": 2.8761487213892316e-05, + "loss": 0.1139, "step": 14230 }, { "epoch": 0.66, - "learning_rate": 1.8759083024705828e-05, - "loss": 0.2381, + "learning_rate": 2.8761019144056293e-05, + "loss": 0.2217, "step": 14235 }, { "epoch": 0.66, - "learning_rate": 1.8758614223430688e-05, - "loss": 0.2788, + "learning_rate": 2.8760551074220272e-05, + "loss": 0.27, "step": 14240 }, { "epoch": 0.66, - "learning_rate": 1.8758145422155548e-05, - "loss": 0.3321, + "learning_rate": 2.8760083004384252e-05, + "loss": 0.3859, "step": 14245 }, { "epoch": 0.66, - "learning_rate": 1.8757676620880408e-05, - "loss": 0.5291, + "learning_rate": 2.8759614934548235e-05, + "loss": 0.4615, "step": 14250 }, { "epoch": 0.67, - "learning_rate": 1.875720781960527e-05, - "loss": 0.1678, + "learning_rate": 2.8759146864712215e-05, + "loss": 0.2342, "step": 14255 }, { "epoch": 0.67, - "learning_rate": 1.875673901833013e-05, - "loss": 0.1219, + "learning_rate": 2.8758678794876195e-05, + "loss": 0.0786, "step": 14260 }, { "epoch": 0.67, - "learning_rate": 1.875627021705499e-05, - "loss": 0.0905, + "learning_rate": 2.8758210725040178e-05, + "loss": 0.1323, "step": 14265 }, { "epoch": 0.67, - "learning_rate": 1.8755801415779855e-05, - "loss": 0.2237, + "learning_rate": 2.8757742655204158e-05, + "loss": 0.1346, "step": 14270 }, { "epoch": 0.67, - "learning_rate": 1.8755332614504714e-05, - "loss": 0.1641, + "learning_rate": 2.8757274585368138e-05, + "loss": 0.115, "step": 14275 }, { "epoch": 0.67, - "learning_rate": 1.8754863813229574e-05, - "loss": 0.2348, + "learning_rate": 2.8756806515532118e-05, + "loss": 0.1903, "step": 14280 }, { "epoch": 0.67, - "learning_rate": 1.8754395011954434e-05, - "loss": 0.1618, + "learning_rate": 2.87563384456961e-05, + "loss": 0.2855, "step": 14285 }, { "epoch": 0.67, - "learning_rate": 1.8753926210679294e-05, - "loss": 0.2344, + "learning_rate": 2.875587037586008e-05, + "loss": 0.2659, "step": 14290 }, { "epoch": 0.67, - "learning_rate": 1.8753457409404154e-05, - "loss": 0.3558, + "learning_rate": 2.875540230602406e-05, + "loss": 0.5062, "step": 14295 }, { "epoch": 0.67, - "learning_rate": 1.8752988608129014e-05, - "loss": 0.456, + "learning_rate": 2.8754934236188037e-05, + "loss": 0.5901, "step": 14300 }, { "epoch": 0.67, - "learning_rate": 1.8752519806853877e-05, - "loss": 0.2746, + "learning_rate": 2.875446616635202e-05, + "loss": 0.1554, "step": 14305 }, { "epoch": 0.67, - "learning_rate": 1.8752051005578737e-05, - "loss": 0.1039, + "learning_rate": 2.8753998096516e-05, + "loss": 0.1165, "step": 14310 }, { "epoch": 0.67, - "learning_rate": 1.8751582204303597e-05, - "loss": 0.1487, + "learning_rate": 2.875353002667998e-05, + "loss": 0.0867, "step": 14315 }, { "epoch": 0.67, - "learning_rate": 1.8751113403028457e-05, - "loss": 0.0682, + "learning_rate": 2.8753061956843963e-05, + "loss": 0.1441, "step": 14320 }, { "epoch": 0.67, - "learning_rate": 1.8750644601753317e-05, - "loss": 0.2572, + "learning_rate": 2.8752593887007943e-05, + "loss": 0.1574, "step": 14325 }, { "epoch": 0.67, - "learning_rate": 1.8750175800478177e-05, - "loss": 0.2256, + "learning_rate": 2.8752125817171923e-05, + "loss": 0.1679, "step": 14330 }, { "epoch": 0.67, - "learning_rate": 1.874970699920304e-05, - "loss": 0.2427, + "learning_rate": 2.8751657747335902e-05, + "loss": 0.1935, "step": 14335 }, { "epoch": 0.67, - "learning_rate": 1.87492381979279e-05, - "loss": 0.1959, + "learning_rate": 2.8751189677499886e-05, + "loss": 0.1993, "step": 14340 }, { "epoch": 0.67, - "learning_rate": 1.874876939665276e-05, - "loss": 0.3314, + "learning_rate": 2.8750721607663865e-05, + "loss": 0.295, "step": 14345 }, { "epoch": 0.67, - "learning_rate": 1.8748300595377624e-05, - "loss": 0.7407, + "learning_rate": 2.8750253537827845e-05, + "loss": 0.6586, "step": 14350 }, { "epoch": 0.67, - "learning_rate": 1.8747831794102484e-05, - "loss": 0.1726, + "learning_rate": 2.8749785467991825e-05, + "loss": 0.1668, "step": 14355 }, { "epoch": 0.67, - "learning_rate": 1.8747362992827344e-05, - "loss": 0.1146, + "learning_rate": 2.8749317398155808e-05, + "loss": 0.0989, "step": 14360 }, { "epoch": 0.67, - "learning_rate": 1.8746894191552203e-05, - "loss": 0.0943, + "learning_rate": 2.8748849328319785e-05, + "loss": 0.0756, "step": 14365 }, { "epoch": 0.67, - "learning_rate": 1.8746425390277063e-05, - "loss": 0.143, + "learning_rate": 2.8748381258483765e-05, + "loss": 0.0983, "step": 14370 }, { "epoch": 0.67, - "learning_rate": 1.8745956589001923e-05, - "loss": 0.1744, + "learning_rate": 2.8747913188647744e-05, + "loss": 0.1155, "step": 14375 }, { "epoch": 0.67, - "learning_rate": 1.8745487787726783e-05, - "loss": 0.1772, + "learning_rate": 2.8747445118811728e-05, + "loss": 0.1271, "step": 14380 }, { "epoch": 0.67, - "learning_rate": 1.8745018986451643e-05, - "loss": 0.1932, + "learning_rate": 2.8746977048975707e-05, + "loss": 0.2332, "step": 14385 }, { "epoch": 0.67, - "learning_rate": 1.8744550185176503e-05, - "loss": 0.2547, + "learning_rate": 2.8746508979139687e-05, + "loss": 0.1807, "step": 14390 }, { "epoch": 0.67, - "learning_rate": 1.8744081383901363e-05, - "loss": 0.2008, + "learning_rate": 2.874604090930367e-05, + "loss": 0.3818, "step": 14395 }, { "epoch": 0.67, - "learning_rate": 1.8743612582626226e-05, - "loss": 0.5431, + "learning_rate": 2.874557283946765e-05, + "loss": 0.5533, "step": 14400 }, { "epoch": 0.67, - "learning_rate": 1.8743143781351086e-05, - "loss": 0.2268, + "learning_rate": 2.874510476963163e-05, + "loss": 0.2287, "step": 14405 }, { "epoch": 0.67, - "learning_rate": 1.8742674980075946e-05, - "loss": 0.0781, + "learning_rate": 2.874463669979561e-05, + "loss": 0.0701, "step": 14410 }, { "epoch": 0.67, - "learning_rate": 1.874220617880081e-05, - "loss": 0.1206, + "learning_rate": 2.8744168629959593e-05, + "loss": 0.0954, "step": 14415 }, { "epoch": 0.67, - "learning_rate": 1.874173737752567e-05, - "loss": 0.0975, + "learning_rate": 2.8743700560123573e-05, + "loss": 0.1512, "step": 14420 }, { "epoch": 0.67, - "learning_rate": 1.874126857625053e-05, - "loss": 0.193, + "learning_rate": 2.874323249028755e-05, + "loss": 0.156, "step": 14425 }, { "epoch": 0.67, - "learning_rate": 1.874079977497539e-05, - "loss": 0.1506, + "learning_rate": 2.874276442045153e-05, + "loss": 0.1778, "step": 14430 }, { "epoch": 0.67, - "learning_rate": 1.874033097370025e-05, - "loss": 0.2245, + "learning_rate": 2.8742296350615512e-05, + "loss": 0.1784, "step": 14435 }, { "epoch": 0.67, - "learning_rate": 1.873986217242511e-05, - "loss": 0.2452, + "learning_rate": 2.8741828280779492e-05, + "loss": 0.1927, "step": 14440 }, { "epoch": 0.67, - "learning_rate": 1.8739393371149973e-05, - "loss": 0.3057, + "learning_rate": 2.8741360210943472e-05, + "loss": 0.2273, "step": 14445 }, { "epoch": 0.67, - "learning_rate": 1.8738924569874833e-05, - "loss": 0.7368, + "learning_rate": 2.8740892141107455e-05, + "loss": 0.4766, "step": 14450 }, { "epoch": 0.67, - "learning_rate": 1.8738455768599692e-05, - "loss": 0.2164, + "learning_rate": 2.8740424071271435e-05, + "loss": 0.2449, "step": 14455 }, { "epoch": 0.67, - "learning_rate": 1.8737986967324552e-05, - "loss": 0.0852, + "learning_rate": 2.8739956001435415e-05, + "loss": 0.0797, "step": 14460 }, { "epoch": 0.67, - "learning_rate": 1.8737518166049412e-05, - "loss": 0.1504, + "learning_rate": 2.8739487931599395e-05, + "loss": 0.1437, "step": 14465 }, { "epoch": 0.68, - "learning_rate": 1.8737049364774272e-05, - "loss": 0.2015, + "learning_rate": 2.8739019861763378e-05, + "loss": 0.1084, "step": 14470 }, { "epoch": 0.68, - "learning_rate": 1.8736580563499132e-05, - "loss": 0.1602, + "learning_rate": 2.8738551791927358e-05, + "loss": 0.1042, "step": 14475 }, { "epoch": 0.68, - "learning_rate": 1.8736111762223995e-05, - "loss": 0.2252, + "learning_rate": 2.8738083722091337e-05, + "loss": 0.2117, "step": 14480 }, { "epoch": 0.68, - "learning_rate": 1.8735642960948855e-05, - "loss": 0.1856, + "learning_rate": 2.8737615652255317e-05, + "loss": 0.2622, "step": 14485 }, { "epoch": 0.68, - "learning_rate": 1.8735174159673715e-05, - "loss": 0.2775, + "learning_rate": 2.8737147582419297e-05, + "loss": 0.2691, "step": 14490 }, { "epoch": 0.68, - "learning_rate": 1.873470535839858e-05, - "loss": 0.2782, + "learning_rate": 2.8736679512583277e-05, + "loss": 0.3481, "step": 14495 }, { "epoch": 0.68, - "learning_rate": 1.873423655712344e-05, - "loss": 0.5278, + "learning_rate": 2.8736211442747257e-05, + "loss": 0.3504, "step": 14500 }, { "epoch": 0.68, - "learning_rate": 1.87337677558483e-05, - "loss": 0.2166, + "learning_rate": 2.873574337291124e-05, + "loss": 0.264, "step": 14505 }, { "epoch": 0.68, - "learning_rate": 1.873329895457316e-05, - "loss": 0.1283, + "learning_rate": 2.873527530307522e-05, + "loss": 0.0905, "step": 14510 }, { "epoch": 0.68, - "learning_rate": 1.873283015329802e-05, - "loss": 0.1561, + "learning_rate": 2.87348072332392e-05, + "loss": 0.07, "step": 14515 }, { "epoch": 0.68, - "learning_rate": 1.873236135202288e-05, - "loss": 0.1354, + "learning_rate": 2.873433916340318e-05, + "loss": 0.1167, "step": 14520 }, { "epoch": 0.68, - "learning_rate": 1.8731892550747738e-05, - "loss": 0.1407, + "learning_rate": 2.8733871093567163e-05, + "loss": 0.1427, "step": 14525 }, { "epoch": 0.68, - "learning_rate": 1.8731423749472598e-05, - "loss": 0.2432, + "learning_rate": 2.8733403023731142e-05, + "loss": 0.1946, "step": 14530 }, { "epoch": 0.68, - "learning_rate": 1.8730954948197458e-05, - "loss": 0.2267, + "learning_rate": 2.8732934953895122e-05, + "loss": 0.2287, "step": 14535 }, { "epoch": 0.68, - "learning_rate": 1.873048614692232e-05, - "loss": 0.2128, + "learning_rate": 2.8732466884059102e-05, + "loss": 0.2621, "step": 14540 }, { "epoch": 0.68, - "learning_rate": 1.873001734564718e-05, - "loss": 0.3205, + "learning_rate": 2.8731998814223085e-05, + "loss": 0.3953, "step": 14545 }, { "epoch": 0.68, - "learning_rate": 1.872954854437204e-05, - "loss": 0.631, + "learning_rate": 2.8731530744387065e-05, + "loss": 0.5614, "step": 14550 }, { "epoch": 0.68, - "learning_rate": 1.87290797430969e-05, - "loss": 0.2592, + "learning_rate": 2.873106267455104e-05, + "loss": 0.1823, "step": 14555 }, { "epoch": 0.68, - "learning_rate": 1.8728610941821765e-05, - "loss": 0.1638, + "learning_rate": 2.8730594604715025e-05, + "loss": 0.0456, "step": 14560 }, { "epoch": 0.68, - "learning_rate": 1.8728142140546625e-05, - "loss": 0.1215, + "learning_rate": 2.8730126534879005e-05, + "loss": 0.0692, "step": 14565 }, { "epoch": 0.68, - "learning_rate": 1.8727673339271484e-05, - "loss": 0.2601, + "learning_rate": 2.8729658465042984e-05, + "loss": 0.1398, "step": 14570 }, { "epoch": 0.68, - "learning_rate": 1.8727204537996344e-05, - "loss": 0.2216, + "learning_rate": 2.8729190395206964e-05, + "loss": 0.1686, "step": 14575 }, { "epoch": 0.68, - "learning_rate": 1.8726735736721204e-05, - "loss": 0.1664, + "learning_rate": 2.8728722325370947e-05, + "loss": 0.0955, "step": 14580 }, { "epoch": 0.68, - "learning_rate": 1.8726266935446068e-05, - "loss": 0.1903, + "learning_rate": 2.8728254255534927e-05, + "loss": 0.1732, "step": 14585 }, { "epoch": 0.68, - "learning_rate": 1.8725798134170928e-05, - "loss": 0.2214, + "learning_rate": 2.8727786185698907e-05, + "loss": 0.2574, "step": 14590 }, { "epoch": 0.68, - "learning_rate": 1.8725329332895788e-05, - "loss": 0.384, + "learning_rate": 2.8727318115862887e-05, + "loss": 0.3676, "step": 14595 }, { "epoch": 0.68, - "learning_rate": 1.8724860531620647e-05, - "loss": 0.6086, + "learning_rate": 2.872685004602687e-05, + "loss": 0.29, "step": 14600 }, { "epoch": 0.68, - "learning_rate": 1.8724391730345507e-05, - "loss": 0.1693, + "learning_rate": 2.872638197619085e-05, + "loss": 0.199, "step": 14605 }, { "epoch": 0.68, - "learning_rate": 1.8723922929070367e-05, - "loss": 0.0831, + "learning_rate": 2.872591390635483e-05, + "loss": 0.073, "step": 14610 }, { "epoch": 0.68, - "learning_rate": 1.8723454127795227e-05, - "loss": 0.1812, + "learning_rate": 2.8725445836518806e-05, + "loss": 0.0835, "step": 14615 }, { "epoch": 0.68, - "learning_rate": 1.872298532652009e-05, - "loss": 0.1165, + "learning_rate": 2.872497776668279e-05, + "loss": 0.1706, "step": 14620 }, { "epoch": 0.68, - "learning_rate": 1.872251652524495e-05, - "loss": 0.1394, + "learning_rate": 2.872450969684677e-05, + "loss": 0.1481, "step": 14625 }, { "epoch": 0.68, - "learning_rate": 1.872204772396981e-05, - "loss": 0.3595, + "learning_rate": 2.872404162701075e-05, + "loss": 0.1746, "step": 14630 }, { "epoch": 0.68, - "learning_rate": 1.872157892269467e-05, - "loss": 0.2451, + "learning_rate": 2.8723573557174732e-05, + "loss": 0.1933, "step": 14635 }, { "epoch": 0.68, - "learning_rate": 1.8721110121419534e-05, - "loss": 0.2951, + "learning_rate": 2.8723105487338712e-05, + "loss": 0.2433, "step": 14640 }, { "epoch": 0.68, - "learning_rate": 1.8720641320144394e-05, - "loss": 0.4274, + "learning_rate": 2.8722637417502692e-05, + "loss": 0.3401, "step": 14645 }, { "epoch": 0.68, - "learning_rate": 1.8720172518869254e-05, - "loss": 0.5065, + "learning_rate": 2.872216934766667e-05, + "loss": 0.3509, "step": 14650 }, { "epoch": 0.68, - "learning_rate": 1.8719703717594113e-05, - "loss": 0.2857, + "learning_rate": 2.8721701277830655e-05, + "loss": 0.2608, "step": 14655 }, { "epoch": 0.68, - "learning_rate": 1.8719234916318973e-05, - "loss": 0.0604, + "learning_rate": 2.8721233207994635e-05, + "loss": 0.0762, "step": 14660 }, { "epoch": 0.68, - "learning_rate": 1.8718766115043833e-05, - "loss": 0.1006, + "learning_rate": 2.8720765138158614e-05, + "loss": 0.1259, "step": 14665 }, { "epoch": 0.68, - "learning_rate": 1.8718297313768693e-05, - "loss": 0.1257, + "learning_rate": 2.8720297068322594e-05, + "loss": 0.1109, "step": 14670 }, { "epoch": 0.68, - "learning_rate": 1.8717828512493557e-05, - "loss": 0.2122, + "learning_rate": 2.8719828998486577e-05, + "loss": 0.0788, "step": 14675 }, { "epoch": 0.68, - "learning_rate": 1.8717359711218417e-05, - "loss": 0.2196, + "learning_rate": 2.8719360928650554e-05, + "loss": 0.2035, "step": 14680 }, { "epoch": 0.69, - "learning_rate": 1.8716890909943276e-05, - "loss": 0.2033, + "learning_rate": 2.8718892858814534e-05, + "loss": 0.2886, "step": 14685 }, { "epoch": 0.69, - "learning_rate": 1.8716422108668136e-05, - "loss": 0.2635, + "learning_rate": 2.8718424788978517e-05, + "loss": 0.1857, "step": 14690 }, { "epoch": 0.69, - "learning_rate": 1.8715953307392996e-05, - "loss": 0.3508, + "learning_rate": 2.8717956719142497e-05, + "loss": 0.3754, "step": 14695 }, { "epoch": 0.69, - "learning_rate": 1.871548450611786e-05, - "loss": 0.5574, + "learning_rate": 2.8717488649306477e-05, + "loss": 0.6902, "step": 14700 }, { "epoch": 0.69, - "learning_rate": 1.871501570484272e-05, - "loss": 0.177, + "learning_rate": 2.8717020579470456e-05, + "loss": 0.2214, "step": 14705 }, { "epoch": 0.69, - "learning_rate": 1.871454690356758e-05, - "loss": 0.0636, + "learning_rate": 2.871655250963444e-05, + "loss": 0.0739, "step": 14710 }, { "epoch": 0.69, - "learning_rate": 1.871407810229244e-05, - "loss": 0.1045, + "learning_rate": 2.871608443979842e-05, + "loss": 0.0544, "step": 14715 }, { "epoch": 0.69, - "learning_rate": 1.87136093010173e-05, - "loss": 0.2398, + "learning_rate": 2.87156163699624e-05, + "loss": 0.0543, "step": 14720 }, { "epoch": 0.69, - "learning_rate": 1.8713140499742163e-05, - "loss": 0.169, + "learning_rate": 2.871514830012638e-05, + "loss": 0.1311, "step": 14725 }, { "epoch": 0.69, - "learning_rate": 1.8712671698467023e-05, - "loss": 0.1389, + "learning_rate": 2.8714680230290362e-05, + "loss": 0.1319, "step": 14730 }, { "epoch": 0.69, - "learning_rate": 1.8712202897191883e-05, - "loss": 0.1491, + "learning_rate": 2.8714212160454342e-05, + "loss": 0.1843, "step": 14735 }, { "epoch": 0.69, - "learning_rate": 1.8711734095916743e-05, - "loss": 0.2459, + "learning_rate": 2.871374409061832e-05, + "loss": 0.3472, "step": 14740 }, { "epoch": 0.69, - "learning_rate": 1.8711265294641602e-05, - "loss": 0.3591, + "learning_rate": 2.8713276020782302e-05, + "loss": 0.2921, "step": 14745 }, { "epoch": 0.69, - "learning_rate": 1.8710796493366462e-05, - "loss": 0.639, + "learning_rate": 2.871280795094628e-05, + "loss": 0.5515, "step": 14750 }, { "epoch": 0.69, - "learning_rate": 1.8710327692091322e-05, - "loss": 0.1863, + "learning_rate": 2.871233988111026e-05, + "loss": 0.2132, "step": 14755 }, { "epoch": 0.69, - "learning_rate": 1.8709858890816182e-05, - "loss": 0.113, + "learning_rate": 2.871187181127424e-05, + "loss": 0.1453, "step": 14760 }, { "epoch": 0.69, - "learning_rate": 1.8709390089541046e-05, - "loss": 0.134, + "learning_rate": 2.8711403741438224e-05, + "loss": 0.1433, "step": 14765 }, { "epoch": 0.69, - "learning_rate": 1.8708921288265906e-05, - "loss": 0.1618, + "learning_rate": 2.8710935671602204e-05, + "loss": 0.1084, "step": 14770 }, { "epoch": 0.69, - "learning_rate": 1.8708452486990765e-05, - "loss": 0.1839, + "learning_rate": 2.8710467601766184e-05, + "loss": 0.1699, "step": 14775 }, { "epoch": 0.69, - "learning_rate": 1.870798368571563e-05, - "loss": 0.2527, + "learning_rate": 2.8709999531930164e-05, + "loss": 0.1344, "step": 14780 }, { "epoch": 0.69, - "learning_rate": 1.870751488444049e-05, - "loss": 0.2761, + "learning_rate": 2.8709531462094147e-05, + "loss": 0.1827, "step": 14785 }, { "epoch": 0.69, - "learning_rate": 1.870704608316535e-05, - "loss": 0.2639, + "learning_rate": 2.8709063392258127e-05, + "loss": 0.2047, "step": 14790 }, { "epoch": 0.69, - "learning_rate": 1.870657728189021e-05, - "loss": 0.3858, + "learning_rate": 2.8708595322422107e-05, + "loss": 0.4195, "step": 14795 }, { "epoch": 0.69, - "learning_rate": 1.870610848061507e-05, - "loss": 0.4675, + "learning_rate": 2.8708127252586086e-05, + "loss": 0.6423, "step": 14800 }, { "epoch": 0.69, - "learning_rate": 1.870563967933993e-05, - "loss": 0.2161, + "learning_rate": 2.8707659182750066e-05, + "loss": 0.2532, "step": 14805 }, { "epoch": 0.69, - "learning_rate": 1.870517087806479e-05, - "loss": 0.0642, + "learning_rate": 2.8707191112914046e-05, + "loss": 0.0983, "step": 14810 }, { "epoch": 0.69, - "learning_rate": 1.8704702076789652e-05, - "loss": 0.1011, + "learning_rate": 2.8706723043078026e-05, + "loss": 0.1019, "step": 14815 }, { "epoch": 0.69, - "learning_rate": 1.870423327551451e-05, - "loss": 0.1166, + "learning_rate": 2.870625497324201e-05, + "loss": 0.0828, "step": 14820 }, { "epoch": 0.69, - "learning_rate": 1.870376447423937e-05, - "loss": 0.1504, + "learning_rate": 2.870578690340599e-05, + "loss": 0.1249, "step": 14825 }, { "epoch": 0.69, - "learning_rate": 1.870329567296423e-05, - "loss": 0.2044, + "learning_rate": 2.870531883356997e-05, + "loss": 0.1412, "step": 14830 }, { "epoch": 0.69, - "learning_rate": 1.870282687168909e-05, - "loss": 0.1632, + "learning_rate": 2.870485076373395e-05, + "loss": 0.3927, "step": 14835 }, { "epoch": 0.69, - "learning_rate": 1.870235807041395e-05, - "loss": 0.3202, + "learning_rate": 2.8704382693897932e-05, + "loss": 0.2275, "step": 14840 }, { "epoch": 0.69, - "learning_rate": 1.8701889269138815e-05, - "loss": 0.3161, + "learning_rate": 2.870391462406191e-05, + "loss": 0.2967, "step": 14845 }, { "epoch": 0.69, - "learning_rate": 1.8701420467863675e-05, - "loss": 0.5072, + "learning_rate": 2.870344655422589e-05, + "loss": 0.5893, "step": 14850 }, { "epoch": 0.69, - "learning_rate": 1.8700951666588535e-05, - "loss": 0.1994, + "learning_rate": 2.870297848438987e-05, + "loss": 0.3111, "step": 14855 }, { "epoch": 0.69, - "learning_rate": 1.8700482865313394e-05, - "loss": 0.1046, + "learning_rate": 2.8702510414553854e-05, + "loss": 0.0802, "step": 14860 }, { "epoch": 0.69, - "learning_rate": 1.8700014064038258e-05, - "loss": 0.1125, + "learning_rate": 2.8702042344717834e-05, + "loss": 0.0921, "step": 14865 }, { "epoch": 0.69, - "learning_rate": 1.8699545262763118e-05, - "loss": 0.1324, + "learning_rate": 2.870157427488181e-05, + "loss": 0.0865, "step": 14870 }, { "epoch": 0.69, - "learning_rate": 1.8699076461487978e-05, - "loss": 0.1554, + "learning_rate": 2.8701106205045794e-05, + "loss": 0.2425, "step": 14875 }, { "epoch": 0.69, - "learning_rate": 1.8698607660212838e-05, - "loss": 0.1562, + "learning_rate": 2.8700638135209774e-05, + "loss": 0.1841, "step": 14880 }, { "epoch": 0.69, - "learning_rate": 1.8698138858937698e-05, - "loss": 0.1357, + "learning_rate": 2.8700170065373754e-05, + "loss": 0.1941, "step": 14885 }, { "epoch": 0.69, - "learning_rate": 1.8697670057662557e-05, - "loss": 0.1963, + "learning_rate": 2.8699701995537733e-05, + "loss": 0.2457, "step": 14890 }, { "epoch": 0.7, - "learning_rate": 1.8697201256387417e-05, - "loss": 0.2944, + "learning_rate": 2.8699233925701717e-05, + "loss": 0.278, "step": 14895 }, { "epoch": 0.7, - "learning_rate": 1.8696732455112277e-05, - "loss": 0.5406, + "learning_rate": 2.8698765855865696e-05, + "loss": 0.4692, "step": 14900 }, { "epoch": 0.7, - "learning_rate": 1.8696263653837137e-05, - "loss": 0.225, + "learning_rate": 2.8698297786029676e-05, + "loss": 0.2133, "step": 14905 }, { "epoch": 0.7, - "learning_rate": 1.8695794852562e-05, - "loss": 0.115, + "learning_rate": 2.8697829716193656e-05, + "loss": 0.065, "step": 14910 }, { "epoch": 0.7, - "learning_rate": 1.869532605128686e-05, - "loss": 0.0827, + "learning_rate": 2.869736164635764e-05, + "loss": 0.105, "step": 14915 }, { "epoch": 0.7, - "learning_rate": 1.869485725001172e-05, - "loss": 0.1133, + "learning_rate": 2.869689357652162e-05, + "loss": 0.0833, "step": 14920 }, { "epoch": 0.7, - "learning_rate": 1.8694388448736584e-05, - "loss": 0.1307, + "learning_rate": 2.86964255066856e-05, + "loss": 0.1978, "step": 14925 }, { "epoch": 0.7, - "learning_rate": 1.8693919647461444e-05, - "loss": 0.1646, + "learning_rate": 2.869595743684958e-05, + "loss": 0.1104, "step": 14930 }, { "epoch": 0.7, - "learning_rate": 1.8693450846186304e-05, - "loss": 0.1663, + "learning_rate": 2.869548936701356e-05, + "loss": 0.1582, "step": 14935 }, { "epoch": 0.7, - "learning_rate": 1.8692982044911164e-05, - "loss": 0.208, + "learning_rate": 2.8695021297177538e-05, + "loss": 0.2012, "step": 14940 }, { "epoch": 0.7, - "learning_rate": 1.8692513243636024e-05, - "loss": 0.3248, + "learning_rate": 2.8694553227341518e-05, + "loss": 0.3984, "step": 14945 }, { "epoch": 0.7, - "learning_rate": 1.8692044442360883e-05, - "loss": 0.539, + "learning_rate": 2.86940851575055e-05, + "loss": 0.7813, "step": 14950 }, { "epoch": 0.7, - "learning_rate": 1.8691575641085747e-05, - "loss": 0.2796, + "learning_rate": 2.869361708766948e-05, + "loss": 0.1963, "step": 14955 }, { "epoch": 0.7, - "learning_rate": 1.8691106839810607e-05, - "loss": 0.0703, + "learning_rate": 2.869314901783346e-05, + "loss": 0.0924, "step": 14960 }, { "epoch": 0.7, - "learning_rate": 1.8690638038535467e-05, - "loss": 0.1278, + "learning_rate": 2.869268094799744e-05, + "loss": 0.1491, "step": 14965 }, { "epoch": 0.7, - "learning_rate": 1.8690169237260327e-05, - "loss": 0.067, + "learning_rate": 2.8692212878161424e-05, + "loss": 0.0754, "step": 14970 }, { "epoch": 0.7, - "learning_rate": 1.8689700435985187e-05, - "loss": 0.192, + "learning_rate": 2.8691744808325404e-05, + "loss": 0.1767, "step": 14975 }, { "epoch": 0.7, - "learning_rate": 1.8689231634710046e-05, - "loss": 0.2147, + "learning_rate": 2.8691276738489384e-05, + "loss": 0.2417, "step": 14980 }, { "epoch": 0.7, - "learning_rate": 1.8688762833434906e-05, - "loss": 0.2016, + "learning_rate": 2.8690808668653363e-05, + "loss": 0.1983, "step": 14985 }, { "epoch": 0.7, - "learning_rate": 1.868829403215977e-05, - "loss": 0.2057, + "learning_rate": 2.8690340598817347e-05, + "loss": 0.2171, "step": 14990 }, { "epoch": 0.7, - "learning_rate": 1.868782523088463e-05, - "loss": 0.2402, + "learning_rate": 2.8689872528981323e-05, + "loss": 0.3203, "step": 14995 }, { "epoch": 0.7, - "learning_rate": 1.868735642960949e-05, - "loss": 0.5123, + "learning_rate": 2.8689404459145303e-05, + "loss": 0.6601, "step": 15000 }, { "epoch": 0.7, - "learning_rate": 1.8686887628334353e-05, - "loss": 0.2725, + "learning_rate": 2.8688936389309286e-05, + "loss": 0.3103, "step": 15005 }, { "epoch": 0.7, - "learning_rate": 1.8686418827059213e-05, - "loss": 0.0648, + "learning_rate": 2.8688468319473266e-05, + "loss": 0.0684, "step": 15010 }, { "epoch": 0.7, - "learning_rate": 1.8685950025784073e-05, - "loss": 0.1405, + "learning_rate": 2.8688000249637246e-05, + "loss": 0.157, "step": 15015 }, { "epoch": 0.7, - "learning_rate": 1.8685481224508933e-05, - "loss": 0.0819, + "learning_rate": 2.8687532179801226e-05, + "loss": 0.1252, "step": 15020 }, { "epoch": 0.7, - "learning_rate": 1.8685012423233793e-05, - "loss": 0.1102, + "learning_rate": 2.868706410996521e-05, + "loss": 0.1809, "step": 15025 }, { "epoch": 0.7, - "learning_rate": 1.8684543621958653e-05, - "loss": 0.2164, + "learning_rate": 2.868659604012919e-05, + "loss": 0.1075, "step": 15030 }, { "epoch": 0.7, - "learning_rate": 1.8684074820683512e-05, - "loss": 0.2653, + "learning_rate": 2.868612797029317e-05, + "loss": 0.1389, "step": 15035 }, { "epoch": 0.7, - "learning_rate": 1.8683606019408372e-05, - "loss": 0.338, + "learning_rate": 2.8685659900457148e-05, + "loss": 0.1951, "step": 15040 }, { "epoch": 0.7, - "learning_rate": 1.8683137218133232e-05, - "loss": 0.3184, + "learning_rate": 2.868519183062113e-05, + "loss": 0.2842, "step": 15045 }, { "epoch": 0.7, - "learning_rate": 1.8682668416858096e-05, - "loss": 0.4224, + "learning_rate": 2.868472376078511e-05, + "loss": 0.51, "step": 15050 }, { "epoch": 0.7, - "learning_rate": 1.8682199615582956e-05, - "loss": 0.2217, + "learning_rate": 2.868425569094909e-05, + "loss": 0.2096, "step": 15055 }, { "epoch": 0.7, - "learning_rate": 1.8681730814307816e-05, - "loss": 0.1586, + "learning_rate": 2.868378762111307e-05, + "loss": 0.1046, "step": 15060 }, { "epoch": 0.7, - "learning_rate": 1.8681262013032675e-05, - "loss": 0.0931, + "learning_rate": 2.868331955127705e-05, + "loss": 0.0639, "step": 15065 }, { "epoch": 0.7, - "learning_rate": 1.868079321175754e-05, - "loss": 0.187, + "learning_rate": 2.868285148144103e-05, + "loss": 0.1285, "step": 15070 }, { "epoch": 0.7, - "learning_rate": 1.86803244104824e-05, - "loss": 0.1737, + "learning_rate": 2.868238341160501e-05, + "loss": 0.1548, "step": 15075 }, { "epoch": 0.7, - "learning_rate": 1.867985560920726e-05, - "loss": 0.2213, + "learning_rate": 2.8681915341768994e-05, + "loss": 0.2482, "step": 15080 }, { "epoch": 0.7, - "learning_rate": 1.867938680793212e-05, - "loss": 0.1752, + "learning_rate": 2.8681447271932973e-05, + "loss": 0.1444, "step": 15085 }, { "epoch": 0.7, - "learning_rate": 1.867891800665698e-05, - "loss": 0.2712, + "learning_rate": 2.8680979202096953e-05, + "loss": 0.2597, "step": 15090 }, { "epoch": 0.7, - "learning_rate": 1.8678449205381842e-05, - "loss": 0.4204, + "learning_rate": 2.8680511132260933e-05, + "loss": 0.2534, "step": 15095 }, { "epoch": 0.7, - "learning_rate": 1.8677980404106702e-05, - "loss": 0.5598, + "learning_rate": 2.8680043062424916e-05, + "loss": 0.4651, "step": 15100 }, { "epoch": 0.7, - "learning_rate": 1.8677511602831562e-05, - "loss": 0.2795, + "learning_rate": 2.8679574992588896e-05, + "loss": 0.2288, "step": 15105 }, { "epoch": 0.71, - "learning_rate": 1.867704280155642e-05, - "loss": 0.0626, + "learning_rate": 2.8679106922752876e-05, + "loss": 0.0982, "step": 15110 }, { "epoch": 0.71, - "learning_rate": 1.867657400028128e-05, - "loss": 0.062, + "learning_rate": 2.867863885291686e-05, + "loss": 0.0942, "step": 15115 }, { "epoch": 0.71, - "learning_rate": 1.867610519900614e-05, - "loss": 0.0722, + "learning_rate": 2.8678170783080835e-05, + "loss": 0.1433, "step": 15120 }, { "epoch": 0.71, - "learning_rate": 1.8675636397731e-05, - "loss": 0.1323, + "learning_rate": 2.8677702713244815e-05, + "loss": 0.1679, "step": 15125 }, { "epoch": 0.71, - "learning_rate": 1.8675167596455865e-05, - "loss": 0.2438, + "learning_rate": 2.8677234643408795e-05, + "loss": 0.1222, "step": 15130 }, { "epoch": 0.71, - "learning_rate": 1.8674698795180725e-05, - "loss": 0.1662, + "learning_rate": 2.8676766573572778e-05, + "loss": 0.2887, "step": 15135 }, { "epoch": 0.71, - "learning_rate": 1.8674229993905585e-05, - "loss": 0.3017, + "learning_rate": 2.8676298503736758e-05, + "loss": 0.2913, "step": 15140 }, { "epoch": 0.71, - "learning_rate": 1.8673761192630448e-05, - "loss": 0.2221, + "learning_rate": 2.8675830433900738e-05, + "loss": 0.4461, "step": 15145 }, { "epoch": 0.71, - "learning_rate": 1.8673292391355308e-05, - "loss": 0.4873, + "learning_rate": 2.8675362364064718e-05, + "loss": 0.5593, "step": 15150 }, { "epoch": 0.71, - "learning_rate": 1.8672823590080168e-05, - "loss": 0.2781, + "learning_rate": 2.86748942942287e-05, + "loss": 0.2748, "step": 15155 }, { "epoch": 0.71, - "learning_rate": 1.8672354788805028e-05, - "loss": 0.0843, + "learning_rate": 2.867442622439268e-05, + "loss": 0.0439, "step": 15160 }, { "epoch": 0.71, - "learning_rate": 1.8671885987529888e-05, - "loss": 0.1082, + "learning_rate": 2.867395815455666e-05, + "loss": 0.08, "step": 15165 }, { "epoch": 0.71, - "learning_rate": 1.8671417186254748e-05, - "loss": 0.1206, + "learning_rate": 2.8673490084720644e-05, + "loss": 0.1478, "step": 15170 }, { "epoch": 0.71, - "learning_rate": 1.8670948384979608e-05, - "loss": 0.1604, + "learning_rate": 2.8673022014884624e-05, + "loss": 0.1772, "step": 15175 }, { "epoch": 0.71, - "learning_rate": 1.8670479583704468e-05, - "loss": 0.1387, + "learning_rate": 2.8672553945048603e-05, + "loss": 0.2313, "step": 15180 }, { "epoch": 0.71, - "learning_rate": 1.8670010782429327e-05, - "loss": 0.256, + "learning_rate": 2.867208587521258e-05, + "loss": 0.1585, "step": 15185 }, { "epoch": 0.71, - "learning_rate": 1.866954198115419e-05, - "loss": 0.2167, + "learning_rate": 2.8671617805376563e-05, + "loss": 0.2704, "step": 15190 }, { "epoch": 0.71, - "learning_rate": 1.866907317987905e-05, - "loss": 0.3426, + "learning_rate": 2.8671149735540543e-05, + "loss": 0.3115, "step": 15195 }, { "epoch": 0.71, - "learning_rate": 1.866860437860391e-05, - "loss": 0.5178, + "learning_rate": 2.8670681665704523e-05, + "loss": 0.5487, "step": 15200 }, { "epoch": 0.71, - "learning_rate": 1.866813557732877e-05, - "loss": 0.2076, + "learning_rate": 2.8670213595868503e-05, + "loss": 0.2093, "step": 15205 }, { "epoch": 0.71, - "learning_rate": 1.8667666776053634e-05, - "loss": 0.0497, + "learning_rate": 2.8669745526032486e-05, + "loss": 0.0895, "step": 15210 }, { "epoch": 0.71, - "learning_rate": 1.8667197974778494e-05, - "loss": 0.1064, + "learning_rate": 2.8669277456196466e-05, + "loss": 0.0847, "step": 15215 }, { "epoch": 0.71, - "learning_rate": 1.8666729173503354e-05, - "loss": 0.2208, + "learning_rate": 2.8668809386360445e-05, + "loss": 0.0671, "step": 15220 }, { "epoch": 0.71, - "learning_rate": 1.8666260372228214e-05, - "loss": 0.1425, + "learning_rate": 2.8668341316524425e-05, + "loss": 0.1134, "step": 15225 }, { "epoch": 0.71, - "learning_rate": 1.8665791570953074e-05, - "loss": 0.1764, + "learning_rate": 2.866787324668841e-05, + "loss": 0.191, "step": 15230 }, { "epoch": 0.71, - "learning_rate": 1.8665322769677937e-05, - "loss": 0.1408, + "learning_rate": 2.8667405176852388e-05, + "loss": 0.2068, "step": 15235 }, { "epoch": 0.71, - "learning_rate": 1.8664853968402797e-05, - "loss": 0.1988, + "learning_rate": 2.8666937107016368e-05, + "loss": 0.2486, "step": 15240 }, { "epoch": 0.71, - "learning_rate": 1.8664385167127657e-05, - "loss": 0.3279, + "learning_rate": 2.8666469037180348e-05, + "loss": 0.3443, "step": 15245 }, { "epoch": 0.71, - "learning_rate": 1.8663916365852517e-05, - "loss": 0.4128, + "learning_rate": 2.8666000967344328e-05, + "loss": 0.4892, "step": 15250 }, { "epoch": 0.71, - "learning_rate": 1.8663447564577377e-05, - "loss": 0.2538, + "learning_rate": 2.8665532897508307e-05, + "loss": 0.2362, "step": 15255 }, { "epoch": 0.71, - "learning_rate": 1.8662978763302237e-05, - "loss": 0.101, + "learning_rate": 2.8665064827672287e-05, + "loss": 0.1597, "step": 15260 }, { "epoch": 0.71, - "learning_rate": 1.8662509962027097e-05, - "loss": 0.1287, + "learning_rate": 2.866459675783627e-05, + "loss": 0.061, "step": 15265 }, { "epoch": 0.71, - "learning_rate": 1.8662041160751956e-05, - "loss": 0.1088, + "learning_rate": 2.866412868800025e-05, + "loss": 0.0928, "step": 15270 }, { "epoch": 0.71, - "learning_rate": 1.866157235947682e-05, - "loss": 0.1514, + "learning_rate": 2.866366061816423e-05, + "loss": 0.1497, "step": 15275 }, { "epoch": 0.71, - "learning_rate": 1.866110355820168e-05, - "loss": 0.2464, + "learning_rate": 2.866319254832821e-05, + "loss": 0.3184, "step": 15280 }, { "epoch": 0.71, - "learning_rate": 1.866063475692654e-05, - "loss": 0.2969, + "learning_rate": 2.8662724478492193e-05, + "loss": 0.2595, "step": 15285 }, { "epoch": 0.71, - "learning_rate": 1.8660165955651403e-05, - "loss": 0.2988, + "learning_rate": 2.8662256408656173e-05, + "loss": 0.3638, "step": 15290 }, { "epoch": 0.71, - "learning_rate": 1.8659697154376263e-05, - "loss": 0.3855, + "learning_rate": 2.8661788338820153e-05, + "loss": 0.2282, "step": 15295 }, { "epoch": 0.71, - "learning_rate": 1.8659228353101123e-05, - "loss": 0.6949, + "learning_rate": 2.8661320268984136e-05, + "loss": 0.3601, "step": 15300 }, { "epoch": 0.71, - "learning_rate": 1.8658759551825983e-05, - "loss": 0.2056, + "learning_rate": 2.8660852199148116e-05, + "loss": 0.256, "step": 15305 }, { "epoch": 0.71, - "learning_rate": 1.8658290750550843e-05, - "loss": 0.0848, + "learning_rate": 2.8660384129312092e-05, + "loss": 0.0989, "step": 15310 }, { "epoch": 0.71, - "learning_rate": 1.8657821949275703e-05, - "loss": 0.1188, + "learning_rate": 2.8659916059476072e-05, + "loss": 0.0888, "step": 15315 }, { "epoch": 0.71, - "learning_rate": 1.8657353148000563e-05, - "loss": 0.0881, + "learning_rate": 2.8659447989640055e-05, + "loss": 0.093, "step": 15320 }, { "epoch": 0.72, - "learning_rate": 1.8656884346725426e-05, - "loss": 0.1742, + "learning_rate": 2.8658979919804035e-05, + "loss": 0.1509, "step": 15325 }, { "epoch": 0.72, - "learning_rate": 1.8656415545450286e-05, - "loss": 0.1222, + "learning_rate": 2.8658511849968015e-05, + "loss": 0.192, "step": 15330 }, { "epoch": 0.72, - "learning_rate": 1.8655946744175146e-05, - "loss": 0.1678, + "learning_rate": 2.8658043780131995e-05, + "loss": 0.1538, "step": 15335 }, { "epoch": 0.72, - "learning_rate": 1.8655477942900006e-05, - "loss": 0.1992, + "learning_rate": 2.8657575710295978e-05, + "loss": 0.28, "step": 15340 }, { "epoch": 0.72, - "learning_rate": 1.8655009141624866e-05, - "loss": 0.3601, + "learning_rate": 2.8657107640459958e-05, + "loss": 0.3267, "step": 15345 }, { "epoch": 0.72, - "learning_rate": 1.8654540340349726e-05, - "loss": 0.5375, + "learning_rate": 2.8656639570623938e-05, + "loss": 0.3868, "step": 15350 }, { "epoch": 0.72, - "learning_rate": 1.865407153907459e-05, - "loss": 0.2414, + "learning_rate": 2.865617150078792e-05, + "loss": 0.1887, "step": 15355 }, { "epoch": 0.72, - "learning_rate": 1.865360273779945e-05, - "loss": 0.0649, + "learning_rate": 2.86557034309519e-05, + "loss": 0.0915, "step": 15360 }, { "epoch": 0.72, - "learning_rate": 1.865313393652431e-05, - "loss": 0.0756, + "learning_rate": 2.865523536111588e-05, + "loss": 0.1361, "step": 15365 }, { "epoch": 0.72, - "learning_rate": 1.865266513524917e-05, - "loss": 0.1562, + "learning_rate": 2.865476729127986e-05, + "loss": 0.1772, "step": 15370 }, { "epoch": 0.72, - "learning_rate": 1.8652196333974032e-05, - "loss": 0.1215, + "learning_rate": 2.865429922144384e-05, + "loss": 0.0874, "step": 15375 }, { "epoch": 0.72, - "learning_rate": 1.8651727532698892e-05, - "loss": 0.1505, + "learning_rate": 2.865383115160782e-05, + "loss": 0.1635, "step": 15380 }, { "epoch": 0.72, - "learning_rate": 1.8651258731423752e-05, - "loss": 0.197, + "learning_rate": 2.86533630817718e-05, + "loss": 0.1755, "step": 15385 }, { "epoch": 0.72, - "learning_rate": 1.8650789930148612e-05, - "loss": 0.2759, + "learning_rate": 2.865289501193578e-05, + "loss": 0.3083, "step": 15390 }, { "epoch": 0.72, - "learning_rate": 1.8650321128873472e-05, - "loss": 0.2041, + "learning_rate": 2.8652426942099763e-05, + "loss": 0.3658, "step": 15395 }, { "epoch": 0.72, - "learning_rate": 1.8649852327598332e-05, - "loss": 0.4761, + "learning_rate": 2.8651958872263743e-05, + "loss": 0.4214, "step": 15400 }, { "epoch": 0.72, - "learning_rate": 1.864938352632319e-05, - "loss": 0.1988, + "learning_rate": 2.8651490802427722e-05, + "loss": 0.1958, "step": 15405 }, { "epoch": 0.72, - "learning_rate": 1.864891472504805e-05, - "loss": 0.1376, + "learning_rate": 2.8651022732591702e-05, + "loss": 0.0651, "step": 15410 }, { "epoch": 0.72, - "learning_rate": 1.864844592377291e-05, - "loss": 0.0872, + "learning_rate": 2.8650554662755685e-05, + "loss": 0.0996, "step": 15415 }, { "epoch": 0.72, - "learning_rate": 1.8647977122497775e-05, - "loss": 0.0863, + "learning_rate": 2.8650086592919665e-05, + "loss": 0.1533, "step": 15420 }, { "epoch": 0.72, - "learning_rate": 1.8647508321222635e-05, - "loss": 0.135, + "learning_rate": 2.8649618523083645e-05, + "loss": 0.151, "step": 15425 }, { "epoch": 0.72, - "learning_rate": 1.8647039519947495e-05, - "loss": 0.0906, + "learning_rate": 2.8649150453247628e-05, + "loss": 0.2203, "step": 15430 }, { "epoch": 0.72, - "learning_rate": 1.8646570718672358e-05, - "loss": 0.2675, + "learning_rate": 2.8648682383411605e-05, + "loss": 0.242, "step": 15435 }, { "epoch": 0.72, - "learning_rate": 1.8646101917397218e-05, - "loss": 0.1789, + "learning_rate": 2.8648214313575584e-05, + "loss": 0.1803, "step": 15440 }, { "epoch": 0.72, - "learning_rate": 1.8645633116122078e-05, - "loss": 0.3858, + "learning_rate": 2.8647746243739564e-05, + "loss": 0.3412, "step": 15445 }, { "epoch": 0.72, - "learning_rate": 1.8645164314846938e-05, - "loss": 0.6604, + "learning_rate": 2.8647278173903547e-05, + "loss": 0.4736, "step": 15450 }, { "epoch": 0.72, - "learning_rate": 1.8644695513571798e-05, - "loss": 0.1742, + "learning_rate": 2.8646810104067527e-05, + "loss": 0.2068, "step": 15455 }, { "epoch": 0.72, - "learning_rate": 1.8644226712296658e-05, - "loss": 0.1072, + "learning_rate": 2.8646342034231507e-05, + "loss": 0.1948, "step": 15460 }, { "epoch": 0.72, - "learning_rate": 1.864375791102152e-05, - "loss": 0.1201, + "learning_rate": 2.8645873964395487e-05, + "loss": 0.0944, "step": 15465 }, { "epoch": 0.72, - "learning_rate": 1.864328910974638e-05, - "loss": 0.1129, + "learning_rate": 2.864540589455947e-05, + "loss": 0.1068, "step": 15470 }, { "epoch": 0.72, - "learning_rate": 1.864282030847124e-05, - "loss": 0.073, + "learning_rate": 2.864493782472345e-05, + "loss": 0.2815, "step": 15475 }, { "epoch": 0.72, - "learning_rate": 1.86423515071961e-05, - "loss": 0.2668, + "learning_rate": 2.864446975488743e-05, + "loss": 0.1155, "step": 15480 }, { "epoch": 0.72, - "learning_rate": 1.864188270592096e-05, - "loss": 0.2092, + "learning_rate": 2.8644001685051413e-05, + "loss": 0.1846, "step": 15485 }, { "epoch": 0.72, - "learning_rate": 1.864141390464582e-05, - "loss": 0.252, + "learning_rate": 2.8643533615215393e-05, + "loss": 0.2807, "step": 15490 }, { "epoch": 0.72, - "learning_rate": 1.8640945103370684e-05, - "loss": 0.2868, + "learning_rate": 2.8643065545379373e-05, + "loss": 0.3434, "step": 15495 }, { "epoch": 0.72, - "learning_rate": 1.8640476302095544e-05, - "loss": 0.6121, + "learning_rate": 2.864259747554335e-05, + "loss": 0.6789, "step": 15500 }, { "epoch": 0.72, - "learning_rate": 1.8640007500820404e-05, - "loss": 0.1869, + "learning_rate": 2.8642129405707332e-05, + "loss": 0.2136, "step": 15505 }, { "epoch": 0.72, - "learning_rate": 1.8639538699545264e-05, - "loss": 0.0788, + "learning_rate": 2.8641661335871312e-05, + "loss": 0.0901, "step": 15510 }, { "epoch": 0.72, - "learning_rate": 1.8639069898270127e-05, - "loss": 0.0961, + "learning_rate": 2.8641193266035292e-05, + "loss": 0.1024, "step": 15515 }, { "epoch": 0.72, - "learning_rate": 1.8638601096994987e-05, - "loss": 0.1343, + "learning_rate": 2.864072519619927e-05, + "loss": 0.1646, "step": 15520 }, { "epoch": 0.72, - "learning_rate": 1.8638132295719847e-05, - "loss": 0.1213, + "learning_rate": 2.8640257126363255e-05, + "loss": 0.223, "step": 15525 }, { "epoch": 0.72, - "learning_rate": 1.8637663494444707e-05, - "loss": 0.2465, + "learning_rate": 2.8639789056527235e-05, + "loss": 0.1807, "step": 15530 }, { "epoch": 0.72, - "learning_rate": 1.8637194693169567e-05, - "loss": 0.2933, + "learning_rate": 2.8639320986691215e-05, + "loss": 0.1904, "step": 15535 }, { "epoch": 0.73, - "learning_rate": 1.8636725891894427e-05, - "loss": 0.205, + "learning_rate": 2.8638852916855198e-05, + "loss": 0.236, "step": 15540 }, { "epoch": 0.73, - "learning_rate": 1.8636257090619287e-05, - "loss": 0.3036, + "learning_rate": 2.8638384847019178e-05, + "loss": 0.2811, "step": 15545 }, { "epoch": 0.73, - "learning_rate": 1.8635788289344147e-05, - "loss": 0.4355, + "learning_rate": 2.8637916777183157e-05, + "loss": 0.6167, "step": 15550 }, { "epoch": 0.73, - "learning_rate": 1.8635319488069007e-05, - "loss": 0.2686, + "learning_rate": 2.8637448707347137e-05, + "loss": 0.1889, "step": 15555 }, { "epoch": 0.73, - "learning_rate": 1.863485068679387e-05, - "loss": 0.0397, + "learning_rate": 2.863698063751112e-05, + "loss": 0.1408, "step": 15560 }, { "epoch": 0.73, - "learning_rate": 1.863438188551873e-05, - "loss": 0.2227, + "learning_rate": 2.8636512567675097e-05, + "loss": 0.0766, "step": 15565 }, { "epoch": 0.73, - "learning_rate": 1.863391308424359e-05, - "loss": 0.1172, + "learning_rate": 2.8636044497839077e-05, + "loss": 0.1584, "step": 15570 }, { "epoch": 0.73, - "learning_rate": 1.8633444282968453e-05, - "loss": 0.1019, + "learning_rate": 2.8635576428003056e-05, + "loss": 0.1382, "step": 15575 }, { "epoch": 0.73, - "learning_rate": 1.8632975481693313e-05, - "loss": 0.184, + "learning_rate": 2.863510835816704e-05, + "loss": 0.1553, "step": 15580 }, { "epoch": 0.73, - "learning_rate": 1.8632506680418173e-05, - "loss": 0.2066, + "learning_rate": 2.863464028833102e-05, + "loss": 0.1733, "step": 15585 }, { "epoch": 0.73, - "learning_rate": 1.8632037879143033e-05, - "loss": 0.1915, + "learning_rate": 2.8634172218495e-05, + "loss": 0.1479, "step": 15590 }, { "epoch": 0.73, - "learning_rate": 1.8631569077867893e-05, - "loss": 0.2898, + "learning_rate": 2.863370414865898e-05, + "loss": 0.4117, "step": 15595 }, { "epoch": 0.73, - "learning_rate": 1.8631100276592753e-05, - "loss": 0.4485, + "learning_rate": 2.8633236078822962e-05, + "loss": 0.3843, "step": 15600 }, { "epoch": 0.73, - "learning_rate": 1.8630631475317616e-05, - "loss": 0.2207, + "learning_rate": 2.8632768008986942e-05, + "loss": 0.2137, "step": 15605 }, { "epoch": 0.73, - "learning_rate": 1.8630162674042476e-05, - "loss": 0.1243, + "learning_rate": 2.8632299939150922e-05, + "loss": 0.0529, "step": 15610 }, { "epoch": 0.73, - "learning_rate": 1.8629693872767336e-05, - "loss": 0.0872, + "learning_rate": 2.8631831869314905e-05, + "loss": 0.1212, "step": 15615 }, { "epoch": 0.73, - "learning_rate": 1.8629225071492196e-05, - "loss": 0.1143, + "learning_rate": 2.8631363799478885e-05, + "loss": 0.1558, "step": 15620 }, { "epoch": 0.73, - "learning_rate": 1.8628756270217056e-05, - "loss": 0.065, + "learning_rate": 2.863089572964286e-05, + "loss": 0.1929, "step": 15625 }, { "epoch": 0.73, - "learning_rate": 1.8628287468941916e-05, - "loss": 0.2135, + "learning_rate": 2.863042765980684e-05, + "loss": 0.1506, "step": 15630 }, { "epoch": 0.73, - "learning_rate": 1.8627818667666776e-05, - "loss": 0.2308, + "learning_rate": 2.8629959589970824e-05, + "loss": 0.2172, "step": 15635 }, { "epoch": 0.73, - "learning_rate": 1.862734986639164e-05, - "loss": 0.2845, + "learning_rate": 2.8629491520134804e-05, + "loss": 0.2056, "step": 15640 }, { "epoch": 0.73, - "learning_rate": 1.86268810651165e-05, - "loss": 0.3182, + "learning_rate": 2.8629023450298784e-05, + "loss": 0.3093, "step": 15645 }, { "epoch": 0.73, - "learning_rate": 1.862641226384136e-05, - "loss": 0.6225, + "learning_rate": 2.8628555380462764e-05, + "loss": 0.4196, "step": 15650 }, { "epoch": 0.73, - "learning_rate": 1.8625943462566222e-05, - "loss": 0.2644, + "learning_rate": 2.8628087310626747e-05, + "loss": 0.2582, "step": 15655 }, { "epoch": 0.73, - "learning_rate": 1.8625474661291082e-05, - "loss": 0.086, + "learning_rate": 2.8627619240790727e-05, + "loss": 0.0894, "step": 15660 }, { "epoch": 0.73, - "learning_rate": 1.8625005860015942e-05, - "loss": 0.157, + "learning_rate": 2.8627151170954707e-05, + "loss": 0.1122, "step": 15665 }, { "epoch": 0.73, - "learning_rate": 1.8624537058740802e-05, - "loss": 0.1244, + "learning_rate": 2.862668310111869e-05, + "loss": 0.1188, "step": 15670 }, { "epoch": 0.73, - "learning_rate": 1.8624068257465662e-05, - "loss": 0.1208, + "learning_rate": 2.862621503128267e-05, + "loss": 0.1679, "step": 15675 }, { "epoch": 0.73, - "learning_rate": 1.8623599456190522e-05, - "loss": 0.1678, + "learning_rate": 2.862574696144665e-05, + "loss": 0.1441, "step": 15680 }, { "epoch": 0.73, - "learning_rate": 1.8623130654915382e-05, - "loss": 0.2024, + "learning_rate": 2.862527889161063e-05, + "loss": 0.1572, "step": 15685 }, { "epoch": 0.73, - "learning_rate": 1.8622661853640242e-05, - "loss": 0.3057, + "learning_rate": 2.862481082177461e-05, + "loss": 0.3004, "step": 15690 }, { "epoch": 0.73, - "learning_rate": 1.86221930523651e-05, - "loss": 0.2436, + "learning_rate": 2.862434275193859e-05, + "loss": 0.1968, "step": 15695 }, { "epoch": 0.73, - "learning_rate": 1.8621724251089965e-05, - "loss": 0.4505, + "learning_rate": 2.862387468210257e-05, + "loss": 0.3735, "step": 15700 }, { "epoch": 0.73, - "learning_rate": 1.8621255449814825e-05, - "loss": 0.2134, + "learning_rate": 2.862340661226655e-05, + "loss": 0.1962, "step": 15705 }, { "epoch": 0.73, - "learning_rate": 1.8620786648539685e-05, - "loss": 0.0503, + "learning_rate": 2.8622938542430532e-05, + "loss": 0.0614, "step": 15710 }, { "epoch": 0.73, - "learning_rate": 1.8620317847264545e-05, - "loss": 0.103, + "learning_rate": 2.862247047259451e-05, + "loss": 0.2023, "step": 15715 }, { "epoch": 0.73, - "learning_rate": 1.8619849045989408e-05, - "loss": 0.132, + "learning_rate": 2.862200240275849e-05, + "loss": 0.1172, "step": 15720 }, { "epoch": 0.73, - "learning_rate": 1.8619380244714268e-05, - "loss": 0.2023, + "learning_rate": 2.8621534332922475e-05, + "loss": 0.1431, "step": 15725 }, { "epoch": 0.73, - "learning_rate": 1.8618911443439128e-05, - "loss": 0.1633, + "learning_rate": 2.8621066263086455e-05, + "loss": 0.1741, "step": 15730 }, { "epoch": 0.73, - "learning_rate": 1.8618442642163988e-05, - "loss": 0.2361, + "learning_rate": 2.8620598193250434e-05, + "loss": 0.2025, "step": 15735 }, { "epoch": 0.73, - "learning_rate": 1.8617973840888848e-05, - "loss": 0.2142, + "learning_rate": 2.8620130123414414e-05, + "loss": 0.3104, "step": 15740 }, { "epoch": 0.73, - "learning_rate": 1.861750503961371e-05, - "loss": 0.2203, + "learning_rate": 2.8619662053578397e-05, + "loss": 0.2903, "step": 15745 }, { "epoch": 0.73, - "learning_rate": 1.861703623833857e-05, - "loss": 0.4832, + "learning_rate": 2.8619193983742377e-05, + "loss": 0.5286, "step": 15750 }, { "epoch": 0.74, - "learning_rate": 1.861656743706343e-05, - "loss": 0.171, + "learning_rate": 2.8618725913906354e-05, + "loss": 0.2759, "step": 15755 }, { "epoch": 0.74, - "learning_rate": 1.861609863578829e-05, - "loss": 0.0748, + "learning_rate": 2.8618257844070333e-05, + "loss": 0.0952, "step": 15760 }, { "epoch": 0.74, - "learning_rate": 1.861562983451315e-05, - "loss": 0.0726, + "learning_rate": 2.8617789774234317e-05, + "loss": 0.1035, "step": 15765 }, { "epoch": 0.74, - "learning_rate": 1.861516103323801e-05, - "loss": 0.1165, + "learning_rate": 2.8617321704398296e-05, + "loss": 0.0883, "step": 15770 }, { "epoch": 0.74, - "learning_rate": 1.861469223196287e-05, - "loss": 0.1643, + "learning_rate": 2.8616853634562276e-05, + "loss": 0.1798, "step": 15775 }, { "epoch": 0.74, - "learning_rate": 1.861422343068773e-05, - "loss": 0.2114, + "learning_rate": 2.8616385564726256e-05, + "loss": 0.2773, "step": 15780 }, { "epoch": 0.74, - "learning_rate": 1.8613754629412594e-05, - "loss": 0.1382, + "learning_rate": 2.861591749489024e-05, + "loss": 0.1251, "step": 15785 }, { "epoch": 0.74, - "learning_rate": 1.8613285828137454e-05, - "loss": 0.1504, + "learning_rate": 2.861544942505422e-05, + "loss": 0.1566, "step": 15790 }, { "epoch": 0.74, - "learning_rate": 1.8612817026862314e-05, - "loss": 0.3677, + "learning_rate": 2.86149813552182e-05, + "loss": 0.3412, "step": 15795 }, { "epoch": 0.74, - "learning_rate": 1.8612348225587177e-05, - "loss": 0.7503, + "learning_rate": 2.8614513285382182e-05, + "loss": 0.5419, "step": 15800 }, { "epoch": 0.74, - "learning_rate": 1.8611879424312037e-05, - "loss": 0.1473, + "learning_rate": 2.8614045215546162e-05, + "loss": 0.1462, "step": 15805 }, { "epoch": 0.74, - "learning_rate": 1.8611410623036897e-05, - "loss": 0.0413, + "learning_rate": 2.8613577145710142e-05, + "loss": 0.0858, "step": 15810 }, { "epoch": 0.74, - "learning_rate": 1.8610941821761757e-05, - "loss": 0.1276, + "learning_rate": 2.8613109075874118e-05, + "loss": 0.0801, "step": 15815 }, { "epoch": 0.74, - "learning_rate": 1.8610473020486617e-05, - "loss": 0.12, + "learning_rate": 2.86126410060381e-05, + "loss": 0.1397, "step": 15820 }, { "epoch": 0.74, - "learning_rate": 1.8610004219211477e-05, - "loss": 0.1935, + "learning_rate": 2.861217293620208e-05, + "loss": 0.0733, "step": 15825 }, { "epoch": 0.74, - "learning_rate": 1.8609535417936337e-05, - "loss": 0.1424, + "learning_rate": 2.861170486636606e-05, + "loss": 0.1209, "step": 15830 }, { "epoch": 0.74, - "learning_rate": 1.8609066616661197e-05, - "loss": 0.2974, + "learning_rate": 2.861123679653004e-05, + "loss": 0.3169, "step": 15835 }, { "epoch": 0.74, - "learning_rate": 1.860859781538606e-05, - "loss": 0.1485, + "learning_rate": 2.8610768726694024e-05, + "loss": 0.1338, "step": 15840 }, { "epoch": 0.74, - "learning_rate": 1.860812901411092e-05, - "loss": 0.3909, + "learning_rate": 2.8610300656858004e-05, + "loss": 0.4793, "step": 15845 }, { "epoch": 0.74, - "learning_rate": 1.860766021283578e-05, - "loss": 0.4972, + "learning_rate": 2.8609832587021984e-05, + "loss": 0.4244, "step": 15850 }, { "epoch": 0.74, - "learning_rate": 1.860719141156064e-05, - "loss": 0.2334, + "learning_rate": 2.8609364517185967e-05, + "loss": 0.1958, "step": 15855 }, { "epoch": 0.74, - "learning_rate": 1.86067226102855e-05, - "loss": 0.059, + "learning_rate": 2.8608896447349947e-05, + "loss": 0.1052, "step": 15860 }, { "epoch": 0.74, - "learning_rate": 1.8606253809010363e-05, - "loss": 0.0805, + "learning_rate": 2.8608428377513927e-05, + "loss": 0.1194, "step": 15865 }, { "epoch": 0.74, - "learning_rate": 1.8605785007735223e-05, - "loss": 0.1012, + "learning_rate": 2.8607960307677906e-05, + "loss": 0.1594, "step": 15870 }, { "epoch": 0.74, - "learning_rate": 1.8605316206460083e-05, - "loss": 0.0957, + "learning_rate": 2.860749223784189e-05, + "loss": 0.1061, "step": 15875 }, { "epoch": 0.74, - "learning_rate": 1.8604847405184943e-05, - "loss": 0.1382, + "learning_rate": 2.8607024168005866e-05, + "loss": 0.1318, "step": 15880 }, { "epoch": 0.74, - "learning_rate": 1.8604378603909806e-05, - "loss": 0.2678, + "learning_rate": 2.8606556098169846e-05, + "loss": 0.2331, "step": 15885 }, { "epoch": 0.74, - "learning_rate": 1.8603909802634666e-05, - "loss": 0.2458, + "learning_rate": 2.8606088028333826e-05, + "loss": 0.304, "step": 15890 }, { "epoch": 0.74, - "learning_rate": 1.8603441001359526e-05, - "loss": 0.3033, + "learning_rate": 2.860561995849781e-05, + "loss": 0.318, "step": 15895 }, { "epoch": 0.74, - "learning_rate": 1.8602972200084386e-05, - "loss": 0.5778, + "learning_rate": 2.860515188866179e-05, + "loss": 0.4271, "step": 15900 }, { "epoch": 0.74, - "learning_rate": 1.8602503398809246e-05, - "loss": 0.2027, + "learning_rate": 2.860468381882577e-05, + "loss": 0.2185, "step": 15905 }, { "epoch": 0.74, - "learning_rate": 1.8602034597534106e-05, - "loss": 0.0999, + "learning_rate": 2.860421574898975e-05, + "loss": 0.1382, "step": 15910 }, { "epoch": 0.74, - "learning_rate": 1.8601565796258966e-05, - "loss": 0.0957, + "learning_rate": 2.860374767915373e-05, + "loss": 0.1087, "step": 15915 }, { "epoch": 0.74, - "learning_rate": 1.8601096994983826e-05, - "loss": 0.2037, + "learning_rate": 2.860327960931771e-05, + "loss": 0.2021, "step": 15920 }, { "epoch": 0.74, - "learning_rate": 1.860062819370869e-05, - "loss": 0.1079, + "learning_rate": 2.860281153948169e-05, + "loss": 0.1516, "step": 15925 }, { "epoch": 0.74, - "learning_rate": 1.860015939243355e-05, - "loss": 0.1333, + "learning_rate": 2.8602343469645674e-05, + "loss": 0.1961, "step": 15930 }, { "epoch": 0.74, - "learning_rate": 1.859969059115841e-05, - "loss": 0.2305, + "learning_rate": 2.8601875399809654e-05, + "loss": 0.2074, "step": 15935 }, { "epoch": 0.74, - "learning_rate": 1.859922178988327e-05, - "loss": 0.2528, + "learning_rate": 2.8601407329973634e-05, + "loss": 0.2017, "step": 15940 }, { "epoch": 0.74, - "learning_rate": 1.8598752988608132e-05, - "loss": 0.3822, + "learning_rate": 2.860093926013761e-05, + "loss": 0.1713, "step": 15945 }, { "epoch": 0.74, - "learning_rate": 1.8598284187332992e-05, - "loss": 0.5118, + "learning_rate": 2.8600471190301594e-05, + "loss": 0.4773, "step": 15950 }, { "epoch": 0.74, - "learning_rate": 1.8597815386057852e-05, - "loss": 0.3077, + "learning_rate": 2.8600003120465573e-05, + "loss": 0.229, "step": 15955 }, { "epoch": 0.74, - "learning_rate": 1.8597346584782712e-05, - "loss": 0.0664, + "learning_rate": 2.8599535050629553e-05, + "loss": 0.0675, "step": 15960 }, { "epoch": 0.74, - "learning_rate": 1.8596877783507572e-05, - "loss": 0.1011, + "learning_rate": 2.8599066980793536e-05, + "loss": 0.1082, "step": 15965 }, { "epoch": 0.75, - "learning_rate": 1.8596408982232432e-05, - "loss": 0.1759, + "learning_rate": 2.8598598910957516e-05, + "loss": 0.1686, "step": 15970 }, { "epoch": 0.75, - "learning_rate": 1.8595940180957295e-05, - "loss": 0.1561, + "learning_rate": 2.8598130841121496e-05, + "loss": 0.1475, "step": 15975 }, { "epoch": 0.75, - "learning_rate": 1.8595471379682155e-05, - "loss": 0.1241, + "learning_rate": 2.8597662771285476e-05, + "loss": 0.1345, "step": 15980 }, { "epoch": 0.75, - "learning_rate": 1.8595002578407015e-05, - "loss": 0.1386, + "learning_rate": 2.859719470144946e-05, + "loss": 0.1773, "step": 15985 }, { "epoch": 0.75, - "learning_rate": 1.8594533777131875e-05, - "loss": 0.2541, + "learning_rate": 2.859672663161344e-05, + "loss": 0.1989, "step": 15990 }, { "epoch": 0.75, - "learning_rate": 1.8594064975856735e-05, - "loss": 0.2858, + "learning_rate": 2.859625856177742e-05, + "loss": 0.2659, "step": 15995 }, { "epoch": 0.75, - "learning_rate": 1.8593596174581595e-05, - "loss": 0.552, + "learning_rate": 2.85957904919414e-05, + "loss": 0.5006, "step": 16000 }, { "epoch": 0.75, - "learning_rate": 1.8593127373306458e-05, - "loss": 0.2186, + "learning_rate": 2.859532242210538e-05, + "loss": 0.2812, "step": 16005 }, { "epoch": 0.75, - "learning_rate": 1.8592658572031318e-05, - "loss": 0.0791, + "learning_rate": 2.8594854352269358e-05, + "loss": 0.0904, "step": 16010 }, { "epoch": 0.75, - "learning_rate": 1.8592189770756178e-05, - "loss": 0.1378, + "learning_rate": 2.8594386282433338e-05, + "loss": 0.1094, "step": 16015 }, { "epoch": 0.75, - "learning_rate": 1.8591720969481038e-05, - "loss": 0.202, + "learning_rate": 2.8593918212597318e-05, + "loss": 0.1808, "step": 16020 }, { "epoch": 0.75, - "learning_rate": 1.85912521682059e-05, - "loss": 0.1665, + "learning_rate": 2.85934501427613e-05, + "loss": 0.1391, "step": 16025 }, { "epoch": 0.75, - "learning_rate": 1.859078336693076e-05, - "loss": 0.2158, + "learning_rate": 2.859298207292528e-05, + "loss": 0.1167, "step": 16030 }, { "epoch": 0.75, - "learning_rate": 1.859031456565562e-05, - "loss": 0.2452, + "learning_rate": 2.859251400308926e-05, + "loss": 0.1936, "step": 16035 }, { "epoch": 0.75, - "learning_rate": 1.858984576438048e-05, - "loss": 0.1764, + "learning_rate": 2.8592045933253244e-05, + "loss": 0.2442, "step": 16040 }, { "epoch": 0.75, - "learning_rate": 1.858937696310534e-05, - "loss": 0.3921, + "learning_rate": 2.8591577863417224e-05, + "loss": 0.2471, "step": 16045 }, { "epoch": 0.75, - "learning_rate": 1.85889081618302e-05, - "loss": 0.5582, + "learning_rate": 2.8591109793581204e-05, + "loss": 0.514, "step": 16050 }, { "epoch": 0.75, - "learning_rate": 1.858843936055506e-05, - "loss": 0.1606, + "learning_rate": 2.8590641723745183e-05, + "loss": 0.2005, "step": 16055 }, { "epoch": 0.75, - "learning_rate": 1.858797055927992e-05, - "loss": 0.089, + "learning_rate": 2.8590173653909167e-05, + "loss": 0.0913, "step": 16060 }, { "epoch": 0.75, - "learning_rate": 1.858750175800478e-05, - "loss": 0.0914, + "learning_rate": 2.8589705584073146e-05, + "loss": 0.0698, "step": 16065 }, { "epoch": 0.75, - "learning_rate": 1.8587032956729644e-05, - "loss": 0.1239, + "learning_rate": 2.8589237514237123e-05, + "loss": 0.1241, "step": 16070 }, { "epoch": 0.75, - "learning_rate": 1.8586564155454504e-05, - "loss": 0.1263, + "learning_rate": 2.8588769444401103e-05, + "loss": 0.1931, "step": 16075 }, { "epoch": 0.75, - "learning_rate": 1.8586095354179364e-05, - "loss": 0.1562, + "learning_rate": 2.8588301374565086e-05, + "loss": 0.102, "step": 16080 }, { "epoch": 0.75, - "learning_rate": 1.8585626552904227e-05, - "loss": 0.3022, + "learning_rate": 2.8587833304729066e-05, + "loss": 0.1887, "step": 16085 }, { "epoch": 0.75, - "learning_rate": 1.8585157751629087e-05, - "loss": 0.2022, + "learning_rate": 2.8587365234893045e-05, + "loss": 0.3126, "step": 16090 }, { "epoch": 0.75, - "learning_rate": 1.8584688950353947e-05, - "loss": 0.3969, + "learning_rate": 2.858689716505703e-05, + "loss": 0.295, "step": 16095 }, { "epoch": 0.75, - "learning_rate": 1.8584220149078807e-05, - "loss": 0.5977, + "learning_rate": 2.858642909522101e-05, + "loss": 0.5638, "step": 16100 }, { "epoch": 0.75, - "learning_rate": 1.8583751347803667e-05, - "loss": 0.2297, + "learning_rate": 2.8585961025384988e-05, + "loss": 0.2382, "step": 16105 }, { "epoch": 0.75, - "learning_rate": 1.8583282546528527e-05, - "loss": 0.0994, + "learning_rate": 2.8585492955548968e-05, + "loss": 0.0758, "step": 16110 }, { "epoch": 0.75, - "learning_rate": 1.858281374525339e-05, - "loss": 0.0737, + "learning_rate": 2.858502488571295e-05, + "loss": 0.1077, "step": 16115 }, { "epoch": 0.75, - "learning_rate": 1.858234494397825e-05, - "loss": 0.097, + "learning_rate": 2.858455681587693e-05, + "loss": 0.1388, "step": 16120 }, { "epoch": 0.75, - "learning_rate": 1.858187614270311e-05, - "loss": 0.1379, + "learning_rate": 2.858408874604091e-05, + "loss": 0.1855, "step": 16125 }, { "epoch": 0.75, - "learning_rate": 1.858140734142797e-05, - "loss": 0.1508, + "learning_rate": 2.8583620676204887e-05, + "loss": 0.1516, "step": 16130 }, { "epoch": 0.75, - "learning_rate": 1.858093854015283e-05, - "loss": 0.2133, + "learning_rate": 2.858315260636887e-05, + "loss": 0.1732, "step": 16135 }, { "epoch": 0.75, - "learning_rate": 1.858046973887769e-05, - "loss": 0.2874, + "learning_rate": 2.858268453653285e-05, + "loss": 0.2748, "step": 16140 }, { "epoch": 0.75, - "learning_rate": 1.858000093760255e-05, - "loss": 0.2308, + "learning_rate": 2.858221646669683e-05, + "loss": 0.4037, "step": 16145 }, { "epoch": 0.75, - "learning_rate": 1.8579532136327413e-05, - "loss": 0.5109, + "learning_rate": 2.8581748396860813e-05, + "loss": 0.5582, "step": 16150 }, { "epoch": 0.75, - "learning_rate": 1.8579063335052273e-05, - "loss": 0.2218, + "learning_rate": 2.8581280327024793e-05, + "loss": 0.1921, "step": 16155 }, { "epoch": 0.75, - "learning_rate": 1.8578594533777133e-05, - "loss": 0.1128, + "learning_rate": 2.8580812257188773e-05, + "loss": 0.1022, "step": 16160 }, { "epoch": 0.75, - "learning_rate": 1.8578125732501996e-05, - "loss": 0.1386, + "learning_rate": 2.8580344187352753e-05, + "loss": 0.0983, "step": 16165 }, { "epoch": 0.75, - "learning_rate": 1.8577656931226856e-05, - "loss": 0.1299, + "learning_rate": 2.8579876117516736e-05, + "loss": 0.1711, "step": 16170 }, { "epoch": 0.75, - "learning_rate": 1.8577188129951716e-05, - "loss": 0.1277, + "learning_rate": 2.8579408047680716e-05, + "loss": 0.1264, "step": 16175 }, { "epoch": 0.75, - "learning_rate": 1.8576719328676576e-05, - "loss": 0.201, + "learning_rate": 2.8578939977844696e-05, + "loss": 0.129, "step": 16180 }, { "epoch": 0.76, - "learning_rate": 1.8576250527401436e-05, - "loss": 0.2323, + "learning_rate": 2.8578471908008676e-05, + "loss": 0.227, "step": 16185 }, { "epoch": 0.76, - "learning_rate": 1.8575781726126296e-05, - "loss": 0.2772, + "learning_rate": 2.857800383817266e-05, + "loss": 0.2669, "step": 16190 }, { "epoch": 0.76, - "learning_rate": 1.8575312924851156e-05, - "loss": 0.3258, + "learning_rate": 2.8577535768336635e-05, + "loss": 0.3455, "step": 16195 }, { "epoch": 0.76, - "learning_rate": 1.8574844123576016e-05, - "loss": 0.3383, + "learning_rate": 2.8577067698500615e-05, + "loss": 0.4634, "step": 16200 }, { "epoch": 0.76, - "learning_rate": 1.8574375322300876e-05, - "loss": 0.2396, + "learning_rate": 2.8576599628664595e-05, + "loss": 0.2328, "step": 16205 }, { "epoch": 0.76, - "learning_rate": 1.857390652102574e-05, - "loss": 0.0677, + "learning_rate": 2.8576131558828578e-05, + "loss": 0.0769, "step": 16210 }, { "epoch": 0.76, - "learning_rate": 1.85734377197506e-05, - "loss": 0.105, + "learning_rate": 2.8575663488992558e-05, + "loss": 0.074, "step": 16215 }, { "epoch": 0.76, - "learning_rate": 1.857296891847546e-05, - "loss": 0.1804, + "learning_rate": 2.8575195419156538e-05, + "loss": 0.0922, "step": 16220 }, { "epoch": 0.76, - "learning_rate": 1.857250011720032e-05, - "loss": 0.1835, + "learning_rate": 2.857472734932052e-05, + "loss": 0.1451, "step": 16225 }, { "epoch": 0.76, - "learning_rate": 1.8572031315925182e-05, - "loss": 0.1532, + "learning_rate": 2.85742592794845e-05, + "loss": 0.2113, "step": 16230 }, { "epoch": 0.76, - "learning_rate": 1.8571562514650042e-05, - "loss": 0.2916, + "learning_rate": 2.857379120964848e-05, + "loss": 0.2035, "step": 16235 }, { "epoch": 0.76, - "learning_rate": 1.8571093713374902e-05, - "loss": 0.3538, + "learning_rate": 2.857332313981246e-05, + "loss": 0.2921, "step": 16240 }, { "epoch": 0.76, - "learning_rate": 1.8570624912099762e-05, - "loss": 0.3102, + "learning_rate": 2.8572855069976444e-05, + "loss": 0.313, "step": 16245 }, { "epoch": 0.76, - "learning_rate": 1.8570156110824622e-05, - "loss": 0.5525, + "learning_rate": 2.8572387000140423e-05, + "loss": 0.4806, "step": 16250 }, { "epoch": 0.76, - "learning_rate": 1.8569687309549485e-05, - "loss": 0.2395, + "learning_rate": 2.8571918930304403e-05, + "loss": 0.3072, "step": 16255 }, { "epoch": 0.76, - "learning_rate": 1.8569218508274345e-05, - "loss": 0.0512, + "learning_rate": 2.857145086046838e-05, + "loss": 0.1016, "step": 16260 }, { "epoch": 0.76, - "learning_rate": 1.8568749706999205e-05, - "loss": 0.0791, + "learning_rate": 2.8570982790632363e-05, + "loss": 0.0676, "step": 16265 }, { "epoch": 0.76, - "learning_rate": 1.8568280905724065e-05, - "loss": 0.139, + "learning_rate": 2.8570514720796343e-05, + "loss": 0.1592, "step": 16270 }, { "epoch": 0.76, - "learning_rate": 1.8567812104448925e-05, - "loss": 0.1549, + "learning_rate": 2.8570046650960322e-05, + "loss": 0.1448, "step": 16275 }, { "epoch": 0.76, - "learning_rate": 1.8567343303173785e-05, - "loss": 0.2231, + "learning_rate": 2.8569578581124306e-05, + "loss": 0.1645, "step": 16280 }, { "epoch": 0.76, - "learning_rate": 1.8566874501898645e-05, - "loss": 0.2823, + "learning_rate": 2.8569110511288285e-05, + "loss": 0.1477, "step": 16285 }, { "epoch": 0.76, - "learning_rate": 1.8566405700623505e-05, - "loss": 0.2111, + "learning_rate": 2.8568642441452265e-05, + "loss": 0.2008, "step": 16290 }, { "epoch": 0.76, - "learning_rate": 1.8565936899348368e-05, - "loss": 0.3525, + "learning_rate": 2.8568174371616245e-05, + "loss": 0.3682, "step": 16295 }, { "epoch": 0.76, - "learning_rate": 1.8565468098073228e-05, - "loss": 0.4608, + "learning_rate": 2.8567706301780228e-05, + "loss": 0.5006, "step": 16300 }, { "epoch": 0.76, - "learning_rate": 1.8564999296798088e-05, - "loss": 0.1706, + "learning_rate": 2.8567238231944208e-05, + "loss": 0.182, "step": 16305 }, { "epoch": 0.76, - "learning_rate": 1.856453049552295e-05, - "loss": 0.1103, + "learning_rate": 2.8566770162108188e-05, + "loss": 0.0546, "step": 16310 }, { "epoch": 0.76, - "learning_rate": 1.856406169424781e-05, - "loss": 0.0674, + "learning_rate": 2.8566302092272168e-05, + "loss": 0.1098, "step": 16315 }, { "epoch": 0.76, - "learning_rate": 1.856359289297267e-05, - "loss": 0.0725, + "learning_rate": 2.8565834022436148e-05, + "loss": 0.109, "step": 16320 }, { "epoch": 0.76, - "learning_rate": 1.856312409169753e-05, - "loss": 0.0853, + "learning_rate": 2.8565365952600127e-05, + "loss": 0.1537, "step": 16325 }, { "epoch": 0.76, - "learning_rate": 1.856265529042239e-05, - "loss": 0.2292, + "learning_rate": 2.8564897882764107e-05, + "loss": 0.1976, "step": 16330 }, { "epoch": 0.76, - "learning_rate": 1.856218648914725e-05, - "loss": 0.2606, + "learning_rate": 2.856442981292809e-05, + "loss": 0.2869, "step": 16335 }, { "epoch": 0.76, - "learning_rate": 1.856171768787211e-05, - "loss": 0.2461, + "learning_rate": 2.856396174309207e-05, + "loss": 0.2232, "step": 16340 }, { "epoch": 0.76, - "learning_rate": 1.856124888659697e-05, - "loss": 0.3852, + "learning_rate": 2.856349367325605e-05, + "loss": 0.2059, "step": 16345 }, { "epoch": 0.76, - "learning_rate": 1.8560780085321834e-05, - "loss": 0.5247, + "learning_rate": 2.856302560342003e-05, + "loss": 0.5648, "step": 16350 }, { "epoch": 0.76, - "learning_rate": 1.8560311284046694e-05, - "loss": 0.2266, + "learning_rate": 2.8562557533584013e-05, + "loss": 0.1993, "step": 16355 }, { "epoch": 0.76, - "learning_rate": 1.8559842482771554e-05, - "loss": 0.0598, + "learning_rate": 2.8562089463747993e-05, + "loss": 0.0903, "step": 16360 }, { "epoch": 0.76, - "learning_rate": 1.8559373681496414e-05, - "loss": 0.1304, + "learning_rate": 2.8561621393911973e-05, + "loss": 0.087, "step": 16365 }, { "epoch": 0.76, - "learning_rate": 1.8558904880221274e-05, - "loss": 0.106, + "learning_rate": 2.8561153324075953e-05, + "loss": 0.1336, "step": 16370 }, { "epoch": 0.76, - "learning_rate": 1.8558436078946137e-05, - "loss": 0.1039, + "learning_rate": 2.8560685254239936e-05, + "loss": 0.1008, "step": 16375 }, { "epoch": 0.76, - "learning_rate": 1.8557967277670997e-05, - "loss": 0.1404, + "learning_rate": 2.8560217184403916e-05, + "loss": 0.15, "step": 16380 }, { "epoch": 0.76, - "learning_rate": 1.8557498476395857e-05, - "loss": 0.2072, + "learning_rate": 2.8559749114567892e-05, + "loss": 0.142, "step": 16385 }, { "epoch": 0.76, - "learning_rate": 1.8557029675120717e-05, - "loss": 0.2344, + "learning_rate": 2.8559281044731872e-05, + "loss": 0.2956, "step": 16390 }, { "epoch": 0.77, - "learning_rate": 1.855656087384558e-05, - "loss": 0.3281, + "learning_rate": 2.8558812974895855e-05, + "loss": 0.3568, "step": 16395 }, { "epoch": 0.77, - "learning_rate": 1.855609207257044e-05, - "loss": 0.5027, + "learning_rate": 2.8558344905059835e-05, + "loss": 0.3688, "step": 16400 }, { "epoch": 0.77, - "learning_rate": 1.85556232712953e-05, - "loss": 0.1777, + "learning_rate": 2.8557876835223815e-05, + "loss": 0.1329, "step": 16405 }, { "epoch": 0.77, - "learning_rate": 1.855515447002016e-05, - "loss": 0.0762, + "learning_rate": 2.8557408765387798e-05, + "loss": 0.0728, "step": 16410 }, { "epoch": 0.77, - "learning_rate": 1.855468566874502e-05, - "loss": 0.0743, + "learning_rate": 2.8556940695551778e-05, + "loss": 0.0862, "step": 16415 }, { "epoch": 0.77, - "learning_rate": 1.855421686746988e-05, - "loss": 0.0928, + "learning_rate": 2.8556472625715757e-05, + "loss": 0.1549, "step": 16420 }, { "epoch": 0.77, - "learning_rate": 1.855374806619474e-05, - "loss": 0.1213, + "learning_rate": 2.8556004555879737e-05, + "loss": 0.1165, "step": 16425 }, { "epoch": 0.77, - "learning_rate": 1.85532792649196e-05, - "loss": 0.1962, + "learning_rate": 2.855553648604372e-05, + "loss": 0.1744, "step": 16430 }, { "epoch": 0.77, - "learning_rate": 1.8552810463644463e-05, - "loss": 0.2512, + "learning_rate": 2.85550684162077e-05, + "loss": 0.2404, "step": 16435 }, { "epoch": 0.77, - "learning_rate": 1.8552341662369323e-05, - "loss": 0.2477, + "learning_rate": 2.855460034637168e-05, + "loss": 0.2093, "step": 16440 }, { "epoch": 0.77, - "learning_rate": 1.8551872861094183e-05, - "loss": 0.2721, + "learning_rate": 2.855413227653566e-05, + "loss": 0.2035, "step": 16445 }, { "epoch": 0.77, - "learning_rate": 1.8551404059819043e-05, - "loss": 0.4693, + "learning_rate": 2.855366420669964e-05, + "loss": 0.509, "step": 16450 }, { "epoch": 0.77, - "learning_rate": 1.8550935258543906e-05, - "loss": 0.2471, + "learning_rate": 2.855319613686362e-05, + "loss": 0.2632, "step": 16455 }, { "epoch": 0.77, - "learning_rate": 1.8550466457268766e-05, - "loss": 0.0744, + "learning_rate": 2.85527280670276e-05, + "loss": 0.1014, "step": 16460 }, { "epoch": 0.77, - "learning_rate": 1.8549997655993626e-05, - "loss": 0.0537, + "learning_rate": 2.8552259997191583e-05, + "loss": 0.1104, "step": 16465 }, { "epoch": 0.77, - "learning_rate": 1.8549528854718486e-05, - "loss": 0.1404, + "learning_rate": 2.8551791927355562e-05, + "loss": 0.1301, "step": 16470 }, { "epoch": 0.77, - "learning_rate": 1.8549060053443346e-05, - "loss": 0.0935, + "learning_rate": 2.8551323857519542e-05, + "loss": 0.1182, "step": 16475 }, { "epoch": 0.77, - "learning_rate": 1.8548591252168206e-05, - "loss": 0.1698, + "learning_rate": 2.8550855787683522e-05, + "loss": 0.1053, "step": 16480 }, { "epoch": 0.77, - "learning_rate": 1.8548122450893066e-05, - "loss": 0.2063, + "learning_rate": 2.8550387717847505e-05, + "loss": 0.169, "step": 16485 }, { "epoch": 0.77, - "learning_rate": 1.854765364961793e-05, - "loss": 0.2454, + "learning_rate": 2.8549919648011485e-05, + "loss": 0.1742, "step": 16490 }, { "epoch": 0.77, - "learning_rate": 1.854718484834279e-05, - "loss": 0.3434, + "learning_rate": 2.8549451578175465e-05, + "loss": 0.2934, "step": 16495 }, { "epoch": 0.77, - "learning_rate": 1.854671604706765e-05, - "loss": 0.3791, + "learning_rate": 2.8548983508339445e-05, + "loss": 0.4648, "step": 16500 }, { "epoch": 0.77, - "learning_rate": 1.854624724579251e-05, - "loss": 0.2235, + "learning_rate": 2.8548515438503428e-05, + "loss": 0.2497, "step": 16505 }, { "epoch": 0.77, - "learning_rate": 1.854577844451737e-05, - "loss": 0.0541, + "learning_rate": 2.8548047368667404e-05, + "loss": 0.0621, "step": 16510 }, { "epoch": 0.77, - "learning_rate": 1.8545309643242232e-05, - "loss": 0.0699, + "learning_rate": 2.8547579298831384e-05, + "loss": 0.0941, "step": 16515 }, { "epoch": 0.77, - "learning_rate": 1.8544840841967092e-05, - "loss": 0.0983, + "learning_rate": 2.8547111228995367e-05, + "loss": 0.1163, "step": 16520 }, { "epoch": 0.77, - "learning_rate": 1.8544372040691952e-05, - "loss": 0.1524, + "learning_rate": 2.8546643159159347e-05, + "loss": 0.1363, "step": 16525 }, { "epoch": 0.77, - "learning_rate": 1.8543903239416812e-05, - "loss": 0.2209, + "learning_rate": 2.8546175089323327e-05, + "loss": 0.1571, "step": 16530 }, { "epoch": 0.77, - "learning_rate": 1.8543434438141676e-05, - "loss": 0.2845, + "learning_rate": 2.8545707019487307e-05, + "loss": 0.2624, "step": 16535 }, { "epoch": 0.77, - "learning_rate": 1.8542965636866535e-05, - "loss": 0.1833, + "learning_rate": 2.854523894965129e-05, + "loss": 0.178, "step": 16540 }, { "epoch": 0.77, - "learning_rate": 1.8542496835591395e-05, - "loss": 0.2518, + "learning_rate": 2.854477087981527e-05, + "loss": 0.3534, "step": 16545 }, { "epoch": 0.77, - "learning_rate": 1.8542028034316255e-05, - "loss": 0.4805, + "learning_rate": 2.854430280997925e-05, + "loss": 0.5826, "step": 16550 }, { "epoch": 0.77, - "learning_rate": 1.8541559233041115e-05, - "loss": 0.2084, + "learning_rate": 2.854383474014323e-05, + "loss": 0.1824, "step": 16555 }, { "epoch": 0.77, - "learning_rate": 1.8541090431765975e-05, - "loss": 0.0975, + "learning_rate": 2.8543366670307213e-05, + "loss": 0.0982, "step": 16560 }, { "epoch": 0.77, - "learning_rate": 1.8540621630490835e-05, - "loss": 0.0754, + "learning_rate": 2.8542898600471192e-05, + "loss": 0.0746, "step": 16565 }, { "epoch": 0.77, - "learning_rate": 1.8540152829215695e-05, - "loss": 0.1748, + "learning_rate": 2.8542430530635172e-05, + "loss": 0.1469, "step": 16570 }, { "epoch": 0.77, - "learning_rate": 1.8539684027940555e-05, - "loss": 0.1479, + "learning_rate": 2.8541962460799152e-05, + "loss": 0.1967, "step": 16575 }, { "epoch": 0.77, - "learning_rate": 1.853921522666542e-05, - "loss": 0.1724, + "learning_rate": 2.8541494390963132e-05, + "loss": 0.1434, "step": 16580 }, { "epoch": 0.77, - "learning_rate": 1.8538746425390278e-05, - "loss": 0.1805, + "learning_rate": 2.8541026321127112e-05, + "loss": 0.296, "step": 16585 }, { "epoch": 0.77, - "learning_rate": 1.8538277624115138e-05, - "loss": 0.233, + "learning_rate": 2.854055825129109e-05, + "loss": 0.2408, "step": 16590 }, { "epoch": 0.77, - "learning_rate": 1.853780882284e-05, - "loss": 0.2265, + "learning_rate": 2.8540090181455075e-05, + "loss": 0.1945, "step": 16595 }, { "epoch": 0.77, - "learning_rate": 1.853734002156486e-05, - "loss": 0.4862, + "learning_rate": 2.8539622111619055e-05, + "loss": 0.4325, "step": 16600 }, { "epoch": 0.77, - "learning_rate": 1.853687122028972e-05, - "loss": 0.2111, + "learning_rate": 2.8539154041783034e-05, + "loss": 0.2406, "step": 16605 }, { "epoch": 0.78, - "learning_rate": 1.853640241901458e-05, - "loss": 0.0882, + "learning_rate": 2.8538685971947014e-05, + "loss": 0.0676, "step": 16610 }, { "epoch": 0.78, - "learning_rate": 1.853593361773944e-05, - "loss": 0.1286, + "learning_rate": 2.8538217902110997e-05, + "loss": 0.0867, "step": 16615 }, { "epoch": 0.78, - "learning_rate": 1.85354648164643e-05, - "loss": 0.0689, + "learning_rate": 2.8537749832274977e-05, + "loss": 0.1002, "step": 16620 }, { "epoch": 0.78, - "learning_rate": 1.8534996015189165e-05, - "loss": 0.0597, + "learning_rate": 2.8537281762438957e-05, + "loss": 0.1743, "step": 16625 }, { "epoch": 0.78, - "learning_rate": 1.8534527213914024e-05, - "loss": 0.1602, + "learning_rate": 2.8536813692602937e-05, + "loss": 0.2309, "step": 16630 }, { "epoch": 0.78, - "learning_rate": 1.8534058412638884e-05, - "loss": 0.2968, + "learning_rate": 2.8536345622766917e-05, + "loss": 0.2388, "step": 16635 }, { "epoch": 0.78, - "learning_rate": 1.8533589611363744e-05, - "loss": 0.3255, + "learning_rate": 2.8535877552930897e-05, + "loss": 0.1545, "step": 16640 }, { "epoch": 0.78, - "learning_rate": 1.8533120810088604e-05, - "loss": 0.3148, + "learning_rate": 2.8535409483094876e-05, + "loss": 0.287, "step": 16645 }, { "epoch": 0.78, - "learning_rate": 1.8532652008813464e-05, - "loss": 0.4054, + "learning_rate": 2.853494141325886e-05, + "loss": 0.5251, "step": 16650 }, { "epoch": 0.78, - "learning_rate": 1.8532183207538324e-05, - "loss": 0.2527, + "learning_rate": 2.853447334342284e-05, + "loss": 0.2706, "step": 16655 }, { "epoch": 0.78, - "learning_rate": 1.8531714406263187e-05, - "loss": 0.0541, + "learning_rate": 2.853400527358682e-05, + "loss": 0.0825, "step": 16660 }, { "epoch": 0.78, - "learning_rate": 1.8531245604988047e-05, - "loss": 0.1128, + "learning_rate": 2.85335372037508e-05, + "loss": 0.0998, "step": 16665 }, { "epoch": 0.78, - "learning_rate": 1.8530776803712907e-05, - "loss": 0.1116, + "learning_rate": 2.8533069133914782e-05, + "loss": 0.1043, "step": 16670 }, { "epoch": 0.78, - "learning_rate": 1.853030800243777e-05, - "loss": 0.1365, + "learning_rate": 2.8532601064078762e-05, + "loss": 0.1212, "step": 16675 }, { "epoch": 0.78, - "learning_rate": 1.852983920116263e-05, - "loss": 0.1894, + "learning_rate": 2.8532132994242742e-05, + "loss": 0.1515, "step": 16680 }, { "epoch": 0.78, - "learning_rate": 1.852937039988749e-05, - "loss": 0.3003, + "learning_rate": 2.853166492440672e-05, + "loss": 0.1651, "step": 16685 }, { "epoch": 0.78, - "learning_rate": 1.852890159861235e-05, - "loss": 0.1914, + "learning_rate": 2.8531196854570705e-05, + "loss": 0.1988, "step": 16690 }, { "epoch": 0.78, - "learning_rate": 1.852843279733721e-05, - "loss": 0.3021, + "learning_rate": 2.8530728784734685e-05, + "loss": 0.2775, "step": 16695 }, { "epoch": 0.78, - "learning_rate": 1.852796399606207e-05, - "loss": 0.3965, + "learning_rate": 2.853026071489866e-05, + "loss": 0.3739, "step": 16700 }, { "epoch": 0.78, - "learning_rate": 1.852749519478693e-05, - "loss": 0.2011, + "learning_rate": 2.8529792645062644e-05, + "loss": 0.2159, "step": 16705 }, { "epoch": 0.78, - "learning_rate": 1.852702639351179e-05, - "loss": 0.0675, + "learning_rate": 2.8529324575226624e-05, + "loss": 0.0993, "step": 16710 }, { "epoch": 0.78, - "learning_rate": 1.852655759223665e-05, - "loss": 0.1059, + "learning_rate": 2.8528856505390604e-05, + "loss": 0.0733, "step": 16715 }, { "epoch": 0.78, - "learning_rate": 1.8526088790961513e-05, - "loss": 0.1146, + "learning_rate": 2.8528388435554584e-05, + "loss": 0.1262, "step": 16720 }, { "epoch": 0.78, - "learning_rate": 1.8525619989686373e-05, - "loss": 0.1235, + "learning_rate": 2.8527920365718567e-05, + "loss": 0.1416, "step": 16725 }, { "epoch": 0.78, - "learning_rate": 1.8525151188411233e-05, - "loss": 0.1721, + "learning_rate": 2.8527452295882547e-05, + "loss": 0.1235, "step": 16730 }, { "epoch": 0.78, - "learning_rate": 1.8524682387136093e-05, - "loss": 0.1825, + "learning_rate": 2.8526984226046527e-05, + "loss": 0.178, "step": 16735 }, { "epoch": 0.78, - "learning_rate": 1.8524213585860957e-05, - "loss": 0.2841, + "learning_rate": 2.8526516156210506e-05, + "loss": 0.2262, "step": 16740 }, { "epoch": 0.78, - "learning_rate": 1.8523744784585816e-05, - "loss": 0.3064, + "learning_rate": 2.852604808637449e-05, + "loss": 0.389, "step": 16745 }, { "epoch": 0.78, - "learning_rate": 1.8523275983310676e-05, - "loss": 0.4229, + "learning_rate": 2.852558001653847e-05, + "loss": 0.4825, "step": 16750 }, { "epoch": 0.78, - "learning_rate": 1.8522807182035536e-05, - "loss": 0.2078, + "learning_rate": 2.852511194670245e-05, + "loss": 0.1922, "step": 16755 }, { "epoch": 0.78, - "learning_rate": 1.8522338380760396e-05, - "loss": 0.0681, + "learning_rate": 2.8524643876866432e-05, + "loss": 0.0468, "step": 16760 }, { "epoch": 0.78, - "learning_rate": 1.852186957948526e-05, - "loss": 0.1168, + "learning_rate": 2.852417580703041e-05, + "loss": 0.1222, "step": 16765 }, { "epoch": 0.78, - "learning_rate": 1.852140077821012e-05, - "loss": 0.0938, + "learning_rate": 2.852370773719439e-05, + "loss": 0.1168, "step": 16770 }, { "epoch": 0.78, - "learning_rate": 1.852093197693498e-05, - "loss": 0.1566, + "learning_rate": 2.852323966735837e-05, + "loss": 0.1204, "step": 16775 }, { "epoch": 0.78, - "learning_rate": 1.852046317565984e-05, - "loss": 0.1815, + "learning_rate": 2.8522771597522352e-05, + "loss": 0.2345, "step": 16780 }, { "epoch": 0.78, - "learning_rate": 1.85199943743847e-05, - "loss": 0.2747, + "learning_rate": 2.852230352768633e-05, + "loss": 0.1697, "step": 16785 }, { "epoch": 0.78, - "learning_rate": 1.851952557310956e-05, - "loss": 0.1572, + "learning_rate": 2.852183545785031e-05, + "loss": 0.2511, "step": 16790 }, { "epoch": 0.78, - "learning_rate": 1.851905677183442e-05, - "loss": 0.4039, + "learning_rate": 2.852136738801429e-05, + "loss": 0.3124, "step": 16795 }, { "epoch": 0.78, - "learning_rate": 1.851858797055928e-05, - "loss": 0.5064, + "learning_rate": 2.8520899318178274e-05, + "loss": 0.4245, "step": 16800 }, { "epoch": 0.78, - "learning_rate": 1.8518119169284142e-05, - "loss": 0.2327, + "learning_rate": 2.8520431248342254e-05, + "loss": 0.172, "step": 16805 }, { "epoch": 0.78, - "learning_rate": 1.8517650368009002e-05, - "loss": 0.055, + "learning_rate": 2.8519963178506234e-05, + "loss": 0.0788, "step": 16810 }, { "epoch": 0.78, - "learning_rate": 1.8517181566733862e-05, - "loss": 0.0713, + "learning_rate": 2.8519495108670214e-05, + "loss": 0.083, "step": 16815 }, { "epoch": 0.78, - "learning_rate": 1.8516712765458726e-05, - "loss": 0.1341, + "learning_rate": 2.8519027038834197e-05, + "loss": 0.1405, "step": 16820 }, { "epoch": 0.79, - "learning_rate": 1.8516243964183586e-05, - "loss": 0.2755, + "learning_rate": 2.8518558968998174e-05, + "loss": 0.1144, "step": 16825 }, { "epoch": 0.79, - "learning_rate": 1.8515775162908446e-05, - "loss": 0.1865, + "learning_rate": 2.8518090899162153e-05, + "loss": 0.1346, "step": 16830 }, { "epoch": 0.79, - "learning_rate": 1.8515306361633305e-05, - "loss": 0.2744, + "learning_rate": 2.8517622829326137e-05, + "loss": 0.1765, "step": 16835 }, { "epoch": 0.79, - "learning_rate": 1.8514837560358165e-05, - "loss": 0.1419, + "learning_rate": 2.8517154759490116e-05, + "loss": 0.3672, "step": 16840 }, { "epoch": 0.79, - "learning_rate": 1.8514368759083025e-05, - "loss": 0.3494, + "learning_rate": 2.8516686689654096e-05, + "loss": 0.2694, "step": 16845 }, { "epoch": 0.79, - "learning_rate": 1.8513899957807885e-05, - "loss": 0.4996, + "learning_rate": 2.8516218619818076e-05, + "loss": 0.6112, "step": 16850 }, { "epoch": 0.79, - "learning_rate": 1.8513431156532745e-05, - "loss": 0.1462, + "learning_rate": 2.851575054998206e-05, + "loss": 0.2137, "step": 16855 }, { "epoch": 0.79, - "learning_rate": 1.851296235525761e-05, - "loss": 0.1193, + "learning_rate": 2.851528248014604e-05, + "loss": 0.0569, "step": 16860 }, { "epoch": 0.79, - "learning_rate": 1.851249355398247e-05, - "loss": 0.1351, + "learning_rate": 2.851481441031002e-05, + "loss": 0.1371, "step": 16865 }, { "epoch": 0.79, - "learning_rate": 1.851202475270733e-05, - "loss": 0.0935, + "learning_rate": 2.8514346340474e-05, + "loss": 0.0837, "step": 16870 }, { "epoch": 0.79, - "learning_rate": 1.851155595143219e-05, - "loss": 0.1988, + "learning_rate": 2.8513878270637982e-05, + "loss": 0.1527, "step": 16875 }, { "epoch": 0.79, - "learning_rate": 1.8511087150157048e-05, - "loss": 0.1656, + "learning_rate": 2.851341020080196e-05, + "loss": 0.1685, "step": 16880 }, { "epoch": 0.79, - "learning_rate": 1.851061834888191e-05, - "loss": 0.2114, + "learning_rate": 2.851294213096594e-05, + "loss": 0.2899, "step": 16885 }, { "epoch": 0.79, - "learning_rate": 1.851014954760677e-05, - "loss": 0.2165, + "learning_rate": 2.851247406112992e-05, + "loss": 0.2131, "step": 16890 }, { "epoch": 0.79, - "learning_rate": 1.850968074633163e-05, - "loss": 0.2662, + "learning_rate": 2.85120059912939e-05, + "loss": 0.2218, "step": 16895 }, { "epoch": 0.79, - "learning_rate": 1.850921194505649e-05, - "loss": 0.694, + "learning_rate": 2.851153792145788e-05, + "loss": 0.6848, "step": 16900 }, { "epoch": 0.79, - "learning_rate": 1.8508743143781355e-05, - "loss": 0.2216, + "learning_rate": 2.851106985162186e-05, + "loss": 0.111, "step": 16905 }, { "epoch": 0.79, - "learning_rate": 1.8508274342506215e-05, - "loss": 0.0824, + "learning_rate": 2.8510601781785844e-05, + "loss": 0.11, "step": 16910 }, { "epoch": 0.79, - "learning_rate": 1.8507805541231075e-05, - "loss": 0.0962, + "learning_rate": 2.8510133711949824e-05, + "loss": 0.0791, "step": 16915 }, { "epoch": 0.79, - "learning_rate": 1.8507336739955934e-05, - "loss": 0.1082, + "learning_rate": 2.8509665642113804e-05, + "loss": 0.0977, "step": 16920 }, { "epoch": 0.79, - "learning_rate": 1.8506867938680794e-05, - "loss": 0.1578, + "learning_rate": 2.8509197572277783e-05, + "loss": 0.1874, "step": 16925 }, { "epoch": 0.79, - "learning_rate": 1.8506399137405654e-05, - "loss": 0.1898, + "learning_rate": 2.8508729502441767e-05, + "loss": 0.1482, "step": 16930 }, { "epoch": 0.79, - "learning_rate": 1.8505930336130514e-05, - "loss": 0.1983, + "learning_rate": 2.8508261432605746e-05, + "loss": 0.2308, "step": 16935 }, { "epoch": 0.79, - "learning_rate": 1.8505461534855374e-05, - "loss": 0.2282, + "learning_rate": 2.8507793362769726e-05, + "loss": 0.2865, "step": 16940 }, { "epoch": 0.79, - "learning_rate": 1.8504992733580238e-05, - "loss": 0.2615, + "learning_rate": 2.850732529293371e-05, + "loss": 0.3884, "step": 16945 }, { "epoch": 0.79, - "learning_rate": 1.8504523932305097e-05, - "loss": 0.4631, + "learning_rate": 2.850685722309769e-05, + "loss": 0.5079, "step": 16950 }, { "epoch": 0.79, - "learning_rate": 1.8504055131029957e-05, - "loss": 0.2476, + "learning_rate": 2.8506389153261666e-05, + "loss": 0.2105, "step": 16955 }, { "epoch": 0.79, - "learning_rate": 1.850358632975482e-05, - "loss": 0.0629, + "learning_rate": 2.8505921083425646e-05, + "loss": 0.0665, "step": 16960 }, { "epoch": 0.79, - "learning_rate": 1.850311752847968e-05, - "loss": 0.0986, + "learning_rate": 2.850545301358963e-05, + "loss": 0.0669, "step": 16965 }, { "epoch": 0.79, - "learning_rate": 1.850264872720454e-05, - "loss": 0.1212, + "learning_rate": 2.850498494375361e-05, + "loss": 0.0971, "step": 16970 }, { "epoch": 0.79, - "learning_rate": 1.85021799259294e-05, - "loss": 0.127, + "learning_rate": 2.850451687391759e-05, + "loss": 0.0863, "step": 16975 }, { "epoch": 0.79, - "learning_rate": 1.850171112465426e-05, - "loss": 0.2048, + "learning_rate": 2.8504048804081568e-05, + "loss": 0.2074, "step": 16980 }, { "epoch": 0.79, - "learning_rate": 1.850124232337912e-05, - "loss": 0.2524, + "learning_rate": 2.850358073424555e-05, + "loss": 0.2256, "step": 16985 }, { "epoch": 0.79, - "learning_rate": 1.850077352210398e-05, - "loss": 0.2069, + "learning_rate": 2.850311266440953e-05, + "loss": 0.1474, "step": 16990 }, { "epoch": 0.79, - "learning_rate": 1.850030472082884e-05, - "loss": 0.2817, + "learning_rate": 2.850264459457351e-05, + "loss": 0.3712, "step": 16995 }, { "epoch": 0.79, - "learning_rate": 1.8499835919553704e-05, - "loss": 0.5592, + "learning_rate": 2.850217652473749e-05, + "loss": 0.4077, "step": 17000 }, { "epoch": 0.79, - "learning_rate": 1.8499367118278564e-05, - "loss": 0.2087, + "learning_rate": 2.8501708454901474e-05, + "loss": 0.2453, "step": 17005 }, { "epoch": 0.79, - "learning_rate": 1.8498898317003423e-05, - "loss": 0.1038, + "learning_rate": 2.8501240385065454e-05, + "loss": 0.0503, "step": 17010 }, { "epoch": 0.79, - "learning_rate": 1.8498429515728283e-05, - "loss": 0.1234, + "learning_rate": 2.850077231522943e-05, + "loss": 0.0805, "step": 17015 }, { "epoch": 0.79, - "learning_rate": 1.8497960714453143e-05, - "loss": 0.1361, + "learning_rate": 2.8500304245393414e-05, + "loss": 0.1867, "step": 17020 }, { "epoch": 0.79, - "learning_rate": 1.8497491913178007e-05, - "loss": 0.1025, + "learning_rate": 2.8499836175557393e-05, + "loss": 0.0975, "step": 17025 }, { "epoch": 0.79, - "learning_rate": 1.8497023111902867e-05, - "loss": 0.1794, + "learning_rate": 2.8499368105721373e-05, + "loss": 0.1772, "step": 17030 }, { "epoch": 0.79, - "learning_rate": 1.8496554310627727e-05, - "loss": 0.2281, + "learning_rate": 2.8498900035885353e-05, + "loss": 0.1767, "step": 17035 }, { "epoch": 0.8, - "learning_rate": 1.8496085509352586e-05, - "loss": 0.2468, + "learning_rate": 2.8498431966049336e-05, + "loss": 0.2192, "step": 17040 }, { "epoch": 0.8, - "learning_rate": 1.849561670807745e-05, - "loss": 0.3502, + "learning_rate": 2.8497963896213316e-05, + "loss": 0.3021, "step": 17045 }, { "epoch": 0.8, - "learning_rate": 1.849514790680231e-05, - "loss": 0.5949, + "learning_rate": 2.8497495826377296e-05, + "loss": 0.6028, "step": 17050 }, { "epoch": 0.8, - "learning_rate": 1.849467910552717e-05, - "loss": 0.2099, + "learning_rate": 2.8497027756541276e-05, + "loss": 0.2188, "step": 17055 }, { "epoch": 0.8, - "learning_rate": 1.849421030425203e-05, - "loss": 0.0767, + "learning_rate": 2.849655968670526e-05, + "loss": 0.1277, "step": 17060 }, { "epoch": 0.8, - "learning_rate": 1.849374150297689e-05, - "loss": 0.142, + "learning_rate": 2.849609161686924e-05, + "loss": 0.0907, "step": 17065 }, { "epoch": 0.8, - "learning_rate": 1.849327270170175e-05, - "loss": 0.1833, + "learning_rate": 2.849562354703322e-05, + "loss": 0.1664, "step": 17070 }, { "epoch": 0.8, - "learning_rate": 1.849280390042661e-05, - "loss": 0.0711, + "learning_rate": 2.84951554771972e-05, + "loss": 0.1506, "step": 17075 }, { "epoch": 0.8, - "learning_rate": 1.849233509915147e-05, - "loss": 0.1719, + "learning_rate": 2.8494687407361178e-05, + "loss": 0.2175, "step": 17080 }, { "epoch": 0.8, - "learning_rate": 1.849186629787633e-05, - "loss": 0.25, + "learning_rate": 2.8494219337525158e-05, + "loss": 0.1628, "step": 17085 }, { "epoch": 0.8, - "learning_rate": 1.8491397496601193e-05, - "loss": 0.1838, + "learning_rate": 2.8493751267689138e-05, + "loss": 0.2535, "step": 17090 }, { "epoch": 0.8, - "learning_rate": 1.8490928695326052e-05, - "loss": 0.3021, + "learning_rate": 2.849328319785312e-05, + "loss": 0.3497, "step": 17095 }, { "epoch": 0.8, - "learning_rate": 1.8490459894050912e-05, - "loss": 0.5295, + "learning_rate": 2.84928151280171e-05, + "loss": 0.5314, "step": 17100 }, { "epoch": 0.8, - "learning_rate": 1.8489991092775776e-05, - "loss": 0.2923, + "learning_rate": 2.849234705818108e-05, + "loss": 0.2614, "step": 17105 }, { "epoch": 0.8, - "learning_rate": 1.8489522291500636e-05, - "loss": 0.0491, + "learning_rate": 2.849187898834506e-05, + "loss": 0.0965, "step": 17110 }, { "epoch": 0.8, - "learning_rate": 1.8489053490225496e-05, - "loss": 0.0952, + "learning_rate": 2.8491410918509044e-05, + "loss": 0.1758, "step": 17115 }, { "epoch": 0.8, - "learning_rate": 1.8488584688950356e-05, - "loss": 0.0852, + "learning_rate": 2.8490942848673023e-05, + "loss": 0.1704, "step": 17120 }, { "epoch": 0.8, - "learning_rate": 1.8488115887675215e-05, - "loss": 0.1166, + "learning_rate": 2.8490474778837003e-05, + "loss": 0.1588, "step": 17125 }, { "epoch": 0.8, - "learning_rate": 1.8487647086400075e-05, - "loss": 0.1835, + "learning_rate": 2.8490006709000986e-05, + "loss": 0.1558, "step": 17130 }, { "epoch": 0.8, - "learning_rate": 1.8487178285124935e-05, - "loss": 0.2004, + "learning_rate": 2.8489538639164966e-05, + "loss": 0.1528, "step": 17135 }, { "epoch": 0.8, - "learning_rate": 1.84867094838498e-05, - "loss": 0.2276, + "learning_rate": 2.8489070569328946e-05, + "loss": 0.2789, "step": 17140 }, { "epoch": 0.8, - "learning_rate": 1.848624068257466e-05, - "loss": 0.3723, + "learning_rate": 2.8488602499492922e-05, + "loss": 0.2574, "step": 17145 }, { "epoch": 0.8, - "learning_rate": 1.848577188129952e-05, - "loss": 0.4231, + "learning_rate": 2.8488134429656906e-05, + "loss": 0.4975, "step": 17150 }, { "epoch": 0.8, - "learning_rate": 1.848530308002438e-05, - "loss": 0.2236, + "learning_rate": 2.8487666359820886e-05, + "loss": 0.1706, "step": 17155 }, { "epoch": 0.8, - "learning_rate": 1.848483427874924e-05, - "loss": 0.0599, + "learning_rate": 2.8487198289984865e-05, + "loss": 0.0641, "step": 17160 }, { "epoch": 0.8, - "learning_rate": 1.84843654774741e-05, - "loss": 0.1565, + "learning_rate": 2.8486730220148845e-05, + "loss": 0.0686, "step": 17165 }, { "epoch": 0.8, - "learning_rate": 1.848389667619896e-05, - "loss": 0.1829, + "learning_rate": 2.848626215031283e-05, + "loss": 0.1254, "step": 17170 }, { "epoch": 0.8, - "learning_rate": 1.848342787492382e-05, - "loss": 0.111, + "learning_rate": 2.8485794080476808e-05, + "loss": 0.1117, "step": 17175 }, { "epoch": 0.8, - "learning_rate": 1.848295907364868e-05, - "loss": 0.1442, + "learning_rate": 2.8485326010640788e-05, + "loss": 0.1488, "step": 17180 }, { "epoch": 0.8, - "learning_rate": 1.8482490272373545e-05, - "loss": 0.2144, + "learning_rate": 2.8484857940804768e-05, + "loss": 0.2334, "step": 17185 }, { "epoch": 0.8, - "learning_rate": 1.8482021471098405e-05, - "loss": 0.2541, + "learning_rate": 2.848438987096875e-05, + "loss": 0.2169, "step": 17190 }, { "epoch": 0.8, - "learning_rate": 1.8481552669823265e-05, - "loss": 0.2623, + "learning_rate": 2.848392180113273e-05, + "loss": 0.2871, "step": 17195 }, { "epoch": 0.8, - "learning_rate": 1.8481083868548125e-05, - "loss": 0.4617, + "learning_rate": 2.848345373129671e-05, + "loss": 0.557, "step": 17200 }, { "epoch": 0.8, - "learning_rate": 1.8480615067272985e-05, - "loss": 0.2354, + "learning_rate": 2.848298566146069e-05, + "loss": 0.2358, "step": 17205 }, { "epoch": 0.8, - "learning_rate": 1.8480146265997845e-05, - "loss": 0.1225, + "learning_rate": 2.848251759162467e-05, + "loss": 0.0385, "step": 17210 }, { "epoch": 0.8, - "learning_rate": 1.8479677464722704e-05, - "loss": 0.0805, + "learning_rate": 2.848204952178865e-05, + "loss": 0.1046, "step": 17215 }, { "epoch": 0.8, - "learning_rate": 1.8479208663447564e-05, - "loss": 0.1008, + "learning_rate": 2.848158145195263e-05, + "loss": 0.1113, "step": 17220 }, { "epoch": 0.8, - "learning_rate": 1.8478739862172424e-05, - "loss": 0.1341, + "learning_rate": 2.8481113382116613e-05, + "loss": 0.1022, "step": 17225 }, { "epoch": 0.8, - "learning_rate": 1.8478271060897288e-05, - "loss": 0.1171, + "learning_rate": 2.8480645312280593e-05, + "loss": 0.1602, "step": 17230 }, { "epoch": 0.8, - "learning_rate": 1.8477802259622148e-05, - "loss": 0.2465, + "learning_rate": 2.8480177242444573e-05, + "loss": 0.2675, "step": 17235 }, { "epoch": 0.8, - "learning_rate": 1.8477333458347008e-05, - "loss": 0.3368, + "learning_rate": 2.8479709172608553e-05, + "loss": 0.2947, "step": 17240 }, { "epoch": 0.8, - "learning_rate": 1.8476864657071867e-05, - "loss": 0.4202, + "learning_rate": 2.8479241102772536e-05, + "loss": 0.3125, "step": 17245 }, { "epoch": 0.8, - "learning_rate": 1.847639585579673e-05, - "loss": 0.4027, + "learning_rate": 2.8478773032936516e-05, + "loss": 0.6016, "step": 17250 }, { "epoch": 0.81, - "learning_rate": 1.847592705452159e-05, - "loss": 0.2201, + "learning_rate": 2.8478304963100495e-05, + "loss": 0.2517, "step": 17255 }, { "epoch": 0.81, - "learning_rate": 1.847545825324645e-05, - "loss": 0.0969, + "learning_rate": 2.847783689326448e-05, + "loss": 0.1022, "step": 17260 }, { "epoch": 0.81, - "learning_rate": 1.847498945197131e-05, - "loss": 0.0765, + "learning_rate": 2.847736882342846e-05, + "loss": 0.074, "step": 17265 }, { "epoch": 0.81, - "learning_rate": 1.847452065069617e-05, - "loss": 0.198, + "learning_rate": 2.8476900753592435e-05, + "loss": 0.0836, "step": 17270 }, { "epoch": 0.81, - "learning_rate": 1.8474051849421034e-05, - "loss": 0.153, + "learning_rate": 2.8476432683756415e-05, + "loss": 0.2013, "step": 17275 }, { "epoch": 0.81, - "learning_rate": 1.8473583048145894e-05, - "loss": 0.1368, + "learning_rate": 2.8475964613920398e-05, + "loss": 0.1588, "step": 17280 }, { "epoch": 0.81, - "learning_rate": 1.8473114246870754e-05, - "loss": 0.1888, + "learning_rate": 2.8475496544084378e-05, + "loss": 0.3324, "step": 17285 }, { "epoch": 0.81, - "learning_rate": 1.8472645445595614e-05, - "loss": 0.1688, + "learning_rate": 2.8475028474248358e-05, + "loss": 0.2227, "step": 17290 }, { "epoch": 0.81, - "learning_rate": 1.8472176644320474e-05, - "loss": 0.2878, + "learning_rate": 2.8474560404412337e-05, + "loss": 0.3555, "step": 17295 }, { "epoch": 0.81, - "learning_rate": 1.8471707843045333e-05, - "loss": 0.5695, + "learning_rate": 2.847409233457632e-05, + "loss": 0.4057, "step": 17300 }, { "epoch": 0.81, - "learning_rate": 1.8471239041770193e-05, - "loss": 0.2027, + "learning_rate": 2.84736242647403e-05, + "loss": 0.2916, "step": 17305 }, { "epoch": 0.81, - "learning_rate": 1.8470770240495057e-05, - "loss": 0.0891, + "learning_rate": 2.847315619490428e-05, + "loss": 0.1287, "step": 17310 }, { "epoch": 0.81, - "learning_rate": 1.8470301439219917e-05, - "loss": 0.109, + "learning_rate": 2.8472688125068263e-05, + "loss": 0.1233, "step": 17315 }, { "epoch": 0.81, - "learning_rate": 1.8469832637944777e-05, - "loss": 0.1004, + "learning_rate": 2.8472220055232243e-05, + "loss": 0.0903, "step": 17320 }, { "epoch": 0.81, - "learning_rate": 1.8469363836669637e-05, - "loss": 0.0984, + "learning_rate": 2.8471751985396223e-05, + "loss": 0.1557, "step": 17325 }, { "epoch": 0.81, - "learning_rate": 1.84688950353945e-05, - "loss": 0.19, + "learning_rate": 2.84712839155602e-05, + "loss": 0.1055, "step": 17330 }, { "epoch": 0.81, - "learning_rate": 1.846842623411936e-05, - "loss": 0.1577, + "learning_rate": 2.8470815845724183e-05, + "loss": 0.2877, "step": 17335 }, { "epoch": 0.81, - "learning_rate": 1.846795743284422e-05, - "loss": 0.3391, + "learning_rate": 2.8470347775888162e-05, + "loss": 0.1888, "step": 17340 }, { "epoch": 0.81, - "learning_rate": 1.846748863156908e-05, - "loss": 0.2577, + "learning_rate": 2.8469879706052142e-05, + "loss": 0.446, "step": 17345 }, { "epoch": 0.81, - "learning_rate": 1.846701983029394e-05, - "loss": 0.4126, + "learning_rate": 2.8469411636216122e-05, + "loss": 0.5535, "step": 17350 }, { "epoch": 0.81, - "learning_rate": 1.84665510290188e-05, - "loss": 0.1848, + "learning_rate": 2.8468943566380105e-05, + "loss": 0.1888, "step": 17355 }, { "epoch": 0.81, - "learning_rate": 1.846608222774366e-05, - "loss": 0.1064, + "learning_rate": 2.8468475496544085e-05, + "loss": 0.0642, "step": 17360 }, { "epoch": 0.81, - "learning_rate": 1.846561342646852e-05, - "loss": 0.1071, + "learning_rate": 2.8468007426708065e-05, + "loss": 0.1229, "step": 17365 }, { "epoch": 0.81, - "learning_rate": 1.8465144625193383e-05, - "loss": 0.1296, + "learning_rate": 2.8467539356872048e-05, + "loss": 0.0824, "step": 17370 }, { "epoch": 0.81, - "learning_rate": 1.8464675823918243e-05, - "loss": 0.1236, + "learning_rate": 2.8467071287036028e-05, + "loss": 0.1548, "step": 17375 }, { "epoch": 0.81, - "learning_rate": 1.8464207022643103e-05, - "loss": 0.3178, + "learning_rate": 2.8466603217200008e-05, + "loss": 0.1229, "step": 17380 }, { "epoch": 0.81, - "learning_rate": 1.8463738221367963e-05, - "loss": 0.3502, + "learning_rate": 2.8466135147363988e-05, + "loss": 0.2513, "step": 17385 }, { "epoch": 0.81, - "learning_rate": 1.8463269420092826e-05, - "loss": 0.3099, + "learning_rate": 2.846566707752797e-05, + "loss": 0.3038, "step": 17390 }, { "epoch": 0.81, - "learning_rate": 1.8462800618817686e-05, - "loss": 0.2417, + "learning_rate": 2.8465199007691947e-05, + "loss": 0.289, "step": 17395 }, { "epoch": 0.81, - "learning_rate": 1.8462331817542546e-05, - "loss": 0.4998, + "learning_rate": 2.8464730937855927e-05, + "loss": 0.5119, "step": 17400 }, { "epoch": 0.81, - "learning_rate": 1.8461863016267406e-05, - "loss": 0.2151, + "learning_rate": 2.8464262868019907e-05, + "loss": 0.1253, "step": 17405 }, { "epoch": 0.81, - "learning_rate": 1.8461394214992266e-05, - "loss": 0.0575, + "learning_rate": 2.846379479818389e-05, + "loss": 0.0963, "step": 17410 }, { "epoch": 0.81, - "learning_rate": 1.846092541371713e-05, - "loss": 0.122, + "learning_rate": 2.846332672834787e-05, + "loss": 0.0999, "step": 17415 }, { "epoch": 0.81, - "learning_rate": 1.846045661244199e-05, - "loss": 0.1077, + "learning_rate": 2.846285865851185e-05, + "loss": 0.099, "step": 17420 }, { "epoch": 0.81, - "learning_rate": 1.845998781116685e-05, - "loss": 0.1329, + "learning_rate": 2.846239058867583e-05, + "loss": 0.158, "step": 17425 }, { "epoch": 0.81, - "learning_rate": 1.845951900989171e-05, - "loss": 0.1299, + "learning_rate": 2.8461922518839813e-05, + "loss": 0.189, "step": 17430 }, { "epoch": 0.81, - "learning_rate": 1.845905020861657e-05, - "loss": 0.2726, + "learning_rate": 2.8461454449003793e-05, + "loss": 0.1467, "step": 17435 }, { "epoch": 0.81, - "learning_rate": 1.845858140734143e-05, - "loss": 0.1639, + "learning_rate": 2.8460986379167772e-05, + "loss": 0.2386, "step": 17440 }, { "epoch": 0.81, - "learning_rate": 1.845811260606629e-05, - "loss": 0.4012, + "learning_rate": 2.8460518309331756e-05, + "loss": 0.2599, "step": 17445 }, { "epoch": 0.81, - "learning_rate": 1.845764380479115e-05, - "loss": 0.3916, + "learning_rate": 2.8460050239495735e-05, + "loss": 0.5057, "step": 17450 }, { "epoch": 0.81, - "learning_rate": 1.8457175003516012e-05, - "loss": 0.2054, + "learning_rate": 2.8459582169659715e-05, + "loss": 0.2334, "step": 17455 }, { "epoch": 0.81, - "learning_rate": 1.8456706202240872e-05, - "loss": 0.0669, + "learning_rate": 2.845911409982369e-05, + "loss": 0.0933, "step": 17460 }, { "epoch": 0.81, - "learning_rate": 1.845623740096573e-05, - "loss": 0.1326, + "learning_rate": 2.8458646029987675e-05, + "loss": 0.1363, "step": 17465 }, { "epoch": 0.82, - "learning_rate": 1.8455768599690595e-05, - "loss": 0.1697, + "learning_rate": 2.8458177960151655e-05, + "loss": 0.0681, "step": 17470 }, { "epoch": 0.82, - "learning_rate": 1.8455299798415455e-05, - "loss": 0.1535, + "learning_rate": 2.8457709890315635e-05, + "loss": 0.1143, "step": 17475 }, { "epoch": 0.82, - "learning_rate": 1.8454830997140315e-05, - "loss": 0.1638, + "learning_rate": 2.8457241820479614e-05, + "loss": 0.1788, "step": 17480 }, { "epoch": 0.82, - "learning_rate": 1.8454362195865175e-05, - "loss": 0.1457, + "learning_rate": 2.8456773750643598e-05, + "loss": 0.0861, "step": 17485 }, { "epoch": 0.82, - "learning_rate": 1.8453893394590035e-05, - "loss": 0.2595, + "learning_rate": 2.8456305680807577e-05, + "loss": 0.2347, "step": 17490 }, { "epoch": 0.82, - "learning_rate": 1.8453424593314895e-05, - "loss": 0.2966, + "learning_rate": 2.8455837610971557e-05, + "loss": 0.4229, "step": 17495 }, { "epoch": 0.82, - "learning_rate": 1.8452955792039755e-05, - "loss": 0.6608, + "learning_rate": 2.845536954113554e-05, + "loss": 0.4567, "step": 17500 }, { "epoch": 0.82, - "learning_rate": 1.8452486990764614e-05, - "loss": 0.1939, + "learning_rate": 2.845490147129952e-05, + "loss": 0.2445, "step": 17505 }, { "epoch": 0.82, - "learning_rate": 1.8452018189489478e-05, - "loss": 0.1323, + "learning_rate": 2.84544334014635e-05, + "loss": 0.0806, "step": 17510 }, { "epoch": 0.82, - "learning_rate": 1.8451549388214338e-05, - "loss": 0.1026, + "learning_rate": 2.845396533162748e-05, + "loss": 0.0552, "step": 17515 }, { "epoch": 0.82, - "learning_rate": 1.8451080586939198e-05, - "loss": 0.1229, + "learning_rate": 2.845349726179146e-05, + "loss": 0.1101, "step": 17520 }, { "epoch": 0.82, - "learning_rate": 1.8450611785664058e-05, - "loss": 0.0851, + "learning_rate": 2.845302919195544e-05, + "loss": 0.1413, "step": 17525 }, { "epoch": 0.82, - "learning_rate": 1.8450142984388918e-05, - "loss": 0.1832, + "learning_rate": 2.845256112211942e-05, + "loss": 0.1538, "step": 17530 }, { "epoch": 0.82, - "learning_rate": 1.844967418311378e-05, - "loss": 0.1868, + "learning_rate": 2.84520930522834e-05, + "loss": 0.2205, "step": 17535 }, { "epoch": 0.82, - "learning_rate": 1.844920538183864e-05, - "loss": 0.2952, + "learning_rate": 2.8451624982447382e-05, + "loss": 0.2771, "step": 17540 }, { "epoch": 0.82, - "learning_rate": 1.84487365805635e-05, - "loss": 0.2594, + "learning_rate": 2.8451156912611362e-05, + "loss": 0.2482, "step": 17545 }, { "epoch": 0.82, - "learning_rate": 1.844826777928836e-05, - "loss": 0.3857, + "learning_rate": 2.8450688842775342e-05, + "loss": 0.5521, "step": 17550 }, { "epoch": 0.82, - "learning_rate": 1.8447798978013224e-05, - "loss": 0.2327, + "learning_rate": 2.8450220772939325e-05, + "loss": 0.2345, "step": 17555 }, { "epoch": 0.82, - "learning_rate": 1.8447330176738084e-05, - "loss": 0.0328, + "learning_rate": 2.8449752703103305e-05, + "loss": 0.0546, "step": 17560 }, { "epoch": 0.82, - "learning_rate": 1.8446861375462944e-05, - "loss": 0.0574, + "learning_rate": 2.8449284633267285e-05, + "loss": 0.115, "step": 17565 }, { "epoch": 0.82, - "learning_rate": 1.8446392574187804e-05, - "loss": 0.1277, + "learning_rate": 2.8448816563431265e-05, + "loss": 0.1554, "step": 17570 }, { "epoch": 0.82, - "learning_rate": 1.8445923772912664e-05, - "loss": 0.2752, + "learning_rate": 2.8448348493595248e-05, + "loss": 0.1924, "step": 17575 }, { "epoch": 0.82, - "learning_rate": 1.8445454971637524e-05, - "loss": 0.1907, + "learning_rate": 2.8447880423759228e-05, + "loss": 0.1633, "step": 17580 }, { "epoch": 0.82, - "learning_rate": 1.8444986170362384e-05, - "loss": 0.2263, + "learning_rate": 2.8447412353923204e-05, + "loss": 0.2276, "step": 17585 }, { "epoch": 0.82, - "learning_rate": 1.8444517369087244e-05, - "loss": 0.2014, + "learning_rate": 2.8446944284087184e-05, + "loss": 0.2455, "step": 17590 }, { "epoch": 0.82, - "learning_rate": 1.8444048567812103e-05, - "loss": 0.3681, + "learning_rate": 2.8446476214251167e-05, + "loss": 0.3305, "step": 17595 }, { "epoch": 0.82, - "learning_rate": 1.8443579766536967e-05, - "loss": 0.53, + "learning_rate": 2.8446008144415147e-05, + "loss": 0.4453, "step": 17600 }, { "epoch": 0.82, - "learning_rate": 1.8443110965261827e-05, - "loss": 0.1759, + "learning_rate": 2.8445540074579127e-05, + "loss": 0.1797, "step": 17605 }, { "epoch": 0.82, - "learning_rate": 1.8442642163986687e-05, - "loss": 0.106, + "learning_rate": 2.8445072004743107e-05, + "loss": 0.0646, "step": 17610 }, { "epoch": 0.82, - "learning_rate": 1.844217336271155e-05, - "loss": 0.0909, + "learning_rate": 2.844460393490709e-05, + "loss": 0.0597, "step": 17615 }, { "epoch": 0.82, - "learning_rate": 1.844170456143641e-05, - "loss": 0.0902, + "learning_rate": 2.844413586507107e-05, + "loss": 0.1145, "step": 17620 }, { "epoch": 0.82, - "learning_rate": 1.844123576016127e-05, - "loss": 0.1365, + "learning_rate": 2.844366779523505e-05, + "loss": 0.1437, "step": 17625 }, { "epoch": 0.82, - "learning_rate": 1.844076695888613e-05, - "loss": 0.1976, + "learning_rate": 2.8443199725399033e-05, + "loss": 0.1537, "step": 17630 }, { "epoch": 0.82, - "learning_rate": 1.844029815761099e-05, - "loss": 0.2058, + "learning_rate": 2.8442731655563012e-05, + "loss": 0.1481, "step": 17635 }, { "epoch": 0.82, - "learning_rate": 1.843982935633585e-05, - "loss": 0.1611, + "learning_rate": 2.8442263585726992e-05, + "loss": 0.3613, "step": 17640 }, { "epoch": 0.82, - "learning_rate": 1.843936055506071e-05, - "loss": 0.2566, + "learning_rate": 2.8441795515890972e-05, + "loss": 0.2935, "step": 17645 }, { "epoch": 0.82, - "learning_rate": 1.8438891753785573e-05, - "loss": 0.5291, + "learning_rate": 2.8441327446054952e-05, + "loss": 0.4195, "step": 17650 }, { "epoch": 0.82, - "learning_rate": 1.8438422952510433e-05, - "loss": 0.1834, + "learning_rate": 2.844085937621893e-05, + "loss": 0.2121, "step": 17655 }, { "epoch": 0.82, - "learning_rate": 1.8437954151235293e-05, - "loss": 0.1088, + "learning_rate": 2.844039130638291e-05, + "loss": 0.1253, "step": 17660 }, { "epoch": 0.82, - "learning_rate": 1.8437485349960153e-05, - "loss": 0.0704, + "learning_rate": 2.843992323654689e-05, + "loss": 0.0747, "step": 17665 }, { "epoch": 0.82, - "learning_rate": 1.8437016548685013e-05, - "loss": 0.1562, + "learning_rate": 2.8439455166710875e-05, + "loss": 0.095, "step": 17670 }, { "epoch": 0.82, - "learning_rate": 1.8436547747409873e-05, - "loss": 0.1251, + "learning_rate": 2.8438987096874854e-05, + "loss": 0.193, "step": 17675 }, { "epoch": 0.82, - "learning_rate": 1.8436078946134736e-05, - "loss": 0.11, + "learning_rate": 2.8438519027038834e-05, + "loss": 0.2413, "step": 17680 }, { "epoch": 0.83, - "learning_rate": 1.8435610144859596e-05, - "loss": 0.1469, + "learning_rate": 2.8438050957202817e-05, + "loss": 0.2313, "step": 17685 }, { "epoch": 0.83, - "learning_rate": 1.8435141343584456e-05, - "loss": 0.5929, + "learning_rate": 2.8437582887366797e-05, + "loss": 0.2927, "step": 17690 }, { "epoch": 0.83, - "learning_rate": 1.843467254230932e-05, - "loss": 0.312, + "learning_rate": 2.8437114817530777e-05, + "loss": 0.2861, "step": 17695 }, { "epoch": 0.83, - "learning_rate": 1.843420374103418e-05, - "loss": 0.627, + "learning_rate": 2.8436646747694757e-05, + "loss": 0.3195, "step": 17700 }, { "epoch": 0.83, - "learning_rate": 1.843373493975904e-05, - "loss": 0.2812, + "learning_rate": 2.843617867785874e-05, + "loss": 0.1636, "step": 17705 }, { "epoch": 0.83, - "learning_rate": 1.84332661384839e-05, - "loss": 0.05, + "learning_rate": 2.8435710608022716e-05, + "loss": 0.0918, "step": 17710 }, { "epoch": 0.83, - "learning_rate": 1.843279733720876e-05, - "loss": 0.0554, + "learning_rate": 2.8435242538186696e-05, + "loss": 0.0742, "step": 17715 }, { "epoch": 0.83, - "learning_rate": 1.843232853593362e-05, - "loss": 0.1176, + "learning_rate": 2.8434774468350676e-05, + "loss": 0.1572, "step": 17720 }, { "epoch": 0.83, - "learning_rate": 1.843185973465848e-05, - "loss": 0.1333, + "learning_rate": 2.843430639851466e-05, + "loss": 0.1237, "step": 17725 }, { "epoch": 0.83, - "learning_rate": 1.843139093338334e-05, - "loss": 0.1865, + "learning_rate": 2.843383832867864e-05, + "loss": 0.1644, "step": 17730 }, { "epoch": 0.83, - "learning_rate": 1.84309221321082e-05, - "loss": 0.2019, + "learning_rate": 2.843337025884262e-05, + "loss": 0.1167, "step": 17735 }, { "epoch": 0.83, - "learning_rate": 1.8430453330833062e-05, - "loss": 0.1893, + "learning_rate": 2.8432902189006602e-05, + "loss": 0.157, "step": 17740 }, { "epoch": 0.83, - "learning_rate": 1.8429984529557922e-05, - "loss": 0.1795, + "learning_rate": 2.8432434119170582e-05, + "loss": 0.3453, "step": 17745 }, { "epoch": 0.83, - "learning_rate": 1.8429515728282782e-05, - "loss": 0.4059, + "learning_rate": 2.8431966049334562e-05, + "loss": 0.46, "step": 17750 }, { "epoch": 0.83, - "learning_rate": 1.842904692700764e-05, - "loss": 0.2472, + "learning_rate": 2.843149797949854e-05, + "loss": 0.2216, "step": 17755 }, { "epoch": 0.83, - "learning_rate": 1.8428578125732505e-05, - "loss": 0.1022, + "learning_rate": 2.8431029909662525e-05, + "loss": 0.0829, "step": 17760 }, { "epoch": 0.83, - "learning_rate": 1.8428109324457365e-05, - "loss": 0.1209, + "learning_rate": 2.8430561839826505e-05, + "loss": 0.0597, "step": 17765 }, { "epoch": 0.83, - "learning_rate": 1.8427640523182225e-05, - "loss": 0.13, + "learning_rate": 2.8430093769990484e-05, + "loss": 0.1267, "step": 17770 }, { "epoch": 0.83, - "learning_rate": 1.8427171721907085e-05, - "loss": 0.1884, + "learning_rate": 2.842962570015446e-05, + "loss": 0.2343, "step": 17775 }, { "epoch": 0.83, - "learning_rate": 1.8426702920631945e-05, - "loss": 0.2003, + "learning_rate": 2.8429157630318444e-05, + "loss": 0.1144, "step": 17780 }, { "epoch": 0.83, - "learning_rate": 1.8426234119356805e-05, - "loss": 0.1951, + "learning_rate": 2.8428689560482424e-05, + "loss": 0.1421, "step": 17785 }, { "epoch": 0.83, - "learning_rate": 1.8425765318081668e-05, - "loss": 0.1778, + "learning_rate": 2.8428221490646404e-05, + "loss": 0.1425, "step": 17790 }, { "epoch": 0.83, - "learning_rate": 1.8425296516806528e-05, - "loss": 0.3655, + "learning_rate": 2.8427753420810383e-05, + "loss": 0.2553, "step": 17795 }, { "epoch": 0.83, - "learning_rate": 1.8424827715531388e-05, - "loss": 0.4681, + "learning_rate": 2.8427285350974367e-05, + "loss": 0.4929, "step": 17800 }, { "epoch": 0.83, - "learning_rate": 1.8424358914256248e-05, - "loss": 0.1807, + "learning_rate": 2.8426817281138347e-05, + "loss": 0.1698, "step": 17805 }, { "epoch": 0.83, - "learning_rate": 1.8423890112981108e-05, - "loss": 0.041, + "learning_rate": 2.8426349211302326e-05, + "loss": 0.0677, "step": 17810 }, { "epoch": 0.83, - "learning_rate": 1.8423421311705968e-05, - "loss": 0.0738, + "learning_rate": 2.842588114146631e-05, + "loss": 0.0714, "step": 17815 }, { "epoch": 0.83, - "learning_rate": 1.842295251043083e-05, - "loss": 0.1138, + "learning_rate": 2.842541307163029e-05, + "loss": 0.1241, "step": 17820 }, { "epoch": 0.83, - "learning_rate": 1.842248370915569e-05, - "loss": 0.1336, + "learning_rate": 2.842494500179427e-05, + "loss": 0.0877, "step": 17825 }, { "epoch": 0.83, - "learning_rate": 1.842201490788055e-05, - "loss": 0.2036, + "learning_rate": 2.842447693195825e-05, + "loss": 0.2265, "step": 17830 }, { "epoch": 0.83, - "learning_rate": 1.842154610660541e-05, - "loss": 0.166, + "learning_rate": 2.842400886212223e-05, + "loss": 0.1535, "step": 17835 }, { "epoch": 0.83, - "learning_rate": 1.8421077305330274e-05, - "loss": 0.1972, + "learning_rate": 2.842354079228621e-05, + "loss": 0.3066, "step": 17840 }, { "epoch": 0.83, - "learning_rate": 1.8420608504055134e-05, - "loss": 0.2675, + "learning_rate": 2.842307272245019e-05, + "loss": 0.247, "step": 17845 }, { "epoch": 0.83, - "learning_rate": 1.8420139702779994e-05, - "loss": 0.431, + "learning_rate": 2.8422604652614168e-05, + "loss": 0.7187, "step": 17850 }, { "epoch": 0.83, - "learning_rate": 1.8419670901504854e-05, - "loss": 0.2293, + "learning_rate": 2.842213658277815e-05, + "loss": 0.1971, "step": 17855 }, { "epoch": 0.83, - "learning_rate": 1.8419202100229714e-05, - "loss": 0.0781, + "learning_rate": 2.842166851294213e-05, + "loss": 0.1076, "step": 17860 }, { "epoch": 0.83, - "learning_rate": 1.8418733298954574e-05, - "loss": 0.0761, + "learning_rate": 2.842120044310611e-05, + "loss": 0.1249, "step": 17865 }, { "epoch": 0.83, - "learning_rate": 1.8418264497679434e-05, - "loss": 0.1226, + "learning_rate": 2.8420732373270094e-05, + "loss": 0.1041, "step": 17870 }, { "epoch": 0.83, - "learning_rate": 1.8417795696404294e-05, - "loss": 0.219, + "learning_rate": 2.8420264303434074e-05, + "loss": 0.1278, "step": 17875 }, { "epoch": 0.83, - "learning_rate": 1.8417326895129157e-05, - "loss": 0.2012, + "learning_rate": 2.8419796233598054e-05, + "loss": 0.1483, "step": 17880 }, { "epoch": 0.83, - "learning_rate": 1.8416858093854017e-05, - "loss": 0.159, + "learning_rate": 2.8419328163762034e-05, + "loss": 0.2494, "step": 17885 }, { "epoch": 0.83, - "learning_rate": 1.8416389292578877e-05, - "loss": 0.2031, + "learning_rate": 2.8418860093926017e-05, + "loss": 0.1923, "step": 17890 }, { "epoch": 0.84, - "learning_rate": 1.8415920491303737e-05, - "loss": 0.3211, + "learning_rate": 2.8418392024089997e-05, + "loss": 0.3253, "step": 17895 }, { "epoch": 0.84, - "learning_rate": 1.84154516900286e-05, - "loss": 0.5196, + "learning_rate": 2.8417923954253973e-05, + "loss": 0.4543, "step": 17900 }, { "epoch": 0.84, - "learning_rate": 1.841498288875346e-05, - "loss": 0.2361, + "learning_rate": 2.8417455884417953e-05, + "loss": 0.158, "step": 17905 }, { "epoch": 0.84, - "learning_rate": 1.841451408747832e-05, - "loss": 0.2127, + "learning_rate": 2.8416987814581936e-05, + "loss": 0.1329, "step": 17910 }, { "epoch": 0.84, - "learning_rate": 1.841404528620318e-05, - "loss": 0.1421, + "learning_rate": 2.8416519744745916e-05, + "loss": 0.1538, "step": 17915 }, { "epoch": 0.84, - "learning_rate": 1.841357648492804e-05, - "loss": 0.1517, + "learning_rate": 2.8416051674909896e-05, + "loss": 0.1384, "step": 17920 }, { "epoch": 0.84, - "learning_rate": 1.8413107683652903e-05, - "loss": 0.138, + "learning_rate": 2.841558360507388e-05, + "loss": 0.0706, "step": 17925 }, { "epoch": 0.84, - "learning_rate": 1.8412638882377763e-05, - "loss": 0.2045, + "learning_rate": 2.841511553523786e-05, + "loss": 0.1279, "step": 17930 }, { "epoch": 0.84, - "learning_rate": 1.8412170081102623e-05, - "loss": 0.2729, + "learning_rate": 2.841464746540184e-05, + "loss": 0.2132, "step": 17935 }, { "epoch": 0.84, - "learning_rate": 1.8411701279827483e-05, - "loss": 0.2523, + "learning_rate": 2.841417939556582e-05, + "loss": 0.2513, "step": 17940 }, { "epoch": 0.84, - "learning_rate": 1.8411232478552343e-05, - "loss": 0.3385, + "learning_rate": 2.8413711325729802e-05, + "loss": 0.2697, "step": 17945 }, { "epoch": 0.84, - "learning_rate": 1.8410763677277203e-05, - "loss": 0.4665, + "learning_rate": 2.841324325589378e-05, + "loss": 0.3846, "step": 17950 }, { "epoch": 0.84, - "learning_rate": 1.8410294876002063e-05, - "loss": 0.2603, + "learning_rate": 2.841277518605776e-05, + "loss": 0.204, "step": 17955 }, { "epoch": 0.84, - "learning_rate": 1.8409826074726923e-05, - "loss": 0.0925, + "learning_rate": 2.841230711622174e-05, + "loss": 0.0675, "step": 17960 }, { "epoch": 0.84, - "learning_rate": 1.8409357273451786e-05, - "loss": 0.1406, + "learning_rate": 2.841183904638572e-05, + "loss": 0.1023, "step": 17965 }, { "epoch": 0.84, - "learning_rate": 1.8408888472176646e-05, - "loss": 0.1379, + "learning_rate": 2.84113709765497e-05, + "loss": 0.1322, "step": 17970 }, { "epoch": 0.84, - "learning_rate": 1.8408419670901506e-05, - "loss": 0.0881, + "learning_rate": 2.841090290671368e-05, + "loss": 0.0832, "step": 17975 }, { "epoch": 0.84, - "learning_rate": 1.840795086962637e-05, - "loss": 0.158, + "learning_rate": 2.8410434836877664e-05, + "loss": 0.2682, "step": 17980 }, { "epoch": 0.84, - "learning_rate": 1.840748206835123e-05, - "loss": 0.2092, + "learning_rate": 2.8409966767041644e-05, + "loss": 0.252, "step": 17985 }, { "epoch": 0.84, - "learning_rate": 1.840701326707609e-05, - "loss": 0.349, + "learning_rate": 2.8409498697205623e-05, + "loss": 0.3592, "step": 17990 }, { "epoch": 0.84, - "learning_rate": 1.840654446580095e-05, - "loss": 0.4693, + "learning_rate": 2.8409030627369603e-05, + "loss": 0.3841, "step": 17995 }, { "epoch": 0.84, - "learning_rate": 1.840607566452581e-05, - "loss": 0.4295, + "learning_rate": 2.8408562557533587e-05, + "loss": 0.4768, "step": 18000 }, { "epoch": 0.84, - "learning_rate": 1.840560686325067e-05, - "loss": 0.225, + "learning_rate": 2.8408094487697566e-05, + "loss": 0.1898, "step": 18005 }, { "epoch": 0.84, - "learning_rate": 1.840513806197553e-05, - "loss": 0.1049, + "learning_rate": 2.8407626417861546e-05, + "loss": 0.0701, "step": 18010 }, { "epoch": 0.84, - "learning_rate": 1.840466926070039e-05, - "loss": 0.1065, + "learning_rate": 2.8407158348025526e-05, + "loss": 0.0812, "step": 18015 }, { "epoch": 0.84, - "learning_rate": 1.8404200459425252e-05, - "loss": 0.1185, + "learning_rate": 2.840669027818951e-05, + "loss": 0.0757, "step": 18020 }, { "epoch": 0.84, - "learning_rate": 1.8403731658150112e-05, - "loss": 0.2128, + "learning_rate": 2.8406222208353486e-05, + "loss": 0.125, "step": 18025 }, { "epoch": 0.84, - "learning_rate": 1.8403262856874972e-05, - "loss": 0.1386, + "learning_rate": 2.8405754138517465e-05, + "loss": 0.1982, "step": 18030 }, { "epoch": 0.84, - "learning_rate": 1.8402794055599832e-05, - "loss": 0.2085, + "learning_rate": 2.8405286068681445e-05, + "loss": 0.1307, "step": 18035 }, { "epoch": 0.84, - "learning_rate": 1.8402325254324692e-05, - "loss": 0.2312, + "learning_rate": 2.840481799884543e-05, + "loss": 0.3637, "step": 18040 }, { "epoch": 0.84, - "learning_rate": 1.8401856453049555e-05, - "loss": 0.3012, + "learning_rate": 2.8404349929009408e-05, + "loss": 0.2655, "step": 18045 }, { "epoch": 0.84, - "learning_rate": 1.8401387651774415e-05, - "loss": 0.5115, + "learning_rate": 2.8403881859173388e-05, + "loss": 0.3529, "step": 18050 }, { "epoch": 0.84, - "learning_rate": 1.8400918850499275e-05, - "loss": 0.2569, + "learning_rate": 2.840341378933737e-05, + "loss": 0.2616, "step": 18055 }, { "epoch": 0.84, - "learning_rate": 1.8400450049224135e-05, - "loss": 0.1007, + "learning_rate": 2.840294571950135e-05, + "loss": 0.0918, "step": 18060 }, { "epoch": 0.84, - "learning_rate": 1.8399981247948998e-05, - "loss": 0.1214, + "learning_rate": 2.840247764966533e-05, + "loss": 0.1122, "step": 18065 }, { "epoch": 0.84, - "learning_rate": 1.8399512446673858e-05, - "loss": 0.1432, + "learning_rate": 2.840200957982931e-05, + "loss": 0.1235, "step": 18070 }, { "epoch": 0.84, - "learning_rate": 1.8399043645398718e-05, - "loss": 0.1994, + "learning_rate": 2.8401541509993294e-05, + "loss": 0.1412, "step": 18075 }, { "epoch": 0.84, - "learning_rate": 1.8398574844123578e-05, - "loss": 0.1827, + "learning_rate": 2.8401073440157274e-05, + "loss": 0.1196, "step": 18080 }, { "epoch": 0.84, - "learning_rate": 1.8398106042848438e-05, - "loss": 0.2193, + "learning_rate": 2.8400605370321254e-05, + "loss": 0.231, "step": 18085 }, { "epoch": 0.84, - "learning_rate": 1.8397637241573298e-05, - "loss": 0.2757, + "learning_rate": 2.840013730048523e-05, + "loss": 0.216, "step": 18090 }, { "epoch": 0.84, - "learning_rate": 1.8397168440298158e-05, - "loss": 0.2565, + "learning_rate": 2.8399669230649213e-05, + "loss": 0.3107, "step": 18095 }, { "epoch": 0.84, - "learning_rate": 1.8396699639023018e-05, - "loss": 0.4674, + "learning_rate": 2.8399201160813193e-05, + "loss": 0.3715, "step": 18100 }, { "epoch": 0.84, - "learning_rate": 1.8396230837747878e-05, - "loss": 0.198, + "learning_rate": 2.8398733090977173e-05, + "loss": 0.1537, "step": 18105 }, { "epoch": 0.85, - "learning_rate": 1.839576203647274e-05, - "loss": 0.0999, + "learning_rate": 2.8398265021141156e-05, + "loss": 0.09, "step": 18110 }, { "epoch": 0.85, - "learning_rate": 1.83952932351976e-05, - "loss": 0.0993, + "learning_rate": 2.8397796951305136e-05, + "loss": 0.0968, "step": 18115 }, { "epoch": 0.85, - "learning_rate": 1.839482443392246e-05, - "loss": 0.1571, + "learning_rate": 2.8397328881469116e-05, + "loss": 0.1648, "step": 18120 }, { "epoch": 0.85, - "learning_rate": 1.8394355632647324e-05, - "loss": 0.0981, + "learning_rate": 2.8396860811633096e-05, + "loss": 0.1818, "step": 18125 }, { "epoch": 0.85, - "learning_rate": 1.8393886831372184e-05, - "loss": 0.1901, + "learning_rate": 2.839639274179708e-05, + "loss": 0.2729, "step": 18130 }, { "epoch": 0.85, - "learning_rate": 1.8393418030097044e-05, - "loss": 0.248, + "learning_rate": 2.839592467196106e-05, + "loss": 0.2332, "step": 18135 }, { "epoch": 0.85, - "learning_rate": 1.8392949228821904e-05, - "loss": 0.2168, + "learning_rate": 2.839545660212504e-05, + "loss": 0.2418, "step": 18140 }, { "epoch": 0.85, - "learning_rate": 1.8392480427546764e-05, - "loss": 0.2339, + "learning_rate": 2.8394988532289018e-05, + "loss": 0.2807, "step": 18145 }, { "epoch": 0.85, - "learning_rate": 1.8392011626271624e-05, - "loss": 0.5106, + "learning_rate": 2.8394520462453e-05, + "loss": 0.3192, "step": 18150 }, { "epoch": 0.85, - "learning_rate": 1.8391542824996484e-05, - "loss": 0.2561, + "learning_rate": 2.8394052392616978e-05, + "loss": 0.1612, "step": 18155 }, { "epoch": 0.85, - "learning_rate": 1.8391074023721347e-05, - "loss": 0.1187, + "learning_rate": 2.8393584322780958e-05, + "loss": 0.0834, "step": 18160 }, { "epoch": 0.85, - "learning_rate": 1.8390605222446207e-05, - "loss": 0.0982, + "learning_rate": 2.839311625294494e-05, + "loss": 0.1163, "step": 18165 }, { "epoch": 0.85, - "learning_rate": 1.8390136421171067e-05, - "loss": 0.0904, + "learning_rate": 2.839264818310892e-05, + "loss": 0.1613, "step": 18170 }, { "epoch": 0.85, - "learning_rate": 1.8389667619895927e-05, - "loss": 0.1419, + "learning_rate": 2.83921801132729e-05, + "loss": 0.0896, "step": 18175 }, { "epoch": 0.85, - "learning_rate": 1.8389198818620787e-05, - "loss": 0.1005, + "learning_rate": 2.839171204343688e-05, + "loss": 0.136, "step": 18180 }, { "epoch": 0.85, - "learning_rate": 1.8388730017345647e-05, - "loss": 0.1154, + "learning_rate": 2.8391243973600863e-05, + "loss": 0.2184, "step": 18185 }, { "epoch": 0.85, - "learning_rate": 1.838826121607051e-05, - "loss": 0.2446, + "learning_rate": 2.8390775903764843e-05, + "loss": 0.1982, "step": 18190 }, { "epoch": 0.85, - "learning_rate": 1.838779241479537e-05, - "loss": 0.3814, + "learning_rate": 2.8390307833928823e-05, + "loss": 0.3135, "step": 18195 }, { "epoch": 0.85, - "learning_rate": 1.838732361352023e-05, - "loss": 0.4632, + "learning_rate": 2.8389839764092803e-05, + "loss": 0.4263, "step": 18200 }, { "epoch": 0.85, - "learning_rate": 1.8386854812245093e-05, - "loss": 0.1842, + "learning_rate": 2.8389371694256786e-05, + "loss": 0.192, "step": 18205 }, { "epoch": 0.85, - "learning_rate": 1.8386386010969953e-05, - "loss": 0.1311, + "learning_rate": 2.8388903624420766e-05, + "loss": 0.0525, "step": 18210 }, { "epoch": 0.85, - "learning_rate": 1.8385917209694813e-05, - "loss": 0.1317, + "learning_rate": 2.8388435554584742e-05, + "loss": 0.076, "step": 18215 }, { "epoch": 0.85, - "learning_rate": 1.8385448408419673e-05, - "loss": 0.1859, + "learning_rate": 2.8387967484748722e-05, + "loss": 0.1511, "step": 18220 }, { "epoch": 0.85, - "learning_rate": 1.8384979607144533e-05, - "loss": 0.0955, + "learning_rate": 2.8387499414912705e-05, + "loss": 0.1484, "step": 18225 }, { "epoch": 0.85, - "learning_rate": 1.8384510805869393e-05, - "loss": 0.1593, + "learning_rate": 2.8387031345076685e-05, + "loss": 0.2002, "step": 18230 }, { "epoch": 0.85, - "learning_rate": 1.8384042004594253e-05, - "loss": 0.2589, + "learning_rate": 2.8386563275240665e-05, + "loss": 0.1409, "step": 18235 }, { "epoch": 0.85, - "learning_rate": 1.8383573203319113e-05, - "loss": 0.3052, + "learning_rate": 2.8386095205404648e-05, + "loss": 0.1672, "step": 18240 }, { "epoch": 0.85, - "learning_rate": 1.8383104402043973e-05, - "loss": 0.2756, + "learning_rate": 2.8385627135568628e-05, + "loss": 0.3112, "step": 18245 }, { "epoch": 0.85, - "learning_rate": 1.8382635600768836e-05, - "loss": 0.4911, + "learning_rate": 2.8385159065732608e-05, + "loss": 0.7144, "step": 18250 }, { "epoch": 0.85, - "learning_rate": 1.8382166799493696e-05, - "loss": 0.1806, + "learning_rate": 2.8384690995896588e-05, + "loss": 0.2201, "step": 18255 }, { "epoch": 0.85, - "learning_rate": 1.8381697998218556e-05, - "loss": 0.0599, + "learning_rate": 2.838422292606057e-05, + "loss": 0.1169, "step": 18260 }, { "epoch": 0.85, - "learning_rate": 1.8381229196943416e-05, - "loss": 0.0742, + "learning_rate": 2.838375485622455e-05, + "loss": 0.0843, "step": 18265 }, { "epoch": 0.85, - "learning_rate": 1.838076039566828e-05, - "loss": 0.0986, + "learning_rate": 2.838328678638853e-05, + "loss": 0.1896, "step": 18270 }, { "epoch": 0.85, - "learning_rate": 1.838029159439314e-05, - "loss": 0.201, + "learning_rate": 2.838281871655251e-05, + "loss": 0.2242, "step": 18275 }, { "epoch": 0.85, - "learning_rate": 1.8379822793118e-05, - "loss": 0.2901, + "learning_rate": 2.838235064671649e-05, + "loss": 0.0801, "step": 18280 }, { "epoch": 0.85, - "learning_rate": 1.837935399184286e-05, - "loss": 0.1967, + "learning_rate": 2.838188257688047e-05, + "loss": 0.0778, "step": 18285 }, { "epoch": 0.85, - "learning_rate": 1.837888519056772e-05, - "loss": 0.2117, + "learning_rate": 2.838141450704445e-05, + "loss": 0.1371, "step": 18290 }, { "epoch": 0.85, - "learning_rate": 1.837841638929258e-05, - "loss": 0.2381, + "learning_rate": 2.8380946437208433e-05, + "loss": 0.2982, "step": 18295 }, { "epoch": 0.85, - "learning_rate": 1.8377947588017442e-05, - "loss": 0.473, + "learning_rate": 2.8380478367372413e-05, + "loss": 0.549, "step": 18300 }, { "epoch": 0.85, - "learning_rate": 1.8377478786742302e-05, - "loss": 0.2497, + "learning_rate": 2.8380010297536393e-05, + "loss": 0.1805, "step": 18305 }, { "epoch": 0.85, - "learning_rate": 1.8377009985467162e-05, - "loss": 0.0581, + "learning_rate": 2.8379542227700372e-05, + "loss": 0.0731, "step": 18310 }, { "epoch": 0.85, - "learning_rate": 1.8376541184192022e-05, - "loss": 0.0962, + "learning_rate": 2.8379074157864356e-05, + "loss": 0.0572, "step": 18315 }, { "epoch": 0.85, - "learning_rate": 1.8376072382916882e-05, - "loss": 0.089, + "learning_rate": 2.8378606088028336e-05, + "loss": 0.2219, "step": 18320 }, { "epoch": 0.86, - "learning_rate": 1.8375603581641742e-05, - "loss": 0.2265, + "learning_rate": 2.8378138018192315e-05, + "loss": 0.1155, "step": 18325 }, { "epoch": 0.86, - "learning_rate": 1.8375134780366605e-05, - "loss": 0.1076, + "learning_rate": 2.8377669948356295e-05, + "loss": 0.2212, "step": 18330 }, { "epoch": 0.86, - "learning_rate": 1.8374665979091465e-05, - "loss": 0.2302, + "learning_rate": 2.837720187852028e-05, + "loss": 0.1358, "step": 18335 }, { "epoch": 0.86, - "learning_rate": 1.8374197177816325e-05, - "loss": 0.2914, + "learning_rate": 2.8376733808684258e-05, + "loss": 0.3169, "step": 18340 }, { "epoch": 0.86, - "learning_rate": 1.837372837654119e-05, - "loss": 0.2649, + "learning_rate": 2.8376265738848235e-05, + "loss": 0.2658, "step": 18345 }, { "epoch": 0.86, - "learning_rate": 1.8373259575266048e-05, - "loss": 0.7229, + "learning_rate": 2.8375797669012218e-05, + "loss": 0.6011, "step": 18350 }, { "epoch": 0.86, - "learning_rate": 1.8372790773990908e-05, - "loss": 0.2282, + "learning_rate": 2.8375329599176198e-05, + "loss": 0.2064, "step": 18355 }, { "epoch": 0.86, - "learning_rate": 1.8372321972715768e-05, - "loss": 0.0604, + "learning_rate": 2.8374861529340177e-05, + "loss": 0.0559, "step": 18360 }, { "epoch": 0.86, - "learning_rate": 1.8371853171440628e-05, - "loss": 0.1012, + "learning_rate": 2.8374393459504157e-05, + "loss": 0.1031, "step": 18365 }, { "epoch": 0.86, - "learning_rate": 1.8371384370165488e-05, - "loss": 0.1669, + "learning_rate": 2.837392538966814e-05, + "loss": 0.1769, "step": 18370 }, { "epoch": 0.86, - "learning_rate": 1.8370915568890348e-05, - "loss": 0.1804, + "learning_rate": 2.837345731983212e-05, + "loss": 0.0809, "step": 18375 }, { "epoch": 0.86, - "learning_rate": 1.8370446767615208e-05, - "loss": 0.202, + "learning_rate": 2.83729892499961e-05, + "loss": 0.2196, "step": 18380 }, { "epoch": 0.86, - "learning_rate": 1.8369977966340068e-05, - "loss": 0.1953, + "learning_rate": 2.837252118016008e-05, + "loss": 0.1496, "step": 18385 }, { "epoch": 0.86, - "learning_rate": 1.836950916506493e-05, - "loss": 0.2295, + "learning_rate": 2.8372053110324063e-05, + "loss": 0.3154, "step": 18390 }, { "epoch": 0.86, - "learning_rate": 1.836904036378979e-05, - "loss": 0.2345, + "learning_rate": 2.8371585040488043e-05, + "loss": 0.2883, "step": 18395 }, { "epoch": 0.86, - "learning_rate": 1.836857156251465e-05, - "loss": 0.4057, + "learning_rate": 2.8371116970652023e-05, + "loss": 0.6507, "step": 18400 }, { "epoch": 0.86, - "learning_rate": 1.836810276123951e-05, - "loss": 0.206, + "learning_rate": 2.8370648900816e-05, + "loss": 0.2271, "step": 18405 }, { "epoch": 0.86, - "learning_rate": 1.8367633959964374e-05, - "loss": 0.0997, + "learning_rate": 2.8370180830979982e-05, + "loss": 0.0465, "step": 18410 }, { "epoch": 0.86, - "learning_rate": 1.8367165158689234e-05, - "loss": 0.0887, + "learning_rate": 2.8369712761143962e-05, + "loss": 0.09, "step": 18415 }, { "epoch": 0.86, - "learning_rate": 1.8366696357414094e-05, - "loss": 0.1298, + "learning_rate": 2.8369244691307942e-05, + "loss": 0.1439, "step": 18420 }, { "epoch": 0.86, - "learning_rate": 1.8366227556138954e-05, - "loss": 0.198, + "learning_rate": 2.8368776621471925e-05, + "loss": 0.1548, "step": 18425 }, { "epoch": 0.86, - "learning_rate": 1.8365758754863814e-05, - "loss": 0.2134, + "learning_rate": 2.8368308551635905e-05, + "loss": 0.1617, "step": 18430 }, { "epoch": 0.86, - "learning_rate": 1.8365289953588674e-05, - "loss": 0.1678, + "learning_rate": 2.8367840481799885e-05, + "loss": 0.1673, "step": 18435 }, { "epoch": 0.86, - "learning_rate": 1.8364821152313537e-05, - "loss": 0.2678, + "learning_rate": 2.8367372411963865e-05, + "loss": 0.1674, "step": 18440 }, { "epoch": 0.86, - "learning_rate": 1.8364352351038397e-05, - "loss": 0.2635, + "learning_rate": 2.8366904342127848e-05, + "loss": 0.2876, "step": 18445 }, { "epoch": 0.86, - "learning_rate": 1.8363883549763257e-05, - "loss": 0.6666, + "learning_rate": 2.8366436272291828e-05, + "loss": 0.3987, "step": 18450 }, { "epoch": 0.86, - "learning_rate": 1.8363414748488117e-05, - "loss": 0.1784, + "learning_rate": 2.8365968202455808e-05, + "loss": 0.2224, "step": 18455 }, { "epoch": 0.86, - "learning_rate": 1.8362945947212977e-05, - "loss": 0.0755, + "learning_rate": 2.8365500132619787e-05, + "loss": 0.0738, "step": 18460 }, { "epoch": 0.86, - "learning_rate": 1.8362477145937837e-05, - "loss": 0.1487, + "learning_rate": 2.836503206278377e-05, + "loss": 0.1386, "step": 18465 }, { "epoch": 0.86, - "learning_rate": 1.8362008344662697e-05, - "loss": 0.0882, + "learning_rate": 2.8364563992947747e-05, + "loss": 0.0821, "step": 18470 }, { "epoch": 0.86, - "learning_rate": 1.836153954338756e-05, - "loss": 0.1553, + "learning_rate": 2.8364095923111727e-05, + "loss": 0.2388, "step": 18475 }, { "epoch": 0.86, - "learning_rate": 1.836107074211242e-05, - "loss": 0.1412, + "learning_rate": 2.836362785327571e-05, + "loss": 0.1524, "step": 18480 }, { "epoch": 0.86, - "learning_rate": 1.836060194083728e-05, - "loss": 0.2449, + "learning_rate": 2.836315978343969e-05, + "loss": 0.1129, "step": 18485 }, { "epoch": 0.86, - "learning_rate": 1.8360133139562143e-05, - "loss": 0.217, + "learning_rate": 2.836269171360367e-05, + "loss": 0.2458, "step": 18490 }, { "epoch": 0.86, - "learning_rate": 1.8359664338287003e-05, - "loss": 0.3949, + "learning_rate": 2.836222364376765e-05, + "loss": 0.2152, "step": 18495 }, { "epoch": 0.86, - "learning_rate": 1.8359195537011863e-05, - "loss": 0.4204, + "learning_rate": 2.8361755573931633e-05, + "loss": 0.3406, "step": 18500 }, { "epoch": 0.86, - "learning_rate": 1.8358726735736723e-05, - "loss": 0.1945, + "learning_rate": 2.8361287504095612e-05, + "loss": 0.2004, "step": 18505 }, { "epoch": 0.86, - "learning_rate": 1.8358257934461583e-05, - "loss": 0.0726, + "learning_rate": 2.8360819434259592e-05, + "loss": 0.0556, "step": 18510 }, { "epoch": 0.86, - "learning_rate": 1.8357789133186443e-05, - "loss": 0.1058, + "learning_rate": 2.8360351364423572e-05, + "loss": 0.0763, "step": 18515 }, { "epoch": 0.86, - "learning_rate": 1.8357320331911303e-05, - "loss": 0.0942, + "learning_rate": 2.8359883294587555e-05, + "loss": 0.0865, "step": 18520 }, { "epoch": 0.86, - "learning_rate": 1.8356851530636163e-05, - "loss": 0.1328, + "learning_rate": 2.8359415224751535e-05, + "loss": 0.1009, "step": 18525 }, { "epoch": 0.86, - "learning_rate": 1.8356382729361026e-05, - "loss": 0.1709, + "learning_rate": 2.8358947154915515e-05, + "loss": 0.1363, "step": 18530 }, { "epoch": 0.86, - "learning_rate": 1.8355913928085886e-05, - "loss": 0.1998, + "learning_rate": 2.8358479085079495e-05, + "loss": 0.1338, "step": 18535 }, { "epoch": 0.87, - "learning_rate": 1.8355445126810746e-05, - "loss": 0.3097, + "learning_rate": 2.8358011015243475e-05, + "loss": 0.2195, "step": 18540 }, { "epoch": 0.87, - "learning_rate": 1.8354976325535606e-05, - "loss": 0.3424, + "learning_rate": 2.8357542945407454e-05, + "loss": 0.3171, "step": 18545 }, { "epoch": 0.87, - "learning_rate": 1.8354507524260466e-05, - "loss": 0.4817, + "learning_rate": 2.8357074875571434e-05, + "loss": 0.5755, "step": 18550 }, { "epoch": 0.87, - "learning_rate": 1.835403872298533e-05, - "loss": 0.1701, + "learning_rate": 2.8356606805735417e-05, + "loss": 0.2008, "step": 18555 }, { "epoch": 0.87, - "learning_rate": 1.835356992171019e-05, - "loss": 0.0983, + "learning_rate": 2.8356138735899397e-05, + "loss": 0.0802, "step": 18560 }, { "epoch": 0.87, - "learning_rate": 1.835310112043505e-05, - "loss": 0.1378, + "learning_rate": 2.8355670666063377e-05, + "loss": 0.0548, "step": 18565 }, { "epoch": 0.87, - "learning_rate": 1.835263231915991e-05, - "loss": 0.1254, + "learning_rate": 2.8355202596227357e-05, + "loss": 0.1332, "step": 18570 }, { "epoch": 0.87, - "learning_rate": 1.8352163517884772e-05, - "loss": 0.1184, + "learning_rate": 2.835473452639134e-05, + "loss": 0.083, "step": 18575 }, { "epoch": 0.87, - "learning_rate": 1.8351694716609632e-05, - "loss": 0.1757, + "learning_rate": 2.835426645655532e-05, + "loss": 0.1904, "step": 18580 }, { "epoch": 0.87, - "learning_rate": 1.8351225915334492e-05, - "loss": 0.1283, + "learning_rate": 2.83537983867193e-05, + "loss": 0.194, "step": 18585 }, { "epoch": 0.87, - "learning_rate": 1.8350757114059352e-05, - "loss": 0.259, + "learning_rate": 2.8353330316883283e-05, + "loss": 0.1367, "step": 18590 }, { "epoch": 0.87, - "learning_rate": 1.8350288312784212e-05, - "loss": 0.2993, + "learning_rate": 2.835286224704726e-05, + "loss": 0.28, "step": 18595 }, { "epoch": 0.87, - "learning_rate": 1.8349819511509072e-05, - "loss": 0.695, + "learning_rate": 2.835239417721124e-05, + "loss": 0.451, "step": 18600 }, { "epoch": 0.87, - "learning_rate": 1.8349350710233932e-05, - "loss": 0.2788, + "learning_rate": 2.835192610737522e-05, + "loss": 0.1892, "step": 18605 }, { "epoch": 0.87, - "learning_rate": 1.8348881908958792e-05, - "loss": 0.1188, + "learning_rate": 2.8351458037539202e-05, + "loss": 0.0842, "step": 18610 }, { "epoch": 0.87, - "learning_rate": 1.8348413107683652e-05, - "loss": 0.0826, + "learning_rate": 2.8350989967703182e-05, + "loss": 0.1046, "step": 18615 }, { "epoch": 0.87, - "learning_rate": 1.8347944306408515e-05, - "loss": 0.1032, + "learning_rate": 2.8350521897867162e-05, + "loss": 0.1042, "step": 18620 }, { "epoch": 0.87, - "learning_rate": 1.8347475505133375e-05, - "loss": 0.0864, + "learning_rate": 2.835005382803114e-05, + "loss": 0.107, "step": 18625 }, { "epoch": 0.87, - "learning_rate": 1.8347006703858235e-05, - "loss": 0.183, + "learning_rate": 2.8349585758195125e-05, + "loss": 0.1507, "step": 18630 }, { "epoch": 0.87, - "learning_rate": 1.83465379025831e-05, - "loss": 0.1417, + "learning_rate": 2.8349117688359105e-05, + "loss": 0.209, "step": 18635 }, { "epoch": 0.87, - "learning_rate": 1.834606910130796e-05, - "loss": 0.179, + "learning_rate": 2.8348649618523084e-05, + "loss": 0.2198, "step": 18640 }, { "epoch": 0.87, - "learning_rate": 1.8345600300032818e-05, - "loss": 0.3551, + "learning_rate": 2.8348181548687064e-05, + "loss": 0.1924, "step": 18645 }, { "epoch": 0.87, - "learning_rate": 1.8345131498757678e-05, - "loss": 0.6553, + "learning_rate": 2.8347713478851048e-05, + "loss": 0.4928, "step": 18650 }, { "epoch": 0.87, - "learning_rate": 1.8344662697482538e-05, - "loss": 0.1851, + "learning_rate": 2.8347245409015027e-05, + "loss": 0.1856, "step": 18655 }, { "epoch": 0.87, - "learning_rate": 1.8344193896207398e-05, - "loss": 0.0574, + "learning_rate": 2.8346777339179004e-05, + "loss": 0.0468, "step": 18660 }, { "epoch": 0.87, - "learning_rate": 1.8343725094932258e-05, - "loss": 0.0999, + "learning_rate": 2.8346309269342987e-05, + "loss": 0.0885, "step": 18665 }, { "epoch": 0.87, - "learning_rate": 1.834325629365712e-05, - "loss": 0.1026, + "learning_rate": 2.8345841199506967e-05, + "loss": 0.1245, "step": 18670 }, { "epoch": 0.87, - "learning_rate": 1.834278749238198e-05, - "loss": 0.1747, + "learning_rate": 2.8345373129670947e-05, + "loss": 0.1097, "step": 18675 }, { "epoch": 0.87, - "learning_rate": 1.834231869110684e-05, - "loss": 0.1283, + "learning_rate": 2.8344905059834926e-05, + "loss": 0.1906, "step": 18680 }, { "epoch": 0.87, - "learning_rate": 1.83418498898317e-05, - "loss": 0.1925, + "learning_rate": 2.834443698999891e-05, + "loss": 0.2069, "step": 18685 }, { "epoch": 0.87, - "learning_rate": 1.834138108855656e-05, - "loss": 0.2752, + "learning_rate": 2.834396892016289e-05, + "loss": 0.2415, "step": 18690 }, { "epoch": 0.87, - "learning_rate": 1.8340912287281424e-05, - "loss": 0.2445, + "learning_rate": 2.834350085032687e-05, + "loss": 0.5044, "step": 18695 }, { "epoch": 0.87, - "learning_rate": 1.8340443486006284e-05, - "loss": 0.4561, + "learning_rate": 2.834303278049085e-05, + "loss": 0.3405, "step": 18700 }, { "epoch": 0.87, - "learning_rate": 1.8339974684731144e-05, - "loss": 0.2342, + "learning_rate": 2.8342564710654832e-05, + "loss": 0.2299, "step": 18705 }, { "epoch": 0.87, - "learning_rate": 1.8339505883456004e-05, - "loss": 0.051, + "learning_rate": 2.8342096640818812e-05, + "loss": 0.0958, "step": 18710 }, { "epoch": 0.87, - "learning_rate": 1.8339037082180868e-05, - "loss": 0.1113, + "learning_rate": 2.8341628570982792e-05, + "loss": 0.0939, "step": 18715 }, { "epoch": 0.87, - "learning_rate": 1.8338568280905727e-05, - "loss": 0.1012, + "learning_rate": 2.8341160501146772e-05, + "loss": 0.1363, "step": 18720 }, { "epoch": 0.87, - "learning_rate": 1.8338099479630587e-05, - "loss": 0.1449, + "learning_rate": 2.834069243131075e-05, + "loss": 0.1064, "step": 18725 }, { "epoch": 0.87, - "learning_rate": 1.8337630678355447e-05, - "loss": 0.1451, + "learning_rate": 2.834022436147473e-05, + "loss": 0.2211, "step": 18730 }, { "epoch": 0.87, - "learning_rate": 1.8337161877080307e-05, - "loss": 0.1471, + "learning_rate": 2.833975629163871e-05, + "loss": 0.2835, "step": 18735 }, { "epoch": 0.87, - "learning_rate": 1.8336693075805167e-05, - "loss": 0.17, + "learning_rate": 2.8339288221802694e-05, + "loss": 0.1807, "step": 18740 }, { "epoch": 0.87, - "learning_rate": 1.8336224274530027e-05, - "loss": 0.1593, + "learning_rate": 2.8338820151966674e-05, + "loss": 0.4162, "step": 18745 }, { "epoch": 0.87, - "learning_rate": 1.8335755473254887e-05, - "loss": 0.4541, + "learning_rate": 2.8338352082130654e-05, + "loss": 0.5333, "step": 18750 }, { "epoch": 0.88, - "learning_rate": 1.8335286671979747e-05, - "loss": 0.2293, + "learning_rate": 2.8337884012294634e-05, + "loss": 0.2408, "step": 18755 }, { "epoch": 0.88, - "learning_rate": 1.833481787070461e-05, - "loss": 0.0878, + "learning_rate": 2.8337415942458617e-05, + "loss": 0.0875, "step": 18760 }, { "epoch": 0.88, - "learning_rate": 1.833434906942947e-05, - "loss": 0.1217, + "learning_rate": 2.8336947872622597e-05, + "loss": 0.0799, "step": 18765 }, { "epoch": 0.88, - "learning_rate": 1.833388026815433e-05, - "loss": 0.1412, + "learning_rate": 2.8336479802786577e-05, + "loss": 0.1036, "step": 18770 }, { "epoch": 0.88, - "learning_rate": 1.8333411466879193e-05, - "loss": 0.1524, + "learning_rate": 2.833601173295056e-05, + "loss": 0.1331, "step": 18775 }, { "epoch": 0.88, - "learning_rate": 1.8332942665604053e-05, - "loss": 0.1379, + "learning_rate": 2.833554366311454e-05, + "loss": 0.1381, "step": 18780 }, { "epoch": 0.88, - "learning_rate": 1.8332473864328913e-05, - "loss": 0.3588, + "learning_rate": 2.8335075593278516e-05, + "loss": 0.1336, "step": 18785 }, { "epoch": 0.88, - "learning_rate": 1.8332005063053773e-05, - "loss": 0.2402, + "learning_rate": 2.8334607523442496e-05, + "loss": 0.2163, "step": 18790 }, { "epoch": 0.88, - "learning_rate": 1.8331536261778633e-05, - "loss": 0.3659, + "learning_rate": 2.833413945360648e-05, + "loss": 0.2485, "step": 18795 }, { "epoch": 0.88, - "learning_rate": 1.8331067460503493e-05, - "loss": 0.4519, + "learning_rate": 2.833367138377046e-05, + "loss": 0.4374, "step": 18800 }, { "epoch": 0.88, - "learning_rate": 1.8330598659228353e-05, - "loss": 0.1818, + "learning_rate": 2.833320331393444e-05, + "loss": 0.2362, "step": 18805 }, { "epoch": 0.88, - "learning_rate": 1.8330129857953216e-05, - "loss": 0.0785, + "learning_rate": 2.833273524409842e-05, + "loss": 0.0922, "step": 18810 }, { "epoch": 0.88, - "learning_rate": 1.8329661056678076e-05, - "loss": 0.1177, + "learning_rate": 2.8332267174262402e-05, + "loss": 0.0773, "step": 18815 }, { "epoch": 0.88, - "learning_rate": 1.8329192255402936e-05, - "loss": 0.0967, + "learning_rate": 2.833179910442638e-05, + "loss": 0.1277, "step": 18820 }, { "epoch": 0.88, - "learning_rate": 1.8328723454127796e-05, - "loss": 0.0934, + "learning_rate": 2.833133103459036e-05, + "loss": 0.1718, "step": 18825 }, { "epoch": 0.88, - "learning_rate": 1.8328254652852656e-05, - "loss": 0.1312, + "learning_rate": 2.833086296475434e-05, + "loss": 0.2076, "step": 18830 }, { "epoch": 0.88, - "learning_rate": 1.8327785851577516e-05, - "loss": 0.3729, + "learning_rate": 2.8330394894918324e-05, + "loss": 0.2233, "step": 18835 }, { "epoch": 0.88, - "learning_rate": 1.832731705030238e-05, - "loss": 0.1916, + "learning_rate": 2.8329926825082304e-05, + "loss": 0.2237, "step": 18840 }, { "epoch": 0.88, - "learning_rate": 1.832684824902724e-05, - "loss": 0.3668, + "learning_rate": 2.8329458755246284e-05, + "loss": 0.2385, "step": 18845 }, { "epoch": 0.88, - "learning_rate": 1.83263794477521e-05, - "loss": 0.5375, + "learning_rate": 2.8328990685410264e-05, + "loss": 0.4141, "step": 18850 }, { "epoch": 0.88, - "learning_rate": 1.8325910646476963e-05, - "loss": 0.2088, + "learning_rate": 2.8328522615574244e-05, + "loss": 0.1521, "step": 18855 }, { "epoch": 0.88, - "learning_rate": 1.8325441845201823e-05, - "loss": 0.0323, + "learning_rate": 2.8328054545738224e-05, + "loss": 0.0794, "step": 18860 }, { "epoch": 0.88, - "learning_rate": 1.8324973043926682e-05, - "loss": 0.1017, + "learning_rate": 2.8327586475902203e-05, + "loss": 0.2141, "step": 18865 }, { "epoch": 0.88, - "learning_rate": 1.8324504242651542e-05, - "loss": 0.0991, + "learning_rate": 2.8327118406066187e-05, + "loss": 0.168, "step": 18870 }, { "epoch": 0.88, - "learning_rate": 1.8324035441376402e-05, - "loss": 0.1889, + "learning_rate": 2.8326650336230166e-05, + "loss": 0.1151, "step": 18875 }, { "epoch": 0.88, - "learning_rate": 1.8323566640101262e-05, - "loss": 0.0697, + "learning_rate": 2.8326182266394146e-05, + "loss": 0.3286, "step": 18880 }, { "epoch": 0.88, - "learning_rate": 1.8323097838826122e-05, - "loss": 0.2361, + "learning_rate": 2.8325714196558126e-05, + "loss": 0.173, "step": 18885 }, { "epoch": 0.88, - "learning_rate": 1.8322629037550982e-05, - "loss": 0.2399, + "learning_rate": 2.832524612672211e-05, + "loss": 0.1535, "step": 18890 }, { "epoch": 0.88, - "learning_rate": 1.8322160236275842e-05, - "loss": 0.3615, + "learning_rate": 2.832477805688609e-05, + "loss": 0.2278, "step": 18895 }, { "epoch": 0.88, - "learning_rate": 1.8321691435000705e-05, - "loss": 0.45, + "learning_rate": 2.832430998705007e-05, + "loss": 0.3436, "step": 18900 }, { "epoch": 0.88, - "learning_rate": 1.8321222633725565e-05, - "loss": 0.2541, + "learning_rate": 2.8323841917214052e-05, + "loss": 0.187, "step": 18905 }, { "epoch": 0.88, - "learning_rate": 1.8320753832450425e-05, - "loss": 0.0357, + "learning_rate": 2.832337384737803e-05, + "loss": 0.0451, "step": 18910 }, { "epoch": 0.88, - "learning_rate": 1.8320285031175285e-05, - "loss": 0.069, + "learning_rate": 2.832290577754201e-05, + "loss": 0.1106, "step": 18915 }, { "epoch": 0.88, - "learning_rate": 1.831981622990015e-05, - "loss": 0.1533, + "learning_rate": 2.8322437707705988e-05, + "loss": 0.1392, "step": 18920 }, { "epoch": 0.88, - "learning_rate": 1.831934742862501e-05, - "loss": 0.0688, + "learning_rate": 2.832196963786997e-05, + "loss": 0.1006, "step": 18925 }, { "epoch": 0.88, - "learning_rate": 1.831887862734987e-05, - "loss": 0.1047, + "learning_rate": 2.832150156803395e-05, + "loss": 0.1918, "step": 18930 }, { "epoch": 0.88, - "learning_rate": 1.8318409826074728e-05, - "loss": 0.153, + "learning_rate": 2.832103349819793e-05, + "loss": 0.1577, "step": 18935 }, { "epoch": 0.88, - "learning_rate": 1.8317941024799588e-05, - "loss": 0.2231, + "learning_rate": 2.832056542836191e-05, + "loss": 0.2796, "step": 18940 }, { "epoch": 0.88, - "learning_rate": 1.8317472223524448e-05, - "loss": 0.4165, + "learning_rate": 2.8320097358525894e-05, + "loss": 0.282, "step": 18945 }, { "epoch": 0.88, - "learning_rate": 1.831700342224931e-05, - "loss": 0.521, + "learning_rate": 2.8319629288689874e-05, + "loss": 0.5019, "step": 18950 }, { "epoch": 0.88, - "learning_rate": 1.831653462097417e-05, - "loss": 0.2484, + "learning_rate": 2.8319161218853854e-05, + "loss": 0.2091, "step": 18955 }, { "epoch": 0.88, - "learning_rate": 1.831606581969903e-05, - "loss": 0.1186, + "learning_rate": 2.8318693149017837e-05, + "loss": 0.0502, "step": 18960 }, { "epoch": 0.88, - "learning_rate": 1.831559701842389e-05, - "loss": 0.1492, + "learning_rate": 2.8318225079181817e-05, + "loss": 0.0924, "step": 18965 }, { "epoch": 0.89, - "learning_rate": 1.831512821714875e-05, - "loss": 0.1672, + "learning_rate": 2.8317757009345797e-05, + "loss": 0.1117, "step": 18970 }, { "epoch": 0.89, - "learning_rate": 1.831465941587361e-05, - "loss": 0.2043, + "learning_rate": 2.8317288939509773e-05, + "loss": 0.1084, "step": 18975 }, { "epoch": 0.89, - "learning_rate": 1.831419061459847e-05, - "loss": 0.1615, + "learning_rate": 2.8316820869673756e-05, + "loss": 0.138, "step": 18980 }, { "epoch": 0.89, - "learning_rate": 1.8313721813323334e-05, - "loss": 0.1741, + "learning_rate": 2.8316352799837736e-05, + "loss": 0.1402, "step": 18985 }, { "epoch": 0.89, - "learning_rate": 1.8313253012048194e-05, - "loss": 0.1771, + "learning_rate": 2.8315884730001716e-05, + "loss": 0.1729, "step": 18990 }, { "epoch": 0.89, - "learning_rate": 1.8312784210773054e-05, - "loss": 0.2435, + "learning_rate": 2.8315416660165696e-05, + "loss": 0.2369, "step": 18995 }, { "epoch": 0.89, - "learning_rate": 1.8312315409497918e-05, - "loss": 0.4375, + "learning_rate": 2.831494859032968e-05, + "loss": 0.414, "step": 19000 }, { "epoch": 0.89, - "learning_rate": 1.8311846608222778e-05, - "loss": 0.1995, + "learning_rate": 2.831448052049366e-05, + "loss": 0.223, "step": 19005 }, { "epoch": 0.89, - "learning_rate": 1.8311377806947637e-05, - "loss": 0.0675, + "learning_rate": 2.831401245065764e-05, + "loss": 0.1283, "step": 19010 }, { "epoch": 0.89, - "learning_rate": 1.8310909005672497e-05, - "loss": 0.1508, + "learning_rate": 2.8313544380821618e-05, + "loss": 0.1046, "step": 19015 }, { "epoch": 0.89, - "learning_rate": 1.8310440204397357e-05, - "loss": 0.1821, + "learning_rate": 2.83130763109856e-05, + "loss": 0.1075, "step": 19020 }, { "epoch": 0.89, - "learning_rate": 1.8309971403122217e-05, - "loss": 0.2436, + "learning_rate": 2.831260824114958e-05, + "loss": 0.1584, "step": 19025 }, { "epoch": 0.89, - "learning_rate": 1.8309502601847077e-05, - "loss": 0.1228, + "learning_rate": 2.831214017131356e-05, + "loss": 0.1721, "step": 19030 }, { "epoch": 0.89, - "learning_rate": 1.8309033800571937e-05, - "loss": 0.2127, + "learning_rate": 2.831167210147754e-05, + "loss": 0.1674, "step": 19035 }, { "epoch": 0.89, - "learning_rate": 1.83085649992968e-05, - "loss": 0.1937, + "learning_rate": 2.831120403164152e-05, + "loss": 0.21, "step": 19040 }, { "epoch": 0.89, - "learning_rate": 1.830809619802166e-05, - "loss": 0.254, + "learning_rate": 2.83107359618055e-05, + "loss": 0.2495, "step": 19045 }, { "epoch": 0.89, - "learning_rate": 1.830762739674652e-05, - "loss": 0.5409, + "learning_rate": 2.831026789196948e-05, + "loss": 0.6011, "step": 19050 }, { "epoch": 0.89, - "learning_rate": 1.830715859547138e-05, - "loss": 0.252, + "learning_rate": 2.8309799822133464e-05, + "loss": 0.1744, "step": 19055 }, { "epoch": 0.89, - "learning_rate": 1.830668979419624e-05, - "loss": 0.0875, + "learning_rate": 2.8309331752297443e-05, + "loss": 0.0949, "step": 19060 }, { "epoch": 0.89, - "learning_rate": 1.8306220992921104e-05, - "loss": 0.1311, + "learning_rate": 2.8308863682461423e-05, + "loss": 0.0569, "step": 19065 }, { "epoch": 0.89, - "learning_rate": 1.8305752191645963e-05, - "loss": 0.1158, + "learning_rate": 2.8308395612625403e-05, + "loss": 0.109, "step": 19070 }, { "epoch": 0.89, - "learning_rate": 1.8305283390370823e-05, - "loss": 0.1872, + "learning_rate": 2.8307927542789386e-05, + "loss": 0.1346, "step": 19075 }, { "epoch": 0.89, - "learning_rate": 1.8304814589095683e-05, - "loss": 0.2072, + "learning_rate": 2.8307459472953366e-05, + "loss": 0.1163, "step": 19080 }, { "epoch": 0.89, - "learning_rate": 1.8304345787820543e-05, - "loss": 0.2102, + "learning_rate": 2.8306991403117346e-05, + "loss": 0.1234, "step": 19085 }, { "epoch": 0.89, - "learning_rate": 1.8303876986545407e-05, - "loss": 0.2013, + "learning_rate": 2.830652333328133e-05, + "loss": 0.2103, "step": 19090 }, { "epoch": 0.89, - "learning_rate": 1.8303408185270267e-05, - "loss": 0.2112, + "learning_rate": 2.830605526344531e-05, + "loss": 0.3757, "step": 19095 }, { "epoch": 0.89, - "learning_rate": 1.8302939383995126e-05, - "loss": 0.6385, + "learning_rate": 2.8305587193609285e-05, + "loss": 0.5541, "step": 19100 }, { "epoch": 0.89, - "learning_rate": 1.8302470582719986e-05, - "loss": 0.1278, + "learning_rate": 2.8305119123773265e-05, + "loss": 0.1623, "step": 19105 }, { "epoch": 0.89, - "learning_rate": 1.8302001781444846e-05, - "loss": 0.0642, + "learning_rate": 2.830465105393725e-05, + "loss": 0.0796, "step": 19110 }, { "epoch": 0.89, - "learning_rate": 1.8301532980169706e-05, - "loss": 0.0807, + "learning_rate": 2.8304182984101228e-05, + "loss": 0.1489, "step": 19115 }, { "epoch": 0.89, - "learning_rate": 1.8301064178894566e-05, - "loss": 0.1014, + "learning_rate": 2.8303714914265208e-05, + "loss": 0.0964, "step": 19120 }, { "epoch": 0.89, - "learning_rate": 1.830059537761943e-05, - "loss": 0.15, + "learning_rate": 2.8303246844429188e-05, + "loss": 0.1179, "step": 19125 }, { "epoch": 0.89, - "learning_rate": 1.830012657634429e-05, - "loss": 0.1112, + "learning_rate": 2.830277877459317e-05, + "loss": 0.2085, "step": 19130 }, { "epoch": 0.89, - "learning_rate": 1.829965777506915e-05, - "loss": 0.2095, + "learning_rate": 2.830231070475715e-05, + "loss": 0.168, "step": 19135 }, { "epoch": 0.89, - "learning_rate": 1.829918897379401e-05, - "loss": 0.1831, + "learning_rate": 2.830184263492113e-05, + "loss": 0.2215, "step": 19140 }, { "epoch": 0.89, - "learning_rate": 1.8298720172518873e-05, - "loss": 0.3499, + "learning_rate": 2.8301374565085114e-05, + "loss": 0.2907, "step": 19145 }, { "epoch": 0.89, - "learning_rate": 1.8298251371243733e-05, - "loss": 0.5991, + "learning_rate": 2.8300906495249094e-05, + "loss": 0.6909, "step": 19150 }, { "epoch": 0.89, - "learning_rate": 1.8297782569968592e-05, - "loss": 0.1876, + "learning_rate": 2.8300438425413073e-05, + "loss": 0.1187, "step": 19155 }, { "epoch": 0.89, - "learning_rate": 1.8297313768693452e-05, - "loss": 0.133, + "learning_rate": 2.8299970355577053e-05, + "loss": 0.0872, "step": 19160 }, { "epoch": 0.89, - "learning_rate": 1.8296844967418312e-05, - "loss": 0.1359, + "learning_rate": 2.8299502285741033e-05, + "loss": 0.1098, "step": 19165 }, { "epoch": 0.89, - "learning_rate": 1.8296376166143172e-05, - "loss": 0.1115, + "learning_rate": 2.8299034215905013e-05, + "loss": 0.1312, "step": 19170 }, { "epoch": 0.89, - "learning_rate": 1.8295907364868032e-05, - "loss": 0.1205, + "learning_rate": 2.8298566146068993e-05, + "loss": 0.0822, "step": 19175 }, { "epoch": 0.89, - "learning_rate": 1.8295438563592896e-05, - "loss": 0.1723, + "learning_rate": 2.8298098076232973e-05, + "loss": 0.1598, "step": 19180 }, { "epoch": 0.9, - "learning_rate": 1.8294969762317755e-05, - "loss": 0.2365, + "learning_rate": 2.8297630006396956e-05, + "loss": 0.152, "step": 19185 }, { "epoch": 0.9, - "learning_rate": 1.8294500961042615e-05, - "loss": 0.3599, + "learning_rate": 2.8297161936560936e-05, + "loss": 0.2295, "step": 19190 }, { "epoch": 0.9, - "learning_rate": 1.8294032159767475e-05, - "loss": 0.3155, + "learning_rate": 2.8296693866724915e-05, + "loss": 0.3206, "step": 19195 }, { "epoch": 0.9, - "learning_rate": 1.8293563358492335e-05, - "loss": 0.5404, + "learning_rate": 2.8296225796888895e-05, + "loss": 0.4797, "step": 19200 }, { "epoch": 0.9, - "learning_rate": 1.82930945572172e-05, - "loss": 0.24, + "learning_rate": 2.829575772705288e-05, + "loss": 0.1981, "step": 19205 }, { "epoch": 0.9, - "learning_rate": 1.829262575594206e-05, - "loss": 0.1003, + "learning_rate": 2.8295289657216858e-05, + "loss": 0.0623, "step": 19210 }, { "epoch": 0.9, - "learning_rate": 1.829215695466692e-05, - "loss": 0.1443, + "learning_rate": 2.8294821587380838e-05, + "loss": 0.1144, "step": 19215 }, { "epoch": 0.9, - "learning_rate": 1.829168815339178e-05, - "loss": 0.1095, + "learning_rate": 2.829435351754482e-05, + "loss": 0.1894, "step": 19220 }, { "epoch": 0.9, - "learning_rate": 1.8291219352116642e-05, - "loss": 0.133, + "learning_rate": 2.8293885447708798e-05, + "loss": 0.1042, "step": 19225 }, { "epoch": 0.9, - "learning_rate": 1.82907505508415e-05, - "loss": 0.1638, + "learning_rate": 2.8293417377872778e-05, + "loss": 0.1333, "step": 19230 }, { "epoch": 0.9, - "learning_rate": 1.829028174956636e-05, - "loss": 0.2031, + "learning_rate": 2.8292949308036757e-05, + "loss": 0.1473, "step": 19235 }, { "epoch": 0.9, - "learning_rate": 1.828981294829122e-05, - "loss": 0.3121, + "learning_rate": 2.829248123820074e-05, + "loss": 0.1897, "step": 19240 }, { "epoch": 0.9, - "learning_rate": 1.828934414701608e-05, - "loss": 0.2641, + "learning_rate": 2.829201316836472e-05, + "loss": 0.4412, "step": 19245 }, { "epoch": 0.9, - "learning_rate": 1.828887534574094e-05, - "loss": 0.3234, + "learning_rate": 2.82915450985287e-05, + "loss": 0.5611, "step": 19250 }, { "epoch": 0.9, - "learning_rate": 1.82884065444658e-05, - "loss": 0.2048, + "learning_rate": 2.829107702869268e-05, + "loss": 0.2042, "step": 19255 }, { "epoch": 0.9, - "learning_rate": 1.828793774319066e-05, - "loss": 0.0689, + "learning_rate": 2.8290608958856663e-05, + "loss": 0.0395, "step": 19260 }, { "epoch": 0.9, - "learning_rate": 1.828746894191552e-05, - "loss": 0.1006, + "learning_rate": 2.8290140889020643e-05, + "loss": 0.1283, "step": 19265 }, { "epoch": 0.9, - "learning_rate": 1.8287000140640385e-05, - "loss": 0.0911, + "learning_rate": 2.8289672819184623e-05, + "loss": 0.0516, "step": 19270 }, { "epoch": 0.9, - "learning_rate": 1.8286531339365244e-05, - "loss": 0.2028, + "learning_rate": 2.8289204749348606e-05, + "loss": 0.0904, "step": 19275 }, { "epoch": 0.9, - "learning_rate": 1.8286062538090104e-05, - "loss": 0.1203, + "learning_rate": 2.8288736679512586e-05, + "loss": 0.1615, "step": 19280 }, { "epoch": 0.9, - "learning_rate": 1.8285593736814968e-05, - "loss": 0.1986, + "learning_rate": 2.8288268609676566e-05, + "loss": 0.2146, "step": 19285 }, { "epoch": 0.9, - "learning_rate": 1.8285124935539828e-05, - "loss": 0.2303, + "learning_rate": 2.8287800539840542e-05, + "loss": 0.2287, "step": 19290 }, { "epoch": 0.9, - "learning_rate": 1.8284656134264688e-05, - "loss": 0.2265, + "learning_rate": 2.8287332470004525e-05, + "loss": 0.2794, "step": 19295 }, { "epoch": 0.9, - "learning_rate": 1.8284187332989548e-05, - "loss": 0.6002, + "learning_rate": 2.8286864400168505e-05, + "loss": 0.3963, "step": 19300 }, { "epoch": 0.9, - "learning_rate": 1.8283718531714407e-05, - "loss": 0.1912, + "learning_rate": 2.8286396330332485e-05, + "loss": 0.2743, "step": 19305 }, { "epoch": 0.9, - "learning_rate": 1.8283249730439267e-05, - "loss": 0.0834, + "learning_rate": 2.8285928260496465e-05, + "loss": 0.0492, "step": 19310 }, { "epoch": 0.9, - "learning_rate": 1.8282780929164127e-05, - "loss": 0.1226, + "learning_rate": 2.8285460190660448e-05, + "loss": 0.1209, "step": 19315 }, { "epoch": 0.9, - "learning_rate": 1.828231212788899e-05, - "loss": 0.201, + "learning_rate": 2.8284992120824428e-05, + "loss": 0.091, "step": 19320 }, { "epoch": 0.9, - "learning_rate": 1.828184332661385e-05, - "loss": 0.0701, + "learning_rate": 2.8284524050988408e-05, + "loss": 0.1206, "step": 19325 }, { "epoch": 0.9, - "learning_rate": 1.828137452533871e-05, - "loss": 0.2523, + "learning_rate": 2.828405598115239e-05, + "loss": 0.1549, "step": 19330 }, { "epoch": 0.9, - "learning_rate": 1.828090572406357e-05, - "loss": 0.1423, + "learning_rate": 2.828358791131637e-05, + "loss": 0.2142, "step": 19335 }, { "epoch": 0.9, - "learning_rate": 1.828043692278843e-05, - "loss": 0.2959, + "learning_rate": 2.828311984148035e-05, + "loss": 0.1949, "step": 19340 }, { "epoch": 0.9, - "learning_rate": 1.827996812151329e-05, - "loss": 0.3732, + "learning_rate": 2.828265177164433e-05, + "loss": 0.2274, "step": 19345 }, { "epoch": 0.9, - "learning_rate": 1.8279499320238154e-05, - "loss": 0.4631, + "learning_rate": 2.8282183701808313e-05, + "loss": 0.3556, "step": 19350 }, { "epoch": 0.9, - "learning_rate": 1.8279030518963014e-05, - "loss": 0.1664, + "learning_rate": 2.828171563197229e-05, + "loss": 0.1303, "step": 19355 }, { "epoch": 0.9, - "learning_rate": 1.8278561717687873e-05, - "loss": 0.1044, + "learning_rate": 2.828124756213627e-05, + "loss": 0.1132, "step": 19360 }, { "epoch": 0.9, - "learning_rate": 1.8278092916412737e-05, - "loss": 0.1303, + "learning_rate": 2.828077949230025e-05, + "loss": 0.1327, "step": 19365 }, { "epoch": 0.9, - "learning_rate": 1.8277624115137597e-05, - "loss": 0.1096, + "learning_rate": 2.8280311422464233e-05, + "loss": 0.0916, "step": 19370 }, { "epoch": 0.9, - "learning_rate": 1.8277155313862457e-05, - "loss": 0.1389, + "learning_rate": 2.8279843352628213e-05, + "loss": 0.1513, "step": 19375 }, { "epoch": 0.9, - "learning_rate": 1.8276686512587317e-05, - "loss": 0.1583, + "learning_rate": 2.8279375282792192e-05, + "loss": 0.124, "step": 19380 }, { "epoch": 0.9, - "learning_rate": 1.8276217711312177e-05, - "loss": 0.2356, + "learning_rate": 2.8278907212956176e-05, + "loss": 0.1891, "step": 19385 }, { "epoch": 0.9, - "learning_rate": 1.8275748910037036e-05, - "loss": 0.2076, + "learning_rate": 2.8278439143120155e-05, + "loss": 0.1877, "step": 19390 }, { "epoch": 0.9, - "learning_rate": 1.8275280108761896e-05, - "loss": 0.2269, + "learning_rate": 2.8277971073284135e-05, + "loss": 0.2175, "step": 19395 }, { "epoch": 0.91, - "learning_rate": 1.8274811307486756e-05, - "loss": 0.3307, + "learning_rate": 2.8277503003448115e-05, + "loss": 0.4662, "step": 19400 }, { "epoch": 0.91, - "learning_rate": 1.8274342506211616e-05, - "loss": 0.2044, + "learning_rate": 2.8277034933612098e-05, + "loss": 0.1913, "step": 19405 }, { "epoch": 0.91, - "learning_rate": 1.8273873704936476e-05, - "loss": 0.0698, + "learning_rate": 2.8276566863776078e-05, + "loss": 0.0924, "step": 19410 }, { "epoch": 0.91, - "learning_rate": 1.827340490366134e-05, - "loss": 0.1467, + "learning_rate": 2.8276098793940054e-05, + "loss": 0.1196, "step": 19415 }, { "epoch": 0.91, - "learning_rate": 1.82729361023862e-05, - "loss": 0.1113, + "learning_rate": 2.8275630724104034e-05, + "loss": 0.151, "step": 19420 }, { "epoch": 0.91, - "learning_rate": 1.827246730111106e-05, - "loss": 0.1657, + "learning_rate": 2.8275162654268018e-05, + "loss": 0.0961, "step": 19425 }, { "epoch": 0.91, - "learning_rate": 1.8271998499835923e-05, - "loss": 0.1277, + "learning_rate": 2.8274694584431997e-05, + "loss": 0.1317, "step": 19430 }, { "epoch": 0.91, - "learning_rate": 1.8271529698560783e-05, - "loss": 0.1894, + "learning_rate": 2.8274226514595977e-05, + "loss": 0.1752, "step": 19435 }, { "epoch": 0.91, - "learning_rate": 1.8271060897285643e-05, - "loss": 0.1985, + "learning_rate": 2.8273758444759957e-05, + "loss": 0.2372, "step": 19440 }, { "epoch": 0.91, - "learning_rate": 1.8270592096010503e-05, - "loss": 0.3031, + "learning_rate": 2.827329037492394e-05, + "loss": 0.2799, "step": 19445 }, { "epoch": 0.91, - "learning_rate": 1.8270123294735362e-05, - "loss": 0.5417, + "learning_rate": 2.827282230508792e-05, + "loss": 0.5546, "step": 19450 }, { "epoch": 0.91, - "learning_rate": 1.8269654493460222e-05, - "loss": 0.1623, + "learning_rate": 2.82723542352519e-05, + "loss": 0.1655, "step": 19455 }, { "epoch": 0.91, - "learning_rate": 1.8269185692185086e-05, - "loss": 0.0407, + "learning_rate": 2.8271886165415883e-05, + "loss": 0.0895, "step": 19460 }, { "epoch": 0.91, - "learning_rate": 1.8268716890909946e-05, - "loss": 0.0865, + "learning_rate": 2.8271418095579863e-05, + "loss": 0.1524, "step": 19465 }, { "epoch": 0.91, - "learning_rate": 1.8268248089634806e-05, - "loss": 0.1004, + "learning_rate": 2.8270950025743843e-05, + "loss": 0.1442, "step": 19470 }, { "epoch": 0.91, - "learning_rate": 1.8267779288359666e-05, - "loss": 0.1137, + "learning_rate": 2.8270481955907822e-05, + "loss": 0.223, "step": 19475 }, { "epoch": 0.91, - "learning_rate": 1.8267310487084525e-05, - "loss": 0.1354, + "learning_rate": 2.8270013886071802e-05, + "loss": 0.175, "step": 19480 }, { "epoch": 0.91, - "learning_rate": 1.8266841685809385e-05, - "loss": 0.2164, + "learning_rate": 2.8269545816235782e-05, + "loss": 0.2154, "step": 19485 }, { "epoch": 0.91, - "learning_rate": 1.8266372884534245e-05, - "loss": 0.2859, + "learning_rate": 2.8269077746399762e-05, + "loss": 0.3557, "step": 19490 }, { "epoch": 0.91, - "learning_rate": 1.826590408325911e-05, - "loss": 0.3575, + "learning_rate": 2.8268609676563742e-05, + "loss": 0.2562, "step": 19495 }, { "epoch": 0.91, - "learning_rate": 1.826543528198397e-05, - "loss": 0.5411, + "learning_rate": 2.8268141606727725e-05, + "loss": 0.4082, "step": 19500 }, { "epoch": 0.91, - "learning_rate": 1.826496648070883e-05, - "loss": 0.2238, + "learning_rate": 2.8267673536891705e-05, + "loss": 0.2267, "step": 19505 }, { "epoch": 0.91, - "learning_rate": 1.8264497679433692e-05, - "loss": 0.0554, + "learning_rate": 2.8267205467055685e-05, + "loss": 0.0698, "step": 19510 }, { "epoch": 0.91, - "learning_rate": 1.8264028878158552e-05, - "loss": 0.1204, + "learning_rate": 2.8266737397219668e-05, + "loss": 0.1245, "step": 19515 }, { "epoch": 0.91, - "learning_rate": 1.826356007688341e-05, - "loss": 0.1493, + "learning_rate": 2.8266269327383648e-05, + "loss": 0.1111, "step": 19520 }, { "epoch": 0.91, - "learning_rate": 1.826309127560827e-05, - "loss": 0.1468, + "learning_rate": 2.8265801257547627e-05, + "loss": 0.1291, "step": 19525 }, { "epoch": 0.91, - "learning_rate": 1.826262247433313e-05, - "loss": 0.2244, + "learning_rate": 2.8265333187711607e-05, + "loss": 0.1354, "step": 19530 }, { "epoch": 0.91, - "learning_rate": 1.826215367305799e-05, - "loss": 0.1374, + "learning_rate": 2.826486511787559e-05, + "loss": 0.1767, "step": 19535 }, { "epoch": 0.91, - "learning_rate": 1.826168487178285e-05, - "loss": 0.1748, + "learning_rate": 2.826439704803957e-05, + "loss": 0.1922, "step": 19540 }, { "epoch": 0.91, - "learning_rate": 1.826121607050771e-05, - "loss": 0.2261, + "learning_rate": 2.8263928978203547e-05, + "loss": 0.2851, "step": 19545 }, { "epoch": 0.91, - "learning_rate": 1.8260747269232575e-05, - "loss": 0.4271, + "learning_rate": 2.8263460908367527e-05, + "loss": 0.4096, "step": 19550 }, { "epoch": 0.91, - "learning_rate": 1.8260278467957435e-05, - "loss": 0.1653, + "learning_rate": 2.826299283853151e-05, + "loss": 0.1669, "step": 19555 }, { "epoch": 0.91, - "learning_rate": 1.8259809666682295e-05, - "loss": 0.0668, + "learning_rate": 2.826252476869549e-05, + "loss": 0.0812, "step": 19560 }, { "epoch": 0.91, - "learning_rate": 1.8259340865407154e-05, - "loss": 0.1128, + "learning_rate": 2.826205669885947e-05, + "loss": 0.1695, "step": 19565 }, { "epoch": 0.91, - "learning_rate": 1.8258872064132014e-05, - "loss": 0.3179, + "learning_rate": 2.8261588629023453e-05, + "loss": 0.1234, "step": 19570 }, { "epoch": 0.91, - "learning_rate": 1.8258403262856878e-05, - "loss": 0.1556, + "learning_rate": 2.8261120559187432e-05, + "loss": 0.0983, "step": 19575 }, { "epoch": 0.91, - "learning_rate": 1.8257934461581738e-05, - "loss": 0.2226, + "learning_rate": 2.8260652489351412e-05, + "loss": 0.1572, "step": 19580 }, { "epoch": 0.91, - "learning_rate": 1.8257465660306598e-05, - "loss": 0.2039, + "learning_rate": 2.8260184419515392e-05, + "loss": 0.1783, "step": 19585 }, { "epoch": 0.91, - "learning_rate": 1.8256996859031458e-05, - "loss": 0.3087, + "learning_rate": 2.8259716349679375e-05, + "loss": 0.2718, "step": 19590 }, { "epoch": 0.91, - "learning_rate": 1.8256528057756317e-05, - "loss": 0.3327, + "learning_rate": 2.8259248279843355e-05, + "loss": 0.2048, "step": 19595 }, { "epoch": 0.91, - "learning_rate": 1.825605925648118e-05, - "loss": 0.4217, + "learning_rate": 2.8258780210007335e-05, + "loss": 0.6031, "step": 19600 }, { "epoch": 0.91, - "learning_rate": 1.825559045520604e-05, - "loss": 0.1834, + "learning_rate": 2.825831214017131e-05, + "loss": 0.1302, "step": 19605 }, { "epoch": 0.92, - "learning_rate": 1.82551216539309e-05, - "loss": 0.0819, + "learning_rate": 2.8257844070335294e-05, + "loss": 0.0921, "step": 19610 }, { "epoch": 0.92, - "learning_rate": 1.825465285265576e-05, - "loss": 0.1225, + "learning_rate": 2.8257376000499274e-05, + "loss": 0.1714, "step": 19615 }, { "epoch": 0.92, - "learning_rate": 1.825418405138062e-05, - "loss": 0.0862, + "learning_rate": 2.8256907930663254e-05, + "loss": 0.088, "step": 19620 }, { "epoch": 0.92, - "learning_rate": 1.825371525010548e-05, - "loss": 0.2495, + "learning_rate": 2.8256439860827234e-05, + "loss": 0.1346, "step": 19625 }, { "epoch": 0.92, - "learning_rate": 1.825324644883034e-05, - "loss": 0.1874, + "learning_rate": 2.8255971790991217e-05, + "loss": 0.121, "step": 19630 }, { "epoch": 0.92, - "learning_rate": 1.8252777647555204e-05, - "loss": 0.2202, + "learning_rate": 2.8255503721155197e-05, + "loss": 0.2561, "step": 19635 }, { "epoch": 0.92, - "learning_rate": 1.8252308846280064e-05, - "loss": 0.2002, + "learning_rate": 2.8255035651319177e-05, + "loss": 0.1892, "step": 19640 }, { "epoch": 0.92, - "learning_rate": 1.8251840045004924e-05, - "loss": 0.2528, + "learning_rate": 2.825456758148316e-05, + "loss": 0.309, "step": 19645 }, { "epoch": 0.92, - "learning_rate": 1.8251371243729784e-05, - "loss": 0.4702, + "learning_rate": 2.825409951164714e-05, + "loss": 0.5683, "step": 19650 }, { "epoch": 0.92, - "learning_rate": 1.8250902442454647e-05, - "loss": 0.2313, + "learning_rate": 2.825363144181112e-05, + "loss": 0.2245, "step": 19655 }, { "epoch": 0.92, - "learning_rate": 1.8250433641179507e-05, - "loss": 0.0705, + "learning_rate": 2.82531633719751e-05, + "loss": 0.0395, "step": 19660 }, { "epoch": 0.92, - "learning_rate": 1.8249964839904367e-05, - "loss": 0.0752, + "learning_rate": 2.8252695302139083e-05, + "loss": 0.0715, "step": 19665 }, { "epoch": 0.92, - "learning_rate": 1.8249496038629227e-05, - "loss": 0.1123, + "learning_rate": 2.825222723230306e-05, + "loss": 0.064, "step": 19670 }, { "epoch": 0.92, - "learning_rate": 1.8249027237354087e-05, - "loss": 0.1416, + "learning_rate": 2.825175916246704e-05, + "loss": 0.1127, "step": 19675 }, { "epoch": 0.92, - "learning_rate": 1.8248558436078947e-05, - "loss": 0.2429, + "learning_rate": 2.825129109263102e-05, + "loss": 0.2057, "step": 19680 }, { "epoch": 0.92, - "learning_rate": 1.8248089634803806e-05, - "loss": 0.2266, + "learning_rate": 2.8250823022795002e-05, + "loss": 0.2353, "step": 19685 }, { "epoch": 0.92, - "learning_rate": 1.824762083352867e-05, - "loss": 0.2297, + "learning_rate": 2.8250354952958982e-05, + "loss": 0.1715, "step": 19690 }, { "epoch": 0.92, - "learning_rate": 1.824715203225353e-05, - "loss": 0.3014, + "learning_rate": 2.824988688312296e-05, + "loss": 0.2828, "step": 19695 }, { "epoch": 0.92, - "learning_rate": 1.824668323097839e-05, - "loss": 0.4253, + "learning_rate": 2.8249418813286945e-05, + "loss": 0.4615, "step": 19700 }, { "epoch": 0.92, - "learning_rate": 1.824621442970325e-05, - "loss": 0.2434, + "learning_rate": 2.8248950743450925e-05, + "loss": 0.2102, "step": 19705 }, { "epoch": 0.92, - "learning_rate": 1.824574562842811e-05, - "loss": 0.0985, + "learning_rate": 2.8248482673614904e-05, + "loss": 0.1068, "step": 19710 }, { "epoch": 0.92, - "learning_rate": 1.8245276827152973e-05, - "loss": 0.1376, + "learning_rate": 2.8248014603778884e-05, + "loss": 0.0895, "step": 19715 }, { "epoch": 0.92, - "learning_rate": 1.8244808025877833e-05, - "loss": 0.1281, + "learning_rate": 2.8247546533942867e-05, + "loss": 0.1568, "step": 19720 }, { "epoch": 0.92, - "learning_rate": 1.8244339224602693e-05, - "loss": 0.155, + "learning_rate": 2.8247078464106847e-05, + "loss": 0.1136, "step": 19725 }, { "epoch": 0.92, - "learning_rate": 1.8243870423327553e-05, - "loss": 0.1448, + "learning_rate": 2.8246610394270827e-05, + "loss": 0.1937, "step": 19730 }, { "epoch": 0.92, - "learning_rate": 1.8243401622052413e-05, - "loss": 0.1762, + "learning_rate": 2.8246142324434803e-05, + "loss": 0.195, "step": 19735 }, { "epoch": 0.92, - "learning_rate": 1.8242932820777276e-05, - "loss": 0.3514, + "learning_rate": 2.8245674254598787e-05, + "loss": 0.1874, "step": 19740 }, { "epoch": 0.92, - "learning_rate": 1.8242464019502136e-05, - "loss": 0.3415, + "learning_rate": 2.8245206184762766e-05, + "loss": 0.151, "step": 19745 }, { "epoch": 0.92, - "learning_rate": 1.8241995218226996e-05, - "loss": 0.4549, + "learning_rate": 2.8244738114926746e-05, + "loss": 0.4252, "step": 19750 }, { "epoch": 0.92, - "learning_rate": 1.8241526416951856e-05, - "loss": 0.2531, + "learning_rate": 2.824427004509073e-05, + "loss": 0.1778, "step": 19755 }, { "epoch": 0.92, - "learning_rate": 1.8241057615676716e-05, - "loss": 0.0812, + "learning_rate": 2.824380197525471e-05, + "loss": 0.0692, "step": 19760 }, { "epoch": 0.92, - "learning_rate": 1.8240588814401576e-05, - "loss": 0.0823, + "learning_rate": 2.824333390541869e-05, + "loss": 0.0619, "step": 19765 }, { "epoch": 0.92, - "learning_rate": 1.8240120013126435e-05, - "loss": 0.0879, + "learning_rate": 2.824286583558267e-05, + "loss": 0.0554, "step": 19770 }, { "epoch": 0.92, - "learning_rate": 1.8239651211851295e-05, - "loss": 0.0641, + "learning_rate": 2.8242397765746652e-05, + "loss": 0.1056, "step": 19775 }, { "epoch": 0.92, - "learning_rate": 1.823918241057616e-05, - "loss": 0.1483, + "learning_rate": 2.8241929695910632e-05, + "loss": 0.1334, "step": 19780 }, { "epoch": 0.92, - "learning_rate": 1.823871360930102e-05, - "loss": 0.1822, + "learning_rate": 2.8241461626074612e-05, + "loss": 0.2277, "step": 19785 }, { "epoch": 0.92, - "learning_rate": 1.823824480802588e-05, - "loss": 0.2754, + "learning_rate": 2.824099355623859e-05, + "loss": 0.2113, "step": 19790 }, { "epoch": 0.92, - "learning_rate": 1.8237776006750742e-05, - "loss": 0.3038, + "learning_rate": 2.824052548640257e-05, + "loss": 0.3137, "step": 19795 }, { "epoch": 0.92, - "learning_rate": 1.8237307205475602e-05, - "loss": 0.3856, + "learning_rate": 2.824005741656655e-05, + "loss": 0.6677, "step": 19800 }, { "epoch": 0.92, - "learning_rate": 1.8236838404200462e-05, - "loss": 0.2713, + "learning_rate": 2.823958934673053e-05, + "loss": 0.1618, "step": 19805 }, { "epoch": 0.92, - "learning_rate": 1.8236369602925322e-05, - "loss": 0.0589, + "learning_rate": 2.823912127689451e-05, + "loss": 0.0787, "step": 19810 }, { "epoch": 0.92, - "learning_rate": 1.823590080165018e-05, - "loss": 0.0968, + "learning_rate": 2.8238653207058494e-05, + "loss": 0.0507, "step": 19815 }, { "epoch": 0.92, - "learning_rate": 1.823543200037504e-05, - "loss": 0.0974, + "learning_rate": 2.8238185137222474e-05, + "loss": 0.0941, "step": 19820 }, { "epoch": 0.93, - "learning_rate": 1.82349631990999e-05, - "loss": 0.119, + "learning_rate": 2.8237717067386454e-05, + "loss": 0.1134, "step": 19825 }, { "epoch": 0.93, - "learning_rate": 1.8234494397824765e-05, - "loss": 0.1067, + "learning_rate": 2.8237248997550437e-05, + "loss": 0.1516, "step": 19830 }, { "epoch": 0.93, - "learning_rate": 1.8234025596549625e-05, - "loss": 0.1, + "learning_rate": 2.8236780927714417e-05, + "loss": 0.159, "step": 19835 }, { "epoch": 0.93, - "learning_rate": 1.8233556795274485e-05, - "loss": 0.1671, + "learning_rate": 2.8236312857878397e-05, + "loss": 0.221, "step": 19840 }, { "epoch": 0.93, - "learning_rate": 1.8233087993999345e-05, - "loss": 0.2918, + "learning_rate": 2.8235844788042376e-05, + "loss": 0.2901, "step": 19845 }, { "epoch": 0.93, - "learning_rate": 1.8232619192724205e-05, - "loss": 0.6152, + "learning_rate": 2.823537671820636e-05, + "loss": 0.5143, "step": 19850 }, { "epoch": 0.93, - "learning_rate": 1.8232150391449065e-05, - "loss": 0.1955, + "learning_rate": 2.823490864837034e-05, + "loss": 0.1777, "step": 19855 }, { "epoch": 0.93, - "learning_rate": 1.8231681590173928e-05, - "loss": 0.0708, + "learning_rate": 2.8234440578534316e-05, + "loss": 0.118, "step": 19860 }, { "epoch": 0.93, - "learning_rate": 1.8231212788898788e-05, - "loss": 0.0908, + "learning_rate": 2.8233972508698296e-05, + "loss": 0.087, "step": 19865 }, { "epoch": 0.93, - "learning_rate": 1.8230743987623648e-05, - "loss": 0.1555, + "learning_rate": 2.823350443886228e-05, + "loss": 0.0556, "step": 19870 }, { "epoch": 0.93, - "learning_rate": 1.823027518634851e-05, - "loss": 0.1415, + "learning_rate": 2.823303636902626e-05, + "loss": 0.0683, "step": 19875 }, { "epoch": 0.93, - "learning_rate": 1.822980638507337e-05, - "loss": 0.1305, + "learning_rate": 2.823256829919024e-05, + "loss": 0.1517, "step": 19880 }, { "epoch": 0.93, - "learning_rate": 1.822933758379823e-05, - "loss": 0.2142, + "learning_rate": 2.8232100229354222e-05, + "loss": 0.1644, "step": 19885 }, { "epoch": 0.93, - "learning_rate": 1.822886878252309e-05, - "loss": 0.2684, + "learning_rate": 2.82316321595182e-05, + "loss": 0.2846, "step": 19890 }, { "epoch": 0.93, - "learning_rate": 1.822839998124795e-05, - "loss": 0.2105, + "learning_rate": 2.823116408968218e-05, + "loss": 0.1794, "step": 19895 }, { "epoch": 0.93, - "learning_rate": 1.822793117997281e-05, - "loss": 0.4126, + "learning_rate": 2.823069601984616e-05, + "loss": 0.5903, "step": 19900 }, { "epoch": 0.93, - "learning_rate": 1.822746237869767e-05, - "loss": 0.2058, + "learning_rate": 2.8230227950010144e-05, + "loss": 0.2026, "step": 19905 }, { "epoch": 0.93, - "learning_rate": 1.822699357742253e-05, - "loss": 0.0673, + "learning_rate": 2.8229759880174124e-05, + "loss": 0.0524, "step": 19910 }, { "epoch": 0.93, - "learning_rate": 1.822652477614739e-05, - "loss": 0.1449, + "learning_rate": 2.8229291810338104e-05, + "loss": 0.0721, "step": 19915 }, { "epoch": 0.93, - "learning_rate": 1.822605597487225e-05, - "loss": 0.0786, + "learning_rate": 2.8228823740502084e-05, + "loss": 0.1443, "step": 19920 }, { "epoch": 0.93, - "learning_rate": 1.8225587173597114e-05, - "loss": 0.1939, + "learning_rate": 2.8228355670666064e-05, + "loss": 0.1695, "step": 19925 }, { "epoch": 0.93, - "learning_rate": 1.8225118372321974e-05, - "loss": 0.1068, + "learning_rate": 2.8227887600830043e-05, + "loss": 0.1654, "step": 19930 }, { "epoch": 0.93, - "learning_rate": 1.8224649571046834e-05, - "loss": 0.1644, + "learning_rate": 2.8227419530994023e-05, + "loss": 0.1993, "step": 19935 }, { "epoch": 0.93, - "learning_rate": 1.8224180769771697e-05, - "loss": 0.1876, + "learning_rate": 2.8226951461158006e-05, + "loss": 0.1515, "step": 19940 }, { "epoch": 0.93, - "learning_rate": 1.8223711968496557e-05, - "loss": 0.2146, + "learning_rate": 2.8226483391321986e-05, + "loss": 0.2342, "step": 19945 }, { "epoch": 0.93, - "learning_rate": 1.8223243167221417e-05, - "loss": 0.4408, + "learning_rate": 2.8226015321485966e-05, + "loss": 0.4621, "step": 19950 }, { "epoch": 0.93, - "learning_rate": 1.8222774365946277e-05, - "loss": 0.1771, + "learning_rate": 2.8225547251649946e-05, + "loss": 0.1994, "step": 19955 }, { "epoch": 0.93, - "learning_rate": 1.8222305564671137e-05, - "loss": 0.0614, + "learning_rate": 2.822507918181393e-05, + "loss": 0.0835, "step": 19960 }, { "epoch": 0.93, - "learning_rate": 1.8221836763395997e-05, - "loss": 0.0667, + "learning_rate": 2.822461111197791e-05, + "loss": 0.0973, "step": 19965 }, { "epoch": 0.93, - "learning_rate": 1.822136796212086e-05, - "loss": 0.1274, + "learning_rate": 2.822414304214189e-05, + "loss": 0.0795, "step": 19970 }, { "epoch": 0.93, - "learning_rate": 1.822089916084572e-05, - "loss": 0.128, + "learning_rate": 2.822367497230587e-05, + "loss": 0.1491, "step": 19975 }, { "epoch": 0.93, - "learning_rate": 1.822043035957058e-05, - "loss": 0.1434, + "learning_rate": 2.8223206902469852e-05, + "loss": 0.1642, "step": 19980 }, { "epoch": 0.93, - "learning_rate": 1.821996155829544e-05, - "loss": 0.2268, + "learning_rate": 2.8222738832633828e-05, + "loss": 0.135, "step": 19985 }, { "epoch": 0.93, - "learning_rate": 1.82194927570203e-05, - "loss": 0.2602, + "learning_rate": 2.8222270762797808e-05, + "loss": 0.2358, "step": 19990 }, { "epoch": 0.93, - "learning_rate": 1.821902395574516e-05, - "loss": 0.2319, + "learning_rate": 2.822180269296179e-05, + "loss": 0.1661, "step": 19995 }, { "epoch": 0.93, - "learning_rate": 1.821855515447002e-05, - "loss": 0.624, + "learning_rate": 2.822133462312577e-05, + "loss": 0.4391, "step": 20000 }, { "epoch": 0.93, - "learning_rate": 1.8218086353194883e-05, - "loss": 0.1856, + "learning_rate": 2.822086655328975e-05, + "loss": 0.1482, "step": 20005 }, { "epoch": 0.93, - "learning_rate": 1.8217617551919743e-05, - "loss": 0.0824, + "learning_rate": 2.822039848345373e-05, + "loss": 0.0925, "step": 20010 }, { "epoch": 0.93, - "learning_rate": 1.8217148750644603e-05, - "loss": 0.0766, + "learning_rate": 2.8219930413617714e-05, + "loss": 0.0681, "step": 20015 }, { "epoch": 0.93, - "learning_rate": 1.8216679949369466e-05, - "loss": 0.1607, + "learning_rate": 2.8219462343781694e-05, + "loss": 0.082, "step": 20020 }, { "epoch": 0.93, - "learning_rate": 1.8216211148094326e-05, - "loss": 0.1132, + "learning_rate": 2.8218994273945674e-05, + "loss": 0.11, "step": 20025 }, { "epoch": 0.93, - "learning_rate": 1.8215742346819186e-05, - "loss": 0.1429, + "learning_rate": 2.8218526204109653e-05, + "loss": 0.1559, "step": 20030 }, { "epoch": 0.93, - "learning_rate": 1.8215273545544046e-05, - "loss": 0.1928, + "learning_rate": 2.8218058134273637e-05, + "loss": 0.1582, "step": 20035 }, { "epoch": 0.94, - "learning_rate": 1.8214804744268906e-05, - "loss": 0.2072, + "learning_rate": 2.8217590064437616e-05, + "loss": 0.1825, "step": 20040 }, { "epoch": 0.94, - "learning_rate": 1.8214335942993766e-05, - "loss": 0.3352, + "learning_rate": 2.8217121994601596e-05, + "loss": 0.347, "step": 20045 }, { "epoch": 0.94, - "learning_rate": 1.8213867141718626e-05, - "loss": 0.5031, + "learning_rate": 2.8216653924765573e-05, + "loss": 0.5309, "step": 20050 }, { "epoch": 0.94, - "learning_rate": 1.8213398340443486e-05, - "loss": 0.2016, + "learning_rate": 2.8216185854929556e-05, + "loss": 0.1357, "step": 20055 }, { "epoch": 0.94, - "learning_rate": 1.8212929539168346e-05, - "loss": 0.0557, + "learning_rate": 2.8215717785093536e-05, + "loss": 0.0813, "step": 20060 }, { "epoch": 0.94, - "learning_rate": 1.821246073789321e-05, - "loss": 0.0958, + "learning_rate": 2.8215249715257515e-05, + "loss": 0.0771, "step": 20065 }, { "epoch": 0.94, - "learning_rate": 1.821199193661807e-05, - "loss": 0.0978, + "learning_rate": 2.82147816454215e-05, + "loss": 0.1573, "step": 20070 }, { "epoch": 0.94, - "learning_rate": 1.821152313534293e-05, - "loss": 0.0867, + "learning_rate": 2.821431357558548e-05, + "loss": 0.1563, "step": 20075 }, { "epoch": 0.94, - "learning_rate": 1.8211054334067792e-05, - "loss": 0.147, + "learning_rate": 2.821384550574946e-05, + "loss": 0.1515, "step": 20080 }, { "epoch": 0.94, - "learning_rate": 1.8210585532792652e-05, - "loss": 0.1557, + "learning_rate": 2.8213377435913438e-05, + "loss": 0.1877, "step": 20085 }, { "epoch": 0.94, - "learning_rate": 1.8210116731517512e-05, - "loss": 0.2568, + "learning_rate": 2.821290936607742e-05, + "loss": 0.2433, "step": 20090 }, { "epoch": 0.94, - "learning_rate": 1.8209647930242372e-05, - "loss": 0.3529, + "learning_rate": 2.82124412962414e-05, + "loss": 0.2405, "step": 20095 }, { "epoch": 0.94, - "learning_rate": 1.8209179128967232e-05, - "loss": 0.491, + "learning_rate": 2.821197322640538e-05, + "loss": 0.4775, "step": 20100 }, { "epoch": 0.94, - "learning_rate": 1.820871032769209e-05, - "loss": 0.1841, + "learning_rate": 2.821150515656936e-05, + "loss": 0.209, "step": 20105 }, { "epoch": 0.94, - "learning_rate": 1.8208241526416955e-05, - "loss": 0.033, + "learning_rate": 2.821103708673334e-05, + "loss": 0.0433, "step": 20110 }, { "epoch": 0.94, - "learning_rate": 1.8207772725141815e-05, - "loss": 0.0934, + "learning_rate": 2.821056901689732e-05, + "loss": 0.1083, "step": 20115 }, { "epoch": 0.94, - "learning_rate": 1.8207303923866675e-05, - "loss": 0.059, + "learning_rate": 2.82101009470613e-05, + "loss": 0.139, "step": 20120 }, { "epoch": 0.94, - "learning_rate": 1.8206835122591535e-05, - "loss": 0.1281, + "learning_rate": 2.8209632877225283e-05, + "loss": 0.1155, "step": 20125 }, { "epoch": 0.94, - "learning_rate": 1.8206366321316395e-05, - "loss": 0.1659, + "learning_rate": 2.8209164807389263e-05, + "loss": 0.1707, "step": 20130 }, { "epoch": 0.94, - "learning_rate": 1.8205897520041255e-05, - "loss": 0.1677, + "learning_rate": 2.8208696737553243e-05, + "loss": 0.183, "step": 20135 }, { "epoch": 0.94, - "learning_rate": 1.8205428718766115e-05, - "loss": 0.27, + "learning_rate": 2.8208228667717223e-05, + "loss": 0.1512, "step": 20140 }, { "epoch": 0.94, - "learning_rate": 1.8204959917490978e-05, - "loss": 0.2848, + "learning_rate": 2.8207760597881206e-05, + "loss": 0.2248, "step": 20145 }, { "epoch": 0.94, - "learning_rate": 1.8204491116215838e-05, - "loss": 0.5694, + "learning_rate": 2.8207292528045186e-05, + "loss": 0.5306, "step": 20150 }, { "epoch": 0.94, - "learning_rate": 1.8204022314940698e-05, - "loss": 0.2125, + "learning_rate": 2.8206824458209166e-05, + "loss": 0.2311, "step": 20155 }, { "epoch": 0.94, - "learning_rate": 1.820355351366556e-05, - "loss": 0.0379, + "learning_rate": 2.8206356388373146e-05, + "loss": 0.0609, "step": 20160 }, { "epoch": 0.94, - "learning_rate": 1.820308471239042e-05, - "loss": 0.1238, + "learning_rate": 2.820588831853713e-05, + "loss": 0.1172, "step": 20165 }, { "epoch": 0.94, - "learning_rate": 1.820261591111528e-05, - "loss": 0.1555, + "learning_rate": 2.820542024870111e-05, + "loss": 0.0703, "step": 20170 }, { "epoch": 0.94, - "learning_rate": 1.820214710984014e-05, - "loss": 0.141, + "learning_rate": 2.8204952178865085e-05, + "loss": 0.1642, "step": 20175 }, { "epoch": 0.94, - "learning_rate": 1.8201678308565e-05, - "loss": 0.0992, + "learning_rate": 2.8204484109029068e-05, + "loss": 0.1785, "step": 20180 }, { "epoch": 0.94, - "learning_rate": 1.820120950728986e-05, - "loss": 0.1861, + "learning_rate": 2.8204016039193048e-05, + "loss": 0.26, "step": 20185 }, { "epoch": 0.94, - "learning_rate": 1.820074070601472e-05, - "loss": 0.2341, + "learning_rate": 2.8203547969357028e-05, + "loss": 0.132, "step": 20190 }, { "epoch": 0.94, - "learning_rate": 1.820027190473958e-05, - "loss": 0.2778, + "learning_rate": 2.8203079899521008e-05, + "loss": 0.2015, "step": 20195 }, { "epoch": 0.94, - "learning_rate": 1.8199803103464444e-05, - "loss": 0.4585, + "learning_rate": 2.820261182968499e-05, + "loss": 0.4543, "step": 20200 }, { "epoch": 0.94, - "learning_rate": 1.8199334302189304e-05, - "loss": 0.193, + "learning_rate": 2.820214375984897e-05, + "loss": 0.1331, "step": 20205 }, { "epoch": 0.94, - "learning_rate": 1.8198865500914164e-05, - "loss": 0.0783, + "learning_rate": 2.820167569001295e-05, + "loss": 0.0367, "step": 20210 }, { "epoch": 0.94, - "learning_rate": 1.8198396699639024e-05, - "loss": 0.1657, + "learning_rate": 2.820120762017693e-05, + "loss": 0.1203, "step": 20215 }, { "epoch": 0.94, - "learning_rate": 1.8197927898363884e-05, - "loss": 0.0852, + "learning_rate": 2.8200739550340914e-05, + "loss": 0.1229, "step": 20220 }, { "epoch": 0.94, - "learning_rate": 1.8197459097088747e-05, - "loss": 0.1167, + "learning_rate": 2.8200271480504893e-05, + "loss": 0.1072, "step": 20225 }, { "epoch": 0.94, - "learning_rate": 1.8196990295813607e-05, - "loss": 0.1107, + "learning_rate": 2.8199803410668873e-05, + "loss": 0.2379, "step": 20230 }, { "epoch": 0.94, - "learning_rate": 1.8196521494538467e-05, - "loss": 0.1642, + "learning_rate": 2.8199335340832853e-05, + "loss": 0.1161, "step": 20235 }, { "epoch": 0.94, - "learning_rate": 1.8196052693263327e-05, - "loss": 0.2792, + "learning_rate": 2.8198867270996833e-05, + "loss": 0.2194, "step": 20240 }, { "epoch": 0.94, - "learning_rate": 1.8195583891988187e-05, - "loss": 0.37, + "learning_rate": 2.8198399201160813e-05, + "loss": 0.2563, "step": 20245 }, { "epoch": 0.94, - "learning_rate": 1.819511509071305e-05, - "loss": 0.3121, + "learning_rate": 2.8197931131324792e-05, + "loss": 0.6662, "step": 20250 }, { "epoch": 0.95, - "learning_rate": 1.819464628943791e-05, - "loss": 0.2295, + "learning_rate": 2.8197463061488776e-05, + "loss": 0.1148, "step": 20255 }, { "epoch": 0.95, - "learning_rate": 1.819417748816277e-05, - "loss": 0.0714, + "learning_rate": 2.8196994991652755e-05, + "loss": 0.0444, "step": 20260 }, { "epoch": 0.95, - "learning_rate": 1.819370868688763e-05, - "loss": 0.1149, + "learning_rate": 2.8196526921816735e-05, + "loss": 0.0626, "step": 20265 }, { "epoch": 0.95, - "learning_rate": 1.819323988561249e-05, - "loss": 0.1149, + "learning_rate": 2.8196058851980715e-05, + "loss": 0.1184, "step": 20270 }, { "epoch": 0.95, - "learning_rate": 1.819277108433735e-05, - "loss": 0.1848, + "learning_rate": 2.81955907821447e-05, + "loss": 0.0785, "step": 20275 }, { "epoch": 0.95, - "learning_rate": 1.819230228306221e-05, - "loss": 0.1652, + "learning_rate": 2.8195122712308678e-05, + "loss": 0.098, "step": 20280 }, { "epoch": 0.95, - "learning_rate": 1.819183348178707e-05, - "loss": 0.1869, + "learning_rate": 2.8194654642472658e-05, + "loss": 0.2003, "step": 20285 }, { "epoch": 0.95, - "learning_rate": 1.8191364680511933e-05, - "loss": 0.2933, + "learning_rate": 2.8194186572636638e-05, + "loss": 0.1674, "step": 20290 }, { "epoch": 0.95, - "learning_rate": 1.8190895879236793e-05, - "loss": 0.2485, + "learning_rate": 2.819371850280062e-05, + "loss": 0.3642, "step": 20295 }, { "epoch": 0.95, - "learning_rate": 1.8190427077961653e-05, - "loss": 0.5297, + "learning_rate": 2.8193250432964597e-05, + "loss": 0.4801, "step": 20300 }, { "epoch": 0.95, - "learning_rate": 1.8189958276686516e-05, - "loss": 0.2442, + "learning_rate": 2.8192782363128577e-05, + "loss": 0.2435, "step": 20305 }, { "epoch": 0.95, - "learning_rate": 1.8189489475411376e-05, - "loss": 0.0492, + "learning_rate": 2.819231429329256e-05, + "loss": 0.0955, "step": 20310 }, { "epoch": 0.95, - "learning_rate": 1.8189020674136236e-05, - "loss": 0.1049, + "learning_rate": 2.819184622345654e-05, + "loss": 0.0778, "step": 20315 }, { "epoch": 0.95, - "learning_rate": 1.8188551872861096e-05, - "loss": 0.1435, + "learning_rate": 2.819137815362052e-05, + "loss": 0.1128, "step": 20320 }, { "epoch": 0.95, - "learning_rate": 1.8188083071585956e-05, - "loss": 0.1713, + "learning_rate": 2.81909100837845e-05, + "loss": 0.1563, "step": 20325 }, { "epoch": 0.95, - "learning_rate": 1.8187614270310816e-05, - "loss": 0.1232, + "learning_rate": 2.8190442013948483e-05, + "loss": 0.0633, "step": 20330 }, { "epoch": 0.95, - "learning_rate": 1.8187145469035676e-05, - "loss": 0.1662, + "learning_rate": 2.8189973944112463e-05, + "loss": 0.1201, "step": 20335 }, { "epoch": 0.95, - "learning_rate": 1.818667666776054e-05, - "loss": 0.2596, + "learning_rate": 2.8189505874276443e-05, + "loss": 0.1788, "step": 20340 }, { "epoch": 0.95, - "learning_rate": 1.81862078664854e-05, - "loss": 0.285, + "learning_rate": 2.8189037804440423e-05, + "loss": 0.2385, "step": 20345 }, { "epoch": 0.95, - "learning_rate": 1.818573906521026e-05, - "loss": 0.3881, + "learning_rate": 2.8188569734604406e-05, + "loss": 0.662, "step": 20350 }, { "epoch": 0.95, - "learning_rate": 1.818527026393512e-05, - "loss": 0.2469, + "learning_rate": 2.8188101664768386e-05, + "loss": 0.2491, "step": 20355 }, { "epoch": 0.95, - "learning_rate": 1.818480146265998e-05, - "loss": 0.0816, + "learning_rate": 2.8187633594932365e-05, + "loss": 0.1046, "step": 20360 }, { "epoch": 0.95, - "learning_rate": 1.818433266138484e-05, - "loss": 0.159, + "learning_rate": 2.8187165525096345e-05, + "loss": 0.0764, "step": 20365 }, { "epoch": 0.95, - "learning_rate": 1.8183863860109702e-05, - "loss": 0.0941, + "learning_rate": 2.8186697455260325e-05, + "loss": 0.1108, "step": 20370 }, { "epoch": 0.95, - "learning_rate": 1.8183395058834562e-05, - "loss": 0.0982, + "learning_rate": 2.8186229385424305e-05, + "loss": 0.1127, "step": 20375 }, { "epoch": 0.95, - "learning_rate": 1.8182926257559422e-05, - "loss": 0.1584, + "learning_rate": 2.8185761315588285e-05, + "loss": 0.1035, "step": 20380 }, { "epoch": 0.95, - "learning_rate": 1.8182457456284282e-05, - "loss": 0.1844, + "learning_rate": 2.8185293245752268e-05, + "loss": 0.1857, "step": 20385 }, { "epoch": 0.95, - "learning_rate": 1.8181988655009145e-05, - "loss": 0.2987, + "learning_rate": 2.8184825175916248e-05, + "loss": 0.2943, "step": 20390 }, { "epoch": 0.95, - "learning_rate": 1.8181519853734005e-05, - "loss": 0.2073, + "learning_rate": 2.8184357106080227e-05, + "loss": 0.3479, "step": 20395 }, { "epoch": 0.95, - "learning_rate": 1.8181051052458865e-05, - "loss": 0.4283, + "learning_rate": 2.8183889036244207e-05, + "loss": 0.5486, "step": 20400 }, { "epoch": 0.95, - "learning_rate": 1.8180582251183725e-05, - "loss": 0.1415, + "learning_rate": 2.818342096640819e-05, + "loss": 0.1338, "step": 20405 }, { "epoch": 0.95, - "learning_rate": 1.8180113449908585e-05, - "loss": 0.041, + "learning_rate": 2.818295289657217e-05, + "loss": 0.0795, "step": 20410 }, { "epoch": 0.95, - "learning_rate": 1.8179644648633445e-05, - "loss": 0.0686, + "learning_rate": 2.818248482673615e-05, + "loss": 0.065, "step": 20415 }, { "epoch": 0.95, - "learning_rate": 1.8179175847358305e-05, - "loss": 0.0919, + "learning_rate": 2.818201675690013e-05, + "loss": 0.1033, "step": 20420 }, { "epoch": 0.95, - "learning_rate": 1.8178707046083165e-05, - "loss": 0.1709, + "learning_rate": 2.818154868706411e-05, + "loss": 0.1537, "step": 20425 }, { "epoch": 0.95, - "learning_rate": 1.8178238244808028e-05, - "loss": 0.1945, + "learning_rate": 2.818108061722809e-05, + "loss": 0.1581, "step": 20430 }, { "epoch": 0.95, - "learning_rate": 1.8177769443532888e-05, - "loss": 0.2593, + "learning_rate": 2.818061254739207e-05, + "loss": 0.1775, "step": 20435 }, { "epoch": 0.95, - "learning_rate": 1.8177300642257748e-05, - "loss": 0.2522, + "learning_rate": 2.8180144477556053e-05, + "loss": 0.1888, "step": 20440 }, { "epoch": 0.95, - "learning_rate": 1.8176831840982608e-05, - "loss": 0.2546, + "learning_rate": 2.8179676407720032e-05, + "loss": 0.2106, "step": 20445 }, { "epoch": 0.95, - "learning_rate": 1.817636303970747e-05, - "loss": 0.4386, + "learning_rate": 2.8179208337884012e-05, + "loss": 0.3352, "step": 20450 }, { "epoch": 0.95, - "learning_rate": 1.817589423843233e-05, - "loss": 0.1648, + "learning_rate": 2.8178740268047992e-05, + "loss": 0.1744, "step": 20455 }, { "epoch": 0.95, - "learning_rate": 1.817542543715719e-05, - "loss": 0.1356, + "learning_rate": 2.8178272198211975e-05, + "loss": 0.0895, "step": 20460 }, { "epoch": 0.95, - "learning_rate": 1.817495663588205e-05, - "loss": 0.0529, + "learning_rate": 2.8177804128375955e-05, + "loss": 0.0839, "step": 20465 }, { "epoch": 0.96, - "learning_rate": 1.817448783460691e-05, - "loss": 0.1376, + "learning_rate": 2.8177336058539935e-05, + "loss": 0.2121, "step": 20470 }, { "epoch": 0.96, - "learning_rate": 1.817401903333177e-05, - "loss": 0.1926, + "learning_rate": 2.8176867988703915e-05, + "loss": 0.1547, "step": 20475 }, { "epoch": 0.96, - "learning_rate": 1.8173550232056634e-05, - "loss": 0.1433, + "learning_rate": 2.8176399918867898e-05, + "loss": 0.1767, "step": 20480 }, { "epoch": 0.96, - "learning_rate": 1.8173081430781494e-05, - "loss": 0.0786, + "learning_rate": 2.8175931849031878e-05, + "loss": 0.1461, "step": 20485 }, { "epoch": 0.96, - "learning_rate": 1.8172612629506354e-05, - "loss": 0.1545, + "learning_rate": 2.8175463779195854e-05, + "loss": 0.1666, "step": 20490 }, { "epoch": 0.96, - "learning_rate": 1.8172143828231214e-05, - "loss": 0.2676, + "learning_rate": 2.8174995709359837e-05, + "loss": 0.2874, "step": 20495 }, { "epoch": 0.96, - "learning_rate": 1.8171675026956074e-05, - "loss": 0.4635, + "learning_rate": 2.8174527639523817e-05, + "loss": 0.3162, "step": 20500 }, { "epoch": 0.96, - "learning_rate": 1.8171206225680934e-05, - "loss": 0.1845, + "learning_rate": 2.8174059569687797e-05, + "loss": 0.226, "step": 20505 }, { "epoch": 0.96, - "learning_rate": 1.8170737424405797e-05, - "loss": 0.0334, + "learning_rate": 2.8173591499851777e-05, + "loss": 0.0568, "step": 20510 }, { "epoch": 0.96, - "learning_rate": 1.8170268623130657e-05, - "loss": 0.0685, + "learning_rate": 2.817312343001576e-05, + "loss": 0.0747, "step": 20515 }, { "epoch": 0.96, - "learning_rate": 1.8169799821855517e-05, - "loss": 0.139, + "learning_rate": 2.817265536017974e-05, + "loss": 0.0523, "step": 20520 }, { "epoch": 0.96, - "learning_rate": 1.8169331020580377e-05, - "loss": 0.1517, + "learning_rate": 2.817218729034372e-05, + "loss": 0.1438, "step": 20525 }, { "epoch": 0.96, - "learning_rate": 1.816886221930524e-05, - "loss": 0.1412, + "learning_rate": 2.81717192205077e-05, + "loss": 0.109, "step": 20530 }, { "epoch": 0.96, - "learning_rate": 1.81683934180301e-05, - "loss": 0.1334, + "learning_rate": 2.8171251150671683e-05, + "loss": 0.1728, "step": 20535 }, { "epoch": 0.96, - "learning_rate": 1.816792461675496e-05, - "loss": 0.3197, + "learning_rate": 2.8170783080835663e-05, + "loss": 0.2249, "step": 20540 }, { "epoch": 0.96, - "learning_rate": 1.816745581547982e-05, - "loss": 0.3216, + "learning_rate": 2.8170315010999642e-05, + "loss": 0.2156, "step": 20545 }, { "epoch": 0.96, - "learning_rate": 1.816698701420468e-05, - "loss": 0.6545, + "learning_rate": 2.8169846941163626e-05, + "loss": 0.6314, "step": 20550 }, { "epoch": 0.96, - "learning_rate": 1.816651821292954e-05, - "loss": 0.1667, + "learning_rate": 2.8169378871327602e-05, + "loss": 0.1749, "step": 20555 }, { "epoch": 0.96, - "learning_rate": 1.81660494116544e-05, - "loss": 0.0827, + "learning_rate": 2.8168910801491582e-05, + "loss": 0.07, "step": 20560 }, { "epoch": 0.96, - "learning_rate": 1.816558061037926e-05, - "loss": 0.0818, + "learning_rate": 2.816844273165556e-05, + "loss": 0.1051, "step": 20565 }, { "epoch": 0.96, - "learning_rate": 1.816511180910412e-05, - "loss": 0.1156, + "learning_rate": 2.8167974661819545e-05, + "loss": 0.0834, "step": 20570 }, { "epoch": 0.96, - "learning_rate": 1.8164643007828983e-05, - "loss": 0.114, + "learning_rate": 2.8167506591983525e-05, + "loss": 0.104, "step": 20575 }, { "epoch": 0.96, - "learning_rate": 1.8164174206553843e-05, - "loss": 0.1087, + "learning_rate": 2.8167038522147504e-05, + "loss": 0.1638, "step": 20580 }, { "epoch": 0.96, - "learning_rate": 1.8163705405278703e-05, - "loss": 0.1839, + "learning_rate": 2.8166570452311484e-05, + "loss": 0.1461, "step": 20585 }, { "epoch": 0.96, - "learning_rate": 1.8163236604003566e-05, - "loss": 0.3323, + "learning_rate": 2.8166102382475467e-05, + "loss": 0.2532, "step": 20590 }, { "epoch": 0.96, - "learning_rate": 1.8162767802728426e-05, - "loss": 0.3873, + "learning_rate": 2.8165634312639447e-05, + "loss": 0.2593, "step": 20595 }, { "epoch": 0.96, - "learning_rate": 1.8162299001453286e-05, - "loss": 0.45, + "learning_rate": 2.8165166242803427e-05, + "loss": 0.6777, "step": 20600 }, { "epoch": 0.96, - "learning_rate": 1.8161830200178146e-05, - "loss": 0.2414, + "learning_rate": 2.8164698172967407e-05, + "loss": 0.1545, "step": 20605 }, { "epoch": 0.96, - "learning_rate": 1.8161361398903006e-05, - "loss": 0.0611, + "learning_rate": 2.816423010313139e-05, + "loss": 0.0451, "step": 20610 }, { "epoch": 0.96, - "learning_rate": 1.8160892597627866e-05, - "loss": 0.124, + "learning_rate": 2.8163762033295367e-05, + "loss": 0.0954, "step": 20615 }, { "epoch": 0.96, - "learning_rate": 1.816042379635273e-05, - "loss": 0.0909, + "learning_rate": 2.8163293963459346e-05, + "loss": 0.1675, "step": 20620 }, { "epoch": 0.96, - "learning_rate": 1.815995499507759e-05, - "loss": 0.1789, + "learning_rate": 2.816282589362333e-05, + "loss": 0.1969, "step": 20625 }, { "epoch": 0.96, - "learning_rate": 1.815948619380245e-05, - "loss": 0.1668, + "learning_rate": 2.816235782378731e-05, + "loss": 0.1834, "step": 20630 }, { "epoch": 0.96, - "learning_rate": 1.815901739252731e-05, - "loss": 0.1578, + "learning_rate": 2.816188975395129e-05, + "loss": 0.2388, "step": 20635 }, { "epoch": 0.96, - "learning_rate": 1.815854859125217e-05, - "loss": 0.253, + "learning_rate": 2.816142168411527e-05, + "loss": 0.2778, "step": 20640 }, { "epoch": 0.96, - "learning_rate": 1.815807978997703e-05, - "loss": 0.2484, + "learning_rate": 2.8160953614279252e-05, + "loss": 0.1899, "step": 20645 }, { "epoch": 0.96, - "learning_rate": 1.815761098870189e-05, - "loss": 0.4484, + "learning_rate": 2.8160485544443232e-05, + "loss": 0.556, "step": 20650 }, { "epoch": 0.96, - "learning_rate": 1.8157142187426752e-05, - "loss": 0.1914, + "learning_rate": 2.8160017474607212e-05, + "loss": 0.1431, "step": 20655 }, { "epoch": 0.96, - "learning_rate": 1.8156673386151612e-05, - "loss": 0.1153, + "learning_rate": 2.8159549404771192e-05, + "loss": 0.0746, "step": 20660 }, { "epoch": 0.96, - "learning_rate": 1.8156204584876472e-05, - "loss": 0.1582, + "learning_rate": 2.8159081334935175e-05, + "loss": 0.0774, "step": 20665 }, { "epoch": 0.96, - "learning_rate": 1.8155735783601335e-05, - "loss": 0.1714, + "learning_rate": 2.8158613265099155e-05, + "loss": 0.143, "step": 20670 }, { "epoch": 0.96, - "learning_rate": 1.8155266982326195e-05, - "loss": 0.1086, + "learning_rate": 2.8158145195263135e-05, + "loss": 0.1597, "step": 20675 }, { "epoch": 0.96, - "learning_rate": 1.8154798181051055e-05, - "loss": 0.1785, + "learning_rate": 2.8157677125427114e-05, + "loss": 0.168, "step": 20680 }, { "epoch": 0.97, - "learning_rate": 1.8154329379775915e-05, - "loss": 0.1845, + "learning_rate": 2.8157209055591094e-05, + "loss": 0.3065, "step": 20685 }, { "epoch": 0.97, - "learning_rate": 1.8153860578500775e-05, - "loss": 0.3776, + "learning_rate": 2.8156740985755074e-05, + "loss": 0.1927, "step": 20690 }, { "epoch": 0.97, - "learning_rate": 1.8153391777225635e-05, - "loss": 0.1668, + "learning_rate": 2.8156272915919054e-05, + "loss": 0.2088, "step": 20695 }, { "epoch": 0.97, - "learning_rate": 1.8152922975950495e-05, - "loss": 0.5262, + "learning_rate": 2.8155804846083037e-05, + "loss": 0.574, "step": 20700 }, { "epoch": 0.97, - "learning_rate": 1.8152454174675355e-05, - "loss": 0.1545, + "learning_rate": 2.8155336776247017e-05, + "loss": 0.1484, "step": 20705 }, { "epoch": 0.97, - "learning_rate": 1.8151985373400215e-05, - "loss": 0.1112, + "learning_rate": 2.8154868706410997e-05, + "loss": 0.078, "step": 20710 }, { "epoch": 0.97, - "learning_rate": 1.8151516572125078e-05, - "loss": 0.0721, + "learning_rate": 2.8154400636574976e-05, + "loss": 0.0621, "step": 20715 }, { "epoch": 0.97, - "learning_rate": 1.8151047770849938e-05, - "loss": 0.1326, + "learning_rate": 2.815393256673896e-05, + "loss": 0.0976, "step": 20720 }, { "epoch": 0.97, - "learning_rate": 1.8150578969574798e-05, - "loss": 0.1883, + "learning_rate": 2.815346449690294e-05, + "loss": 0.1303, "step": 20725 }, { "epoch": 0.97, - "learning_rate": 1.8150110168299658e-05, - "loss": 0.0903, + "learning_rate": 2.815299642706692e-05, + "loss": 0.1321, "step": 20730 }, { "epoch": 0.97, - "learning_rate": 1.814964136702452e-05, - "loss": 0.2431, + "learning_rate": 2.8152528357230903e-05, + "loss": 0.1589, "step": 20735 }, { "epoch": 0.97, - "learning_rate": 1.814917256574938e-05, - "loss": 0.301, + "learning_rate": 2.8152060287394882e-05, + "loss": 0.2044, "step": 20740 }, { "epoch": 0.97, - "learning_rate": 1.814870376447424e-05, - "loss": 0.3377, + "learning_rate": 2.815159221755886e-05, + "loss": 0.3798, "step": 20745 }, { "epoch": 0.97, - "learning_rate": 1.81482349631991e-05, - "loss": 0.3813, + "learning_rate": 2.815112414772284e-05, + "loss": 0.4709, "step": 20750 }, { "epoch": 0.97, - "learning_rate": 1.814776616192396e-05, - "loss": 0.154, + "learning_rate": 2.8150656077886822e-05, + "loss": 0.196, "step": 20755 }, { "epoch": 0.97, - "learning_rate": 1.8147297360648824e-05, - "loss": 0.0773, + "learning_rate": 2.81501880080508e-05, + "loss": 0.0599, "step": 20760 }, { "epoch": 0.97, - "learning_rate": 1.8146828559373684e-05, - "loss": 0.0761, + "learning_rate": 2.814971993821478e-05, + "loss": 0.0847, "step": 20765 }, { "epoch": 0.97, - "learning_rate": 1.8146359758098544e-05, - "loss": 0.075, + "learning_rate": 2.814925186837876e-05, + "loss": 0.088, "step": 20770 }, { "epoch": 0.97, - "learning_rate": 1.8145890956823404e-05, - "loss": 0.2386, + "learning_rate": 2.8148783798542744e-05, + "loss": 0.0987, "step": 20775 }, { "epoch": 0.97, - "learning_rate": 1.8145422155548264e-05, - "loss": 0.1128, + "learning_rate": 2.8148315728706724e-05, + "loss": 0.2123, "step": 20780 }, { "epoch": 0.97, - "learning_rate": 1.8144953354273124e-05, - "loss": 0.1266, + "learning_rate": 2.8147847658870704e-05, + "loss": 0.2164, "step": 20785 }, { "epoch": 0.97, - "learning_rate": 1.8144484552997984e-05, - "loss": 0.2653, + "learning_rate": 2.8147379589034687e-05, + "loss": 0.1814, "step": 20790 }, { "epoch": 0.97, - "learning_rate": 1.8144015751722844e-05, - "loss": 0.2307, + "learning_rate": 2.8146911519198667e-05, + "loss": 0.3309, "step": 20795 }, { "epoch": 0.97, - "learning_rate": 1.8143546950447707e-05, - "loss": 0.5244, + "learning_rate": 2.8146443449362647e-05, + "loss": 0.4629, "step": 20800 }, { "epoch": 0.97, - "learning_rate": 1.8143078149172567e-05, - "loss": 0.2061, + "learning_rate": 2.8145975379526623e-05, + "loss": 0.1562, "step": 20805 }, { "epoch": 0.97, - "learning_rate": 1.8142609347897427e-05, - "loss": 0.0903, + "learning_rate": 2.8145507309690607e-05, + "loss": 0.027, "step": 20810 }, { "epoch": 0.97, - "learning_rate": 1.814214054662229e-05, - "loss": 0.0667, + "learning_rate": 2.8145039239854586e-05, + "loss": 0.1902, "step": 20815 }, { "epoch": 0.97, - "learning_rate": 1.814167174534715e-05, - "loss": 0.069, + "learning_rate": 2.8144571170018566e-05, + "loss": 0.1324, "step": 20820 }, { "epoch": 0.97, - "learning_rate": 1.814120294407201e-05, - "loss": 0.0872, + "learning_rate": 2.8144103100182546e-05, + "loss": 0.0776, "step": 20825 }, { "epoch": 0.97, - "learning_rate": 1.814073414279687e-05, - "loss": 0.183, + "learning_rate": 2.814363503034653e-05, + "loss": 0.1613, "step": 20830 }, { "epoch": 0.97, - "learning_rate": 1.814026534152173e-05, - "loss": 0.1693, + "learning_rate": 2.814316696051051e-05, + "loss": 0.1971, "step": 20835 }, { "epoch": 0.97, - "learning_rate": 1.813979654024659e-05, - "loss": 0.2612, + "learning_rate": 2.814269889067449e-05, + "loss": 0.2176, "step": 20840 }, { "epoch": 0.97, - "learning_rate": 1.813932773897145e-05, - "loss": 0.2822, + "learning_rate": 2.814223082083847e-05, + "loss": 0.3287, "step": 20845 }, { "epoch": 0.97, - "learning_rate": 1.8138858937696313e-05, - "loss": 0.5785, + "learning_rate": 2.8141762751002452e-05, + "loss": 0.4665, "step": 20850 }, { "epoch": 0.97, - "learning_rate": 1.8138390136421173e-05, - "loss": 0.2277, + "learning_rate": 2.8141294681166432e-05, + "loss": 0.2047, "step": 20855 }, { "epoch": 0.97, - "learning_rate": 1.8137921335146033e-05, - "loss": 0.0781, + "learning_rate": 2.814082661133041e-05, + "loss": 0.0807, "step": 20860 }, { "epoch": 0.97, - "learning_rate": 1.8137452533870893e-05, - "loss": 0.0593, + "learning_rate": 2.8140358541494395e-05, + "loss": 0.1344, "step": 20865 }, { "epoch": 0.97, - "learning_rate": 1.8136983732595753e-05, - "loss": 0.0789, + "learning_rate": 2.813989047165837e-05, + "loss": 0.1654, "step": 20870 }, { "epoch": 0.97, - "learning_rate": 1.8136514931320613e-05, - "loss": 0.1482, + "learning_rate": 2.813942240182235e-05, + "loss": 0.0912, "step": 20875 }, { "epoch": 0.97, - "learning_rate": 1.8136046130045476e-05, - "loss": 0.1335, + "learning_rate": 2.813895433198633e-05, + "loss": 0.1348, "step": 20880 }, { "epoch": 0.97, - "learning_rate": 1.8135577328770336e-05, - "loss": 0.1823, + "learning_rate": 2.8138486262150314e-05, + "loss": 0.1929, "step": 20885 }, { "epoch": 0.97, - "learning_rate": 1.8135108527495196e-05, - "loss": 0.2789, + "learning_rate": 2.8138018192314294e-05, + "loss": 0.1943, "step": 20890 }, { "epoch": 0.97, - "learning_rate": 1.8134639726220056e-05, - "loss": 0.2018, + "learning_rate": 2.8137550122478274e-05, + "loss": 0.1947, "step": 20895 }, { "epoch": 0.98, - "learning_rate": 1.813417092494492e-05, - "loss": 0.4649, + "learning_rate": 2.8137082052642253e-05, + "loss": 0.4322, "step": 20900 }, { "epoch": 0.98, - "learning_rate": 1.813370212366978e-05, - "loss": 0.2208, + "learning_rate": 2.8136613982806237e-05, + "loss": 0.1952, "step": 20905 }, { "epoch": 0.98, - "learning_rate": 1.813323332239464e-05, - "loss": 0.0604, + "learning_rate": 2.8136145912970216e-05, + "loss": 0.0827, "step": 20910 }, { "epoch": 0.98, - "learning_rate": 1.81327645211195e-05, - "loss": 0.0778, + "learning_rate": 2.8135677843134196e-05, + "loss": 0.1029, "step": 20915 }, { "epoch": 0.98, - "learning_rate": 1.813229571984436e-05, - "loss": 0.1314, + "learning_rate": 2.813520977329818e-05, + "loss": 0.0621, "step": 20920 }, { "epoch": 0.98, - "learning_rate": 1.813182691856922e-05, - "loss": 0.1132, + "learning_rate": 2.813474170346216e-05, + "loss": 0.1272, "step": 20925 }, { "epoch": 0.98, - "learning_rate": 1.813135811729408e-05, - "loss": 0.1354, + "learning_rate": 2.813427363362614e-05, + "loss": 0.1242, "step": 20930 }, { "epoch": 0.98, - "learning_rate": 1.813088931601894e-05, - "loss": 0.208, + "learning_rate": 2.8133805563790116e-05, + "loss": 0.2298, "step": 20935 }, { "epoch": 0.98, - "learning_rate": 1.8130420514743802e-05, - "loss": 0.26, + "learning_rate": 2.81333374939541e-05, + "loss": 0.1909, "step": 20940 }, { "epoch": 0.98, - "learning_rate": 1.8129951713468662e-05, - "loss": 0.281, + "learning_rate": 2.813286942411808e-05, + "loss": 0.2004, "step": 20945 }, { "epoch": 0.98, - "learning_rate": 1.8129482912193522e-05, - "loss": 0.4454, + "learning_rate": 2.813240135428206e-05, + "loss": 0.397, "step": 20950 }, { "epoch": 0.98, - "learning_rate": 1.8129014110918382e-05, - "loss": 0.2346, + "learning_rate": 2.8131933284446038e-05, + "loss": 0.1798, "step": 20955 }, { "epoch": 0.98, - "learning_rate": 1.8128545309643245e-05, - "loss": 0.0742, + "learning_rate": 2.813146521461002e-05, + "loss": 0.0624, "step": 20960 }, { "epoch": 0.98, - "learning_rate": 1.8128076508368105e-05, - "loss": 0.104, + "learning_rate": 2.8130997144774e-05, + "loss": 0.0655, "step": 20965 }, { "epoch": 0.98, - "learning_rate": 1.8127607707092965e-05, - "loss": 0.0568, + "learning_rate": 2.813052907493798e-05, + "loss": 0.1675, "step": 20970 }, { "epoch": 0.98, - "learning_rate": 1.8127138905817825e-05, - "loss": 0.1337, + "learning_rate": 2.8130061005101964e-05, + "loss": 0.1494, "step": 20975 }, { "epoch": 0.98, - "learning_rate": 1.8126670104542685e-05, - "loss": 0.1325, + "learning_rate": 2.8129592935265944e-05, + "loss": 0.24, "step": 20980 }, { "epoch": 0.98, - "learning_rate": 1.8126201303267545e-05, - "loss": 0.1653, + "learning_rate": 2.8129124865429924e-05, + "loss": 0.3086, "step": 20985 }, { "epoch": 0.98, - "learning_rate": 1.812573250199241e-05, - "loss": 0.2467, + "learning_rate": 2.8128656795593904e-05, + "loss": 0.1123, "step": 20990 }, { "epoch": 0.98, - "learning_rate": 1.8125263700717268e-05, - "loss": 0.3002, + "learning_rate": 2.8128188725757884e-05, + "loss": 0.2502, "step": 20995 }, { "epoch": 0.98, - "learning_rate": 1.8124794899442128e-05, - "loss": 0.3036, + "learning_rate": 2.8127720655921863e-05, + "loss": 0.4047, "step": 21000 }, { "epoch": 0.98, - "learning_rate": 1.8124326098166988e-05, - "loss": 0.2193, + "learning_rate": 2.8127252586085843e-05, + "loss": 0.2248, "step": 21005 }, { "epoch": 0.98, - "learning_rate": 1.8123857296891848e-05, - "loss": 0.0949, + "learning_rate": 2.8126784516249823e-05, + "loss": 0.043, "step": 21010 }, { "epoch": 0.98, - "learning_rate": 1.8123388495616708e-05, - "loss": 0.0853, + "learning_rate": 2.8126316446413806e-05, + "loss": 0.0671, "step": 21015 }, { "epoch": 0.98, - "learning_rate": 1.812291969434157e-05, - "loss": 0.0995, + "learning_rate": 2.8125848376577786e-05, + "loss": 0.0699, "step": 21020 }, { "epoch": 0.98, - "learning_rate": 1.812245089306643e-05, - "loss": 0.138, + "learning_rate": 2.8125380306741766e-05, + "loss": 0.1004, "step": 21025 }, { "epoch": 0.98, - "learning_rate": 1.812198209179129e-05, - "loss": 0.1339, + "learning_rate": 2.8124912236905746e-05, + "loss": 0.0738, "step": 21030 }, { "epoch": 0.98, - "learning_rate": 1.812151329051615e-05, - "loss": 0.2001, + "learning_rate": 2.812444416706973e-05, + "loss": 0.1827, "step": 21035 }, { "epoch": 0.98, - "learning_rate": 1.8121044489241014e-05, - "loss": 0.2148, + "learning_rate": 2.812397609723371e-05, + "loss": 0.2244, "step": 21040 }, { "epoch": 0.98, - "learning_rate": 1.8120575687965874e-05, - "loss": 0.2632, + "learning_rate": 2.812350802739769e-05, + "loss": 0.3415, "step": 21045 }, { "epoch": 0.98, - "learning_rate": 1.8120106886690734e-05, - "loss": 0.4485, + "learning_rate": 2.8123039957561672e-05, + "loss": 0.4996, "step": 21050 }, { "epoch": 0.98, - "learning_rate": 1.8119638085415594e-05, - "loss": 0.1182, + "learning_rate": 2.812257188772565e-05, + "loss": 0.2611, "step": 21055 }, { "epoch": 0.98, - "learning_rate": 1.8119169284140454e-05, - "loss": 0.0904, + "learning_rate": 2.8122103817889628e-05, + "loss": 0.0686, "step": 21060 }, { "epoch": 0.98, - "learning_rate": 1.8118700482865314e-05, - "loss": 0.1157, + "learning_rate": 2.8121635748053608e-05, + "loss": 0.0538, "step": 21065 }, { "epoch": 0.98, - "learning_rate": 1.8118231681590174e-05, - "loss": 0.1715, + "learning_rate": 2.812116767821759e-05, + "loss": 0.0752, "step": 21070 }, { "epoch": 0.98, - "learning_rate": 1.8117762880315034e-05, - "loss": 0.154, + "learning_rate": 2.812069960838157e-05, + "loss": 0.1292, "step": 21075 }, { "epoch": 0.98, - "learning_rate": 1.8117294079039894e-05, - "loss": 0.2591, + "learning_rate": 2.812023153854555e-05, + "loss": 0.1622, "step": 21080 }, { "epoch": 0.98, - "learning_rate": 1.8116825277764757e-05, - "loss": 0.2791, + "learning_rate": 2.811976346870953e-05, + "loss": 0.1767, "step": 21085 }, { "epoch": 0.98, - "learning_rate": 1.8116356476489617e-05, - "loss": 0.2793, + "learning_rate": 2.8119295398873514e-05, + "loss": 0.1789, "step": 21090 }, { "epoch": 0.98, - "learning_rate": 1.8115887675214477e-05, - "loss": 0.2693, + "learning_rate": 2.8118827329037493e-05, + "loss": 0.3069, "step": 21095 }, { "epoch": 0.98, - "learning_rate": 1.811541887393934e-05, - "loss": 0.3771, + "learning_rate": 2.8118359259201473e-05, + "loss": 0.3624, "step": 21100 }, { "epoch": 0.98, - "learning_rate": 1.81149500726642e-05, - "loss": 0.1693, + "learning_rate": 2.8117891189365456e-05, + "loss": 0.168, "step": 21105 }, { "epoch": 0.99, - "learning_rate": 1.811448127138906e-05, - "loss": 0.1137, + "learning_rate": 2.8117423119529436e-05, + "loss": 0.148, "step": 21110 }, { "epoch": 0.99, - "learning_rate": 1.811401247011392e-05, - "loss": 0.0693, + "learning_rate": 2.8116955049693416e-05, + "loss": 0.0757, "step": 21115 }, { "epoch": 0.99, - "learning_rate": 1.811354366883878e-05, - "loss": 0.0632, + "learning_rate": 2.8116486979857396e-05, + "loss": 0.0868, "step": 21120 }, { "epoch": 0.99, - "learning_rate": 1.811307486756364e-05, - "loss": 0.1544, + "learning_rate": 2.8116018910021376e-05, + "loss": 0.1677, "step": 21125 }, { "epoch": 0.99, - "learning_rate": 1.8112606066288503e-05, - "loss": 0.2066, + "learning_rate": 2.8115550840185356e-05, + "loss": 0.0994, "step": 21130 }, { "epoch": 0.99, - "learning_rate": 1.8112137265013363e-05, - "loss": 0.2304, + "learning_rate": 2.8115082770349335e-05, + "loss": 0.1551, "step": 21135 }, { "epoch": 0.99, - "learning_rate": 1.8111668463738223e-05, - "loss": 0.3096, + "learning_rate": 2.8114614700513315e-05, + "loss": 0.2855, "step": 21140 }, { "epoch": 0.99, - "learning_rate": 1.8111199662463083e-05, - "loss": 0.302, + "learning_rate": 2.81141466306773e-05, + "loss": 0.2286, "step": 21145 }, { "epoch": 0.99, - "learning_rate": 1.8110730861187943e-05, - "loss": 0.3194, + "learning_rate": 2.8113678560841278e-05, + "loss": 0.413, "step": 21150 }, { "epoch": 0.99, - "learning_rate": 1.8110262059912803e-05, - "loss": 0.1765, + "learning_rate": 2.8113210491005258e-05, + "loss": 0.1841, "step": 21155 }, { "epoch": 0.99, - "learning_rate": 1.8109793258637663e-05, - "loss": 0.0763, + "learning_rate": 2.811274242116924e-05, + "loss": 0.0742, "step": 21160 }, { "epoch": 0.99, - "learning_rate": 1.8109324457362526e-05, - "loss": 0.0571, + "learning_rate": 2.811227435133322e-05, + "loss": 0.0712, "step": 21165 }, { "epoch": 0.99, - "learning_rate": 1.8108855656087386e-05, - "loss": 0.1146, + "learning_rate": 2.81118062814972e-05, + "loss": 0.0768, "step": 21170 }, { "epoch": 0.99, - "learning_rate": 1.8108386854812246e-05, - "loss": 0.1685, + "learning_rate": 2.811133821166118e-05, + "loss": 0.0907, "step": 21175 }, { "epoch": 0.99, - "learning_rate": 1.810791805353711e-05, - "loss": 0.1763, + "learning_rate": 2.8110870141825164e-05, + "loss": 0.1989, "step": 21180 }, { "epoch": 0.99, - "learning_rate": 1.810744925226197e-05, - "loss": 0.146, + "learning_rate": 2.811040207198914e-05, + "loss": 0.2145, "step": 21185 }, { "epoch": 0.99, - "learning_rate": 1.810698045098683e-05, - "loss": 0.232, + "learning_rate": 2.810993400215312e-05, + "loss": 0.2252, "step": 21190 }, { "epoch": 0.99, - "learning_rate": 1.810651164971169e-05, - "loss": 0.3637, + "learning_rate": 2.81094659323171e-05, + "loss": 0.238, "step": 21195 }, { "epoch": 0.99, - "learning_rate": 1.810604284843655e-05, - "loss": 0.4796, + "learning_rate": 2.8108997862481083e-05, + "loss": 0.4606, "step": 21200 }, { "epoch": 0.99, - "learning_rate": 1.810557404716141e-05, - "loss": 0.2576, + "learning_rate": 2.8108529792645063e-05, + "loss": 0.2023, "step": 21205 }, { "epoch": 0.99, - "learning_rate": 1.810510524588627e-05, - "loss": 0.0738, + "learning_rate": 2.8108061722809043e-05, + "loss": 0.089, "step": 21210 }, { "epoch": 0.99, - "learning_rate": 1.810463644461113e-05, - "loss": 0.0893, + "learning_rate": 2.8107593652973023e-05, + "loss": 0.1199, "step": 21215 }, { "epoch": 0.99, - "learning_rate": 1.810416764333599e-05, - "loss": 0.1347, + "learning_rate": 2.8107125583137006e-05, + "loss": 0.152, "step": 21220 }, { "epoch": 0.99, - "learning_rate": 1.8103698842060852e-05, - "loss": 0.1224, + "learning_rate": 2.8106657513300986e-05, + "loss": 0.1005, "step": 21225 }, { "epoch": 0.99, - "learning_rate": 1.8103230040785712e-05, - "loss": 0.1012, + "learning_rate": 2.8106189443464965e-05, + "loss": 0.1959, "step": 21230 }, { "epoch": 0.99, - "learning_rate": 1.8102761239510572e-05, - "loss": 0.1355, + "learning_rate": 2.810572137362895e-05, + "loss": 0.1537, "step": 21235 }, { "epoch": 0.99, - "learning_rate": 1.8102292438235432e-05, - "loss": 0.2893, + "learning_rate": 2.810525330379293e-05, + "loss": 0.1304, "step": 21240 }, { "epoch": 0.99, - "learning_rate": 1.8101823636960295e-05, - "loss": 0.2347, + "learning_rate": 2.810478523395691e-05, + "loss": 0.4672, "step": 21245 }, { "epoch": 0.99, - "learning_rate": 1.8101354835685155e-05, - "loss": 0.4496, + "learning_rate": 2.8104317164120885e-05, + "loss": 0.4464, "step": 21250 }, { "epoch": 0.99, - "learning_rate": 1.8100886034410015e-05, - "loss": 0.2292, + "learning_rate": 2.8103849094284868e-05, + "loss": 0.1754, "step": 21255 }, { "epoch": 0.99, - "learning_rate": 1.8100417233134875e-05, - "loss": 0.1001, + "learning_rate": 2.8103381024448848e-05, + "loss": 0.0353, "step": 21260 }, { "epoch": 0.99, - "learning_rate": 1.8099948431859735e-05, - "loss": 0.0865, + "learning_rate": 2.8102912954612828e-05, + "loss": 0.1236, "step": 21265 }, { "epoch": 0.99, - "learning_rate": 1.80994796305846e-05, - "loss": 0.0803, + "learning_rate": 2.8102444884776807e-05, + "loss": 0.0951, "step": 21270 }, { "epoch": 0.99, - "learning_rate": 1.809901082930946e-05, - "loss": 0.1175, + "learning_rate": 2.810197681494079e-05, + "loss": 0.1122, "step": 21275 }, { "epoch": 0.99, - "learning_rate": 1.809854202803432e-05, - "loss": 0.2219, + "learning_rate": 2.810150874510477e-05, + "loss": 0.1565, "step": 21280 }, { "epoch": 0.99, - "learning_rate": 1.809807322675918e-05, - "loss": 0.2215, + "learning_rate": 2.810104067526875e-05, + "loss": 0.1991, "step": 21285 }, { "epoch": 0.99, - "learning_rate": 1.8097604425484038e-05, - "loss": 0.221, + "learning_rate": 2.8100572605432733e-05, + "loss": 0.298, "step": 21290 }, { "epoch": 0.99, - "learning_rate": 1.8097135624208898e-05, - "loss": 0.2907, + "learning_rate": 2.8100104535596713e-05, + "loss": 0.3901, "step": 21295 }, { "epoch": 0.99, - "learning_rate": 1.8096666822933758e-05, - "loss": 0.5423, + "learning_rate": 2.8099636465760693e-05, + "loss": 0.5585, "step": 21300 }, { "epoch": 0.99, - "learning_rate": 1.8096198021658618e-05, - "loss": 0.1497, + "learning_rate": 2.8099168395924673e-05, + "loss": 0.2222, "step": 21305 }, { "epoch": 0.99, - "learning_rate": 1.809572922038348e-05, - "loss": 0.1218, + "learning_rate": 2.8098700326088653e-05, + "loss": 0.0505, "step": 21310 }, { "epoch": 0.99, - "learning_rate": 1.809526041910834e-05, - "loss": 0.1345, + "learning_rate": 2.8098232256252633e-05, + "loss": 0.0697, "step": 21315 }, { "epoch": 0.99, - "learning_rate": 1.80947916178332e-05, - "loss": 0.0717, + "learning_rate": 2.8097764186416612e-05, + "loss": 0.0983, "step": 21320 }, { "epoch": 1.0, - "learning_rate": 1.8094322816558065e-05, - "loss": 0.2388, + "learning_rate": 2.8097296116580592e-05, + "loss": 0.1085, "step": 21325 }, { "epoch": 1.0, - "learning_rate": 1.8093854015282925e-05, - "loss": 0.1201, + "learning_rate": 2.8096828046744575e-05, + "loss": 0.0795, "step": 21330 }, { "epoch": 1.0, - "learning_rate": 1.8093385214007784e-05, - "loss": 0.1755, + "learning_rate": 2.8096359976908555e-05, + "loss": 0.1129, "step": 21335 }, { "epoch": 1.0, - "learning_rate": 1.8092916412732644e-05, - "loss": 0.2016, + "learning_rate": 2.8095891907072535e-05, + "loss": 0.186, "step": 21340 }, { "epoch": 1.0, - "learning_rate": 1.8092447611457504e-05, - "loss": 0.2375, + "learning_rate": 2.8095423837236518e-05, + "loss": 0.3137, "step": 21345 }, { "epoch": 1.0, - "learning_rate": 1.8091978810182364e-05, - "loss": 0.6163, + "learning_rate": 2.8094955767400498e-05, + "loss": 0.4092, "step": 21350 }, { "epoch": 1.0, - "learning_rate": 1.8091510008907224e-05, - "loss": 0.1855, + "learning_rate": 2.8094487697564478e-05, + "loss": 0.2431, "step": 21355 }, { "epoch": 1.0, - "learning_rate": 1.8091041207632084e-05, - "loss": 0.1006, + "learning_rate": 2.8094019627728458e-05, + "loss": 0.0776, "step": 21360 }, { "epoch": 1.0, - "learning_rate": 1.8090572406356947e-05, - "loss": 0.0872, + "learning_rate": 2.809355155789244e-05, + "loss": 0.0519, "step": 21365 }, { "epoch": 1.0, - "learning_rate": 1.8090103605081807e-05, - "loss": 0.0815, + "learning_rate": 2.809308348805642e-05, + "loss": 0.1305, "step": 21370 }, { "epoch": 1.0, - "learning_rate": 1.8089634803806667e-05, - "loss": 0.1751, + "learning_rate": 2.8092615418220397e-05, + "loss": 0.1244, "step": 21375 }, { "epoch": 1.0, - "learning_rate": 1.8089166002531527e-05, - "loss": 0.1182, + "learning_rate": 2.8092147348384377e-05, + "loss": 0.1205, "step": 21380 }, { "epoch": 1.0, - "learning_rate": 1.8088697201256387e-05, - "loss": 0.2401, + "learning_rate": 2.809167927854836e-05, + "loss": 0.2034, "step": 21385 }, { "epoch": 1.0, - "learning_rate": 1.808822839998125e-05, - "loss": 0.2828, + "learning_rate": 2.809121120871234e-05, + "loss": 0.1759, "step": 21390 }, { "epoch": 1.0, - "learning_rate": 1.808775959870611e-05, - "loss": 0.2181, + "learning_rate": 2.809074313887632e-05, + "loss": 0.2639, "step": 21395 }, { "epoch": 1.0, - "learning_rate": 1.808729079743097e-05, - "loss": 0.4759, + "learning_rate": 2.8090275069040303e-05, + "loss": 0.3853, "step": 21400 }, { "epoch": 1.0, - "learning_rate": 1.808682199615583e-05, - "loss": 0.0917, + "learning_rate": 2.8089806999204283e-05, + "loss": 0.1772, "step": 21405 }, { "epoch": 1.0, - "learning_rate": 1.8086353194880694e-05, - "loss": 0.0687, + "learning_rate": 2.8089338929368263e-05, + "loss": 0.0547, "step": 21410 }, { "epoch": 1.0, - "learning_rate": 1.8085884393605554e-05, - "loss": 0.0882, + "learning_rate": 2.8088870859532242e-05, + "loss": 0.1344, "step": 21415 }, { "epoch": 1.0, - "learning_rate": 1.8085415592330413e-05, - "loss": 0.1867, + "learning_rate": 2.8088402789696226e-05, + "loss": 0.1098, "step": 21420 }, { "epoch": 1.0, - "learning_rate": 1.8084946791055273e-05, - "loss": 0.2552, + "learning_rate": 2.8087934719860205e-05, + "loss": 0.2273, "step": 21425 }, { "epoch": 1.0, - "learning_rate": 1.8084477989780133e-05, - "loss": 0.4176, + "learning_rate": 2.8087466650024185e-05, + "loss": 0.2281, "step": 21430 }, { "epoch": 1.0, - "eval_cer": 0.017639530100105133, - "eval_loss": 0.05617095157504082, - "eval_runtime": 420.9463, - "eval_samples_per_second": 45.255, - "eval_steps_per_second": 11.315, - "eval_wer": 0.151395023974025, + "eval_cer": 0.017324029687760277, + "eval_loss": 0.044368449598550797, + "eval_runtime": 417.3503, + "eval_samples_per_second": 45.645, + "eval_steps_per_second": 11.412, + "eval_wer": 0.1453773151691413, "step": 21431 }, { "epoch": 1.0, - "learning_rate": 1.8084009188504993e-05, - "loss": 0.2647, + "learning_rate": 2.8086998580188165e-05, + "loss": 0.3569, "step": 21435 }, { "epoch": 1.0, - "learning_rate": 1.8083540387229853e-05, - "loss": 0.0701, + "learning_rate": 2.8086530510352145e-05, + "loss": 0.0547, "step": 21440 }, { "epoch": 1.0, - "learning_rate": 1.8083071585954713e-05, - "loss": 0.0847, + "learning_rate": 2.8086062440516125e-05, + "loss": 0.1129, "step": 21445 }, { "epoch": 1.0, - "learning_rate": 1.8082602784679576e-05, - "loss": 0.152, + "learning_rate": 2.8085594370680105e-05, + "loss": 0.1289, "step": 21450 }, { "epoch": 1.0, - "learning_rate": 1.8082133983404436e-05, - "loss": 0.1055, + "learning_rate": 2.8085126300844084e-05, + "loss": 0.136, "step": 21455 }, { "epoch": 1.0, - "learning_rate": 1.8081665182129296e-05, - "loss": 0.1901, + "learning_rate": 2.8084658231008068e-05, + "loss": 0.1492, "step": 21460 }, { "epoch": 1.0, - "learning_rate": 1.8081196380854156e-05, - "loss": 0.2548, + "learning_rate": 2.8084190161172047e-05, + "loss": 0.1638, "step": 21465 }, { "epoch": 1.0, - "learning_rate": 1.808072757957902e-05, - "loss": 0.2324, + "learning_rate": 2.8083722091336027e-05, + "loss": 0.2593, "step": 21470 }, { "epoch": 1.0, - "learning_rate": 1.808025877830388e-05, - "loss": 0.2663, + "learning_rate": 2.808325402150001e-05, + "loss": 0.2652, "step": 21475 }, { "epoch": 1.0, - "learning_rate": 1.807978997702874e-05, - "loss": 0.3625, + "learning_rate": 2.808278595166399e-05, + "loss": 0.349, "step": 21480 }, { "epoch": 1.0, - "learning_rate": 1.80793211757536e-05, - "loss": 0.3101, + "learning_rate": 2.808231788182797e-05, + "loss": 0.2494, "step": 21485 }, { "epoch": 1.0, - "learning_rate": 1.807885237447846e-05, - "loss": 0.081, + "learning_rate": 2.808184981199195e-05, + "loss": 0.0287, "step": 21490 }, { "epoch": 1.0, - "learning_rate": 1.807838357320332e-05, - "loss": 0.1012, + "learning_rate": 2.8081381742155933e-05, + "loss": 0.1174, "step": 21495 }, { "epoch": 1.0, - "learning_rate": 1.8077914771928183e-05, - "loss": 0.1227, + "learning_rate": 2.808091367231991e-05, + "loss": 0.084, "step": 21500 }, { "epoch": 1.0, - "learning_rate": 1.8077445970653043e-05, - "loss": 0.1404, + "learning_rate": 2.808044560248389e-05, + "loss": 0.1471, "step": 21505 }, { "epoch": 1.0, - "learning_rate": 1.8076977169377902e-05, - "loss": 0.1724, + "learning_rate": 2.807997753264787e-05, + "loss": 0.0932, "step": 21510 }, { "epoch": 1.0, - "learning_rate": 1.8076508368102762e-05, - "loss": 0.2312, + "learning_rate": 2.8079509462811852e-05, + "loss": 0.1504, "step": 21515 }, { "epoch": 1.0, - "learning_rate": 1.8076039566827622e-05, - "loss": 0.2044, + "learning_rate": 2.8079041392975832e-05, + "loss": 0.1676, "step": 21520 }, { "epoch": 1.0, - "learning_rate": 1.8075570765552482e-05, - "loss": 0.2757, + "learning_rate": 2.8078573323139812e-05, + "loss": 0.2544, "step": 21525 }, { "epoch": 1.0, - "learning_rate": 1.8075101964277346e-05, - "loss": 0.3255, + "learning_rate": 2.8078105253303795e-05, + "loss": 0.5146, "step": 21530 }, { "epoch": 1.0, - "learning_rate": 1.8074633163002206e-05, - "loss": 0.2708, + "learning_rate": 2.8077637183467775e-05, + "loss": 0.2322, "step": 21535 }, { "epoch": 1.01, - "learning_rate": 1.8074164361727065e-05, - "loss": 0.1083, + "learning_rate": 2.8077169113631755e-05, + "loss": 0.0566, "step": 21540 }, { "epoch": 1.01, - "learning_rate": 1.8073695560451925e-05, - "loss": 0.0432, + "learning_rate": 2.8076701043795735e-05, + "loss": 0.0943, "step": 21545 }, { "epoch": 1.01, - "learning_rate": 1.807322675917679e-05, - "loss": 0.0933, + "learning_rate": 2.8076232973959718e-05, + "loss": 0.1057, "step": 21550 }, { "epoch": 1.01, - "learning_rate": 1.807275795790165e-05, - "loss": 0.134, + "learning_rate": 2.8075764904123698e-05, + "loss": 0.118, "step": 21555 }, { "epoch": 1.01, - "learning_rate": 1.807228915662651e-05, - "loss": 0.2026, + "learning_rate": 2.8075296834287677e-05, + "loss": 0.1078, "step": 21560 }, { "epoch": 1.01, - "learning_rate": 1.807182035535137e-05, - "loss": 0.1708, + "learning_rate": 2.8074828764451654e-05, + "loss": 0.1053, "step": 21565 }, { "epoch": 1.01, - "learning_rate": 1.807135155407623e-05, - "loss": 0.1225, + "learning_rate": 2.8074360694615637e-05, + "loss": 0.189, "step": 21570 }, { "epoch": 1.01, - "learning_rate": 1.807088275280109e-05, - "loss": 0.2929, + "learning_rate": 2.8073892624779617e-05, + "loss": 0.1968, "step": 21575 }, { "epoch": 1.01, - "learning_rate": 1.8070413951525948e-05, - "loss": 0.3245, + "learning_rate": 2.8073424554943597e-05, + "loss": 0.3459, "step": 21580 }, { "epoch": 1.01, - "learning_rate": 1.8069945150250808e-05, - "loss": 0.2348, + "learning_rate": 2.807295648510758e-05, + "loss": 0.3776, "step": 21585 }, { "epoch": 1.01, - "learning_rate": 1.8069476348975668e-05, - "loss": 0.0968, + "learning_rate": 2.807248841527156e-05, + "loss": 0.0604, "step": 21590 }, { "epoch": 1.01, - "learning_rate": 1.806900754770053e-05, - "loss": 0.1464, + "learning_rate": 2.807202034543554e-05, + "loss": 0.0643, "step": 21595 }, { "epoch": 1.01, - "learning_rate": 1.806853874642539e-05, - "loss": 0.0781, + "learning_rate": 2.807155227559952e-05, + "loss": 0.1255, "step": 21600 }, { "epoch": 1.01, - "learning_rate": 1.806806994515025e-05, - "loss": 0.1357, + "learning_rate": 2.8071084205763503e-05, + "loss": 0.1175, "step": 21605 }, { "epoch": 1.01, - "learning_rate": 1.8067601143875115e-05, - "loss": 0.1264, + "learning_rate": 2.8070616135927482e-05, + "loss": 0.1656, "step": 21610 }, { "epoch": 1.01, - "learning_rate": 1.8067132342599975e-05, - "loss": 0.2146, + "learning_rate": 2.8070148066091462e-05, + "loss": 0.2271, "step": 21615 }, { "epoch": 1.01, - "learning_rate": 1.8066663541324835e-05, - "loss": 0.1569, + "learning_rate": 2.8069679996255442e-05, + "loss": 0.2579, "step": 21620 }, { "epoch": 1.01, - "learning_rate": 1.8066194740049694e-05, - "loss": 0.1623, + "learning_rate": 2.8069211926419422e-05, + "loss": 0.1913, "step": 21625 }, { "epoch": 1.01, - "learning_rate": 1.8065725938774554e-05, - "loss": 0.5652, + "learning_rate": 2.8068743856583402e-05, + "loss": 0.4454, "step": 21630 }, { "epoch": 1.01, - "learning_rate": 1.8065257137499414e-05, - "loss": 0.347, + "learning_rate": 2.806827578674738e-05, + "loss": 0.3754, "step": 21635 }, { "epoch": 1.01, - "learning_rate": 1.8064788336224278e-05, - "loss": 0.047, + "learning_rate": 2.806780771691136e-05, + "loss": 0.0788, "step": 21640 }, { "epoch": 1.01, - "learning_rate": 1.8064319534949138e-05, - "loss": 0.0867, + "learning_rate": 2.8067339647075345e-05, + "loss": 0.0852, "step": 21645 }, { "epoch": 1.01, - "learning_rate": 1.8063850733673998e-05, - "loss": 0.0751, + "learning_rate": 2.8066871577239324e-05, + "loss": 0.1291, "step": 21650 }, { "epoch": 1.01, - "learning_rate": 1.8063381932398857e-05, - "loss": 0.061, + "learning_rate": 2.8066403507403304e-05, + "loss": 0.12, "step": 21655 }, { "epoch": 1.01, - "learning_rate": 1.8062913131123717e-05, - "loss": 0.1454, + "learning_rate": 2.8065935437567287e-05, + "loss": 0.169, "step": 21660 }, { "epoch": 1.01, - "learning_rate": 1.8062444329848577e-05, - "loss": 0.1297, + "learning_rate": 2.8065467367731267e-05, + "loss": 0.1362, "step": 21665 }, { "epoch": 1.01, - "learning_rate": 1.8061975528573437e-05, - "loss": 0.1807, + "learning_rate": 2.8064999297895247e-05, + "loss": 0.2041, "step": 21670 }, { "epoch": 1.01, - "learning_rate": 1.80615067272983e-05, - "loss": 0.1411, + "learning_rate": 2.8064531228059227e-05, + "loss": 0.254, "step": 21675 }, { "epoch": 1.01, - "learning_rate": 1.806103792602316e-05, - "loss": 0.3258, + "learning_rate": 2.806406315822321e-05, + "loss": 0.2922, "step": 21680 }, { "epoch": 1.01, - "learning_rate": 1.806056912474802e-05, - "loss": 0.3581, + "learning_rate": 2.806359508838719e-05, + "loss": 0.3923, "step": 21685 }, { "epoch": 1.01, - "learning_rate": 1.8060100323472884e-05, - "loss": 0.0666, + "learning_rate": 2.8063127018551166e-05, + "loss": 0.0997, "step": 21690 }, { "epoch": 1.01, - "learning_rate": 1.8059631522197744e-05, - "loss": 0.0615, + "learning_rate": 2.8062658948715146e-05, + "loss": 0.076, "step": 21695 }, { "epoch": 1.01, - "learning_rate": 1.8059162720922604e-05, - "loss": 0.0849, + "learning_rate": 2.806219087887913e-05, + "loss": 0.1171, "step": 21700 }, { "epoch": 1.01, - "learning_rate": 1.8058693919647464e-05, - "loss": 0.1099, + "learning_rate": 2.806172280904311e-05, + "loss": 0.0969, "step": 21705 }, { "epoch": 1.01, - "learning_rate": 1.8058225118372324e-05, - "loss": 0.1481, + "learning_rate": 2.806125473920709e-05, + "loss": 0.089, "step": 21710 }, { "epoch": 1.01, - "learning_rate": 1.8057756317097183e-05, - "loss": 0.149, + "learning_rate": 2.8060786669371072e-05, + "loss": 0.1608, "step": 21715 }, { "epoch": 1.01, - "learning_rate": 1.8057287515822043e-05, - "loss": 0.2365, + "learning_rate": 2.8060318599535052e-05, + "loss": 0.1848, "step": 21720 }, { "epoch": 1.01, - "learning_rate": 1.8056818714546903e-05, - "loss": 0.216, + "learning_rate": 2.8059850529699032e-05, + "loss": 0.2496, "step": 21725 }, { "epoch": 1.01, - "learning_rate": 1.8056349913271763e-05, - "loss": 0.3717, + "learning_rate": 2.805938245986301e-05, + "loss": 0.3286, "step": 21730 }, { "epoch": 1.01, - "learning_rate": 1.8055881111996627e-05, - "loss": 0.3129, + "learning_rate": 2.8058914390026995e-05, + "loss": 0.3203, "step": 21735 }, { "epoch": 1.01, - "learning_rate": 1.8055412310721487e-05, - "loss": 0.0945, + "learning_rate": 2.8058446320190975e-05, + "loss": 0.0839, "step": 21740 }, { "epoch": 1.01, - "learning_rate": 1.8054943509446346e-05, - "loss": 0.0414, + "learning_rate": 2.8057978250354954e-05, + "loss": 0.0886, "step": 21745 }, { "epoch": 1.01, - "learning_rate": 1.8054474708171206e-05, - "loss": 0.0931, + "learning_rate": 2.8057510180518934e-05, + "loss": 0.1171, "step": 21750 }, { "epoch": 1.02, - "learning_rate": 1.805400590689607e-05, - "loss": 0.1486, + "learning_rate": 2.8057042110682914e-05, + "loss": 0.0864, "step": 21755 }, { "epoch": 1.02, - "learning_rate": 1.805353710562093e-05, - "loss": 0.1353, + "learning_rate": 2.8056574040846894e-05, + "loss": 0.1462, "step": 21760 }, { "epoch": 1.02, - "learning_rate": 1.805306830434579e-05, - "loss": 0.2255, + "learning_rate": 2.8056105971010874e-05, + "loss": 0.2781, "step": 21765 }, { "epoch": 1.02, - "learning_rate": 1.805259950307065e-05, - "loss": 0.2076, + "learning_rate": 2.8055637901174857e-05, + "loss": 0.155, "step": 21770 }, { "epoch": 1.02, - "learning_rate": 1.805213070179551e-05, - "loss": 0.2506, + "learning_rate": 2.8055169831338837e-05, + "loss": 0.2439, "step": 21775 }, { "epoch": 1.02, - "learning_rate": 1.8051661900520373e-05, - "loss": 0.2631, + "learning_rate": 2.8054701761502817e-05, + "loss": 0.4164, "step": 21780 }, { "epoch": 1.02, - "learning_rate": 1.8051193099245233e-05, - "loss": 0.3428, + "learning_rate": 2.8054233691666796e-05, + "loss": 0.2868, "step": 21785 }, { "epoch": 1.02, - "learning_rate": 1.8050724297970093e-05, - "loss": 0.0835, + "learning_rate": 2.805376562183078e-05, + "loss": 0.0581, "step": 21790 }, { "epoch": 1.02, - "learning_rate": 1.8050255496694953e-05, - "loss": 0.1205, + "learning_rate": 2.805329755199476e-05, + "loss": 0.0677, "step": 21795 }, { "epoch": 1.02, - "learning_rate": 1.8049786695419812e-05, - "loss": 0.106, + "learning_rate": 2.805282948215874e-05, + "loss": 0.0646, "step": 21800 }, { "epoch": 1.02, - "learning_rate": 1.8049317894144672e-05, - "loss": 0.1363, + "learning_rate": 2.805236141232272e-05, + "loss": 0.0902, "step": 21805 }, { "epoch": 1.02, - "learning_rate": 1.8048849092869532e-05, - "loss": 0.1173, + "learning_rate": 2.8051893342486702e-05, + "loss": 0.166, "step": 21810 }, { "epoch": 1.02, - "learning_rate": 1.8048380291594392e-05, - "loss": 0.1431, + "learning_rate": 2.805142527265068e-05, + "loss": 0.1674, "step": 21815 }, { "epoch": 1.02, - "learning_rate": 1.8047911490319256e-05, - "loss": 0.1746, + "learning_rate": 2.805095720281466e-05, + "loss": 0.183, "step": 21820 }, { "epoch": 1.02, - "learning_rate": 1.8047442689044116e-05, - "loss": 0.2475, + "learning_rate": 2.805048913297864e-05, + "loss": 0.2956, "step": 21825 }, { "epoch": 1.02, - "learning_rate": 1.8046973887768975e-05, - "loss": 0.4129, + "learning_rate": 2.805002106314262e-05, + "loss": 0.341, "step": 21830 }, { "epoch": 1.02, - "learning_rate": 1.804650508649384e-05, - "loss": 0.3637, + "learning_rate": 2.80495529933066e-05, + "loss": 0.2537, "step": 21835 }, { "epoch": 1.02, - "learning_rate": 1.80460362852187e-05, - "loss": 0.0778, + "learning_rate": 2.804908492347058e-05, + "loss": 0.0434, "step": 21840 }, { "epoch": 1.02, - "learning_rate": 1.804556748394356e-05, - "loss": 0.0479, + "learning_rate": 2.8048616853634564e-05, + "loss": 0.0873, "step": 21845 }, { "epoch": 1.02, - "learning_rate": 1.804509868266842e-05, - "loss": 0.1032, + "learning_rate": 2.8048148783798544e-05, + "loss": 0.135, "step": 21850 }, { "epoch": 1.02, - "learning_rate": 1.804462988139328e-05, - "loss": 0.1429, + "learning_rate": 2.8047680713962524e-05, + "loss": 0.1317, "step": 21855 }, { "epoch": 1.02, - "learning_rate": 1.804416108011814e-05, - "loss": 0.1455, + "learning_rate": 2.8047212644126504e-05, + "loss": 0.1786, "step": 21860 }, { "epoch": 1.02, - "learning_rate": 1.8043692278843e-05, - "loss": 0.2018, + "learning_rate": 2.8046744574290487e-05, + "loss": 0.1707, "step": 21865 }, { "epoch": 1.02, - "learning_rate": 1.804322347756786e-05, - "loss": 0.1994, + "learning_rate": 2.8046276504454467e-05, + "loss": 0.2086, "step": 21870 }, { "epoch": 1.02, - "learning_rate": 1.804275467629272e-05, - "loss": 0.2921, + "learning_rate": 2.8045808434618447e-05, + "loss": 0.3222, "step": 21875 }, { "epoch": 1.02, - "learning_rate": 1.804228587501758e-05, - "loss": 0.2647, + "learning_rate": 2.8045340364782423e-05, + "loss": 0.3771, "step": 21880 }, { "epoch": 1.02, - "learning_rate": 1.804181707374244e-05, - "loss": 0.3341, + "learning_rate": 2.8044872294946406e-05, + "loss": 0.3114, "step": 21885 }, { "epoch": 1.02, - "learning_rate": 1.80413482724673e-05, - "loss": 0.0478, + "learning_rate": 2.8044404225110386e-05, + "loss": 0.1046, "step": 21890 }, { "epoch": 1.02, - "learning_rate": 1.8040879471192165e-05, - "loss": 0.0548, + "learning_rate": 2.8043936155274366e-05, + "loss": 0.1055, "step": 21895 }, { "epoch": 1.02, - "learning_rate": 1.8040410669917025e-05, - "loss": 0.0963, + "learning_rate": 2.804346808543835e-05, + "loss": 0.1283, "step": 21900 }, { "epoch": 1.02, - "learning_rate": 1.8039941868641885e-05, - "loss": 0.1291, + "learning_rate": 2.804300001560233e-05, + "loss": 0.147, "step": 21905 }, { "epoch": 1.02, - "learning_rate": 1.8039473067366745e-05, - "loss": 0.0829, + "learning_rate": 2.804253194576631e-05, + "loss": 0.1537, "step": 21910 }, { "epoch": 1.02, - "learning_rate": 1.8039004266091605e-05, - "loss": 0.1468, + "learning_rate": 2.804206387593029e-05, + "loss": 0.1984, "step": 21915 }, { "epoch": 1.02, - "learning_rate": 1.8038535464816468e-05, - "loss": 0.1108, + "learning_rate": 2.8041595806094272e-05, + "loss": 0.2843, "step": 21920 }, { "epoch": 1.02, - "learning_rate": 1.8038066663541328e-05, - "loss": 0.291, + "learning_rate": 2.804112773625825e-05, + "loss": 0.2247, "step": 21925 }, { "epoch": 1.02, - "learning_rate": 1.8037597862266188e-05, - "loss": 0.5618, + "learning_rate": 2.804065966642223e-05, + "loss": 0.3886, "step": 21930 }, { "epoch": 1.02, - "learning_rate": 1.8037129060991048e-05, - "loss": 0.2874, + "learning_rate": 2.804019159658621e-05, + "loss": 0.2921, "step": 21935 }, { "epoch": 1.02, - "learning_rate": 1.8036660259715908e-05, - "loss": 0.1016, + "learning_rate": 2.8039723526750194e-05, + "loss": 0.1001, "step": 21940 }, { "epoch": 1.02, - "learning_rate": 1.8036191458440768e-05, - "loss": 0.0921, + "learning_rate": 2.803925545691417e-05, + "loss": 0.0521, "step": 21945 }, { "epoch": 1.02, - "learning_rate": 1.8035722657165627e-05, - "loss": 0.094, + "learning_rate": 2.803878738707815e-05, + "loss": 0.137, "step": 21950 }, { "epoch": 1.02, - "learning_rate": 1.8035253855890487e-05, - "loss": 0.1329, + "learning_rate": 2.8038319317242134e-05, + "loss": 0.1007, "step": 21955 }, { "epoch": 1.02, - "learning_rate": 1.803478505461535e-05, - "loss": 0.1354, + "learning_rate": 2.8037851247406114e-05, + "loss": 0.2019, "step": 21960 }, { "epoch": 1.02, - "learning_rate": 1.803431625334021e-05, - "loss": 0.1359, + "learning_rate": 2.8037383177570094e-05, + "loss": 0.1404, "step": 21965 }, { "epoch": 1.03, - "learning_rate": 1.803384745206507e-05, - "loss": 0.1957, + "learning_rate": 2.8036915107734073e-05, + "loss": 0.2021, "step": 21970 }, { "epoch": 1.03, - "learning_rate": 1.8033378650789934e-05, - "loss": 0.2334, + "learning_rate": 2.8036447037898057e-05, + "loss": 0.1831, "step": 21975 }, { "epoch": 1.03, - "learning_rate": 1.8032909849514794e-05, - "loss": 0.4382, + "learning_rate": 2.8035978968062036e-05, + "loss": 0.3806, "step": 21980 }, { "epoch": 1.03, - "learning_rate": 1.8032441048239654e-05, - "loss": 0.2213, + "learning_rate": 2.8035510898226016e-05, + "loss": 0.2251, "step": 21985 }, { "epoch": 1.03, - "learning_rate": 1.8031972246964514e-05, - "loss": 0.0824, + "learning_rate": 2.8035042828389996e-05, + "loss": 0.0822, "step": 21990 }, { "epoch": 1.03, - "learning_rate": 1.8031503445689374e-05, - "loss": 0.0964, + "learning_rate": 2.803457475855398e-05, + "loss": 0.1029, "step": 21995 }, { "epoch": 1.03, - "learning_rate": 1.8031034644414234e-05, - "loss": 0.0895, + "learning_rate": 2.803410668871796e-05, + "loss": 0.1215, "step": 22000 }, { "epoch": 1.03, - "learning_rate": 1.8030565843139093e-05, - "loss": 0.1174, + "learning_rate": 2.8033638618881935e-05, + "loss": 0.1059, "step": 22005 }, { "epoch": 1.03, - "learning_rate": 1.8030097041863953e-05, - "loss": 0.2309, + "learning_rate": 2.8033170549045915e-05, + "loss": 0.1448, "step": 22010 }, { "epoch": 1.03, - "learning_rate": 1.8029628240588817e-05, - "loss": 0.2318, + "learning_rate": 2.80327024792099e-05, + "loss": 0.1495, "step": 22015 }, { "epoch": 1.03, - "learning_rate": 1.8029159439313677e-05, - "loss": 0.1976, + "learning_rate": 2.8032234409373878e-05, + "loss": 0.3297, "step": 22020 }, { "epoch": 1.03, - "learning_rate": 1.8028690638038537e-05, - "loss": 0.2326, + "learning_rate": 2.8031766339537858e-05, + "loss": 0.314, "step": 22025 }, { "epoch": 1.03, - "learning_rate": 1.8028221836763397e-05, - "loss": 0.4568, + "learning_rate": 2.803129826970184e-05, + "loss": 0.345, "step": 22030 }, { "epoch": 1.03, - "learning_rate": 1.8027753035488256e-05, - "loss": 0.3818, + "learning_rate": 2.803083019986582e-05, + "loss": 0.3261, "step": 22035 }, { "epoch": 1.03, - "learning_rate": 1.802728423421312e-05, - "loss": 0.0766, + "learning_rate": 2.80303621300298e-05, + "loss": 0.0667, "step": 22040 }, { "epoch": 1.03, - "learning_rate": 1.802681543293798e-05, - "loss": 0.0628, + "learning_rate": 2.802989406019378e-05, + "loss": 0.0971, "step": 22045 }, { "epoch": 1.03, - "learning_rate": 1.802634663166284e-05, - "loss": 0.1297, + "learning_rate": 2.8029425990357764e-05, + "loss": 0.0779, "step": 22050 }, { "epoch": 1.03, - "learning_rate": 1.80258778303877e-05, - "loss": 0.1041, + "learning_rate": 2.8028957920521744e-05, + "loss": 0.1409, "step": 22055 }, { "epoch": 1.03, - "learning_rate": 1.8025409029112563e-05, - "loss": 0.1585, + "learning_rate": 2.8028489850685724e-05, + "loss": 0.1598, "step": 22060 }, { "epoch": 1.03, - "learning_rate": 1.8024940227837423e-05, - "loss": 0.1287, + "learning_rate": 2.8028021780849703e-05, + "loss": 0.1167, "step": 22065 }, { "epoch": 1.03, - "learning_rate": 1.8024471426562283e-05, - "loss": 0.1883, + "learning_rate": 2.8027553711013683e-05, + "loss": 0.156, "step": 22070 }, { "epoch": 1.03, - "learning_rate": 1.8024002625287143e-05, - "loss": 0.3461, + "learning_rate": 2.8027085641177663e-05, + "loss": 0.2611, "step": 22075 }, { "epoch": 1.03, - "learning_rate": 1.8023533824012003e-05, - "loss": 0.4398, + "learning_rate": 2.8026617571341643e-05, + "loss": 0.4057, "step": 22080 }, { "epoch": 1.03, - "learning_rate": 1.8023065022736863e-05, - "loss": 0.2017, + "learning_rate": 2.8026149501505626e-05, + "loss": 0.2867, "step": 22085 }, { "epoch": 1.03, - "learning_rate": 1.8022596221461723e-05, - "loss": 0.0727, + "learning_rate": 2.8025681431669606e-05, + "loss": 0.0698, "step": 22090 }, { "epoch": 1.03, - "learning_rate": 1.8022127420186582e-05, - "loss": 0.0453, + "learning_rate": 2.8025213361833586e-05, + "loss": 0.0762, "step": 22095 }, { "epoch": 1.03, - "learning_rate": 1.8021658618911442e-05, - "loss": 0.0808, + "learning_rate": 2.8024745291997566e-05, + "loss": 0.1022, "step": 22100 }, { "epoch": 1.03, - "learning_rate": 1.8021189817636306e-05, - "loss": 0.1692, + "learning_rate": 2.802427722216155e-05, + "loss": 0.0939, "step": 22105 }, { "epoch": 1.03, - "learning_rate": 1.8020721016361166e-05, - "loss": 0.1024, + "learning_rate": 2.802380915232553e-05, + "loss": 0.1342, "step": 22110 }, { "epoch": 1.03, - "learning_rate": 1.8020252215086026e-05, - "loss": 0.2401, + "learning_rate": 2.802334108248951e-05, + "loss": 0.1678, "step": 22115 }, { "epoch": 1.03, - "learning_rate": 1.801978341381089e-05, - "loss": 0.1554, + "learning_rate": 2.8022873012653488e-05, + "loss": 0.223, "step": 22120 }, { "epoch": 1.03, - "learning_rate": 1.801931461253575e-05, - "loss": 0.3393, + "learning_rate": 2.802240494281747e-05, + "loss": 0.1388, "step": 22125 }, { "epoch": 1.03, - "learning_rate": 1.801884581126061e-05, - "loss": 0.4696, + "learning_rate": 2.802193687298145e-05, + "loss": 0.4349, "step": 22130 }, { "epoch": 1.03, - "learning_rate": 1.801837700998547e-05, - "loss": 0.3602, + "learning_rate": 2.8021468803145428e-05, + "loss": 0.3072, "step": 22135 }, { "epoch": 1.03, - "learning_rate": 1.801790820871033e-05, - "loss": 0.1322, + "learning_rate": 2.802100073330941e-05, + "loss": 0.0539, "step": 22140 }, { "epoch": 1.03, - "learning_rate": 1.801743940743519e-05, - "loss": 0.0692, + "learning_rate": 2.802053266347339e-05, + "loss": 0.0851, "step": 22145 }, { "epoch": 1.03, - "learning_rate": 1.8016970606160052e-05, - "loss": 0.1042, + "learning_rate": 2.802006459363737e-05, + "loss": 0.1224, "step": 22150 }, { "epoch": 1.03, - "learning_rate": 1.8016501804884912e-05, - "loss": 0.1385, + "learning_rate": 2.801959652380135e-05, + "loss": 0.0958, "step": 22155 }, { "epoch": 1.03, - "learning_rate": 1.8016033003609772e-05, - "loss": 0.3227, + "learning_rate": 2.8019128453965334e-05, + "loss": 0.1109, "step": 22160 }, { "epoch": 1.03, - "learning_rate": 1.801556420233463e-05, - "loss": 0.2212, + "learning_rate": 2.8018660384129313e-05, + "loss": 0.1401, "step": 22165 }, { "epoch": 1.03, - "learning_rate": 1.801509540105949e-05, - "loss": 0.2446, + "learning_rate": 2.8018192314293293e-05, + "loss": 0.161, "step": 22170 }, { "epoch": 1.03, - "learning_rate": 1.801462659978435e-05, - "loss": 0.3732, + "learning_rate": 2.8017724244457273e-05, + "loss": 0.295, "step": 22175 }, { "epoch": 1.03, - "learning_rate": 1.801415779850921e-05, - "loss": 0.3812, + "learning_rate": 2.8017256174621256e-05, + "loss": 0.3682, "step": 22180 }, { "epoch": 1.04, - "learning_rate": 1.8013688997234075e-05, - "loss": 0.4351, + "learning_rate": 2.8016788104785236e-05, + "loss": 0.3074, "step": 22185 }, { "epoch": 1.04, - "learning_rate": 1.8013220195958935e-05, - "loss": 0.0613, + "learning_rate": 2.8016320034949216e-05, + "loss": 0.1186, "step": 22190 }, { "epoch": 1.04, - "learning_rate": 1.8012751394683795e-05, - "loss": 0.0991, + "learning_rate": 2.8015851965113196e-05, + "loss": 0.0638, "step": 22195 }, { "epoch": 1.04, - "learning_rate": 1.8012282593408658e-05, - "loss": 0.1864, + "learning_rate": 2.8015383895277175e-05, + "loss": 0.1235, "step": 22200 }, { "epoch": 1.04, - "learning_rate": 1.8011813792133518e-05, - "loss": 0.1468, + "learning_rate": 2.8014915825441155e-05, + "loss": 0.0682, "step": 22205 }, { "epoch": 1.04, - "learning_rate": 1.8011344990858378e-05, - "loss": 0.1907, + "learning_rate": 2.8014447755605135e-05, + "loss": 0.086, "step": 22210 }, { "epoch": 1.04, - "learning_rate": 1.8010876189583238e-05, - "loss": 0.1372, + "learning_rate": 2.8013979685769118e-05, + "loss": 0.1951, "step": 22215 }, { "epoch": 1.04, - "learning_rate": 1.8010407388308098e-05, - "loss": 0.2023, + "learning_rate": 2.8013511615933098e-05, + "loss": 0.2108, "step": 22220 }, { "epoch": 1.04, - "learning_rate": 1.8009938587032958e-05, - "loss": 0.2943, + "learning_rate": 2.8013043546097078e-05, + "loss": 0.2215, "step": 22225 }, { "epoch": 1.04, - "learning_rate": 1.8009469785757818e-05, - "loss": 0.2892, + "learning_rate": 2.8012575476261058e-05, + "loss": 0.3945, "step": 22230 }, { "epoch": 1.04, - "learning_rate": 1.8009000984482678e-05, - "loss": 0.2727, + "learning_rate": 2.801210740642504e-05, + "loss": 0.3312, "step": 22235 }, { "epoch": 1.04, - "learning_rate": 1.8008532183207537e-05, - "loss": 0.0482, + "learning_rate": 2.801163933658902e-05, + "loss": 0.0904, "step": 22240 }, { "epoch": 1.04, - "learning_rate": 1.80080633819324e-05, - "loss": 0.0691, + "learning_rate": 2.8011171266753e-05, + "loss": 0.0737, "step": 22245 }, { "epoch": 1.04, - "learning_rate": 1.800759458065726e-05, - "loss": 0.0779, + "learning_rate": 2.801070319691698e-05, + "loss": 0.0878, "step": 22250 }, { "epoch": 1.04, - "learning_rate": 1.800712577938212e-05, - "loss": 0.1529, + "learning_rate": 2.8010235127080964e-05, + "loss": 0.0806, "step": 22255 }, { "epoch": 1.04, - "learning_rate": 1.800665697810698e-05, - "loss": 0.1618, + "learning_rate": 2.800976705724494e-05, + "loss": 0.1336, "step": 22260 }, { "epoch": 1.04, - "learning_rate": 1.8006188176831844e-05, - "loss": 0.1802, + "learning_rate": 2.800929898740892e-05, + "loss": 0.216, "step": 22265 }, { "epoch": 1.04, - "learning_rate": 1.8005719375556704e-05, - "loss": 0.2725, + "learning_rate": 2.8008830917572903e-05, + "loss": 0.2947, "step": 22270 }, { "epoch": 1.04, - "learning_rate": 1.8005250574281564e-05, - "loss": 0.3094, + "learning_rate": 2.8008362847736883e-05, + "loss": 0.2117, "step": 22275 }, { "epoch": 1.04, - "learning_rate": 1.8004781773006424e-05, - "loss": 0.4009, + "learning_rate": 2.8007894777900863e-05, + "loss": 0.3475, "step": 22280 }, { "epoch": 1.04, - "learning_rate": 1.8004312971731284e-05, - "loss": 0.267, + "learning_rate": 2.8007426708064843e-05, + "loss": 0.2453, "step": 22285 }, { "epoch": 1.04, - "learning_rate": 1.8003844170456147e-05, - "loss": 0.0792, + "learning_rate": 2.8006958638228826e-05, + "loss": 0.1209, "step": 22290 }, { "epoch": 1.04, - "learning_rate": 1.8003375369181007e-05, - "loss": 0.1097, + "learning_rate": 2.8006490568392806e-05, + "loss": 0.1034, "step": 22295 }, { "epoch": 1.04, - "learning_rate": 1.8002906567905867e-05, - "loss": 0.1334, + "learning_rate": 2.8006022498556785e-05, + "loss": 0.0383, "step": 22300 }, { "epoch": 1.04, - "learning_rate": 1.8002437766630727e-05, - "loss": 0.1154, + "learning_rate": 2.8005554428720765e-05, + "loss": 0.1362, "step": 22305 }, { "epoch": 1.04, - "learning_rate": 1.8001968965355587e-05, - "loss": 0.1646, + "learning_rate": 2.800508635888475e-05, + "loss": 0.1696, "step": 22310 }, { "epoch": 1.04, - "learning_rate": 1.8001500164080447e-05, - "loss": 0.2812, + "learning_rate": 2.8004618289048728e-05, + "loss": 0.1563, "step": 22315 }, { "epoch": 1.04, - "learning_rate": 1.8001031362805307e-05, - "loss": 0.1536, + "learning_rate": 2.8004150219212708e-05, + "loss": 0.265, "step": 22320 }, { "epoch": 1.04, - "learning_rate": 1.800056256153017e-05, - "loss": 0.1213, + "learning_rate": 2.8003682149376688e-05, + "loss": 0.2571, "step": 22325 }, { "epoch": 1.04, - "learning_rate": 1.800009376025503e-05, - "loss": 0.4102, + "learning_rate": 2.8003214079540668e-05, + "loss": 0.3525, "step": 22330 }, { "epoch": 1.04, - "learning_rate": 1.799962495897989e-05, - "loss": 0.2346, + "learning_rate": 2.8002746009704647e-05, + "loss": 0.2973, "step": 22335 }, { "epoch": 1.04, - "learning_rate": 1.799915615770475e-05, - "loss": 0.0383, + "learning_rate": 2.8002277939868627e-05, + "loss": 0.0616, "step": 22340 }, { "epoch": 1.04, - "learning_rate": 1.7998687356429613e-05, - "loss": 0.2049, + "learning_rate": 2.800180987003261e-05, + "loss": 0.0927, "step": 22345 }, { "epoch": 1.04, - "learning_rate": 1.7998218555154473e-05, - "loss": 0.0988, + "learning_rate": 2.800134180019659e-05, + "loss": 0.0686, "step": 22350 }, { "epoch": 1.04, - "learning_rate": 1.7997749753879333e-05, - "loss": 0.1249, + "learning_rate": 2.800087373036057e-05, + "loss": 0.154, "step": 22355 }, { "epoch": 1.04, - "learning_rate": 1.7997280952604193e-05, - "loss": 0.0932, + "learning_rate": 2.800040566052455e-05, + "loss": 0.1362, "step": 22360 }, { "epoch": 1.04, - "learning_rate": 1.7996812151329053e-05, - "loss": 0.1902, + "learning_rate": 2.7999937590688533e-05, + "loss": 0.1925, "step": 22365 }, { "epoch": 1.04, - "learning_rate": 1.7996343350053913e-05, - "loss": 0.2656, + "learning_rate": 2.7999469520852513e-05, + "loss": 0.262, "step": 22370 }, { "epoch": 1.04, - "learning_rate": 1.7995874548778773e-05, - "loss": 0.2387, + "learning_rate": 2.7999001451016493e-05, + "loss": 0.2398, "step": 22375 }, { "epoch": 1.04, - "learning_rate": 1.7995405747503633e-05, - "loss": 0.3128, + "learning_rate": 2.7998533381180476e-05, + "loss": 0.414, "step": 22380 }, { "epoch": 1.04, - "learning_rate": 1.7994936946228496e-05, - "loss": 0.2632, + "learning_rate": 2.7998065311344452e-05, + "loss": 0.3237, "step": 22385 }, { "epoch": 1.04, - "learning_rate": 1.7994468144953356e-05, - "loss": 0.0293, + "learning_rate": 2.7997597241508432e-05, + "loss": 0.0841, "step": 22390 }, { "epoch": 1.04, - "learning_rate": 1.7993999343678216e-05, - "loss": 0.1036, + "learning_rate": 2.7997129171672412e-05, + "loss": 0.0513, "step": 22395 }, { "epoch": 1.05, - "learning_rate": 1.7993530542403076e-05, - "loss": 0.1302, + "learning_rate": 2.7996661101836395e-05, + "loss": 0.0424, "step": 22400 }, { "epoch": 1.05, - "learning_rate": 1.799306174112794e-05, - "loss": 0.0677, + "learning_rate": 2.7996193032000375e-05, + "loss": 0.1236, "step": 22405 }, { "epoch": 1.05, - "learning_rate": 1.79925929398528e-05, - "loss": 0.1077, + "learning_rate": 2.7995724962164355e-05, + "loss": 0.1358, "step": 22410 }, { "epoch": 1.05, - "learning_rate": 1.799212413857766e-05, - "loss": 0.2445, + "learning_rate": 2.7995256892328335e-05, + "loss": 0.19, "step": 22415 }, { "epoch": 1.05, - "learning_rate": 1.799165533730252e-05, - "loss": 0.2922, + "learning_rate": 2.7994788822492318e-05, + "loss": 0.1605, "step": 22420 }, { "epoch": 1.05, - "learning_rate": 1.799118653602738e-05, - "loss": 0.3946, + "learning_rate": 2.7994320752656298e-05, + "loss": 0.1292, "step": 22425 }, { "epoch": 1.05, - "learning_rate": 1.7990717734752242e-05, - "loss": 0.3417, + "learning_rate": 2.7993852682820278e-05, + "loss": 0.3035, "step": 22430 }, { "epoch": 1.05, - "learning_rate": 1.7990248933477102e-05, - "loss": 0.2651, + "learning_rate": 2.7993384612984257e-05, + "loss": 0.3058, "step": 22435 }, { "epoch": 1.05, - "learning_rate": 1.7989780132201962e-05, - "loss": 0.0262, + "learning_rate": 2.799291654314824e-05, + "loss": 0.0367, "step": 22440 }, { "epoch": 1.05, - "learning_rate": 1.7989311330926822e-05, - "loss": 0.1012, + "learning_rate": 2.799244847331222e-05, + "loss": 0.0674, "step": 22445 }, { "epoch": 1.05, - "learning_rate": 1.7988842529651682e-05, - "loss": 0.1389, + "learning_rate": 2.7991980403476197e-05, + "loss": 0.1236, "step": 22450 }, { "epoch": 1.05, - "learning_rate": 1.7988373728376542e-05, - "loss": 0.105, + "learning_rate": 2.799151233364018e-05, + "loss": 0.1277, "step": 22455 }, { "epoch": 1.05, - "learning_rate": 1.79879049271014e-05, - "loss": 0.1181, + "learning_rate": 2.799104426380416e-05, + "loss": 0.1916, "step": 22460 }, { "epoch": 1.05, - "learning_rate": 1.798743612582626e-05, - "loss": 0.1825, + "learning_rate": 2.799057619396814e-05, + "loss": 0.2015, "step": 22465 }, { "epoch": 1.05, - "learning_rate": 1.7986967324551125e-05, - "loss": 0.2169, + "learning_rate": 2.799010812413212e-05, + "loss": 0.3296, "step": 22470 }, { "epoch": 1.05, - "learning_rate": 1.7986498523275985e-05, - "loss": 0.2781, + "learning_rate": 2.7989640054296103e-05, + "loss": 0.2275, "step": 22475 }, { "epoch": 1.05, - "learning_rate": 1.7986029722000845e-05, - "loss": 0.3195, + "learning_rate": 2.7989171984460083e-05, + "loss": 0.3404, "step": 22480 }, { "epoch": 1.05, - "learning_rate": 1.7985560920725708e-05, - "loss": 0.2792, + "learning_rate": 2.7988703914624062e-05, + "loss": 0.3814, "step": 22485 }, { "epoch": 1.05, - "learning_rate": 1.7985092119450568e-05, - "loss": 0.0311, + "learning_rate": 2.7988235844788042e-05, + "loss": 0.065, "step": 22490 }, { "epoch": 1.05, - "learning_rate": 1.7984623318175428e-05, - "loss": 0.0569, + "learning_rate": 2.7987767774952025e-05, + "loss": 0.0983, "step": 22495 }, { "epoch": 1.05, - "learning_rate": 1.7984154516900288e-05, - "loss": 0.122, + "learning_rate": 2.7987299705116005e-05, + "loss": 0.0914, "step": 22500 }, { "epoch": 1.05, - "learning_rate": 1.7983685715625148e-05, - "loss": 0.1595, + "learning_rate": 2.7986831635279985e-05, + "loss": 0.0641, "step": 22505 }, { "epoch": 1.05, - "learning_rate": 1.7983216914350008e-05, - "loss": 0.1147, + "learning_rate": 2.7986363565443965e-05, + "loss": 0.1114, "step": 22510 }, { "epoch": 1.05, - "learning_rate": 1.7982748113074868e-05, - "loss": 0.1325, + "learning_rate": 2.7985895495607945e-05, + "loss": 0.1769, "step": 22515 }, { "epoch": 1.05, - "learning_rate": 1.7982279311799728e-05, - "loss": 0.2697, + "learning_rate": 2.7985427425771924e-05, + "loss": 0.2426, "step": 22520 }, { "epoch": 1.05, - "learning_rate": 1.798181051052459e-05, - "loss": 0.3659, + "learning_rate": 2.7984959355935904e-05, + "loss": 0.3089, "step": 22525 }, { "epoch": 1.05, - "learning_rate": 1.798134170924945e-05, - "loss": 0.3407, + "learning_rate": 2.7984491286099887e-05, + "loss": 0.428, "step": 22530 }, { "epoch": 1.05, - "learning_rate": 1.798087290797431e-05, - "loss": 0.2422, + "learning_rate": 2.7984023216263867e-05, + "loss": 0.3321, "step": 22535 }, { "epoch": 1.05, - "learning_rate": 1.798040410669917e-05, - "loss": 0.103, + "learning_rate": 2.7983555146427847e-05, + "loss": 0.0933, "step": 22540 }, { "epoch": 1.05, - "learning_rate": 1.797993530542403e-05, - "loss": 0.1043, + "learning_rate": 2.7983087076591827e-05, + "loss": 0.0801, "step": 22545 }, { "epoch": 1.05, - "learning_rate": 1.7979466504148894e-05, - "loss": 0.1243, + "learning_rate": 2.798261900675581e-05, + "loss": 0.0415, "step": 22550 }, { "epoch": 1.05, - "learning_rate": 1.7978997702873754e-05, - "loss": 0.1655, + "learning_rate": 2.798215093691979e-05, + "loss": 0.0557, "step": 22555 }, { "epoch": 1.05, - "learning_rate": 1.7978528901598614e-05, - "loss": 0.1039, + "learning_rate": 2.798168286708377e-05, + "loss": 0.0842, "step": 22560 }, { "epoch": 1.05, - "learning_rate": 1.7978060100323474e-05, - "loss": 0.1785, + "learning_rate": 2.7981214797247753e-05, + "loss": 0.1134, "step": 22565 }, { "epoch": 1.05, - "learning_rate": 1.7977591299048337e-05, - "loss": 0.1834, + "learning_rate": 2.7980746727411733e-05, + "loss": 0.1539, "step": 22570 }, { "epoch": 1.05, - "learning_rate": 1.7977122497773197e-05, - "loss": 0.2746, + "learning_rate": 2.798027865757571e-05, + "loss": 0.2321, "step": 22575 }, { "epoch": 1.05, - "learning_rate": 1.7976653696498057e-05, - "loss": 0.3149, + "learning_rate": 2.797981058773969e-05, + "loss": 0.352, "step": 22580 }, { "epoch": 1.05, - "learning_rate": 1.7976184895222917e-05, - "loss": 0.2779, + "learning_rate": 2.7979342517903672e-05, + "loss": 0.2247, "step": 22585 }, { "epoch": 1.05, - "learning_rate": 1.7975716093947777e-05, - "loss": 0.1215, + "learning_rate": 2.7978874448067652e-05, + "loss": 0.0818, "step": 22590 }, { "epoch": 1.05, - "learning_rate": 1.7975247292672637e-05, - "loss": 0.0716, + "learning_rate": 2.7978406378231632e-05, + "loss": 0.1145, "step": 22595 }, { "epoch": 1.05, - "learning_rate": 1.7974778491397497e-05, - "loss": 0.0738, + "learning_rate": 2.7977938308395612e-05, + "loss": 0.12, "step": 22600 }, { "epoch": 1.05, - "learning_rate": 1.7974309690122357e-05, - "loss": 0.1404, + "learning_rate": 2.7977470238559595e-05, + "loss": 0.0897, "step": 22605 }, { "epoch": 1.06, - "learning_rate": 1.7973840888847217e-05, - "loss": 0.1767, + "learning_rate": 2.7977002168723575e-05, + "loss": 0.1287, "step": 22610 }, { "epoch": 1.06, - "learning_rate": 1.797337208757208e-05, - "loss": 0.1615, + "learning_rate": 2.7976534098887555e-05, + "loss": 0.2098, "step": 22615 }, { "epoch": 1.06, - "learning_rate": 1.797290328629694e-05, - "loss": 0.1144, + "learning_rate": 2.7976066029051534e-05, + "loss": 0.2627, "step": 22620 }, { "epoch": 1.06, - "learning_rate": 1.79724344850218e-05, - "loss": 0.3295, + "learning_rate": 2.7975597959215518e-05, + "loss": 0.3518, "step": 22625 }, { "epoch": 1.06, - "learning_rate": 1.7971965683746663e-05, - "loss": 0.3561, + "learning_rate": 2.7975129889379497e-05, + "loss": 0.4095, "step": 22630 }, { "epoch": 1.06, - "learning_rate": 1.7971496882471523e-05, - "loss": 0.3627, + "learning_rate": 2.7974661819543477e-05, + "loss": 0.4317, "step": 22635 }, { "epoch": 1.06, - "learning_rate": 1.7971028081196383e-05, - "loss": 0.0388, + "learning_rate": 2.7974193749707457e-05, + "loss": 0.0738, "step": 22640 }, { "epoch": 1.06, - "learning_rate": 1.7970559279921243e-05, - "loss": 0.0624, + "learning_rate": 2.7973725679871437e-05, + "loss": 0.1177, "step": 22645 }, { "epoch": 1.06, - "learning_rate": 1.7970090478646103e-05, - "loss": 0.1464, + "learning_rate": 2.7973257610035417e-05, + "loss": 0.1492, "step": 22650 }, { "epoch": 1.06, - "learning_rate": 1.7969621677370963e-05, - "loss": 0.1578, + "learning_rate": 2.7972789540199396e-05, + "loss": 0.1138, "step": 22655 }, { "epoch": 1.06, - "learning_rate": 1.7969152876095823e-05, - "loss": 0.2061, + "learning_rate": 2.797232147036338e-05, + "loss": 0.0826, "step": 22660 }, { "epoch": 1.06, - "learning_rate": 1.7968684074820686e-05, - "loss": 0.1574, + "learning_rate": 2.797185340052736e-05, + "loss": 0.2128, "step": 22665 }, { "epoch": 1.06, - "learning_rate": 1.7968215273545546e-05, - "loss": 0.293, + "learning_rate": 2.797138533069134e-05, + "loss": 0.1339, "step": 22670 }, { "epoch": 1.06, - "learning_rate": 1.7967746472270406e-05, - "loss": 0.3353, + "learning_rate": 2.797091726085532e-05, + "loss": 0.181, "step": 22675 }, { "epoch": 1.06, - "learning_rate": 1.7967277670995266e-05, - "loss": 0.387, + "learning_rate": 2.7970449191019302e-05, + "loss": 0.4068, "step": 22680 }, { "epoch": 1.06, - "learning_rate": 1.7966808869720126e-05, - "loss": 0.3516, + "learning_rate": 2.7969981121183282e-05, + "loss": 0.2568, "step": 22685 }, { "epoch": 1.06, - "learning_rate": 1.7966340068444986e-05, - "loss": 0.0655, + "learning_rate": 2.7969513051347262e-05, + "loss": 0.0896, "step": 22690 }, { "epoch": 1.06, - "learning_rate": 1.796587126716985e-05, - "loss": 0.0642, + "learning_rate": 2.7969044981511245e-05, + "loss": 0.0615, "step": 22695 }, { "epoch": 1.06, - "learning_rate": 1.796540246589471e-05, - "loss": 0.12, + "learning_rate": 2.796857691167522e-05, + "loss": 0.0534, "step": 22700 }, { "epoch": 1.06, - "learning_rate": 1.796493366461957e-05, - "loss": 0.1054, + "learning_rate": 2.79681088418392e-05, + "loss": 0.1399, "step": 22705 }, { "epoch": 1.06, - "learning_rate": 1.7964464863344432e-05, - "loss": 0.1462, + "learning_rate": 2.796764077200318e-05, + "loss": 0.1286, "step": 22710 }, { "epoch": 1.06, - "learning_rate": 1.7963996062069292e-05, - "loss": 0.1599, + "learning_rate": 2.7967172702167164e-05, + "loss": 0.1565, "step": 22715 }, { "epoch": 1.06, - "learning_rate": 1.7963527260794152e-05, - "loss": 0.2888, + "learning_rate": 2.7966704632331144e-05, + "loss": 0.2351, "step": 22720 }, { "epoch": 1.06, - "learning_rate": 1.7963058459519012e-05, - "loss": 0.1973, + "learning_rate": 2.7966236562495124e-05, + "loss": 0.2478, "step": 22725 }, { "epoch": 1.06, - "learning_rate": 1.7962589658243872e-05, - "loss": 0.3846, + "learning_rate": 2.7965768492659104e-05, + "loss": 0.346, "step": 22730 }, { "epoch": 1.06, - "learning_rate": 1.7962120856968732e-05, - "loss": 0.3066, + "learning_rate": 2.7965300422823087e-05, + "loss": 0.308, "step": 22735 }, { "epoch": 1.06, - "learning_rate": 1.7961652055693592e-05, - "loss": 0.0643, + "learning_rate": 2.7964832352987067e-05, + "loss": 0.0745, "step": 22740 }, { "epoch": 1.06, - "learning_rate": 1.7961183254418452e-05, - "loss": 0.0761, + "learning_rate": 2.7964364283151047e-05, + "loss": 0.0747, "step": 22745 }, { "epoch": 1.06, - "learning_rate": 1.796071445314331e-05, - "loss": 0.0798, + "learning_rate": 2.796389621331503e-05, + "loss": 0.094, "step": 22750 }, { "epoch": 1.06, - "learning_rate": 1.7960245651868175e-05, - "loss": 0.16, + "learning_rate": 2.796342814347901e-05, + "loss": 0.1546, "step": 22755 }, { "epoch": 1.06, - "learning_rate": 1.7959776850593035e-05, - "loss": 0.155, + "learning_rate": 2.796296007364299e-05, + "loss": 0.1048, "step": 22760 }, { "epoch": 1.06, - "learning_rate": 1.7959308049317895e-05, - "loss": 0.2153, + "learning_rate": 2.7962492003806966e-05, + "loss": 0.0816, "step": 22765 }, { "epoch": 1.06, - "learning_rate": 1.7958839248042755e-05, - "loss": 0.191, + "learning_rate": 2.796202393397095e-05, + "loss": 0.1823, "step": 22770 }, { "epoch": 1.06, - "learning_rate": 1.7958370446767618e-05, - "loss": 0.2506, + "learning_rate": 2.796155586413493e-05, + "loss": 0.1807, "step": 22775 }, { "epoch": 1.06, - "learning_rate": 1.7957901645492478e-05, - "loss": 0.4182, + "learning_rate": 2.796108779429891e-05, + "loss": 0.2225, "step": 22780 }, { "epoch": 1.06, - "learning_rate": 1.7957432844217338e-05, - "loss": 0.3069, + "learning_rate": 2.796061972446289e-05, + "loss": 0.451, "step": 22785 }, { "epoch": 1.06, - "learning_rate": 1.7956964042942198e-05, - "loss": 0.0769, + "learning_rate": 2.7960151654626872e-05, + "loss": 0.0427, "step": 22790 }, { "epoch": 1.06, - "learning_rate": 1.7956495241667058e-05, - "loss": 0.1017, + "learning_rate": 2.795968358479085e-05, + "loss": 0.1386, "step": 22795 }, { "epoch": 1.06, - "learning_rate": 1.795602644039192e-05, - "loss": 0.1326, + "learning_rate": 2.795921551495483e-05, + "loss": 0.1519, "step": 22800 }, { "epoch": 1.06, - "learning_rate": 1.795555763911678e-05, - "loss": 0.0708, + "learning_rate": 2.7958747445118815e-05, + "loss": 0.1064, "step": 22805 }, { "epoch": 1.06, - "learning_rate": 1.795508883784164e-05, - "loss": 0.2177, + "learning_rate": 2.7958279375282795e-05, + "loss": 0.1201, "step": 22810 }, { "epoch": 1.06, - "learning_rate": 1.79546200365665e-05, - "loss": 0.1882, + "learning_rate": 2.7957811305446774e-05, + "loss": 0.158, "step": 22815 }, { "epoch": 1.06, - "learning_rate": 1.795415123529136e-05, - "loss": 0.1609, + "learning_rate": 2.7957343235610754e-05, + "loss": 0.2632, "step": 22820 }, { "epoch": 1.07, - "learning_rate": 1.795368243401622e-05, - "loss": 0.1834, + "learning_rate": 2.7956875165774737e-05, + "loss": 0.2432, "step": 22825 }, { "epoch": 1.07, - "learning_rate": 1.795321363274108e-05, - "loss": 0.4985, + "learning_rate": 2.7956407095938714e-05, + "loss": 0.4555, "step": 22830 }, { "epoch": 1.07, - "learning_rate": 1.7952744831465944e-05, - "loss": 0.2931, + "learning_rate": 2.7955939026102694e-05, + "loss": 0.3108, "step": 22835 }, { "epoch": 1.07, - "learning_rate": 1.7952276030190804e-05, - "loss": 0.036, + "learning_rate": 2.7955470956266673e-05, + "loss": 0.0548, "step": 22840 }, { "epoch": 1.07, - "learning_rate": 1.7951807228915664e-05, - "loss": 0.0431, + "learning_rate": 2.7955002886430657e-05, + "loss": 0.1005, "step": 22845 }, { "epoch": 1.07, - "learning_rate": 1.7951338427640524e-05, - "loss": 0.1114, + "learning_rate": 2.7954534816594636e-05, + "loss": 0.063, "step": 22850 }, { "epoch": 1.07, - "learning_rate": 1.7950869626365387e-05, - "loss": 0.1402, + "learning_rate": 2.7954066746758616e-05, + "loss": 0.1485, "step": 22855 }, { "epoch": 1.07, - "learning_rate": 1.7950400825090247e-05, - "loss": 0.1071, + "learning_rate": 2.7953598676922596e-05, + "loss": 0.1224, "step": 22860 }, { "epoch": 1.07, - "learning_rate": 1.7949932023815107e-05, - "loss": 0.2206, + "learning_rate": 2.795313060708658e-05, + "loss": 0.1332, "step": 22865 }, { "epoch": 1.07, - "learning_rate": 1.7949463222539967e-05, - "loss": 0.1546, + "learning_rate": 2.795266253725056e-05, + "loss": 0.1694, "step": 22870 }, { "epoch": 1.07, - "learning_rate": 1.7948994421264827e-05, - "loss": 0.2827, + "learning_rate": 2.795219446741454e-05, + "loss": 0.2077, "step": 22875 }, { "epoch": 1.07, - "learning_rate": 1.7948525619989687e-05, - "loss": 0.384, + "learning_rate": 2.7951726397578522e-05, + "loss": 0.4646, "step": 22880 }, { "epoch": 1.07, - "learning_rate": 1.7948056818714547e-05, - "loss": 0.162, + "learning_rate": 2.7951258327742502e-05, + "loss": 0.2959, "step": 22885 }, { "epoch": 1.07, - "learning_rate": 1.7947588017439407e-05, - "loss": 0.0751, + "learning_rate": 2.795079025790648e-05, + "loss": 0.1028, "step": 22890 }, { "epoch": 1.07, - "learning_rate": 1.794711921616427e-05, - "loss": 0.0633, + "learning_rate": 2.7950322188070458e-05, + "loss": 0.1081, "step": 22895 }, { "epoch": 1.07, - "learning_rate": 1.794665041488913e-05, - "loss": 0.097, + "learning_rate": 2.794985411823444e-05, + "loss": 0.079, "step": 22900 }, { "epoch": 1.07, - "learning_rate": 1.794618161361399e-05, - "loss": 0.0881, + "learning_rate": 2.794938604839842e-05, + "loss": 0.0909, "step": 22905 }, { "epoch": 1.07, - "learning_rate": 1.794571281233885e-05, - "loss": 0.1653, + "learning_rate": 2.79489179785624e-05, + "loss": 0.1335, "step": 22910 }, { "epoch": 1.07, - "learning_rate": 1.7945244011063713e-05, - "loss": 0.212, + "learning_rate": 2.794844990872638e-05, + "loss": 0.1752, "step": 22915 }, { "epoch": 1.07, - "learning_rate": 1.7944775209788573e-05, - "loss": 0.217, + "learning_rate": 2.7947981838890364e-05, + "loss": 0.1535, "step": 22920 }, { "epoch": 1.07, - "learning_rate": 1.7944306408513433e-05, - "loss": 0.2547, + "learning_rate": 2.7947513769054344e-05, + "loss": 0.2721, "step": 22925 }, { "epoch": 1.07, - "learning_rate": 1.7943837607238293e-05, - "loss": 0.3459, + "learning_rate": 2.7947045699218324e-05, + "loss": 0.3652, "step": 22930 }, { "epoch": 1.07, - "learning_rate": 1.7943368805963153e-05, - "loss": 0.3144, + "learning_rate": 2.7946577629382307e-05, + "loss": 0.3648, "step": 22935 }, { "epoch": 1.07, - "learning_rate": 1.7942900004688016e-05, - "loss": 0.0644, + "learning_rate": 2.7946109559546287e-05, + "loss": 0.0835, "step": 22940 }, { "epoch": 1.07, - "learning_rate": 1.7942431203412876e-05, - "loss": 0.0695, + "learning_rate": 2.7945641489710267e-05, + "loss": 0.0997, "step": 22945 }, { "epoch": 1.07, - "learning_rate": 1.7941962402137736e-05, - "loss": 0.0989, + "learning_rate": 2.7945173419874246e-05, + "loss": 0.0862, "step": 22950 }, { "epoch": 1.07, - "learning_rate": 1.7941493600862596e-05, - "loss": 0.0713, + "learning_rate": 2.7944705350038226e-05, + "loss": 0.1361, "step": 22955 }, { "epoch": 1.07, - "learning_rate": 1.7941024799587456e-05, - "loss": 0.1471, + "learning_rate": 2.7944237280202206e-05, + "loss": 0.1504, "step": 22960 }, { "epoch": 1.07, - "learning_rate": 1.7940555998312316e-05, - "loss": 0.7201, + "learning_rate": 2.7943769210366186e-05, + "loss": 0.1236, "step": 22965 }, { "epoch": 1.07, - "learning_rate": 1.7940087197037176e-05, - "loss": 0.3036, + "learning_rate": 2.7943301140530166e-05, + "loss": 0.1653, "step": 22970 }, { "epoch": 1.07, - "learning_rate": 1.7939618395762036e-05, - "loss": 0.2699, + "learning_rate": 2.794283307069415e-05, + "loss": 0.2567, "step": 22975 }, { "epoch": 1.07, - "learning_rate": 1.79391495944869e-05, - "loss": 0.3579, + "learning_rate": 2.794236500085813e-05, + "loss": 0.4976, "step": 22980 }, { "epoch": 1.07, - "learning_rate": 1.793868079321176e-05, - "loss": 0.2728, + "learning_rate": 2.794189693102211e-05, + "loss": 0.3687, "step": 22985 }, { "epoch": 1.07, - "learning_rate": 1.793821199193662e-05, - "loss": 0.0657, + "learning_rate": 2.794142886118609e-05, + "loss": 0.0721, "step": 22990 }, { "epoch": 1.07, - "learning_rate": 1.7937743190661482e-05, - "loss": 0.0762, + "learning_rate": 2.794096079135007e-05, + "loss": 0.0776, "step": 22995 }, { "epoch": 1.07, - "learning_rate": 1.7937274389386342e-05, - "loss": 0.146, + "learning_rate": 2.794049272151405e-05, + "loss": 0.0999, "step": 23000 }, { "epoch": 1.07, - "learning_rate": 1.7936805588111202e-05, - "loss": 0.1352, + "learning_rate": 2.794002465167803e-05, + "loss": 0.0748, "step": 23005 }, { "epoch": 1.07, - "learning_rate": 1.7936336786836062e-05, - "loss": 0.126, + "learning_rate": 2.7939556581842014e-05, + "loss": 0.1476, "step": 23010 }, { "epoch": 1.07, - "learning_rate": 1.7935867985560922e-05, - "loss": 0.1249, + "learning_rate": 2.793908851200599e-05, + "loss": 0.1064, "step": 23015 }, { "epoch": 1.07, - "learning_rate": 1.7935399184285782e-05, - "loss": 0.1431, + "learning_rate": 2.793862044216997e-05, + "loss": 0.2505, "step": 23020 }, { "epoch": 1.07, - "learning_rate": 1.7934930383010642e-05, - "loss": 0.1323, + "learning_rate": 2.793815237233395e-05, + "loss": 0.2573, "step": 23025 }, { "epoch": 1.07, - "learning_rate": 1.7934461581735502e-05, - "loss": 0.3606, + "learning_rate": 2.7937684302497934e-05, + "loss": 0.1895, "step": 23030 }, { "epoch": 1.07, - "learning_rate": 1.7933992780460365e-05, - "loss": 0.2748, + "learning_rate": 2.7937216232661913e-05, + "loss": 0.204, "step": 23035 }, { "epoch": 1.08, - "learning_rate": 1.7933523979185225e-05, - "loss": 0.0852, + "learning_rate": 2.7936748162825893e-05, + "loss": 0.1203, "step": 23040 }, { "epoch": 1.08, - "learning_rate": 1.7933055177910085e-05, - "loss": 0.0636, + "learning_rate": 2.7936280092989873e-05, + "loss": 0.0748, "step": 23045 }, { "epoch": 1.08, - "learning_rate": 1.7932586376634945e-05, - "loss": 0.1535, + "learning_rate": 2.7935812023153856e-05, + "loss": 0.0602, "step": 23050 }, { "epoch": 1.08, - "learning_rate": 1.7932117575359805e-05, - "loss": 0.146, + "learning_rate": 2.7935343953317836e-05, + "loss": 0.1381, "step": 23055 }, { "epoch": 1.08, - "learning_rate": 1.7931648774084668e-05, - "loss": 0.1514, + "learning_rate": 2.7934875883481816e-05, + "loss": 0.133, "step": 23060 }, { "epoch": 1.08, - "learning_rate": 1.7931179972809528e-05, - "loss": 0.1452, + "learning_rate": 2.79344078136458e-05, + "loss": 0.1485, "step": 23065 }, { "epoch": 1.08, - "learning_rate": 1.7930711171534388e-05, - "loss": 0.1974, + "learning_rate": 2.793393974380978e-05, + "loss": 0.167, "step": 23070 }, { "epoch": 1.08, - "learning_rate": 1.7930242370259248e-05, - "loss": 0.338, + "learning_rate": 2.793347167397376e-05, + "loss": 0.3623, "step": 23075 }, { "epoch": 1.08, - "learning_rate": 1.792977356898411e-05, - "loss": 0.3308, + "learning_rate": 2.7933003604137735e-05, + "loss": 0.5458, "step": 23080 }, { "epoch": 1.08, - "learning_rate": 1.792930476770897e-05, - "loss": 0.2528, + "learning_rate": 2.793253553430172e-05, + "loss": 0.3545, "step": 23085 }, { "epoch": 1.08, - "learning_rate": 1.792883596643383e-05, - "loss": 0.0529, + "learning_rate": 2.7932067464465698e-05, + "loss": 0.0485, "step": 23090 }, { "epoch": 1.08, - "learning_rate": 1.792836716515869e-05, - "loss": 0.0403, + "learning_rate": 2.7931599394629678e-05, + "loss": 0.0781, "step": 23095 }, { "epoch": 1.08, - "learning_rate": 1.792789836388355e-05, - "loss": 0.1068, + "learning_rate": 2.7931131324793658e-05, + "loss": 0.1369, "step": 23100 }, { "epoch": 1.08, - "learning_rate": 1.792742956260841e-05, - "loss": 0.1184, + "learning_rate": 2.793066325495764e-05, + "loss": 0.0773, "step": 23105 }, { "epoch": 1.08, - "learning_rate": 1.792696076133327e-05, - "loss": 0.1366, + "learning_rate": 2.793019518512162e-05, + "loss": 0.1182, "step": 23110 }, { "epoch": 1.08, - "learning_rate": 1.792649196005813e-05, - "loss": 0.1412, + "learning_rate": 2.79297271152856e-05, + "loss": 0.1371, "step": 23115 }, { "epoch": 1.08, - "learning_rate": 1.792602315878299e-05, - "loss": 0.3482, + "learning_rate": 2.7929259045449584e-05, + "loss": 0.1681, "step": 23120 }, { "epoch": 1.08, - "learning_rate": 1.7925554357507854e-05, - "loss": 0.2595, + "learning_rate": 2.7928790975613564e-05, + "loss": 0.2092, "step": 23125 }, { "epoch": 1.08, - "learning_rate": 1.7925085556232714e-05, - "loss": 0.3428, + "learning_rate": 2.7928322905777544e-05, + "loss": 0.3097, "step": 23130 }, { "epoch": 1.08, - "learning_rate": 1.7924616754957574e-05, - "loss": 0.3361, + "learning_rate": 2.7927854835941523e-05, + "loss": 0.2789, "step": 23135 }, { "epoch": 1.08, - "learning_rate": 1.7924147953682437e-05, - "loss": 0.0959, + "learning_rate": 2.7927386766105507e-05, + "loss": 0.0853, "step": 23140 }, { "epoch": 1.08, - "learning_rate": 1.7923679152407297e-05, - "loss": 0.0802, + "learning_rate": 2.7926918696269483e-05, + "loss": 0.1227, "step": 23145 }, { "epoch": 1.08, - "learning_rate": 1.7923210351132157e-05, - "loss": 0.1528, + "learning_rate": 2.7926450626433463e-05, + "loss": 0.1103, "step": 23150 }, { "epoch": 1.08, - "learning_rate": 1.7922741549857017e-05, - "loss": 0.0776, + "learning_rate": 2.7925982556597443e-05, + "loss": 0.1223, "step": 23155 }, { "epoch": 1.08, - "learning_rate": 1.7922272748581877e-05, - "loss": 0.1456, + "learning_rate": 2.7925514486761426e-05, + "loss": 0.1283, "step": 23160 }, { "epoch": 1.08, - "learning_rate": 1.7921803947306737e-05, - "loss": 0.1474, + "learning_rate": 2.7925046416925406e-05, + "loss": 0.1876, "step": 23165 }, { "epoch": 1.08, - "learning_rate": 1.7921335146031597e-05, - "loss": 0.1562, + "learning_rate": 2.7924578347089385e-05, + "loss": 0.2919, "step": 23170 }, { "epoch": 1.08, - "learning_rate": 1.792086634475646e-05, - "loss": 0.2387, + "learning_rate": 2.792411027725337e-05, + "loss": 0.2611, "step": 23175 }, { "epoch": 1.08, - "learning_rate": 1.792039754348132e-05, - "loss": 0.4654, + "learning_rate": 2.792364220741735e-05, + "loss": 0.3863, "step": 23180 }, { "epoch": 1.08, - "learning_rate": 1.791992874220618e-05, - "loss": 0.2778, + "learning_rate": 2.7923174137581328e-05, + "loss": 0.2966, "step": 23185 }, { "epoch": 1.08, - "learning_rate": 1.791945994093104e-05, - "loss": 0.0927, + "learning_rate": 2.7922706067745308e-05, + "loss": 0.0797, "step": 23190 }, { "epoch": 1.08, - "learning_rate": 1.79189911396559e-05, - "loss": 0.0786, + "learning_rate": 2.792223799790929e-05, + "loss": 0.0962, "step": 23195 }, { "epoch": 1.08, - "learning_rate": 1.791852233838076e-05, - "loss": 0.0507, + "learning_rate": 2.792176992807327e-05, + "loss": 0.0764, "step": 23200 }, { "epoch": 1.08, - "learning_rate": 1.7918053537105623e-05, - "loss": 0.1528, + "learning_rate": 2.7921301858237248e-05, + "loss": 0.1265, "step": 23205 }, { "epoch": 1.08, - "learning_rate": 1.7917584735830483e-05, - "loss": 0.146, + "learning_rate": 2.7920833788401227e-05, + "loss": 0.1923, "step": 23210 }, { "epoch": 1.08, - "learning_rate": 1.7917115934555343e-05, - "loss": 0.1655, + "learning_rate": 2.792036571856521e-05, + "loss": 0.1019, "step": 23215 }, { "epoch": 1.08, - "learning_rate": 1.7916647133280206e-05, - "loss": 0.1505, + "learning_rate": 2.791989764872919e-05, + "loss": 0.1793, "step": 23220 }, { "epoch": 1.08, - "learning_rate": 1.7916178332005066e-05, - "loss": 0.2023, + "learning_rate": 2.791942957889317e-05, + "loss": 0.2424, "step": 23225 }, { "epoch": 1.08, - "learning_rate": 1.7915709530729926e-05, - "loss": 0.348, + "learning_rate": 2.791896150905715e-05, + "loss": 0.3228, "step": 23230 }, { "epoch": 1.08, - "learning_rate": 1.7915240729454786e-05, - "loss": 0.3989, + "learning_rate": 2.7918493439221133e-05, + "loss": 0.3051, "step": 23235 }, { "epoch": 1.08, - "learning_rate": 1.7914771928179646e-05, - "loss": 0.0733, + "learning_rate": 2.7918025369385113e-05, + "loss": 0.0913, "step": 23240 }, { "epoch": 1.08, - "learning_rate": 1.7914303126904506e-05, - "loss": 0.0842, + "learning_rate": 2.7917557299549093e-05, + "loss": 0.1234, "step": 23245 }, { "epoch": 1.08, - "learning_rate": 1.7913834325629366e-05, - "loss": 0.0824, + "learning_rate": 2.7917089229713076e-05, + "loss": 0.0642, "step": 23250 }, { "epoch": 1.09, - "learning_rate": 1.7913365524354226e-05, - "loss": 0.1814, + "learning_rate": 2.7916621159877056e-05, + "loss": 0.177, "step": 23255 }, { "epoch": 1.09, - "learning_rate": 1.7912896723079086e-05, - "loss": 0.088, + "learning_rate": 2.7916153090041036e-05, + "loss": 0.1408, "step": 23260 }, { "epoch": 1.09, - "learning_rate": 1.791242792180395e-05, - "loss": 0.1784, + "learning_rate": 2.7915685020205016e-05, + "loss": 0.1713, "step": 23265 }, { "epoch": 1.09, - "learning_rate": 1.791195912052881e-05, - "loss": 0.1825, + "learning_rate": 2.7915216950368995e-05, + "loss": 0.1815, "step": 23270 }, { "epoch": 1.09, - "learning_rate": 1.791149031925367e-05, - "loss": 0.1527, + "learning_rate": 2.7914748880532975e-05, + "loss": 0.2341, "step": 23275 }, { "epoch": 1.09, - "learning_rate": 1.7911021517978532e-05, - "loss": 0.2648, + "learning_rate": 2.7914280810696955e-05, + "loss": 0.2827, "step": 23280 }, { "epoch": 1.09, - "learning_rate": 1.7910552716703392e-05, - "loss": 0.4263, + "learning_rate": 2.7913812740860935e-05, + "loss": 0.3039, "step": 23285 }, { "epoch": 1.09, - "learning_rate": 1.7910083915428252e-05, - "loss": 0.062, + "learning_rate": 2.7913344671024918e-05, + "loss": 0.1035, "step": 23290 }, { "epoch": 1.09, - "learning_rate": 1.7909615114153112e-05, - "loss": 0.0452, + "learning_rate": 2.7912876601188898e-05, + "loss": 0.0578, "step": 23295 }, { "epoch": 1.09, - "learning_rate": 1.7909146312877972e-05, - "loss": 0.0897, + "learning_rate": 2.7912408531352878e-05, + "loss": 0.0686, "step": 23300 }, { "epoch": 1.09, - "learning_rate": 1.7908677511602832e-05, - "loss": 0.1544, + "learning_rate": 2.791194046151686e-05, + "loss": 0.1239, "step": 23305 }, { "epoch": 1.09, - "learning_rate": 1.7908208710327692e-05, - "loss": 0.1552, + "learning_rate": 2.791147239168084e-05, + "loss": 0.1145, "step": 23310 }, { "epoch": 1.09, - "learning_rate": 1.7907739909052555e-05, - "loss": 0.2193, + "learning_rate": 2.791100432184482e-05, + "loss": 0.149, "step": 23315 }, { "epoch": 1.09, - "learning_rate": 1.7907271107777415e-05, - "loss": 0.1309, + "learning_rate": 2.79105362520088e-05, + "loss": 0.2288, "step": 23320 }, { "epoch": 1.09, - "learning_rate": 1.7906802306502275e-05, - "loss": 0.2026, + "learning_rate": 2.7910068182172784e-05, + "loss": 0.2054, "step": 23325 }, { "epoch": 1.09, - "learning_rate": 1.7906333505227135e-05, - "loss": 0.3341, + "learning_rate": 2.7909600112336763e-05, + "loss": 0.4812, "step": 23330 }, { "epoch": 1.09, - "learning_rate": 1.7905864703951995e-05, - "loss": 0.3806, + "learning_rate": 2.790913204250074e-05, + "loss": 0.3454, "step": 23335 }, { "epoch": 1.09, - "learning_rate": 1.7905395902676855e-05, - "loss": 0.09, + "learning_rate": 2.790866397266472e-05, + "loss": 0.0272, "step": 23340 }, { "epoch": 1.09, - "learning_rate": 1.790492710140172e-05, - "loss": 0.079, + "learning_rate": 2.7908195902828703e-05, + "loss": 0.12, "step": 23345 }, { "epoch": 1.09, - "learning_rate": 1.7904458300126578e-05, - "loss": 0.155, + "learning_rate": 2.7907727832992683e-05, + "loss": 0.0455, "step": 23350 }, { "epoch": 1.09, - "learning_rate": 1.7903989498851438e-05, - "loss": 0.2088, + "learning_rate": 2.7907259763156662e-05, + "loss": 0.0957, "step": 23355 }, { "epoch": 1.09, - "learning_rate": 1.79035206975763e-05, - "loss": 0.1606, + "learning_rate": 2.7906791693320646e-05, + "loss": 0.1513, "step": 23360 }, { "epoch": 1.09, - "learning_rate": 1.790305189630116e-05, - "loss": 0.109, + "learning_rate": 2.7906323623484625e-05, + "loss": 0.2252, "step": 23365 }, { "epoch": 1.09, - "learning_rate": 1.790258309502602e-05, - "loss": 0.1766, + "learning_rate": 2.7905855553648605e-05, + "loss": 0.2206, "step": 23370 }, { "epoch": 1.09, - "learning_rate": 1.790211429375088e-05, - "loss": 0.1729, + "learning_rate": 2.7905387483812585e-05, + "loss": 0.2257, "step": 23375 }, { "epoch": 1.09, - "learning_rate": 1.790164549247574e-05, - "loss": 0.3656, + "learning_rate": 2.7904919413976568e-05, + "loss": 0.3453, "step": 23380 }, { "epoch": 1.09, - "learning_rate": 1.79011766912006e-05, - "loss": 0.2559, + "learning_rate": 2.7904451344140548e-05, + "loss": 0.2708, "step": 23385 }, { "epoch": 1.09, - "learning_rate": 1.790070788992546e-05, - "loss": 0.0238, + "learning_rate": 2.7903983274304528e-05, + "loss": 0.1018, "step": 23390 }, { "epoch": 1.09, - "learning_rate": 1.790023908865032e-05, - "loss": 0.0779, + "learning_rate": 2.7903515204468504e-05, + "loss": 0.0812, "step": 23395 }, { "epoch": 1.09, - "learning_rate": 1.789977028737518e-05, - "loss": 0.1486, + "learning_rate": 2.7903047134632488e-05, + "loss": 0.1129, "step": 23400 }, { "epoch": 1.09, - "learning_rate": 1.7899301486100044e-05, - "loss": 0.1425, + "learning_rate": 2.7902579064796467e-05, + "loss": 0.0758, "step": 23405 }, { "epoch": 1.09, - "learning_rate": 1.7898832684824904e-05, - "loss": 0.1606, + "learning_rate": 2.7902110994960447e-05, + "loss": 0.1734, "step": 23410 }, { "epoch": 1.09, - "learning_rate": 1.7898363883549764e-05, - "loss": 0.1494, + "learning_rate": 2.790164292512443e-05, + "loss": 0.1572, "step": 23415 }, { "epoch": 1.09, - "learning_rate": 1.7897895082274624e-05, - "loss": 0.1622, + "learning_rate": 2.790117485528841e-05, + "loss": 0.2247, "step": 23420 }, { "epoch": 1.09, - "learning_rate": 1.7897426280999487e-05, - "loss": 0.2253, + "learning_rate": 2.790070678545239e-05, + "loss": 0.3071, "step": 23425 }, { "epoch": 1.09, - "learning_rate": 1.7896957479724347e-05, - "loss": 0.3518, + "learning_rate": 2.790023871561637e-05, + "loss": 0.292, "step": 23430 }, { "epoch": 1.09, - "learning_rate": 1.7896488678449207e-05, - "loss": 0.2974, + "learning_rate": 2.7899770645780353e-05, + "loss": 0.2463, "step": 23435 }, { "epoch": 1.09, - "learning_rate": 1.7896019877174067e-05, - "loss": 0.0553, + "learning_rate": 2.7899302575944333e-05, + "loss": 0.0202, "step": 23440 }, { "epoch": 1.09, - "learning_rate": 1.7895551075898927e-05, - "loss": 0.0576, + "learning_rate": 2.7898834506108313e-05, + "loss": 0.1184, "step": 23445 }, { "epoch": 1.09, - "learning_rate": 1.7895082274623787e-05, - "loss": 0.1191, + "learning_rate": 2.7898366436272293e-05, + "loss": 0.0944, "step": 23450 }, { "epoch": 1.09, - "learning_rate": 1.789461347334865e-05, - "loss": 0.1455, + "learning_rate": 2.7897898366436276e-05, + "loss": 0.176, "step": 23455 }, { "epoch": 1.09, - "learning_rate": 1.789414467207351e-05, - "loss": 0.1459, + "learning_rate": 2.7897430296600252e-05, + "loss": 0.2143, "step": 23460 }, { "epoch": 1.09, - "learning_rate": 1.789367587079837e-05, - "loss": 0.19, + "learning_rate": 2.7896962226764232e-05, + "loss": 0.1107, "step": 23465 }, { "epoch": 1.1, - "learning_rate": 1.789320706952323e-05, - "loss": 0.2075, + "learning_rate": 2.7896494156928212e-05, + "loss": 0.1465, "step": 23470 }, { "epoch": 1.1, - "learning_rate": 1.789273826824809e-05, - "loss": 0.2089, + "learning_rate": 2.7896026087092195e-05, + "loss": 0.332, "step": 23475 }, { "epoch": 1.1, - "learning_rate": 1.789226946697295e-05, - "loss": 0.6432, + "learning_rate": 2.7895558017256175e-05, + "loss": 0.455, "step": 23480 }, { "epoch": 1.1, - "learning_rate": 1.789180066569781e-05, - "loss": 0.293, + "learning_rate": 2.7895089947420155e-05, + "loss": 0.3153, "step": 23485 }, { "epoch": 1.1, - "learning_rate": 1.7891331864422673e-05, - "loss": 0.148, + "learning_rate": 2.7894621877584138e-05, + "loss": 0.0421, "step": 23490 }, { "epoch": 1.1, - "learning_rate": 1.7890863063147533e-05, - "loss": 0.0799, + "learning_rate": 2.7894153807748118e-05, + "loss": 0.1043, "step": 23495 }, { "epoch": 1.1, - "learning_rate": 1.7890394261872393e-05, - "loss": 0.1131, + "learning_rate": 2.7893685737912097e-05, + "loss": 0.1151, "step": 23500 }, { "epoch": 1.1, - "learning_rate": 1.7889925460597257e-05, - "loss": 0.1146, + "learning_rate": 2.7893217668076077e-05, + "loss": 0.1384, "step": 23505 }, { "epoch": 1.1, - "learning_rate": 1.7889456659322116e-05, - "loss": 0.0917, + "learning_rate": 2.789274959824006e-05, + "loss": 0.1662, "step": 23510 }, { "epoch": 1.1, - "learning_rate": 1.7888987858046976e-05, - "loss": 0.2351, + "learning_rate": 2.789228152840404e-05, + "loss": 0.1708, "step": 23515 }, { "epoch": 1.1, - "learning_rate": 1.7888519056771836e-05, - "loss": 0.1878, + "learning_rate": 2.789181345856802e-05, + "loss": 0.1553, "step": 23520 }, { "epoch": 1.1, - "learning_rate": 1.7888050255496696e-05, - "loss": 0.3739, + "learning_rate": 2.7891345388731997e-05, + "loss": 0.1907, "step": 23525 }, { "epoch": 1.1, - "learning_rate": 1.7887581454221556e-05, - "loss": 0.339, + "learning_rate": 2.789087731889598e-05, + "loss": 0.3254, "step": 23530 }, { "epoch": 1.1, - "learning_rate": 1.7887112652946416e-05, - "loss": 0.2131, + "learning_rate": 2.789040924905996e-05, + "loss": 0.3018, "step": 23535 }, { "epoch": 1.1, - "learning_rate": 1.7886643851671276e-05, - "loss": 0.0588, + "learning_rate": 2.788994117922394e-05, + "loss": 0.0582, "step": 23540 }, { "epoch": 1.1, - "learning_rate": 1.788617505039614e-05, - "loss": 0.0581, + "learning_rate": 2.7889473109387923e-05, + "loss": 0.0943, "step": 23545 }, { "epoch": 1.1, - "learning_rate": 1.7885706249121e-05, - "loss": 0.0815, + "learning_rate": 2.7889005039551902e-05, + "loss": 0.0854, "step": 23550 }, { "epoch": 1.1, - "learning_rate": 1.788523744784586e-05, - "loss": 0.116, + "learning_rate": 2.7888536969715882e-05, + "loss": 0.086, "step": 23555 }, { "epoch": 1.1, - "learning_rate": 1.788476864657072e-05, - "loss": 0.0722, + "learning_rate": 2.7888068899879862e-05, + "loss": 0.0775, "step": 23560 }, { "epoch": 1.1, - "learning_rate": 1.788429984529558e-05, - "loss": 0.1832, + "learning_rate": 2.7887600830043845e-05, + "loss": 0.1291, "step": 23565 }, { "epoch": 1.1, - "learning_rate": 1.7883831044020442e-05, - "loss": 0.2784, + "learning_rate": 2.7887132760207825e-05, + "loss": 0.2555, "step": 23570 }, { "epoch": 1.1, - "learning_rate": 1.7883362242745302e-05, - "loss": 0.2925, + "learning_rate": 2.7886664690371805e-05, + "loss": 0.2474, "step": 23575 }, { "epoch": 1.1, - "learning_rate": 1.7882893441470162e-05, - "loss": 0.3034, + "learning_rate": 2.7886196620535785e-05, + "loss": 0.2907, "step": 23580 }, { "epoch": 1.1, - "learning_rate": 1.7882424640195022e-05, - "loss": 0.306, + "learning_rate": 2.7885728550699765e-05, + "loss": 0.219, "step": 23585 }, { "epoch": 1.1, - "learning_rate": 1.7881955838919886e-05, - "loss": 0.042, + "learning_rate": 2.7885260480863744e-05, + "loss": 0.026, "step": 23590 }, { "epoch": 1.1, - "learning_rate": 1.7881487037644746e-05, - "loss": 0.0754, + "learning_rate": 2.7884792411027724e-05, + "loss": 0.0669, "step": 23595 }, { "epoch": 1.1, - "learning_rate": 1.7881018236369605e-05, - "loss": 0.0822, + "learning_rate": 2.7884324341191707e-05, + "loss": 0.0977, "step": 23600 }, { "epoch": 1.1, - "learning_rate": 1.7880549435094465e-05, - "loss": 0.1275, + "learning_rate": 2.7883856271355687e-05, + "loss": 0.0636, "step": 23605 }, { "epoch": 1.1, - "learning_rate": 1.7880080633819325e-05, - "loss": 0.2322, + "learning_rate": 2.7883388201519667e-05, + "loss": 0.1187, "step": 23610 }, { "epoch": 1.1, - "learning_rate": 1.7879611832544185e-05, - "loss": 0.1159, + "learning_rate": 2.7882920131683647e-05, + "loss": 0.1748, "step": 23615 }, { "epoch": 1.1, - "learning_rate": 1.7879143031269045e-05, - "loss": 0.0986, + "learning_rate": 2.788245206184763e-05, + "loss": 0.1696, "step": 23620 }, { "epoch": 1.1, - "learning_rate": 1.7878674229993905e-05, - "loss": 0.3391, + "learning_rate": 2.788198399201161e-05, + "loss": 0.1567, "step": 23625 }, { "epoch": 1.1, - "learning_rate": 1.787820542871877e-05, - "loss": 0.4291, + "learning_rate": 2.788151592217559e-05, + "loss": 0.5083, "step": 23630 }, { "epoch": 1.1, - "learning_rate": 1.787773662744363e-05, - "loss": 0.3598, + "learning_rate": 2.788104785233957e-05, + "loss": 0.2237, "step": 23635 }, { "epoch": 1.1, - "learning_rate": 1.7877267826168488e-05, - "loss": 0.0618, + "learning_rate": 2.7880579782503553e-05, + "loss": 0.0791, "step": 23640 }, { "epoch": 1.1, - "learning_rate": 1.7876799024893348e-05, - "loss": 0.0916, + "learning_rate": 2.7880111712667533e-05, + "loss": 0.0885, "step": 23645 }, { "epoch": 1.1, - "learning_rate": 1.787633022361821e-05, - "loss": 0.1183, + "learning_rate": 2.787964364283151e-05, + "loss": 0.066, "step": 23650 }, { "epoch": 1.1, - "learning_rate": 1.787586142234307e-05, - "loss": 0.2357, + "learning_rate": 2.787917557299549e-05, + "loss": 0.1573, "step": 23655 }, { "epoch": 1.1, - "learning_rate": 1.787539262106793e-05, - "loss": 0.1575, + "learning_rate": 2.7878707503159472e-05, + "loss": 0.1412, "step": 23660 }, { "epoch": 1.1, - "learning_rate": 1.787492381979279e-05, - "loss": 0.1602, + "learning_rate": 2.7878239433323452e-05, + "loss": 0.2119, "step": 23665 }, { "epoch": 1.1, - "learning_rate": 1.787445501851765e-05, - "loss": 0.2106, + "learning_rate": 2.787777136348743e-05, + "loss": 0.1254, "step": 23670 }, { "epoch": 1.1, - "learning_rate": 1.787398621724251e-05, - "loss": 0.2623, + "learning_rate": 2.7877303293651415e-05, + "loss": 0.2836, "step": 23675 }, { "epoch": 1.1, - "learning_rate": 1.787351741596737e-05, - "loss": 0.3984, + "learning_rate": 2.7876835223815395e-05, + "loss": 0.3298, "step": 23680 }, { "epoch": 1.11, - "learning_rate": 1.7873048614692234e-05, - "loss": 0.4425, + "learning_rate": 2.7876367153979374e-05, + "loss": 0.284, "step": 23685 }, { "epoch": 1.11, - "learning_rate": 1.7872579813417094e-05, - "loss": 0.0526, + "learning_rate": 2.7875899084143354e-05, + "loss": 0.0646, "step": 23690 }, { "epoch": 1.11, - "learning_rate": 1.7872111012141954e-05, - "loss": 0.0902, + "learning_rate": 2.7875431014307337e-05, + "loss": 0.087, "step": 23695 }, { "epoch": 1.11, - "learning_rate": 1.7871642210866814e-05, - "loss": 0.0561, + "learning_rate": 2.7874962944471317e-05, + "loss": 0.0796, "step": 23700 }, { "epoch": 1.11, - "learning_rate": 1.7871173409591674e-05, - "loss": 0.1389, + "learning_rate": 2.7874494874635297e-05, + "loss": 0.0773, "step": 23705 }, { "epoch": 1.11, - "learning_rate": 1.7870704608316538e-05, - "loss": 0.1042, + "learning_rate": 2.7874026804799277e-05, + "loss": 0.1057, "step": 23710 }, { "epoch": 1.11, - "learning_rate": 1.7870235807041397e-05, - "loss": 0.2266, + "learning_rate": 2.7873558734963257e-05, + "loss": 0.1404, "step": 23715 }, { "epoch": 1.11, - "learning_rate": 1.7869767005766257e-05, - "loss": 0.2452, + "learning_rate": 2.7873090665127237e-05, + "loss": 0.1837, "step": 23720 }, { "epoch": 1.11, - "learning_rate": 1.7869298204491117e-05, - "loss": 0.2247, + "learning_rate": 2.7872622595291216e-05, + "loss": 0.1783, "step": 23725 }, { "epoch": 1.11, - "learning_rate": 1.786882940321598e-05, - "loss": 0.472, + "learning_rate": 2.78721545254552e-05, + "loss": 0.4584, "step": 23730 }, { "epoch": 1.11, - "learning_rate": 1.786836060194084e-05, - "loss": 0.268, + "learning_rate": 2.787168645561918e-05, + "loss": 0.367, "step": 23735 }, { "epoch": 1.11, - "learning_rate": 1.78678918006657e-05, - "loss": 0.1071, + "learning_rate": 2.787121838578316e-05, + "loss": 0.0392, "step": 23740 }, { "epoch": 1.11, - "learning_rate": 1.786742299939056e-05, - "loss": 0.0653, + "learning_rate": 2.787075031594714e-05, + "loss": 0.0638, "step": 23745 }, { "epoch": 1.11, - "learning_rate": 1.786695419811542e-05, - "loss": 0.089, + "learning_rate": 2.7870282246111122e-05, + "loss": 0.1059, "step": 23750 }, { "epoch": 1.11, - "learning_rate": 1.786648539684028e-05, - "loss": 0.1511, + "learning_rate": 2.7869814176275102e-05, + "loss": 0.1247, "step": 23755 }, { "epoch": 1.11, - "learning_rate": 1.786601659556514e-05, - "loss": 0.2224, + "learning_rate": 2.7869346106439082e-05, + "loss": 0.0934, "step": 23760 }, { "epoch": 1.11, - "learning_rate": 1.786554779429e-05, - "loss": 0.1669, + "learning_rate": 2.786887803660306e-05, + "loss": 0.1016, "step": 23765 }, { "epoch": 1.11, - "learning_rate": 1.786507899301486e-05, - "loss": 0.2287, + "learning_rate": 2.7868409966767045e-05, + "loss": 0.3392, "step": 23770 }, { "epoch": 1.11, - "learning_rate": 1.7864610191739723e-05, - "loss": 0.2261, + "learning_rate": 2.786794189693102e-05, + "loss": 0.205, "step": 23775 }, { "epoch": 1.11, - "learning_rate": 1.7864141390464583e-05, - "loss": 0.2919, + "learning_rate": 2.7867473827095e-05, + "loss": 0.4122, "step": 23780 }, { "epoch": 1.11, - "learning_rate": 1.7863672589189443e-05, - "loss": 0.2836, + "learning_rate": 2.7867005757258984e-05, + "loss": 0.2699, "step": 23785 }, { "epoch": 1.11, - "learning_rate": 1.7863203787914307e-05, - "loss": 0.0762, + "learning_rate": 2.7866537687422964e-05, + "loss": 0.0749, "step": 23790 }, { "epoch": 1.11, - "learning_rate": 1.7862734986639167e-05, - "loss": 0.0781, + "learning_rate": 2.7866069617586944e-05, + "loss": 0.0608, "step": 23795 }, { "epoch": 1.11, - "learning_rate": 1.7862266185364026e-05, - "loss": 0.0974, + "learning_rate": 2.7865601547750924e-05, + "loss": 0.1115, "step": 23800 }, { "epoch": 1.11, - "learning_rate": 1.7861797384088886e-05, - "loss": 0.0707, + "learning_rate": 2.7865133477914907e-05, + "loss": 0.0917, "step": 23805 }, { "epoch": 1.11, - "learning_rate": 1.7861328582813746e-05, - "loss": 0.1144, + "learning_rate": 2.7864665408078887e-05, + "loss": 0.1316, "step": 23810 }, { "epoch": 1.11, - "learning_rate": 1.7860859781538606e-05, - "loss": 0.2205, + "learning_rate": 2.7864197338242867e-05, + "loss": 0.0842, "step": 23815 }, { "epoch": 1.11, - "learning_rate": 1.7860390980263466e-05, - "loss": 0.2307, + "learning_rate": 2.7863729268406846e-05, + "loss": 0.1673, "step": 23820 }, { "epoch": 1.11, - "learning_rate": 1.785992217898833e-05, - "loss": 0.2626, + "learning_rate": 2.786326119857083e-05, + "loss": 0.2385, "step": 23825 }, { "epoch": 1.11, - "learning_rate": 1.785945337771319e-05, - "loss": 0.372, + "learning_rate": 2.786279312873481e-05, + "loss": 0.2514, "step": 23830 }, { "epoch": 1.11, - "learning_rate": 1.785898457643805e-05, - "loss": 0.2216, + "learning_rate": 2.786232505889879e-05, + "loss": 0.3098, "step": 23835 }, { "epoch": 1.11, - "learning_rate": 1.785851577516291e-05, - "loss": 0.0576, + "learning_rate": 2.7861856989062766e-05, + "loss": 0.0623, "step": 23840 }, { "epoch": 1.11, - "learning_rate": 1.785804697388777e-05, - "loss": 0.0833, + "learning_rate": 2.786138891922675e-05, + "loss": 0.0985, "step": 23845 }, { "epoch": 1.11, - "learning_rate": 1.785757817261263e-05, - "loss": 0.1436, + "learning_rate": 2.786092084939073e-05, + "loss": 0.0456, "step": 23850 }, { "epoch": 1.11, - "learning_rate": 1.7857109371337493e-05, - "loss": 0.1121, + "learning_rate": 2.786045277955471e-05, + "loss": 0.1148, "step": 23855 }, { "epoch": 1.11, - "learning_rate": 1.7856640570062352e-05, - "loss": 0.139, + "learning_rate": 2.7859984709718692e-05, + "loss": 0.1061, "step": 23860 }, { "epoch": 1.11, - "learning_rate": 1.7856171768787212e-05, - "loss": 0.2333, + "learning_rate": 2.785951663988267e-05, + "loss": 0.1392, "step": 23865 }, { "epoch": 1.11, - "learning_rate": 1.7855702967512076e-05, - "loss": 0.2352, + "learning_rate": 2.785904857004665e-05, + "loss": 0.1639, "step": 23870 }, { "epoch": 1.11, - "learning_rate": 1.7855234166236936e-05, - "loss": 0.2574, + "learning_rate": 2.785858050021063e-05, + "loss": 0.2892, "step": 23875 }, { "epoch": 1.11, - "learning_rate": 1.7854765364961796e-05, - "loss": 0.5096, + "learning_rate": 2.7858112430374614e-05, + "loss": 0.3988, "step": 23880 }, { "epoch": 1.11, - "learning_rate": 1.7854296563686656e-05, - "loss": 0.3584, + "learning_rate": 2.7857644360538594e-05, + "loss": 0.2502, "step": 23885 }, { "epoch": 1.11, - "learning_rate": 1.7853827762411515e-05, - "loss": 0.0982, + "learning_rate": 2.7857176290702574e-05, + "loss": 0.0541, "step": 23890 }, { "epoch": 1.11, - "learning_rate": 1.7853358961136375e-05, - "loss": 0.0527, + "learning_rate": 2.7856708220866554e-05, + "loss": 0.0181, "step": 23895 }, { "epoch": 1.12, - "learning_rate": 1.7852890159861235e-05, - "loss": 0.0734, + "learning_rate": 2.7856240151030534e-05, + "loss": 0.1351, "step": 23900 }, { "epoch": 1.12, - "learning_rate": 1.7852421358586095e-05, - "loss": 0.0671, + "learning_rate": 2.7855772081194514e-05, + "loss": 0.1144, "step": 23905 }, { "epoch": 1.12, - "learning_rate": 1.7851952557310955e-05, - "loss": 0.1738, + "learning_rate": 2.7855304011358493e-05, + "loss": 0.0976, "step": 23910 }, { "epoch": 1.12, - "learning_rate": 1.785148375603582e-05, - "loss": 0.1695, + "learning_rate": 2.7854835941522477e-05, + "loss": 0.162, "step": 23915 }, { "epoch": 1.12, - "learning_rate": 1.785101495476068e-05, - "loss": 0.2176, + "learning_rate": 2.7854367871686456e-05, + "loss": 0.1764, "step": 23920 }, { "epoch": 1.12, - "learning_rate": 1.785054615348554e-05, - "loss": 0.2628, + "learning_rate": 2.7853899801850436e-05, + "loss": 0.2787, "step": 23925 }, { "epoch": 1.12, - "learning_rate": 1.78500773522104e-05, - "loss": 0.3754, + "learning_rate": 2.7853431732014416e-05, + "loss": 0.255, "step": 23930 }, { "epoch": 1.12, - "learning_rate": 1.784960855093526e-05, - "loss": 0.3783, + "learning_rate": 2.78529636621784e-05, + "loss": 0.2853, "step": 23935 }, { "epoch": 1.12, - "learning_rate": 1.784913974966012e-05, - "loss": 0.0661, + "learning_rate": 2.785249559234238e-05, + "loss": 0.0753, "step": 23940 }, { "epoch": 1.12, - "learning_rate": 1.784867094838498e-05, - "loss": 0.0745, + "learning_rate": 2.785202752250636e-05, + "loss": 0.0491, "step": 23945 }, { "epoch": 1.12, - "learning_rate": 1.784820214710984e-05, - "loss": 0.0752, + "learning_rate": 2.785155945267034e-05, + "loss": 0.0862, "step": 23950 }, { "epoch": 1.12, - "learning_rate": 1.78477333458347e-05, - "loss": 0.0765, + "learning_rate": 2.7851091382834322e-05, + "loss": 0.141, "step": 23955 }, { "epoch": 1.12, - "learning_rate": 1.784726454455956e-05, - "loss": 0.177, + "learning_rate": 2.78506233129983e-05, + "loss": 0.1506, "step": 23960 }, { "epoch": 1.12, - "learning_rate": 1.7846795743284425e-05, - "loss": 0.2159, + "learning_rate": 2.7850155243162278e-05, + "loss": 0.1839, "step": 23965 }, { "epoch": 1.12, - "learning_rate": 1.7846326942009285e-05, - "loss": 0.1894, + "learning_rate": 2.784968717332626e-05, + "loss": 0.1658, "step": 23970 }, { "epoch": 1.12, - "learning_rate": 1.7845858140734145e-05, - "loss": 0.1678, + "learning_rate": 2.784921910349024e-05, + "loss": 0.2148, "step": 23975 }, { "epoch": 1.12, - "learning_rate": 1.7845389339459004e-05, - "loss": 0.3089, + "learning_rate": 2.784875103365422e-05, + "loss": 0.3744, "step": 23980 }, { "epoch": 1.12, - "learning_rate": 1.7844920538183864e-05, - "loss": 0.2526, + "learning_rate": 2.78482829638182e-05, + "loss": 0.4416, "step": 23985 }, { "epoch": 1.12, - "learning_rate": 1.7844451736908724e-05, - "loss": 0.0912, + "learning_rate": 2.7847814893982184e-05, + "loss": 0.0708, "step": 23990 }, { "epoch": 1.12, - "learning_rate": 1.7843982935633584e-05, - "loss": 0.098, + "learning_rate": 2.7847346824146164e-05, + "loss": 0.0429, "step": 23995 }, { "epoch": 1.12, - "learning_rate": 1.7843514134358448e-05, - "loss": 0.0601, + "learning_rate": 2.7846878754310144e-05, + "loss": 0.0519, "step": 24000 }, { "epoch": 1.12, - "learning_rate": 1.7843045333083307e-05, - "loss": 0.1781, + "learning_rate": 2.7846410684474123e-05, + "loss": 0.076, "step": 24005 }, { "epoch": 1.12, - "learning_rate": 1.7842576531808167e-05, - "loss": 0.1413, + "learning_rate": 2.7845942614638107e-05, + "loss": 0.0836, "step": 24010 }, { "epoch": 1.12, - "learning_rate": 1.784210773053303e-05, - "loss": 0.2184, + "learning_rate": 2.7845474544802086e-05, + "loss": 0.2031, "step": 24015 }, { "epoch": 1.12, - "learning_rate": 1.784163892925789e-05, - "loss": 0.2072, + "learning_rate": 2.7845006474966066e-05, + "loss": 0.1877, "step": 24020 }, { "epoch": 1.12, - "learning_rate": 1.784117012798275e-05, - "loss": 0.2547, + "learning_rate": 2.7844538405130046e-05, + "loss": 0.1343, "step": 24025 }, { "epoch": 1.12, - "learning_rate": 1.784070132670761e-05, - "loss": 0.328, + "learning_rate": 2.7844070335294026e-05, + "loss": 0.4115, "step": 24030 }, { "epoch": 1.12, - "learning_rate": 1.784023252543247e-05, - "loss": 0.3129, + "learning_rate": 2.7843602265458006e-05, + "loss": 0.1969, "step": 24035 }, { "epoch": 1.12, - "learning_rate": 1.783976372415733e-05, - "loss": 0.0616, + "learning_rate": 2.7843134195621986e-05, + "loss": 0.0571, "step": 24040 }, { "epoch": 1.12, - "learning_rate": 1.783929492288219e-05, - "loss": 0.0432, + "learning_rate": 2.784266612578597e-05, + "loss": 0.0702, "step": 24045 }, { "epoch": 1.12, - "learning_rate": 1.783882612160705e-05, - "loss": 0.0913, + "learning_rate": 2.784219805594995e-05, + "loss": 0.0763, "step": 24050 }, { "epoch": 1.12, - "learning_rate": 1.7838357320331914e-05, - "loss": 0.1446, + "learning_rate": 2.784172998611393e-05, + "loss": 0.0857, "step": 24055 }, { "epoch": 1.12, - "learning_rate": 1.7837888519056774e-05, - "loss": 0.1433, + "learning_rate": 2.7841261916277908e-05, + "loss": 0.1207, "step": 24060 }, { "epoch": 1.12, - "learning_rate": 1.7837419717781633e-05, - "loss": 0.1023, + "learning_rate": 2.784079384644189e-05, + "loss": 0.2118, "step": 24065 }, { "epoch": 1.12, - "learning_rate": 1.7836950916506493e-05, - "loss": 0.188, + "learning_rate": 2.784032577660587e-05, + "loss": 0.1536, "step": 24070 }, { "epoch": 1.12, - "learning_rate": 1.7836482115231353e-05, - "loss": 0.1815, + "learning_rate": 2.783985770676985e-05, + "loss": 0.3481, "step": 24075 }, { "epoch": 1.12, - "learning_rate": 1.7836013313956217e-05, - "loss": 0.4238, + "learning_rate": 2.783938963693383e-05, + "loss": 0.2797, "step": 24080 }, { "epoch": 1.12, - "learning_rate": 1.7835544512681077e-05, - "loss": 0.3328, + "learning_rate": 2.7838921567097814e-05, + "loss": 0.3007, "step": 24085 }, { "epoch": 1.12, - "learning_rate": 1.7835075711405937e-05, - "loss": 0.1068, + "learning_rate": 2.783845349726179e-05, + "loss": 0.0347, "step": 24090 }, { "epoch": 1.12, - "learning_rate": 1.7834606910130796e-05, - "loss": 0.0785, + "learning_rate": 2.783798542742577e-05, + "loss": 0.0338, "step": 24095 }, { "epoch": 1.12, - "learning_rate": 1.7834138108855656e-05, - "loss": 0.1101, + "learning_rate": 2.7837517357589754e-05, + "loss": 0.1197, "step": 24100 }, { "epoch": 1.12, - "learning_rate": 1.783366930758052e-05, - "loss": 0.169, + "learning_rate": 2.7837049287753733e-05, + "loss": 0.105, "step": 24105 }, { "epoch": 1.13, - "learning_rate": 1.783320050630538e-05, - "loss": 0.1494, + "learning_rate": 2.7836581217917713e-05, + "loss": 0.0927, "step": 24110 }, { "epoch": 1.13, - "learning_rate": 1.783273170503024e-05, - "loss": 0.1267, + "learning_rate": 2.7836113148081693e-05, + "loss": 0.1928, "step": 24115 }, { "epoch": 1.13, - "learning_rate": 1.78322629037551e-05, - "loss": 0.1311, + "learning_rate": 2.7835645078245676e-05, + "loss": 0.1256, "step": 24120 }, { "epoch": 1.13, - "learning_rate": 1.783179410247996e-05, - "loss": 0.299, + "learning_rate": 2.7835177008409656e-05, + "loss": 0.2703, "step": 24125 }, { "epoch": 1.13, - "learning_rate": 1.783132530120482e-05, - "loss": 0.2228, + "learning_rate": 2.7834708938573636e-05, + "loss": 0.5215, "step": 24130 }, { "epoch": 1.13, - "learning_rate": 1.783085649992968e-05, - "loss": 0.309, + "learning_rate": 2.7834240868737616e-05, + "loss": 0.4436, "step": 24135 }, { "epoch": 1.13, - "learning_rate": 1.7830387698654543e-05, - "loss": 0.0578, + "learning_rate": 2.78337727989016e-05, + "loss": 0.0458, "step": 24140 }, { "epoch": 1.13, - "learning_rate": 1.7829918897379403e-05, - "loss": 0.0749, + "learning_rate": 2.783330472906558e-05, + "loss": 0.059, "step": 24145 }, { "epoch": 1.13, - "learning_rate": 1.7829450096104263e-05, - "loss": 0.1276, + "learning_rate": 2.783283665922956e-05, + "loss": 0.0762, "step": 24150 }, { "epoch": 1.13, - "learning_rate": 1.7828981294829122e-05, - "loss": 0.1273, + "learning_rate": 2.7832368589393538e-05, + "loss": 0.0905, "step": 24155 }, { "epoch": 1.13, - "learning_rate": 1.7828512493553986e-05, - "loss": 0.1065, + "learning_rate": 2.7831900519557518e-05, + "loss": 0.0656, "step": 24160 }, { "epoch": 1.13, - "learning_rate": 1.7828043692278846e-05, - "loss": 0.2525, + "learning_rate": 2.7831432449721498e-05, + "loss": 0.1279, "step": 24165 }, { "epoch": 1.13, - "learning_rate": 1.7827574891003706e-05, - "loss": 0.1666, + "learning_rate": 2.7830964379885478e-05, + "loss": 0.1636, "step": 24170 }, { "epoch": 1.13, - "learning_rate": 1.7827106089728566e-05, - "loss": 0.2416, + "learning_rate": 2.783049631004946e-05, + "loss": 0.2044, "step": 24175 }, { "epoch": 1.13, - "learning_rate": 1.7826637288453426e-05, - "loss": 0.5039, + "learning_rate": 2.783002824021344e-05, + "loss": 0.3608, "step": 24180 }, { "epoch": 1.13, - "learning_rate": 1.7826168487178285e-05, - "loss": 0.3964, + "learning_rate": 2.782956017037742e-05, + "loss": 0.3159, "step": 24185 }, { "epoch": 1.13, - "learning_rate": 1.7825699685903145e-05, - "loss": 0.0342, + "learning_rate": 2.78290921005414e-05, + "loss": 0.0909, "step": 24190 }, { "epoch": 1.13, - "learning_rate": 1.782523088462801e-05, - "loss": 0.0864, + "learning_rate": 2.7828624030705384e-05, + "loss": 0.0536, "step": 24195 }, { "epoch": 1.13, - "learning_rate": 1.782476208335287e-05, - "loss": 0.1232, + "learning_rate": 2.7828155960869363e-05, + "loss": 0.0767, "step": 24200 }, { "epoch": 1.13, - "learning_rate": 1.782429328207773e-05, - "loss": 0.149, + "learning_rate": 2.7827687891033343e-05, + "loss": 0.0605, "step": 24205 }, { "epoch": 1.13, - "learning_rate": 1.782382448080259e-05, - "loss": 0.1592, + "learning_rate": 2.7827219821197326e-05, + "loss": 0.1344, "step": 24210 }, { "epoch": 1.13, - "learning_rate": 1.782335567952745e-05, - "loss": 0.2233, + "learning_rate": 2.7826751751361303e-05, + "loss": 0.1458, "step": 24215 }, { "epoch": 1.13, - "learning_rate": 1.7822886878252312e-05, - "loss": 0.1183, + "learning_rate": 2.7826283681525283e-05, + "loss": 0.2126, "step": 24220 }, { "epoch": 1.13, - "learning_rate": 1.782241807697717e-05, - "loss": 0.1203, + "learning_rate": 2.7825815611689263e-05, + "loss": 0.213, "step": 24225 }, { "epoch": 1.13, - "learning_rate": 1.782194927570203e-05, - "loss": 0.2905, + "learning_rate": 2.7825347541853246e-05, + "loss": 0.4755, "step": 24230 }, { "epoch": 1.13, - "learning_rate": 1.782148047442689e-05, - "loss": 0.2715, + "learning_rate": 2.7824879472017226e-05, + "loss": 0.3213, "step": 24235 }, { "epoch": 1.13, - "learning_rate": 1.7821011673151755e-05, - "loss": 0.026, + "learning_rate": 2.7824411402181205e-05, + "loss": 0.0693, "step": 24240 }, { "epoch": 1.13, - "learning_rate": 1.7820542871876615e-05, - "loss": 0.0983, + "learning_rate": 2.7823943332345185e-05, + "loss": 0.0591, "step": 24245 }, { "epoch": 1.13, - "learning_rate": 1.7820074070601475e-05, - "loss": 0.0667, + "learning_rate": 2.782347526250917e-05, + "loss": 0.0504, "step": 24250 }, { "epoch": 1.13, - "learning_rate": 1.7819605269326335e-05, - "loss": 0.1388, + "learning_rate": 2.7823007192673148e-05, + "loss": 0.0842, "step": 24255 }, { "epoch": 1.13, - "learning_rate": 1.7819136468051195e-05, - "loss": 0.0779, + "learning_rate": 2.7822539122837128e-05, + "loss": 0.1454, "step": 24260 }, { "epoch": 1.13, - "learning_rate": 1.7818667666776055e-05, - "loss": 0.2576, + "learning_rate": 2.7822071053001108e-05, + "loss": 0.1549, "step": 24265 }, { "epoch": 1.13, - "learning_rate": 1.7818198865500914e-05, - "loss": 0.2524, + "learning_rate": 2.782160298316509e-05, + "loss": 0.1323, "step": 24270 }, { "epoch": 1.13, - "learning_rate": 1.7817730064225774e-05, - "loss": 0.328, + "learning_rate": 2.782113491332907e-05, + "loss": 0.265, "step": 24275 }, { "epoch": 1.13, - "learning_rate": 1.7817261262950634e-05, - "loss": 0.4354, + "learning_rate": 2.7820666843493047e-05, + "loss": 0.4209, "step": 24280 }, { "epoch": 1.13, - "learning_rate": 1.7816792461675498e-05, - "loss": 0.362, + "learning_rate": 2.782019877365703e-05, + "loss": 0.2853, "step": 24285 }, { "epoch": 1.13, - "learning_rate": 1.7816323660400358e-05, - "loss": 0.0855, + "learning_rate": 2.781973070382101e-05, + "loss": 0.101, "step": 24290 }, { "epoch": 1.13, - "learning_rate": 1.7815854859125218e-05, - "loss": 0.0941, + "learning_rate": 2.781926263398499e-05, + "loss": 0.022, "step": 24295 }, { "epoch": 1.13, - "learning_rate": 1.781538605785008e-05, - "loss": 0.1006, + "learning_rate": 2.781879456414897e-05, + "loss": 0.0391, "step": 24300 }, { "epoch": 1.13, - "learning_rate": 1.781491725657494e-05, - "loss": 0.1203, + "learning_rate": 2.7818326494312953e-05, + "loss": 0.0911, "step": 24305 }, { "epoch": 1.13, - "learning_rate": 1.78144484552998e-05, - "loss": 0.1505, + "learning_rate": 2.7817858424476933e-05, + "loss": 0.1841, "step": 24310 }, { "epoch": 1.13, - "learning_rate": 1.781397965402466e-05, - "loss": 0.3013, + "learning_rate": 2.7817390354640913e-05, + "loss": 0.144, "step": 24315 }, { "epoch": 1.13, - "learning_rate": 1.781351085274952e-05, - "loss": 0.1516, + "learning_rate": 2.7816922284804893e-05, + "loss": 0.1671, "step": 24320 }, { "epoch": 1.14, - "learning_rate": 1.781304205147438e-05, - "loss": 0.1622, + "learning_rate": 2.7816454214968876e-05, + "loss": 0.3196, "step": 24325 }, { "epoch": 1.14, - "learning_rate": 1.781257325019924e-05, - "loss": 0.3296, + "learning_rate": 2.7815986145132856e-05, + "loss": 0.4916, "step": 24330 }, { "epoch": 1.14, - "learning_rate": 1.7812104448924104e-05, - "loss": 0.2447, + "learning_rate": 2.7815518075296835e-05, + "loss": 0.4262, "step": 24335 }, { "epoch": 1.14, - "learning_rate": 1.7811635647648964e-05, - "loss": 0.0976, + "learning_rate": 2.781505000546082e-05, + "loss": 0.1215, "step": 24340 }, { "epoch": 1.14, - "learning_rate": 1.7811166846373824e-05, - "loss": 0.0757, + "learning_rate": 2.7814581935624795e-05, + "loss": 0.0956, "step": 24345 }, { "epoch": 1.14, - "learning_rate": 1.7810698045098684e-05, - "loss": 0.0458, + "learning_rate": 2.7814113865788775e-05, + "loss": 0.1143, "step": 24350 }, { "epoch": 1.14, - "learning_rate": 1.7810229243823544e-05, - "loss": 0.1581, + "learning_rate": 2.7813645795952755e-05, + "loss": 0.0758, "step": 24355 }, { "epoch": 1.14, - "learning_rate": 1.7809760442548403e-05, - "loss": 0.1213, + "learning_rate": 2.7813177726116738e-05, + "loss": 0.1008, "step": 24360 }, { "epoch": 1.14, - "learning_rate": 1.7809291641273267e-05, - "loss": 0.1537, + "learning_rate": 2.7812709656280718e-05, + "loss": 0.1616, "step": 24365 }, { "epoch": 1.14, - "learning_rate": 1.7808822839998127e-05, - "loss": 0.2788, + "learning_rate": 2.7812241586444698e-05, + "loss": 0.1988, "step": 24370 }, { "epoch": 1.14, - "learning_rate": 1.7808354038722987e-05, - "loss": 0.2862, + "learning_rate": 2.7811773516608677e-05, + "loss": 0.1654, "step": 24375 }, { "epoch": 1.14, - "learning_rate": 1.780788523744785e-05, - "loss": 0.3774, + "learning_rate": 2.781130544677266e-05, + "loss": 0.3107, "step": 24380 }, { "epoch": 1.14, - "learning_rate": 1.780741643617271e-05, - "loss": 0.2727, + "learning_rate": 2.781083737693664e-05, + "loss": 0.2834, "step": 24385 }, { "epoch": 1.14, - "learning_rate": 1.780694763489757e-05, - "loss": 0.0509, + "learning_rate": 2.781036930710062e-05, + "loss": 0.1004, "step": 24390 }, { "epoch": 1.14, - "learning_rate": 1.780647883362243e-05, - "loss": 0.0936, + "learning_rate": 2.7809901237264603e-05, + "loss": 0.0424, "step": 24395 }, { "epoch": 1.14, - "learning_rate": 1.780601003234729e-05, - "loss": 0.0708, + "learning_rate": 2.7809433167428583e-05, + "loss": 0.1437, "step": 24400 }, { "epoch": 1.14, - "learning_rate": 1.780554123107215e-05, - "loss": 0.0753, + "learning_rate": 2.780896509759256e-05, + "loss": 0.1121, "step": 24405 }, { "epoch": 1.14, - "learning_rate": 1.780507242979701e-05, - "loss": 0.1762, + "learning_rate": 2.780849702775654e-05, + "loss": 0.1602, "step": 24410 }, { "epoch": 1.14, - "learning_rate": 1.780460362852187e-05, - "loss": 0.1752, + "learning_rate": 2.7808028957920523e-05, + "loss": 0.2569, "step": 24415 }, { "epoch": 1.14, - "learning_rate": 1.780413482724673e-05, - "loss": 0.2408, + "learning_rate": 2.7807560888084502e-05, + "loss": 0.1752, "step": 24420 }, { "epoch": 1.14, - "learning_rate": 1.780366602597159e-05, - "loss": 0.2928, + "learning_rate": 2.7807092818248482e-05, + "loss": 0.2632, "step": 24425 }, { "epoch": 1.14, - "learning_rate": 1.7803197224696453e-05, - "loss": 0.4798, + "learning_rate": 2.7806624748412462e-05, + "loss": 0.3705, "step": 24430 }, { "epoch": 1.14, - "learning_rate": 1.7802728423421313e-05, - "loss": 0.407, + "learning_rate": 2.7806156678576445e-05, + "loss": 0.3963, "step": 24435 }, { "epoch": 1.14, - "learning_rate": 1.7802259622146173e-05, - "loss": 0.1121, + "learning_rate": 2.7805688608740425e-05, + "loss": 0.0922, "step": 24440 }, { "epoch": 1.14, - "learning_rate": 1.7801790820871036e-05, - "loss": 0.0546, + "learning_rate": 2.7805220538904405e-05, + "loss": 0.0646, "step": 24445 }, { "epoch": 1.14, - "learning_rate": 1.7801322019595896e-05, - "loss": 0.078, + "learning_rate": 2.7804752469068385e-05, + "loss": 0.036, "step": 24450 }, { "epoch": 1.14, - "learning_rate": 1.7800853218320756e-05, - "loss": 0.1126, + "learning_rate": 2.7804284399232368e-05, + "loss": 0.1185, "step": 24455 }, { "epoch": 1.14, - "learning_rate": 1.7800384417045616e-05, - "loss": 0.2212, + "learning_rate": 2.7803816329396348e-05, + "loss": 0.1575, "step": 24460 }, { "epoch": 1.14, - "learning_rate": 1.7799915615770476e-05, - "loss": 0.1623, + "learning_rate": 2.7803348259560328e-05, + "loss": 0.0933, "step": 24465 }, { "epoch": 1.14, - "learning_rate": 1.7799446814495336e-05, - "loss": 0.3043, + "learning_rate": 2.7802880189724307e-05, + "loss": 0.1831, "step": 24470 }, { "epoch": 1.14, - "learning_rate": 1.77989780132202e-05, - "loss": 0.3083, + "learning_rate": 2.7802412119888287e-05, + "loss": 0.2376, "step": 24475 }, { "epoch": 1.14, - "learning_rate": 1.779850921194506e-05, - "loss": 0.3038, + "learning_rate": 2.7801944050052267e-05, + "loss": 0.3066, "step": 24480 }, { "epoch": 1.14, - "learning_rate": 1.779804041066992e-05, - "loss": 0.3422, + "learning_rate": 2.7801475980216247e-05, + "loss": 0.3635, "step": 24485 }, { "epoch": 1.14, - "learning_rate": 1.779757160939478e-05, - "loss": 0.0868, + "learning_rate": 2.780100791038023e-05, + "loss": 0.0318, "step": 24490 }, { "epoch": 1.14, - "learning_rate": 1.779710280811964e-05, - "loss": 0.0834, + "learning_rate": 2.780053984054421e-05, + "loss": 0.0678, "step": 24495 }, { "epoch": 1.14, - "learning_rate": 1.77966340068445e-05, - "loss": 0.0701, + "learning_rate": 2.780007177070819e-05, + "loss": 0.1153, "step": 24500 }, { "epoch": 1.14, - "learning_rate": 1.779616520556936e-05, - "loss": 0.1719, + "learning_rate": 2.779960370087217e-05, + "loss": 0.1895, "step": 24505 }, { "epoch": 1.14, - "learning_rate": 1.7795696404294222e-05, - "loss": 0.0946, + "learning_rate": 2.7799135631036153e-05, + "loss": 0.1513, "step": 24510 }, { "epoch": 1.14, - "learning_rate": 1.7795227603019082e-05, - "loss": 0.1765, + "learning_rate": 2.7798667561200133e-05, + "loss": 0.0862, "step": 24515 }, { "epoch": 1.14, - "learning_rate": 1.779475880174394e-05, - "loss": 0.256, + "learning_rate": 2.7798199491364112e-05, + "loss": 0.2052, "step": 24520 }, { "epoch": 1.14, - "learning_rate": 1.7794290000468805e-05, - "loss": 0.326, + "learning_rate": 2.7797731421528096e-05, + "loss": 0.2323, "step": 24525 }, { "epoch": 1.14, - "learning_rate": 1.7793821199193665e-05, - "loss": 0.2897, + "learning_rate": 2.7797263351692075e-05, + "loss": 0.4008, "step": 24530 }, { "epoch": 1.14, - "learning_rate": 1.7793352397918525e-05, - "loss": 0.2758, + "learning_rate": 2.7796795281856052e-05, + "loss": 0.3173, "step": 24535 }, { "epoch": 1.15, - "learning_rate": 1.7792883596643385e-05, - "loss": 0.0219, + "learning_rate": 2.779632721202003e-05, + "loss": 0.0315, "step": 24540 }, { "epoch": 1.15, - "learning_rate": 1.7792414795368245e-05, - "loss": 0.1285, + "learning_rate": 2.7795859142184015e-05, + "loss": 0.1739, "step": 24545 }, { "epoch": 1.15, - "learning_rate": 1.7791945994093105e-05, - "loss": 0.1647, + "learning_rate": 2.7795391072347995e-05, + "loss": 0.0783, "step": 24550 }, { "epoch": 1.15, - "learning_rate": 1.7791477192817965e-05, - "loss": 0.0803, + "learning_rate": 2.7794923002511975e-05, + "loss": 0.1348, "step": 24555 }, { "epoch": 1.15, - "learning_rate": 1.7791008391542825e-05, - "loss": 0.0733, + "learning_rate": 2.7794454932675954e-05, + "loss": 0.1921, "step": 24560 }, { "epoch": 1.15, - "learning_rate": 1.7790539590267688e-05, - "loss": 0.1594, + "learning_rate": 2.7793986862839938e-05, + "loss": 0.1798, "step": 24565 }, { "epoch": 1.15, - "learning_rate": 1.7790070788992548e-05, - "loss": 0.1789, + "learning_rate": 2.7793518793003917e-05, + "loss": 0.1721, "step": 24570 }, { "epoch": 1.15, - "learning_rate": 1.7789601987717408e-05, - "loss": 0.2286, + "learning_rate": 2.7793050723167897e-05, + "loss": 0.2046, "step": 24575 }, { "epoch": 1.15, - "learning_rate": 1.7789133186442268e-05, - "loss": 0.269, + "learning_rate": 2.779258265333188e-05, + "loss": 0.3995, "step": 24580 }, { "epoch": 1.15, - "learning_rate": 1.7788664385167128e-05, - "loss": 0.2611, + "learning_rate": 2.779211458349586e-05, + "loss": 0.3411, "step": 24585 }, { "epoch": 1.15, - "learning_rate": 1.778819558389199e-05, - "loss": 0.0783, + "learning_rate": 2.779164651365984e-05, + "loss": 0.0392, "step": 24590 }, { "epoch": 1.15, - "learning_rate": 1.778772678261685e-05, - "loss": 0.0946, + "learning_rate": 2.7791178443823816e-05, + "loss": 0.0342, "step": 24595 }, { "epoch": 1.15, - "learning_rate": 1.778725798134171e-05, - "loss": 0.0734, + "learning_rate": 2.77907103739878e-05, + "loss": 0.03, "step": 24600 }, { "epoch": 1.15, - "learning_rate": 1.778678918006657e-05, - "loss": 0.1414, + "learning_rate": 2.779024230415178e-05, + "loss": 0.0792, "step": 24605 }, { "epoch": 1.15, - "learning_rate": 1.778632037879143e-05, - "loss": 0.2184, + "learning_rate": 2.778977423431576e-05, + "loss": 0.1117, "step": 24610 }, { "epoch": 1.15, - "learning_rate": 1.7785851577516294e-05, - "loss": 0.129, + "learning_rate": 2.778930616447974e-05, + "loss": 0.186, "step": 24615 }, { "epoch": 1.15, - "learning_rate": 1.7785382776241154e-05, - "loss": 0.1087, + "learning_rate": 2.7788838094643722e-05, + "loss": 0.2702, "step": 24620 }, { "epoch": 1.15, - "learning_rate": 1.7784913974966014e-05, - "loss": 0.2552, + "learning_rate": 2.7788370024807702e-05, + "loss": 0.2745, "step": 24625 }, { "epoch": 1.15, - "learning_rate": 1.7784445173690874e-05, - "loss": 0.2631, + "learning_rate": 2.7787901954971682e-05, + "loss": 0.3906, "step": 24630 }, { "epoch": 1.15, - "learning_rate": 1.7783976372415734e-05, - "loss": 0.2573, + "learning_rate": 2.7787433885135662e-05, + "loss": 0.2601, "step": 24635 }, { "epoch": 1.15, - "learning_rate": 1.7783507571140594e-05, - "loss": 0.1247, + "learning_rate": 2.7786965815299645e-05, + "loss": 0.0918, "step": 24640 }, { "epoch": 1.15, - "learning_rate": 1.7783038769865454e-05, - "loss": 0.0605, + "learning_rate": 2.7786497745463625e-05, + "loss": 0.0976, "step": 24645 }, { "epoch": 1.15, - "learning_rate": 1.7782569968590317e-05, - "loss": 0.0973, + "learning_rate": 2.7786029675627605e-05, + "loss": 0.0672, "step": 24650 }, { "epoch": 1.15, - "learning_rate": 1.7782101167315177e-05, - "loss": 0.1361, + "learning_rate": 2.7785561605791588e-05, + "loss": 0.078, "step": 24655 }, { "epoch": 1.15, - "learning_rate": 1.7781632366040037e-05, - "loss": 0.1205, + "learning_rate": 2.7785093535955564e-05, + "loss": 0.0773, "step": 24660 }, { "epoch": 1.15, - "learning_rate": 1.7781163564764897e-05, - "loss": 0.1463, + "learning_rate": 2.7784625466119544e-05, + "loss": 0.127, "step": 24665 }, { "epoch": 1.15, - "learning_rate": 1.778069476348976e-05, - "loss": 0.2136, + "learning_rate": 2.7784157396283524e-05, + "loss": 0.1797, "step": 24670 }, { "epoch": 1.15, - "learning_rate": 1.778022596221462e-05, - "loss": 0.2491, + "learning_rate": 2.7783689326447507e-05, + "loss": 0.2975, "step": 24675 }, { "epoch": 1.15, - "learning_rate": 1.777975716093948e-05, - "loss": 0.4697, + "learning_rate": 2.7783221256611487e-05, + "loss": 0.461, "step": 24680 }, { "epoch": 1.15, - "learning_rate": 1.777928835966434e-05, - "loss": 0.296, + "learning_rate": 2.7782753186775467e-05, + "loss": 0.2972, "step": 24685 }, { "epoch": 1.15, - "learning_rate": 1.77788195583892e-05, - "loss": 0.0517, + "learning_rate": 2.7782285116939447e-05, + "loss": 0.1419, "step": 24690 }, { "epoch": 1.15, - "learning_rate": 1.777835075711406e-05, - "loss": 0.0591, + "learning_rate": 2.778181704710343e-05, + "loss": 0.1212, "step": 24695 }, { "epoch": 1.15, - "learning_rate": 1.777788195583892e-05, - "loss": 0.0752, + "learning_rate": 2.778134897726741e-05, + "loss": 0.1443, "step": 24700 }, { "epoch": 1.15, - "learning_rate": 1.7777413154563783e-05, - "loss": 0.1123, + "learning_rate": 2.778088090743139e-05, + "loss": 0.1086, "step": 24705 }, { "epoch": 1.15, - "learning_rate": 1.7776944353288643e-05, - "loss": 0.2295, + "learning_rate": 2.7780412837595373e-05, + "loss": 0.1421, "step": 24710 }, { "epoch": 1.15, - "learning_rate": 1.7776475552013503e-05, - "loss": 0.1714, + "learning_rate": 2.7779944767759352e-05, + "loss": 0.1977, "step": 24715 }, { "epoch": 1.15, - "learning_rate": 1.7776006750738363e-05, - "loss": 0.18, + "learning_rate": 2.7779476697923332e-05, + "loss": 0.2013, "step": 24720 }, { "epoch": 1.15, - "learning_rate": 1.7775537949463223e-05, - "loss": 0.2655, + "learning_rate": 2.777900862808731e-05, + "loss": 0.2399, "step": 24725 }, { "epoch": 1.15, - "learning_rate": 1.7775069148188086e-05, - "loss": 0.2824, + "learning_rate": 2.7778540558251292e-05, + "loss": 0.3368, "step": 24730 }, { "epoch": 1.15, - "learning_rate": 1.7774600346912946e-05, - "loss": 0.2489, + "learning_rate": 2.777807248841527e-05, + "loss": 0.3361, "step": 24735 }, { "epoch": 1.15, - "learning_rate": 1.7774131545637806e-05, - "loss": 0.0462, + "learning_rate": 2.777760441857925e-05, + "loss": 0.0392, "step": 24740 }, { "epoch": 1.15, - "learning_rate": 1.7773662744362666e-05, - "loss": 0.0724, + "learning_rate": 2.777713634874323e-05, + "loss": 0.116, "step": 24745 }, { "epoch": 1.15, - "learning_rate": 1.7773193943087526e-05, - "loss": 0.1316, + "learning_rate": 2.7776668278907215e-05, + "loss": 0.1077, "step": 24750 }, { "epoch": 1.16, - "learning_rate": 1.777272514181239e-05, - "loss": 0.1558, + "learning_rate": 2.7776200209071194e-05, + "loss": 0.1151, "step": 24755 }, { "epoch": 1.16, - "learning_rate": 1.777225634053725e-05, - "loss": 0.1752, + "learning_rate": 2.7775732139235174e-05, + "loss": 0.1359, "step": 24760 }, { "epoch": 1.16, - "learning_rate": 1.777178753926211e-05, - "loss": 0.171, + "learning_rate": 2.7775264069399157e-05, + "loss": 0.1645, "step": 24765 }, { "epoch": 1.16, - "learning_rate": 1.777131873798697e-05, - "loss": 0.1807, + "learning_rate": 2.7774795999563137e-05, + "loss": 0.2649, "step": 24770 }, { "epoch": 1.16, - "learning_rate": 1.777084993671183e-05, - "loss": 0.3059, + "learning_rate": 2.7774327929727117e-05, + "loss": 0.2646, "step": 24775 }, { "epoch": 1.16, - "learning_rate": 1.777038113543669e-05, - "loss": 0.3799, + "learning_rate": 2.7773859859891097e-05, + "loss": 0.4178, "step": 24780 }, { "epoch": 1.16, - "learning_rate": 1.776991233416155e-05, - "loss": 0.2705, + "learning_rate": 2.7773391790055077e-05, + "loss": 0.2718, "step": 24785 }, { "epoch": 1.16, - "learning_rate": 1.776944353288641e-05, - "loss": 0.0925, + "learning_rate": 2.7772923720219056e-05, + "loss": 0.0642, "step": 24790 }, { "epoch": 1.16, - "learning_rate": 1.7768974731611272e-05, - "loss": 0.1392, + "learning_rate": 2.7772455650383036e-05, + "loss": 0.1047, "step": 24795 }, { "epoch": 1.16, - "learning_rate": 1.7768505930336132e-05, - "loss": 0.1734, + "learning_rate": 2.7771987580547016e-05, + "loss": 0.1553, "step": 24800 }, { "epoch": 1.16, - "learning_rate": 1.7768037129060992e-05, - "loss": 0.0997, + "learning_rate": 2.7771519510711e-05, + "loss": 0.147, "step": 24805 }, { "epoch": 1.16, - "learning_rate": 1.7767568327785855e-05, - "loss": 0.1075, + "learning_rate": 2.777105144087498e-05, + "loss": 0.1093, "step": 24810 }, { "epoch": 1.16, - "learning_rate": 1.7767099526510715e-05, - "loss": 0.1196, + "learning_rate": 2.777058337103896e-05, + "loss": 0.157, "step": 24815 }, { "epoch": 1.16, - "learning_rate": 1.7766630725235575e-05, - "loss": 0.2809, + "learning_rate": 2.7770115301202942e-05, + "loss": 0.2539, "step": 24820 }, { "epoch": 1.16, - "learning_rate": 1.7766161923960435e-05, - "loss": 0.2512, + "learning_rate": 2.7769647231366922e-05, + "loss": 0.1965, "step": 24825 }, { "epoch": 1.16, - "learning_rate": 1.7765693122685295e-05, - "loss": 0.3496, + "learning_rate": 2.7769179161530902e-05, + "loss": 0.2952, "step": 24830 }, { "epoch": 1.16, - "learning_rate": 1.7765224321410155e-05, - "loss": 0.3198, + "learning_rate": 2.776871109169488e-05, + "loss": 0.2187, "step": 24835 }, { "epoch": 1.16, - "learning_rate": 1.7764755520135015e-05, - "loss": 0.0283, + "learning_rate": 2.7768243021858865e-05, + "loss": 0.0681, "step": 24840 }, { "epoch": 1.16, - "learning_rate": 1.7764286718859878e-05, - "loss": 0.1024, + "learning_rate": 2.7767774952022845e-05, + "loss": 0.0781, "step": 24845 }, { "epoch": 1.16, - "learning_rate": 1.7763817917584738e-05, - "loss": 0.117, + "learning_rate": 2.776730688218682e-05, + "loss": 0.1127, "step": 24850 }, { "epoch": 1.16, - "learning_rate": 1.7763349116309598e-05, - "loss": 0.1471, + "learning_rate": 2.77668388123508e-05, + "loss": 0.1195, "step": 24855 }, { "epoch": 1.16, - "learning_rate": 1.7762880315034458e-05, - "loss": 0.1288, + "learning_rate": 2.7766370742514784e-05, + "loss": 0.1037, "step": 24860 }, { "epoch": 1.16, - "learning_rate": 1.7762411513759318e-05, - "loss": 0.1377, + "learning_rate": 2.7765902672678764e-05, + "loss": 0.16, "step": 24865 }, { "epoch": 1.16, - "learning_rate": 1.7761942712484178e-05, - "loss": 0.252, + "learning_rate": 2.7765434602842744e-05, + "loss": 0.249, "step": 24870 }, { "epoch": 1.16, - "learning_rate": 1.776147391120904e-05, - "loss": 0.401, + "learning_rate": 2.7764966533006724e-05, + "loss": 0.2931, "step": 24875 }, { "epoch": 1.16, - "learning_rate": 1.77610051099339e-05, - "loss": 0.3525, + "learning_rate": 2.7764498463170707e-05, + "loss": 0.4896, "step": 24880 }, { "epoch": 1.16, - "learning_rate": 1.776053630865876e-05, - "loss": 0.187, + "learning_rate": 2.7764030393334687e-05, + "loss": 0.1716, "step": 24885 }, { "epoch": 1.16, - "learning_rate": 1.7760067507383624e-05, - "loss": 0.0264, + "learning_rate": 2.7763562323498666e-05, + "loss": 0.0742, "step": 24890 }, { "epoch": 1.16, - "learning_rate": 1.7759598706108484e-05, - "loss": 0.0683, + "learning_rate": 2.776309425366265e-05, + "loss": 0.1061, "step": 24895 }, { "epoch": 1.16, - "learning_rate": 1.7759129904833344e-05, - "loss": 0.0647, + "learning_rate": 2.776262618382663e-05, + "loss": 0.1011, "step": 24900 }, { "epoch": 1.16, - "learning_rate": 1.7758661103558204e-05, - "loss": 0.1196, + "learning_rate": 2.776215811399061e-05, + "loss": 0.1582, "step": 24905 }, { "epoch": 1.16, - "learning_rate": 1.7758192302283064e-05, - "loss": 0.1604, + "learning_rate": 2.776169004415459e-05, + "loss": 0.1836, "step": 24910 }, { "epoch": 1.16, - "learning_rate": 1.7757723501007924e-05, - "loss": 0.1187, + "learning_rate": 2.776122197431857e-05, + "loss": 0.1387, "step": 24915 }, { "epoch": 1.16, - "learning_rate": 1.7757254699732784e-05, - "loss": 0.2915, + "learning_rate": 2.776075390448255e-05, + "loss": 0.2108, "step": 24920 }, { "epoch": 1.16, - "learning_rate": 1.7756785898457644e-05, - "loss": 0.268, + "learning_rate": 2.776028583464653e-05, + "loss": 0.2764, "step": 24925 }, { "epoch": 1.16, - "learning_rate": 1.7756317097182504e-05, - "loss": 0.3793, + "learning_rate": 2.7759817764810508e-05, + "loss": 0.3323, "step": 24930 }, { "epoch": 1.16, - "learning_rate": 1.7755848295907364e-05, - "loss": 0.4032, + "learning_rate": 2.775934969497449e-05, + "loss": 0.2756, "step": 24935 }, { "epoch": 1.16, - "learning_rate": 1.7755379494632227e-05, - "loss": 0.1255, + "learning_rate": 2.775888162513847e-05, + "loss": 0.0872, "step": 24940 }, { "epoch": 1.16, - "learning_rate": 1.7754910693357087e-05, - "loss": 0.0745, + "learning_rate": 2.775841355530245e-05, + "loss": 0.1389, "step": 24945 }, { "epoch": 1.16, - "learning_rate": 1.7754441892081947e-05, - "loss": 0.121, + "learning_rate": 2.7757945485466434e-05, + "loss": 0.0912, "step": 24950 }, { "epoch": 1.16, - "learning_rate": 1.775397309080681e-05, - "loss": 0.1057, + "learning_rate": 2.7757477415630414e-05, + "loss": 0.0729, "step": 24955 }, { "epoch": 1.16, - "learning_rate": 1.775350428953167e-05, - "loss": 0.1091, + "learning_rate": 2.7757009345794394e-05, + "loss": 0.0942, "step": 24960 }, { "epoch": 1.16, - "learning_rate": 1.775303548825653e-05, - "loss": 0.1403, + "learning_rate": 2.7756541275958374e-05, + "loss": 0.1227, "step": 24965 }, { "epoch": 1.17, - "learning_rate": 1.775256668698139e-05, - "loss": 0.2515, + "learning_rate": 2.7756073206122357e-05, + "loss": 0.1822, "step": 24970 }, { "epoch": 1.17, - "learning_rate": 1.775209788570625e-05, - "loss": 0.3576, + "learning_rate": 2.7755605136286333e-05, + "loss": 0.2144, "step": 24975 }, { "epoch": 1.17, - "learning_rate": 1.775162908443111e-05, - "loss": 0.3469, + "learning_rate": 2.7755137066450313e-05, + "loss": 0.5167, "step": 24980 }, { "epoch": 1.17, - "learning_rate": 1.7751160283155973e-05, - "loss": 0.3998, + "learning_rate": 2.7754668996614293e-05, + "loss": 0.2825, "step": 24985 }, { "epoch": 1.17, - "learning_rate": 1.7750691481880833e-05, - "loss": 0.1192, + "learning_rate": 2.7754200926778276e-05, + "loss": 0.045, "step": 24990 }, { "epoch": 1.17, - "learning_rate": 1.7750222680605693e-05, - "loss": 0.0596, + "learning_rate": 2.7753732856942256e-05, + "loss": 0.0452, "step": 24995 }, { "epoch": 1.17, - "learning_rate": 1.7749753879330553e-05, - "loss": 0.0592, + "learning_rate": 2.7753264787106236e-05, + "loss": 0.071, "step": 25000 }, { "epoch": 1.17, - "learning_rate": 1.7749285078055413e-05, - "loss": 0.0797, + "learning_rate": 2.775279671727022e-05, + "loss": 0.1491, "step": 25005 }, { "epoch": 1.17, - "learning_rate": 1.7748816276780273e-05, - "loss": 0.1481, + "learning_rate": 2.77523286474342e-05, + "loss": 0.1434, "step": 25010 }, { "epoch": 1.17, - "learning_rate": 1.7748347475505136e-05, - "loss": 0.1276, + "learning_rate": 2.775186057759818e-05, + "loss": 0.151, "step": 25015 }, { "epoch": 1.17, - "learning_rate": 1.7747878674229996e-05, - "loss": 0.145, + "learning_rate": 2.775139250776216e-05, + "loss": 0.1819, "step": 25020 }, { "epoch": 1.17, - "learning_rate": 1.7747409872954856e-05, - "loss": 0.3773, + "learning_rate": 2.7750924437926142e-05, + "loss": 0.2723, "step": 25025 }, { "epoch": 1.17, - "learning_rate": 1.7746941071679716e-05, - "loss": 0.3071, + "learning_rate": 2.775045636809012e-05, + "loss": 0.2587, "step": 25030 }, { "epoch": 1.17, - "learning_rate": 1.774647227040458e-05, - "loss": 0.2949, + "learning_rate": 2.77499882982541e-05, + "loss": 0.2854, "step": 25035 }, { "epoch": 1.17, - "learning_rate": 1.774600346912944e-05, - "loss": 0.0725, + "learning_rate": 2.7749520228418078e-05, + "loss": 0.0922, "step": 25040 }, { "epoch": 1.17, - "learning_rate": 1.77455346678543e-05, - "loss": 0.0693, + "learning_rate": 2.774905215858206e-05, + "loss": 0.0191, "step": 25045 }, { "epoch": 1.17, - "learning_rate": 1.774506586657916e-05, - "loss": 0.0615, + "learning_rate": 2.774858408874604e-05, + "loss": 0.0941, "step": 25050 }, { "epoch": 1.17, - "learning_rate": 1.774459706530402e-05, - "loss": 0.1353, + "learning_rate": 2.774811601891002e-05, + "loss": 0.1208, "step": 25055 }, { "epoch": 1.17, - "learning_rate": 1.774412826402888e-05, - "loss": 0.1324, + "learning_rate": 2.7747647949074e-05, + "loss": 0.1192, "step": 25060 }, { "epoch": 1.17, - "learning_rate": 1.774365946275374e-05, - "loss": 0.1406, + "learning_rate": 2.7747179879237984e-05, + "loss": 0.1651, "step": 25065 }, { "epoch": 1.17, - "learning_rate": 1.77431906614786e-05, - "loss": 0.1729, + "learning_rate": 2.7746711809401963e-05, + "loss": 0.2296, "step": 25070 }, { "epoch": 1.17, - "learning_rate": 1.774272186020346e-05, - "loss": 0.2353, + "learning_rate": 2.7746243739565943e-05, + "loss": 0.21, "step": 25075 }, { "epoch": 1.17, - "learning_rate": 1.7742253058928322e-05, - "loss": 0.3157, + "learning_rate": 2.7745775669729927e-05, + "loss": 0.3513, "step": 25080 }, { "epoch": 1.17, - "learning_rate": 1.7741784257653182e-05, - "loss": 0.3223, + "learning_rate": 2.7745307599893906e-05, + "loss": 0.3436, "step": 25085 }, { "epoch": 1.17, - "learning_rate": 1.7741315456378042e-05, - "loss": 0.0615, + "learning_rate": 2.7744839530057886e-05, + "loss": 0.0526, "step": 25090 }, { "epoch": 1.17, - "learning_rate": 1.7740846655102905e-05, - "loss": 0.124, + "learning_rate": 2.7744371460221866e-05, + "loss": 0.0514, "step": 25095 }, { "epoch": 1.17, - "learning_rate": 1.7740377853827765e-05, - "loss": 0.0724, + "learning_rate": 2.7743903390385846e-05, + "loss": 0.1136, "step": 25100 }, { "epoch": 1.17, - "learning_rate": 1.7739909052552625e-05, - "loss": 0.077, + "learning_rate": 2.7743435320549826e-05, + "loss": 0.0964, "step": 25105 }, { "epoch": 1.17, - "learning_rate": 1.7739440251277485e-05, - "loss": 0.1978, + "learning_rate": 2.7742967250713805e-05, + "loss": 0.0994, "step": 25110 }, { "epoch": 1.17, - "learning_rate": 1.7738971450002345e-05, - "loss": 0.1213, + "learning_rate": 2.7742499180877785e-05, + "loss": 0.1668, "step": 25115 }, { "epoch": 1.17, - "learning_rate": 1.7738502648727205e-05, - "loss": 0.2148, + "learning_rate": 2.774203111104177e-05, + "loss": 0.172, "step": 25120 }, { "epoch": 1.17, - "learning_rate": 1.7738033847452068e-05, - "loss": 0.3011, + "learning_rate": 2.7741563041205748e-05, + "loss": 0.2633, "step": 25125 }, { "epoch": 1.17, - "learning_rate": 1.7737565046176928e-05, - "loss": 0.3277, + "learning_rate": 2.7741094971369728e-05, + "loss": 0.4466, "step": 25130 }, { "epoch": 1.17, - "learning_rate": 1.7737096244901788e-05, - "loss": 0.2688, + "learning_rate": 2.774062690153371e-05, + "loss": 0.3911, "step": 25135 }, { "epoch": 1.17, - "learning_rate": 1.7736627443626648e-05, - "loss": 0.0598, + "learning_rate": 2.774015883169769e-05, + "loss": 0.0396, "step": 25140 }, { "epoch": 1.17, - "learning_rate": 1.7736158642351508e-05, - "loss": 0.041, + "learning_rate": 2.773969076186167e-05, + "loss": 0.0798, "step": 25145 }, { "epoch": 1.17, - "learning_rate": 1.7735689841076368e-05, - "loss": 0.1021, + "learning_rate": 2.773922269202565e-05, + "loss": 0.0737, "step": 25150 }, { "epoch": 1.17, - "learning_rate": 1.7735221039801228e-05, - "loss": 0.1441, + "learning_rate": 2.7738754622189634e-05, + "loss": 0.0927, "step": 25155 }, { "epoch": 1.17, - "learning_rate": 1.773475223852609e-05, - "loss": 0.1063, + "learning_rate": 2.7738286552353614e-05, + "loss": 0.1029, "step": 25160 }, { "epoch": 1.17, - "learning_rate": 1.773428343725095e-05, - "loss": 0.109, + "learning_rate": 2.773781848251759e-05, + "loss": 0.1644, "step": 25165 }, { "epoch": 1.17, - "learning_rate": 1.773381463597581e-05, - "loss": 0.2057, + "learning_rate": 2.773735041268157e-05, + "loss": 0.2245, "step": 25170 }, { "epoch": 1.17, - "learning_rate": 1.7733345834700674e-05, - "loss": 0.2269, + "learning_rate": 2.7736882342845553e-05, + "loss": 0.21, "step": 25175 }, { "epoch": 1.17, - "learning_rate": 1.7732877033425534e-05, - "loss": 0.428, + "learning_rate": 2.7736414273009533e-05, + "loss": 0.2846, "step": 25180 }, { "epoch": 1.18, - "learning_rate": 1.7732408232150394e-05, - "loss": 0.3289, + "learning_rate": 2.7735946203173513e-05, + "loss": 0.3117, "step": 25185 }, { "epoch": 1.18, - "learning_rate": 1.7731939430875254e-05, - "loss": 0.0549, + "learning_rate": 2.7735478133337496e-05, + "loss": 0.0897, "step": 25190 }, { "epoch": 1.18, - "learning_rate": 1.7731470629600114e-05, - "loss": 0.1167, + "learning_rate": 2.7735010063501476e-05, + "loss": 0.0676, "step": 25195 }, { "epoch": 1.18, - "learning_rate": 1.7731001828324974e-05, - "loss": 0.0702, + "learning_rate": 2.7734541993665456e-05, + "loss": 0.0712, "step": 25200 }, { "epoch": 1.18, - "learning_rate": 1.7730533027049834e-05, - "loss": 0.1164, + "learning_rate": 2.7734073923829436e-05, + "loss": 0.0796, "step": 25205 }, { "epoch": 1.18, - "learning_rate": 1.7730064225774694e-05, - "loss": 0.1205, + "learning_rate": 2.773360585399342e-05, + "loss": 0.089, "step": 25210 }, { "epoch": 1.18, - "learning_rate": 1.7729595424499557e-05, - "loss": 0.1234, + "learning_rate": 2.77331377841574e-05, + "loss": 0.1672, "step": 25215 }, { "epoch": 1.18, - "learning_rate": 1.7729126623224417e-05, - "loss": 0.1769, + "learning_rate": 2.773266971432138e-05, + "loss": 0.2702, "step": 25220 }, { "epoch": 1.18, - "learning_rate": 1.7728657821949277e-05, - "loss": 0.2162, + "learning_rate": 2.7732201644485358e-05, + "loss": 0.2624, "step": 25225 }, { "epoch": 1.18, - "learning_rate": 1.7728189020674137e-05, - "loss": 0.3294, + "learning_rate": 2.7731733574649338e-05, + "loss": 0.3468, "step": 25230 }, { "epoch": 1.18, - "learning_rate": 1.7727720219398997e-05, - "loss": 0.2073, + "learning_rate": 2.7731265504813318e-05, + "loss": 0.3082, "step": 25235 }, { "epoch": 1.18, - "learning_rate": 1.772725141812386e-05, - "loss": 0.0587, + "learning_rate": 2.7730797434977298e-05, + "loss": 0.0881, "step": 25240 }, { "epoch": 1.18, - "learning_rate": 1.772678261684872e-05, - "loss": 0.0679, + "learning_rate": 2.7730329365141277e-05, + "loss": 0.0347, "step": 25245 }, { "epoch": 1.18, - "learning_rate": 1.772631381557358e-05, - "loss": 0.1205, + "learning_rate": 2.772986129530526e-05, + "loss": 0.105, "step": 25250 }, { "epoch": 1.18, - "learning_rate": 1.772584501429844e-05, - "loss": 0.0763, + "learning_rate": 2.772939322546924e-05, + "loss": 0.0889, "step": 25255 }, { "epoch": 1.18, - "learning_rate": 1.77253762130233e-05, - "loss": 0.1503, + "learning_rate": 2.772892515563322e-05, + "loss": 0.0521, "step": 25260 }, { "epoch": 1.18, - "learning_rate": 1.7724907411748163e-05, - "loss": 0.208, + "learning_rate": 2.7728457085797203e-05, + "loss": 0.1286, "step": 25265 }, { "epoch": 1.18, - "learning_rate": 1.7724438610473023e-05, - "loss": 0.1812, + "learning_rate": 2.7727989015961183e-05, + "loss": 0.1604, "step": 25270 }, { "epoch": 1.18, - "learning_rate": 1.7723969809197883e-05, - "loss": 0.2399, + "learning_rate": 2.7727520946125163e-05, + "loss": 0.2217, "step": 25275 }, { "epoch": 1.18, - "learning_rate": 1.7723501007922743e-05, - "loss": 0.276, + "learning_rate": 2.7727052876289143e-05, + "loss": 0.4269, "step": 25280 }, { "epoch": 1.18, - "learning_rate": 1.7723032206647603e-05, - "loss": 0.3312, + "learning_rate": 2.7726584806453126e-05, + "loss": 0.3102, "step": 25285 }, { "epoch": 1.18, - "learning_rate": 1.7722563405372463e-05, - "loss": 0.0309, + "learning_rate": 2.7726116736617103e-05, + "loss": 0.0935, "step": 25290 }, { "epoch": 1.18, - "learning_rate": 1.7722094604097323e-05, - "loss": 0.0898, + "learning_rate": 2.7725648666781082e-05, + "loss": 0.0656, "step": 25295 }, { "epoch": 1.18, - "learning_rate": 1.7721625802822183e-05, - "loss": 0.1028, + "learning_rate": 2.7725180596945062e-05, + "loss": 0.0585, "step": 25300 }, { "epoch": 1.18, - "learning_rate": 1.7721157001547046e-05, - "loss": 0.1351, + "learning_rate": 2.7724712527109045e-05, + "loss": 0.0882, "step": 25305 }, { "epoch": 1.18, - "learning_rate": 1.7720688200271906e-05, - "loss": 0.1858, + "learning_rate": 2.7724244457273025e-05, + "loss": 0.155, "step": 25310 }, { "epoch": 1.18, - "learning_rate": 1.7720219398996766e-05, - "loss": 0.0857, + "learning_rate": 2.7723776387437005e-05, + "loss": 0.1676, "step": 25315 }, { "epoch": 1.18, - "learning_rate": 1.771975059772163e-05, - "loss": 0.2156, + "learning_rate": 2.7723308317600988e-05, + "loss": 0.1588, "step": 25320 }, { "epoch": 1.18, - "learning_rate": 1.771928179644649e-05, - "loss": 0.3597, + "learning_rate": 2.7722840247764968e-05, + "loss": 0.2572, "step": 25325 }, { "epoch": 1.18, - "learning_rate": 1.771881299517135e-05, - "loss": 0.2196, + "learning_rate": 2.7722372177928948e-05, + "loss": 0.375, "step": 25330 }, { "epoch": 1.18, - "learning_rate": 1.771834419389621e-05, - "loss": 0.2814, + "learning_rate": 2.7721904108092928e-05, + "loss": 0.2919, "step": 25335 }, { "epoch": 1.18, - "learning_rate": 1.771787539262107e-05, - "loss": 0.0829, + "learning_rate": 2.772143603825691e-05, + "loss": 0.0853, "step": 25340 }, { "epoch": 1.18, - "learning_rate": 1.771740659134593e-05, - "loss": 0.0587, + "learning_rate": 2.772096796842089e-05, + "loss": 0.1015, "step": 25345 }, { "epoch": 1.18, - "learning_rate": 1.771693779007079e-05, - "loss": 0.1198, + "learning_rate": 2.772049989858487e-05, + "loss": 0.0854, "step": 25350 }, { "epoch": 1.18, - "learning_rate": 1.7716468988795652e-05, - "loss": 0.1103, + "learning_rate": 2.7720031828748847e-05, + "loss": 0.1539, "step": 25355 }, { "epoch": 1.18, - "learning_rate": 1.7716000187520512e-05, - "loss": 0.0928, + "learning_rate": 2.771956375891283e-05, + "loss": 0.102, "step": 25360 }, { "epoch": 1.18, - "learning_rate": 1.7715531386245372e-05, - "loss": 0.1933, + "learning_rate": 2.771909568907681e-05, + "loss": 0.1338, "step": 25365 }, { "epoch": 1.18, - "learning_rate": 1.7715062584970232e-05, - "loss": 0.246, + "learning_rate": 2.771862761924079e-05, + "loss": 0.1894, "step": 25370 }, { "epoch": 1.18, - "learning_rate": 1.7714593783695092e-05, - "loss": 0.2511, + "learning_rate": 2.7718159549404773e-05, + "loss": 0.1858, "step": 25375 }, { "epoch": 1.18, - "learning_rate": 1.7714124982419952e-05, - "loss": 0.3878, + "learning_rate": 2.7717691479568753e-05, + "loss": 0.5424, "step": 25380 }, { "epoch": 1.18, - "learning_rate": 1.7713656181144815e-05, - "loss": 0.2689, + "learning_rate": 2.7717223409732733e-05, + "loss": 0.3205, "step": 25385 }, { "epoch": 1.18, - "learning_rate": 1.7713187379869675e-05, - "loss": 0.0576, + "learning_rate": 2.7716755339896712e-05, + "loss": 0.061, "step": 25390 }, { "epoch": 1.18, - "learning_rate": 1.7712718578594535e-05, - "loss": 0.0802, + "learning_rate": 2.7716287270060696e-05, + "loss": 0.0828, "step": 25395 }, { "epoch": 1.19, - "learning_rate": 1.7712249777319395e-05, - "loss": 0.1151, + "learning_rate": 2.7715819200224676e-05, + "loss": 0.1036, "step": 25400 }, { "epoch": 1.19, - "learning_rate": 1.771178097604426e-05, - "loss": 0.1614, + "learning_rate": 2.7715351130388655e-05, + "loss": 0.1288, "step": 25405 }, { "epoch": 1.19, - "learning_rate": 1.7711312174769118e-05, - "loss": 0.2646, + "learning_rate": 2.7714883060552635e-05, + "loss": 0.134, "step": 25410 }, { "epoch": 1.19, - "learning_rate": 1.7710843373493978e-05, - "loss": 0.1363, + "learning_rate": 2.771441499071662e-05, + "loss": 0.2133, "step": 25415 }, { "epoch": 1.19, - "learning_rate": 1.7710374572218838e-05, - "loss": 0.271, + "learning_rate": 2.7713946920880595e-05, + "loss": 0.2697, "step": 25420 }, { "epoch": 1.19, - "learning_rate": 1.7709905770943698e-05, - "loss": 0.2105, + "learning_rate": 2.7713478851044575e-05, + "loss": 0.2125, "step": 25425 }, { "epoch": 1.19, - "learning_rate": 1.7709436969668558e-05, - "loss": 0.4278, + "learning_rate": 2.7713010781208554e-05, + "loss": 0.4802, "step": 25430 }, { "epoch": 1.19, - "learning_rate": 1.7708968168393418e-05, - "loss": 0.304, + "learning_rate": 2.7712542711372538e-05, + "loss": 0.2832, "step": 25435 }, { "epoch": 1.19, - "learning_rate": 1.7708499367118278e-05, - "loss": 0.096, + "learning_rate": 2.7712074641536517e-05, + "loss": 0.0427, "step": 25440 }, { "epoch": 1.19, - "learning_rate": 1.770803056584314e-05, - "loss": 0.0942, + "learning_rate": 2.7711606571700497e-05, + "loss": 0.0404, "step": 25445 }, { "epoch": 1.19, - "learning_rate": 1.7707561764568e-05, - "loss": 0.0986, + "learning_rate": 2.771113850186448e-05, + "loss": 0.1058, "step": 25450 }, { "epoch": 1.19, - "learning_rate": 1.770709296329286e-05, - "loss": 0.1379, + "learning_rate": 2.771067043202846e-05, + "loss": 0.1017, "step": 25455 }, { "epoch": 1.19, - "learning_rate": 1.770662416201772e-05, - "loss": 0.1218, + "learning_rate": 2.771020236219244e-05, + "loss": 0.1037, "step": 25460 }, { "epoch": 1.19, - "learning_rate": 1.7706155360742584e-05, - "loss": 0.172, + "learning_rate": 2.770973429235642e-05, + "loss": 0.2486, "step": 25465 }, { "epoch": 1.19, - "learning_rate": 1.7705686559467444e-05, - "loss": 0.175, + "learning_rate": 2.7709266222520403e-05, + "loss": 0.179, "step": 25470 }, { "epoch": 1.19, - "learning_rate": 1.7705217758192304e-05, - "loss": 0.1812, + "learning_rate": 2.7708798152684383e-05, + "loss": 0.265, "step": 25475 }, { "epoch": 1.19, - "learning_rate": 1.7704748956917164e-05, - "loss": 0.4416, + "learning_rate": 2.770833008284836e-05, + "loss": 0.4503, "step": 25480 }, { "epoch": 1.19, - "learning_rate": 1.7704280155642024e-05, - "loss": 0.3549, + "learning_rate": 2.770786201301234e-05, + "loss": 0.4062, "step": 25485 }, { "epoch": 1.19, - "learning_rate": 1.7703811354366884e-05, - "loss": 0.0537, + "learning_rate": 2.7707393943176322e-05, + "loss": 0.101, "step": 25490 }, { "epoch": 1.19, - "learning_rate": 1.7703342553091747e-05, - "loss": 0.0699, + "learning_rate": 2.7706925873340302e-05, + "loss": 0.0385, "step": 25495 }, { "epoch": 1.19, - "learning_rate": 1.7702873751816607e-05, - "loss": 0.1461, + "learning_rate": 2.7706457803504282e-05, + "loss": 0.0892, "step": 25500 }, { "epoch": 1.19, - "learning_rate": 1.7702404950541467e-05, - "loss": 0.181, + "learning_rate": 2.7705989733668265e-05, + "loss": 0.1343, "step": 25505 }, { "epoch": 1.19, - "learning_rate": 1.7701936149266327e-05, - "loss": 0.2071, + "learning_rate": 2.7705521663832245e-05, + "loss": 0.0809, "step": 25510 }, { "epoch": 1.19, - "learning_rate": 1.7701467347991187e-05, - "loss": 0.1548, + "learning_rate": 2.7705053593996225e-05, + "loss": 0.1952, "step": 25515 }, { "epoch": 1.19, - "learning_rate": 1.7700998546716047e-05, - "loss": 0.2228, + "learning_rate": 2.7704585524160205e-05, + "loss": 0.2179, "step": 25520 }, { "epoch": 1.19, - "learning_rate": 1.770052974544091e-05, - "loss": 0.232, + "learning_rate": 2.7704117454324188e-05, + "loss": 0.2581, "step": 25525 }, { "epoch": 1.19, - "learning_rate": 1.770006094416577e-05, - "loss": 0.3807, + "learning_rate": 2.7703649384488168e-05, + "loss": 0.3992, "step": 25530 }, { "epoch": 1.19, - "learning_rate": 1.769959214289063e-05, - "loss": 0.3537, + "learning_rate": 2.7703181314652148e-05, + "loss": 0.3823, "step": 25535 }, { "epoch": 1.19, - "learning_rate": 1.769912334161549e-05, - "loss": 0.0648, + "learning_rate": 2.7702713244816127e-05, + "loss": 0.0789, "step": 25540 }, { "epoch": 1.19, - "learning_rate": 1.7698654540340353e-05, - "loss": 0.0903, + "learning_rate": 2.7702245174980107e-05, + "loss": 0.0499, "step": 25545 }, { "epoch": 1.19, - "learning_rate": 1.7698185739065213e-05, - "loss": 0.1323, + "learning_rate": 2.7701777105144087e-05, + "loss": 0.073, "step": 25550 }, { "epoch": 1.19, - "learning_rate": 1.7697716937790073e-05, - "loss": 0.106, + "learning_rate": 2.7701309035308067e-05, + "loss": 0.1008, "step": 25555 }, { "epoch": 1.19, - "learning_rate": 1.7697248136514933e-05, - "loss": 0.0727, + "learning_rate": 2.770084096547205e-05, + "loss": 0.1142, "step": 25560 }, { "epoch": 1.19, - "learning_rate": 1.7696779335239793e-05, - "loss": 0.1354, + "learning_rate": 2.770037289563603e-05, + "loss": 0.1204, "step": 25565 }, { "epoch": 1.19, - "learning_rate": 1.7696310533964653e-05, - "loss": 0.2474, + "learning_rate": 2.769990482580001e-05, + "loss": 0.1725, "step": 25570 }, { "epoch": 1.19, - "learning_rate": 1.7695841732689513e-05, - "loss": 0.2366, + "learning_rate": 2.769943675596399e-05, + "loss": 0.1671, "step": 25575 }, { "epoch": 1.19, - "learning_rate": 1.7695372931414373e-05, - "loss": 0.3618, + "learning_rate": 2.7698968686127973e-05, + "loss": 0.4282, "step": 25580 }, { "epoch": 1.19, - "learning_rate": 1.7694904130139233e-05, - "loss": 0.294, + "learning_rate": 2.7698500616291952e-05, + "loss": 0.2291, "step": 25585 }, { "epoch": 1.19, - "learning_rate": 1.7694435328864096e-05, - "loss": 0.0909, + "learning_rate": 2.7698032546455932e-05, + "loss": 0.0799, "step": 25590 }, { "epoch": 1.19, - "learning_rate": 1.7693966527588956e-05, - "loss": 0.0789, + "learning_rate": 2.7697564476619912e-05, + "loss": 0.1047, "step": 25595 }, { "epoch": 1.19, - "learning_rate": 1.7693497726313816e-05, - "loss": 0.1543, + "learning_rate": 2.7697096406783895e-05, + "loss": 0.1213, "step": 25600 }, { "epoch": 1.19, - "learning_rate": 1.769302892503868e-05, - "loss": 0.1547, + "learning_rate": 2.7696628336947872e-05, + "loss": 0.1082, "step": 25605 }, { "epoch": 1.19, - "learning_rate": 1.769256012376354e-05, - "loss": 0.1539, + "learning_rate": 2.769616026711185e-05, + "loss": 0.1425, "step": 25610 }, { "epoch": 1.2, - "learning_rate": 1.76920913224884e-05, - "loss": 0.1479, + "learning_rate": 2.7695692197275835e-05, + "loss": 0.1121, "step": 25615 }, { "epoch": 1.2, - "learning_rate": 1.769162252121326e-05, - "loss": 0.2259, + "learning_rate": 2.7695224127439815e-05, + "loss": 0.225, "step": 25620 }, { "epoch": 1.2, - "learning_rate": 1.769115371993812e-05, - "loss": 0.237, + "learning_rate": 2.7694756057603794e-05, + "loss": 0.2783, "step": 25625 }, { "epoch": 1.2, - "learning_rate": 1.769068491866298e-05, - "loss": 0.2004, + "learning_rate": 2.7694287987767774e-05, + "loss": 0.3588, "step": 25630 }, { "epoch": 1.2, - "learning_rate": 1.7690216117387842e-05, - "loss": 0.4021, + "learning_rate": 2.7693819917931757e-05, + "loss": 0.3846, "step": 25635 }, { "epoch": 1.2, - "learning_rate": 1.7689747316112702e-05, - "loss": 0.102, + "learning_rate": 2.7693351848095737e-05, + "loss": 0.0537, "step": 25640 }, { "epoch": 1.2, - "learning_rate": 1.7689278514837562e-05, - "loss": 0.0637, + "learning_rate": 2.7692883778259717e-05, + "loss": 0.0729, "step": 25645 }, { "epoch": 1.2, - "learning_rate": 1.7688809713562422e-05, - "loss": 0.0587, + "learning_rate": 2.7692415708423697e-05, + "loss": 0.1688, "step": 25650 }, { "epoch": 1.2, - "learning_rate": 1.7688340912287282e-05, - "loss": 0.112, + "learning_rate": 2.769194763858768e-05, + "loss": 0.1576, "step": 25655 }, { "epoch": 1.2, - "learning_rate": 1.7687872111012142e-05, - "loss": 0.1684, + "learning_rate": 2.769147956875166e-05, + "loss": 0.1189, "step": 25660 }, { "epoch": 1.2, - "learning_rate": 1.7687403309737002e-05, - "loss": 0.2316, + "learning_rate": 2.769101149891564e-05, + "loss": 0.1946, "step": 25665 }, { "epoch": 1.2, - "learning_rate": 1.7686934508461865e-05, - "loss": 0.2574, + "learning_rate": 2.7690543429079616e-05, + "loss": 0.2424, "step": 25670 }, { "epoch": 1.2, - "learning_rate": 1.7686465707186725e-05, - "loss": 0.1927, + "learning_rate": 2.76900753592436e-05, + "loss": 0.266, "step": 25675 }, { "epoch": 1.2, - "learning_rate": 1.7685996905911585e-05, - "loss": 0.3456, + "learning_rate": 2.768960728940758e-05, + "loss": 0.4794, "step": 25680 }, { "epoch": 1.2, - "learning_rate": 1.768552810463645e-05, - "loss": 0.2925, + "learning_rate": 2.768913921957156e-05, + "loss": 0.2383, "step": 25685 }, { "epoch": 1.2, - "learning_rate": 1.768505930336131e-05, - "loss": 0.056, + "learning_rate": 2.7688671149735542e-05, + "loss": 0.0965, "step": 25690 }, { "epoch": 1.2, - "learning_rate": 1.768459050208617e-05, - "loss": 0.1489, + "learning_rate": 2.7688203079899522e-05, + "loss": 0.0779, "step": 25695 }, { "epoch": 1.2, - "learning_rate": 1.7684121700811028e-05, - "loss": 0.0954, + "learning_rate": 2.7687735010063502e-05, + "loss": 0.1129, "step": 25700 }, { "epoch": 1.2, - "learning_rate": 1.7683652899535888e-05, - "loss": 0.079, + "learning_rate": 2.768726694022748e-05, + "loss": 0.1068, "step": 25705 }, { "epoch": 1.2, - "learning_rate": 1.7683184098260748e-05, - "loss": 0.0937, + "learning_rate": 2.7686798870391465e-05, + "loss": 0.0992, "step": 25710 }, { "epoch": 1.2, - "learning_rate": 1.7682715296985608e-05, - "loss": 0.1415, + "learning_rate": 2.7686330800555445e-05, + "loss": 0.1579, "step": 25715 }, { "epoch": 1.2, - "learning_rate": 1.7682246495710468e-05, - "loss": 0.1691, + "learning_rate": 2.7685862730719424e-05, + "loss": 0.1385, "step": 25720 }, { "epoch": 1.2, - "learning_rate": 1.7681777694435328e-05, - "loss": 0.3064, + "learning_rate": 2.7685394660883404e-05, + "loss": 0.2129, "step": 25725 }, { "epoch": 1.2, - "learning_rate": 1.768130889316019e-05, - "loss": 0.3589, + "learning_rate": 2.7684926591047388e-05, + "loss": 0.4603, "step": 25730 }, { "epoch": 1.2, - "learning_rate": 1.768084009188505e-05, - "loss": 0.2006, + "learning_rate": 2.7684458521211364e-05, + "loss": 0.2509, "step": 25735 }, { "epoch": 1.2, - "learning_rate": 1.768037129060991e-05, - "loss": 0.0595, + "learning_rate": 2.7683990451375344e-05, + "loss": 0.0231, "step": 25740 }, { "epoch": 1.2, - "learning_rate": 1.767990248933477e-05, - "loss": 0.0753, + "learning_rate": 2.7683522381539327e-05, + "loss": 0.1146, "step": 25745 }, { "epoch": 1.2, - "learning_rate": 1.7679433688059634e-05, - "loss": 0.0849, + "learning_rate": 2.7683054311703307e-05, + "loss": 0.0884, "step": 25750 }, { "epoch": 1.2, - "learning_rate": 1.7678964886784494e-05, - "loss": 0.1281, + "learning_rate": 2.7682586241867287e-05, + "loss": 0.1142, "step": 25755 }, { "epoch": 1.2, - "learning_rate": 1.7678496085509354e-05, - "loss": 0.1522, + "learning_rate": 2.7682118172031266e-05, + "loss": 0.0975, "step": 25760 }, { "epoch": 1.2, - "learning_rate": 1.7678027284234214e-05, - "loss": 0.0967, + "learning_rate": 2.768165010219525e-05, + "loss": 0.1035, "step": 25765 }, { "epoch": 1.2, - "learning_rate": 1.7677558482959074e-05, - "loss": 0.1523, + "learning_rate": 2.768118203235923e-05, + "loss": 0.2098, "step": 25770 }, { "epoch": 1.2, - "learning_rate": 1.7677089681683937e-05, - "loss": 0.1729, + "learning_rate": 2.768071396252321e-05, + "loss": 0.2113, "step": 25775 }, { "epoch": 1.2, - "learning_rate": 1.7676620880408797e-05, - "loss": 0.4448, + "learning_rate": 2.768024589268719e-05, + "loss": 0.2928, "step": 25780 }, { "epoch": 1.2, - "learning_rate": 1.7676152079133657e-05, - "loss": 0.3308, + "learning_rate": 2.7679777822851172e-05, + "loss": 0.2965, "step": 25785 }, { "epoch": 1.2, - "learning_rate": 1.7675683277858517e-05, - "loss": 0.0479, + "learning_rate": 2.7679309753015152e-05, + "loss": 0.0334, "step": 25790 }, { "epoch": 1.2, - "learning_rate": 1.7675214476583377e-05, - "loss": 0.0691, + "learning_rate": 2.767884168317913e-05, + "loss": 0.0585, "step": 25795 }, { "epoch": 1.2, - "learning_rate": 1.7674745675308237e-05, - "loss": 0.0718, + "learning_rate": 2.7678373613343112e-05, + "loss": 0.0588, "step": 25800 }, { "epoch": 1.2, - "learning_rate": 1.7674276874033097e-05, - "loss": 0.1133, + "learning_rate": 2.767790554350709e-05, + "loss": 0.1113, "step": 25805 }, { "epoch": 1.2, - "learning_rate": 1.7673808072757957e-05, - "loss": 0.1421, + "learning_rate": 2.767743747367107e-05, + "loss": 0.1197, "step": 25810 }, { "epoch": 1.2, - "learning_rate": 1.767333927148282e-05, - "loss": 0.2539, + "learning_rate": 2.767696940383505e-05, + "loss": 0.1716, "step": 25815 }, { "epoch": 1.2, - "learning_rate": 1.767287047020768e-05, - "loss": 0.2135, + "learning_rate": 2.7676501333999034e-05, + "loss": 0.2334, "step": 25820 }, { "epoch": 1.21, - "learning_rate": 1.767240166893254e-05, - "loss": 0.311, + "learning_rate": 2.7676033264163014e-05, + "loss": 0.2301, "step": 25825 }, { "epoch": 1.21, - "learning_rate": 1.7671932867657404e-05, - "loss": 0.3515, + "learning_rate": 2.7675565194326994e-05, + "loss": 0.2914, "step": 25830 }, { "epoch": 1.21, - "learning_rate": 1.7671464066382263e-05, - "loss": 0.2823, + "learning_rate": 2.7675097124490974e-05, + "loss": 0.2337, "step": 25835 }, { "epoch": 1.21, - "learning_rate": 1.7670995265107123e-05, - "loss": 0.0406, + "learning_rate": 2.7674629054654957e-05, + "loss": 0.0694, "step": 25840 }, { "epoch": 1.21, - "learning_rate": 1.7670526463831983e-05, - "loss": 0.0476, + "learning_rate": 2.7674160984818937e-05, + "loss": 0.075, "step": 25845 }, { "epoch": 1.21, - "learning_rate": 1.7670057662556843e-05, - "loss": 0.158, + "learning_rate": 2.7673692914982917e-05, + "loss": 0.1596, "step": 25850 }, { "epoch": 1.21, - "learning_rate": 1.7669588861281703e-05, - "loss": 0.1091, + "learning_rate": 2.7673224845146897e-05, + "loss": 0.0813, "step": 25855 }, { "epoch": 1.21, - "learning_rate": 1.7669120060006563e-05, - "loss": 0.1941, + "learning_rate": 2.7672756775310876e-05, + "loss": 0.1492, "step": 25860 }, { "epoch": 1.21, - "learning_rate": 1.7668651258731426e-05, - "loss": 0.1813, + "learning_rate": 2.7672288705474856e-05, + "loss": 0.2187, "step": 25865 }, { "epoch": 1.21, - "learning_rate": 1.7668182457456286e-05, - "loss": 0.2163, + "learning_rate": 2.7671820635638836e-05, + "loss": 0.2334, "step": 25870 }, { "epoch": 1.21, - "learning_rate": 1.7667713656181146e-05, - "loss": 0.2309, + "learning_rate": 2.767135256580282e-05, + "loss": 0.2039, "step": 25875 }, { "epoch": 1.21, - "learning_rate": 1.7667244854906006e-05, - "loss": 0.3899, + "learning_rate": 2.76708844959668e-05, + "loss": 0.459, "step": 25880 }, { "epoch": 1.21, - "learning_rate": 1.7666776053630866e-05, - "loss": 0.409, + "learning_rate": 2.767041642613078e-05, + "loss": 0.4232, "step": 25885 }, { "epoch": 1.21, - "learning_rate": 1.7666307252355726e-05, - "loss": 0.0603, + "learning_rate": 2.766994835629476e-05, + "loss": 0.0428, "step": 25890 }, { "epoch": 1.21, - "learning_rate": 1.766583845108059e-05, - "loss": 0.0614, + "learning_rate": 2.7669480286458742e-05, + "loss": 0.093, "step": 25895 }, { "epoch": 1.21, - "learning_rate": 1.766536964980545e-05, - "loss": 0.0755, + "learning_rate": 2.766901221662272e-05, + "loss": 0.0938, "step": 25900 }, { "epoch": 1.21, - "learning_rate": 1.766490084853031e-05, - "loss": 0.0907, + "learning_rate": 2.76685441467867e-05, + "loss": 0.1127, "step": 25905 }, { "epoch": 1.21, - "learning_rate": 1.766443204725517e-05, - "loss": 0.2172, + "learning_rate": 2.766807607695068e-05, + "loss": 0.1284, "step": 25910 }, { "epoch": 1.21, - "learning_rate": 1.7663963245980033e-05, - "loss": 0.1953, + "learning_rate": 2.7667608007114664e-05, + "loss": 0.1795, "step": 25915 }, { "epoch": 1.21, - "learning_rate": 1.7663494444704892e-05, - "loss": 0.2066, + "learning_rate": 2.7667139937278644e-05, + "loss": 0.2153, "step": 25920 }, { "epoch": 1.21, - "learning_rate": 1.7663025643429752e-05, - "loss": 0.2019, + "learning_rate": 2.766667186744262e-05, + "loss": 0.1978, "step": 25925 }, { "epoch": 1.21, - "learning_rate": 1.7662556842154612e-05, - "loss": 0.2587, + "learning_rate": 2.7666203797606604e-05, + "loss": 0.4028, "step": 25930 }, { "epoch": 1.21, - "learning_rate": 1.7662088040879472e-05, - "loss": 0.328, + "learning_rate": 2.7665735727770584e-05, + "loss": 0.3551, "step": 25935 }, { "epoch": 1.21, - "learning_rate": 1.7661619239604332e-05, - "loss": 0.0652, + "learning_rate": 2.7665267657934564e-05, + "loss": 0.0499, "step": 25940 }, { "epoch": 1.21, - "learning_rate": 1.7661150438329192e-05, - "loss": 0.1316, + "learning_rate": 2.7664799588098543e-05, + "loss": 0.0746, "step": 25945 }, { "epoch": 1.21, - "learning_rate": 1.7660681637054052e-05, - "loss": 0.1019, + "learning_rate": 2.7664331518262527e-05, + "loss": 0.0827, "step": 25950 }, { "epoch": 1.21, - "learning_rate": 1.7660212835778915e-05, - "loss": 0.1534, + "learning_rate": 2.7663863448426506e-05, + "loss": 0.0656, "step": 25955 }, { "epoch": 1.21, - "learning_rate": 1.7659744034503775e-05, - "loss": 0.1238, + "learning_rate": 2.7663395378590486e-05, + "loss": 0.0846, "step": 25960 }, { "epoch": 1.21, - "learning_rate": 1.7659275233228635e-05, - "loss": 0.1582, + "learning_rate": 2.7662927308754466e-05, + "loss": 0.2031, "step": 25965 }, { "epoch": 1.21, - "learning_rate": 1.7658806431953495e-05, - "loss": 0.2585, + "learning_rate": 2.766245923891845e-05, + "loss": 0.162, "step": 25970 }, { "epoch": 1.21, - "learning_rate": 1.765833763067836e-05, - "loss": 0.3096, + "learning_rate": 2.766199116908243e-05, + "loss": 0.2951, "step": 25975 }, { "epoch": 1.21, - "learning_rate": 1.765786882940322e-05, - "loss": 0.4421, + "learning_rate": 2.766152309924641e-05, + "loss": 0.3014, "step": 25980 }, { "epoch": 1.21, - "learning_rate": 1.765740002812808e-05, - "loss": 0.2735, + "learning_rate": 2.766105502941039e-05, + "loss": 0.2334, "step": 25985 }, { "epoch": 1.21, - "learning_rate": 1.765693122685294e-05, - "loss": 0.0522, + "learning_rate": 2.766058695957437e-05, + "loss": 0.0926, "step": 25990 }, { "epoch": 1.21, - "learning_rate": 1.7656462425577798e-05, - "loss": 0.0711, + "learning_rate": 2.766011888973835e-05, + "loss": 0.0547, "step": 25995 }, { "epoch": 1.21, - "learning_rate": 1.7655993624302658e-05, - "loss": 0.0588, + "learning_rate": 2.7659650819902328e-05, + "loss": 0.0435, "step": 26000 }, { "epoch": 1.21, - "learning_rate": 1.765552482302752e-05, - "loss": 0.1255, + "learning_rate": 2.765918275006631e-05, + "loss": 0.0942, "step": 26005 }, { "epoch": 1.21, - "learning_rate": 1.765505602175238e-05, - "loss": 0.0974, + "learning_rate": 2.765871468023029e-05, + "loss": 0.1461, "step": 26010 }, { "epoch": 1.21, - "learning_rate": 1.765458722047724e-05, - "loss": 0.162, + "learning_rate": 2.765824661039427e-05, + "loss": 0.0797, "step": 26015 }, { "epoch": 1.21, - "learning_rate": 1.76541184192021e-05, - "loss": 0.2493, + "learning_rate": 2.765777854055825e-05, + "loss": 0.2177, "step": 26020 }, { "epoch": 1.21, - "learning_rate": 1.765364961792696e-05, - "loss": 0.228, + "learning_rate": 2.7657310470722234e-05, + "loss": 0.1941, "step": 26025 }, { "epoch": 1.21, - "learning_rate": 1.765318081665182e-05, - "loss": 0.3248, + "learning_rate": 2.7656842400886214e-05, + "loss": 0.3445, "step": 26030 }, { "epoch": 1.21, - "learning_rate": 1.7652712015376684e-05, - "loss": 0.3221, + "learning_rate": 2.7656374331050194e-05, + "loss": 0.2635, "step": 26035 }, { "epoch": 1.22, - "learning_rate": 1.7652243214101544e-05, - "loss": 0.0706, + "learning_rate": 2.7655906261214173e-05, + "loss": 0.1444, "step": 26040 }, { "epoch": 1.22, - "learning_rate": 1.7651774412826404e-05, - "loss": 0.1151, + "learning_rate": 2.7655438191378157e-05, + "loss": 0.0868, "step": 26045 }, { "epoch": 1.22, - "learning_rate": 1.7651305611551264e-05, - "loss": 0.0961, + "learning_rate": 2.7654970121542133e-05, + "loss": 0.1137, "step": 26050 }, { "epoch": 1.22, - "learning_rate": 1.7650836810276128e-05, - "loss": 0.1412, + "learning_rate": 2.7654502051706113e-05, + "loss": 0.0746, "step": 26055 }, { "epoch": 1.22, - "learning_rate": 1.7650368009000988e-05, - "loss": 0.1789, + "learning_rate": 2.7654033981870096e-05, + "loss": 0.0848, "step": 26060 }, { "epoch": 1.22, - "learning_rate": 1.7649899207725847e-05, - "loss": 0.1407, + "learning_rate": 2.7653565912034076e-05, + "loss": 0.131, "step": 26065 }, { "epoch": 1.22, - "learning_rate": 1.7649430406450707e-05, - "loss": 0.2843, + "learning_rate": 2.7653097842198056e-05, + "loss": 0.2067, "step": 26070 }, { "epoch": 1.22, - "learning_rate": 1.7648961605175567e-05, - "loss": 0.2238, + "learning_rate": 2.7652629772362036e-05, + "loss": 0.2435, "step": 26075 }, { "epoch": 1.22, - "learning_rate": 1.7648492803900427e-05, - "loss": 0.4524, + "learning_rate": 2.765216170252602e-05, + "loss": 0.2184, "step": 26080 }, { "epoch": 1.22, - "learning_rate": 1.7648024002625287e-05, - "loss": 0.2933, + "learning_rate": 2.765169363269e-05, + "loss": 0.3624, "step": 26085 }, { "epoch": 1.22, - "learning_rate": 1.7647555201350147e-05, - "loss": 0.0408, + "learning_rate": 2.765122556285398e-05, + "loss": 0.097, "step": 26090 }, { "epoch": 1.22, - "learning_rate": 1.7647086400075007e-05, - "loss": 0.1177, + "learning_rate": 2.7650757493017958e-05, + "loss": 0.1063, "step": 26095 }, { "epoch": 1.22, - "learning_rate": 1.764661759879987e-05, - "loss": 0.1175, + "learning_rate": 2.765028942318194e-05, + "loss": 0.1098, "step": 26100 }, { "epoch": 1.22, - "learning_rate": 1.764614879752473e-05, - "loss": 0.1405, + "learning_rate": 2.764982135334592e-05, + "loss": 0.1686, "step": 26105 }, { "epoch": 1.22, - "learning_rate": 1.764567999624959e-05, - "loss": 0.1418, + "learning_rate": 2.76493532835099e-05, + "loss": 0.1633, "step": 26110 }, { "epoch": 1.22, - "learning_rate": 1.7645211194974454e-05, - "loss": 0.1756, + "learning_rate": 2.764888521367388e-05, + "loss": 0.1439, "step": 26115 }, { "epoch": 1.22, - "learning_rate": 1.7644742393699314e-05, - "loss": 0.1311, + "learning_rate": 2.764841714383786e-05, + "loss": 0.1674, "step": 26120 }, { "epoch": 1.22, - "learning_rate": 1.7644273592424173e-05, - "loss": 0.2413, + "learning_rate": 2.764794907400184e-05, + "loss": 0.1911, "step": 26125 }, { "epoch": 1.22, - "learning_rate": 1.7643804791149033e-05, - "loss": 0.2902, + "learning_rate": 2.764748100416582e-05, + "loss": 0.2752, "step": 26130 }, { "epoch": 1.22, - "learning_rate": 1.7643335989873893e-05, - "loss": 0.4399, + "learning_rate": 2.7647012934329804e-05, + "loss": 0.4117, "step": 26135 }, { "epoch": 1.22, - "learning_rate": 1.7642867188598753e-05, - "loss": 0.0452, + "learning_rate": 2.7646544864493783e-05, + "loss": 0.0602, "step": 26140 }, { "epoch": 1.22, - "learning_rate": 1.7642398387323617e-05, - "loss": 0.0868, + "learning_rate": 2.7646076794657763e-05, + "loss": 0.0713, "step": 26145 }, { "epoch": 1.22, - "learning_rate": 1.7641929586048477e-05, - "loss": 0.0898, + "learning_rate": 2.7645608724821743e-05, + "loss": 0.0761, "step": 26150 }, { "epoch": 1.22, - "learning_rate": 1.7641460784773336e-05, - "loss": 0.1588, + "learning_rate": 2.7645140654985726e-05, + "loss": 0.0877, "step": 26155 }, { "epoch": 1.22, - "learning_rate": 1.7640991983498196e-05, - "loss": 0.1818, + "learning_rate": 2.7644672585149706e-05, + "loss": 0.1915, "step": 26160 }, { "epoch": 1.22, - "learning_rate": 1.7640523182223056e-05, - "loss": 0.241, + "learning_rate": 2.7644204515313686e-05, + "loss": 0.1301, "step": 26165 }, { "epoch": 1.22, - "learning_rate": 1.7640054380947916e-05, - "loss": 0.1719, + "learning_rate": 2.764373644547767e-05, + "loss": 0.1413, "step": 26170 }, { "epoch": 1.22, - "learning_rate": 1.7639585579672776e-05, - "loss": 0.219, + "learning_rate": 2.7643268375641646e-05, + "loss": 0.2631, "step": 26175 }, { "epoch": 1.22, - "learning_rate": 1.763911677839764e-05, - "loss": 0.3367, + "learning_rate": 2.7642800305805625e-05, + "loss": 0.3781, "step": 26180 }, { "epoch": 1.22, - "learning_rate": 1.76386479771225e-05, - "loss": 0.2374, + "learning_rate": 2.7642332235969605e-05, + "loss": 0.2615, "step": 26185 }, { "epoch": 1.22, - "learning_rate": 1.763817917584736e-05, - "loss": 0.0655, + "learning_rate": 2.764186416613359e-05, + "loss": 0.1036, "step": 26190 }, { "epoch": 1.22, - "learning_rate": 1.7637710374572223e-05, - "loss": 0.0818, + "learning_rate": 2.7641396096297568e-05, + "loss": 0.1248, "step": 26195 }, { "epoch": 1.22, - "learning_rate": 1.7637241573297083e-05, - "loss": 0.1566, + "learning_rate": 2.7640928026461548e-05, + "loss": 0.1448, "step": 26200 }, { "epoch": 1.22, - "learning_rate": 1.7636772772021943e-05, - "loss": 0.1525, + "learning_rate": 2.7640459956625528e-05, + "loss": 0.1155, "step": 26205 }, { "epoch": 1.22, - "learning_rate": 1.7636303970746803e-05, - "loss": 0.1549, + "learning_rate": 2.763999188678951e-05, + "loss": 0.1462, "step": 26210 }, { "epoch": 1.22, - "learning_rate": 1.7635835169471662e-05, - "loss": 0.2122, + "learning_rate": 2.763952381695349e-05, + "loss": 0.0997, "step": 26215 }, { "epoch": 1.22, - "learning_rate": 1.7635366368196522e-05, - "loss": 0.1392, + "learning_rate": 2.763905574711747e-05, + "loss": 0.238, "step": 26220 }, { "epoch": 1.22, - "learning_rate": 1.7634897566921382e-05, - "loss": 0.3111, + "learning_rate": 2.7638587677281454e-05, + "loss": 0.214, "step": 26225 }, { "epoch": 1.22, - "learning_rate": 1.7634428765646242e-05, - "loss": 0.4944, + "learning_rate": 2.7638119607445434e-05, + "loss": 0.2839, "step": 26230 }, { "epoch": 1.22, - "learning_rate": 1.7633959964371102e-05, - "loss": 0.3476, + "learning_rate": 2.7637651537609413e-05, + "loss": 0.2874, "step": 26235 }, { "epoch": 1.22, - "learning_rate": 1.7633491163095965e-05, - "loss": 0.0758, + "learning_rate": 2.763718346777339e-05, + "loss": 0.0405, "step": 26240 }, { "epoch": 1.22, - "learning_rate": 1.7633022361820825e-05, - "loss": 0.1091, + "learning_rate": 2.7636715397937373e-05, + "loss": 0.0466, "step": 26245 }, { "epoch": 1.22, - "learning_rate": 1.7632553560545685e-05, - "loss": 0.1075, + "learning_rate": 2.7636247328101353e-05, + "loss": 0.0732, "step": 26250 }, { "epoch": 1.23, - "learning_rate": 1.7632084759270545e-05, - "loss": 0.0975, + "learning_rate": 2.7635779258265333e-05, + "loss": 0.1521, "step": 26255 }, { "epoch": 1.23, - "learning_rate": 1.763161595799541e-05, - "loss": 0.1404, + "learning_rate": 2.7635311188429313e-05, + "loss": 0.1306, "step": 26260 }, { "epoch": 1.23, - "learning_rate": 1.763114715672027e-05, - "loss": 0.1439, + "learning_rate": 2.7634843118593296e-05, + "loss": 0.1572, "step": 26265 }, { "epoch": 1.23, - "learning_rate": 1.763067835544513e-05, - "loss": 0.1963, + "learning_rate": 2.7634375048757276e-05, + "loss": 0.1458, "step": 26270 }, { "epoch": 1.23, - "learning_rate": 1.763020955416999e-05, - "loss": 0.2833, + "learning_rate": 2.7633906978921255e-05, + "loss": 0.1574, "step": 26275 }, { "epoch": 1.23, - "learning_rate": 1.762974075289485e-05, - "loss": 0.4187, + "learning_rate": 2.7633438909085235e-05, + "loss": 0.2982, "step": 26280 }, { "epoch": 1.23, - "learning_rate": 1.762927195161971e-05, - "loss": 0.3837, + "learning_rate": 2.763297083924922e-05, + "loss": 0.2551, "step": 26285 }, { "epoch": 1.23, - "learning_rate": 1.762880315034457e-05, - "loss": 0.0572, + "learning_rate": 2.7632502769413198e-05, + "loss": 0.0704, "step": 26290 }, { "epoch": 1.23, - "learning_rate": 1.762833434906943e-05, - "loss": 0.1154, + "learning_rate": 2.7632034699577178e-05, + "loss": 0.0801, "step": 26295 }, { "epoch": 1.23, - "learning_rate": 1.762786554779429e-05, - "loss": 0.1305, + "learning_rate": 2.7631566629741158e-05, + "loss": 0.099, "step": 26300 }, { "epoch": 1.23, - "learning_rate": 1.762739674651915e-05, - "loss": 0.0764, + "learning_rate": 2.7631098559905138e-05, + "loss": 0.1362, "step": 26305 }, { "epoch": 1.23, - "learning_rate": 1.762692794524401e-05, - "loss": 0.172, + "learning_rate": 2.7630630490069118e-05, + "loss": 0.1717, "step": 26310 }, { "epoch": 1.23, - "learning_rate": 1.762645914396887e-05, - "loss": 0.2716, + "learning_rate": 2.7630162420233097e-05, + "loss": 0.2557, "step": 26315 }, { "epoch": 1.23, - "learning_rate": 1.762599034269373e-05, - "loss": 0.1888, + "learning_rate": 2.762969435039708e-05, + "loss": 0.158, "step": 26320 }, { "epoch": 1.23, - "learning_rate": 1.7625521541418595e-05, - "loss": 0.2317, + "learning_rate": 2.762922628056106e-05, + "loss": 0.2529, "step": 26325 }, { "epoch": 1.23, - "learning_rate": 1.7625052740143454e-05, - "loss": 0.3846, + "learning_rate": 2.762875821072504e-05, + "loss": 0.3616, "step": 26330 }, { "epoch": 1.23, - "learning_rate": 1.7624583938868314e-05, - "loss": 0.3627, + "learning_rate": 2.762829014088902e-05, + "loss": 0.2514, "step": 26335 }, { "epoch": 1.23, - "learning_rate": 1.7624115137593178e-05, - "loss": 0.0344, + "learning_rate": 2.7627822071053003e-05, + "loss": 0.0665, "step": 26340 }, { "epoch": 1.23, - "learning_rate": 1.7623646336318038e-05, - "loss": 0.0382, + "learning_rate": 2.7627354001216983e-05, + "loss": 0.0737, "step": 26345 }, { "epoch": 1.23, - "learning_rate": 1.7623177535042898e-05, - "loss": 0.0994, + "learning_rate": 2.7626885931380963e-05, + "loss": 0.1017, "step": 26350 }, { "epoch": 1.23, - "learning_rate": 1.7622708733767758e-05, - "loss": 0.0887, + "learning_rate": 2.7626417861544946e-05, + "loss": 0.0793, "step": 26355 }, { "epoch": 1.23, - "learning_rate": 1.7622239932492617e-05, - "loss": 0.107, + "learning_rate": 2.7625949791708926e-05, + "loss": 0.1185, "step": 26360 }, { "epoch": 1.23, - "learning_rate": 1.7621771131217477e-05, - "loss": 0.2014, + "learning_rate": 2.7625481721872902e-05, + "loss": 0.1498, "step": 26365 }, { "epoch": 1.23, - "learning_rate": 1.7621302329942337e-05, - "loss": 0.1593, + "learning_rate": 2.7625013652036882e-05, + "loss": 0.0816, "step": 26370 }, { "epoch": 1.23, - "learning_rate": 1.7620833528667197e-05, - "loss": 0.2043, + "learning_rate": 2.7624545582200865e-05, + "loss": 0.2107, "step": 26375 }, { "epoch": 1.23, - "learning_rate": 1.762036472739206e-05, - "loss": 0.2195, + "learning_rate": 2.7624077512364845e-05, + "loss": 0.361, "step": 26380 }, { "epoch": 1.23, - "learning_rate": 1.761989592611692e-05, - "loss": 0.2368, + "learning_rate": 2.7623609442528825e-05, + "loss": 0.2103, "step": 26385 }, { "epoch": 1.23, - "learning_rate": 1.761942712484178e-05, - "loss": 0.0934, + "learning_rate": 2.7623141372692805e-05, + "loss": 0.0649, "step": 26390 }, { "epoch": 1.23, - "learning_rate": 1.761895832356664e-05, - "loss": 0.1525, + "learning_rate": 2.7622673302856788e-05, + "loss": 0.0719, "step": 26395 }, { "epoch": 1.23, - "learning_rate": 1.76184895222915e-05, - "loss": 0.1338, + "learning_rate": 2.7622205233020768e-05, + "loss": 0.095, "step": 26400 }, { "epoch": 1.23, - "learning_rate": 1.7618020721016364e-05, - "loss": 0.1834, + "learning_rate": 2.7621737163184748e-05, + "loss": 0.1392, "step": 26405 }, { "epoch": 1.23, - "learning_rate": 1.7617551919741224e-05, - "loss": 0.1386, + "learning_rate": 2.762126909334873e-05, + "loss": 0.1261, "step": 26410 }, { "epoch": 1.23, - "learning_rate": 1.7617083118466084e-05, - "loss": 0.2247, + "learning_rate": 2.762080102351271e-05, + "loss": 0.2235, "step": 26415 }, { "epoch": 1.23, - "learning_rate": 1.7616614317190943e-05, - "loss": 0.2742, + "learning_rate": 2.762033295367669e-05, + "loss": 0.1772, "step": 26420 }, { "epoch": 1.23, - "learning_rate": 1.7616145515915807e-05, - "loss": 0.2639, + "learning_rate": 2.761986488384067e-05, + "loss": 0.2098, "step": 26425 }, { "epoch": 1.23, - "learning_rate": 1.7615676714640667e-05, - "loss": 0.2932, + "learning_rate": 2.761939681400465e-05, + "loss": 0.5498, "step": 26430 }, { "epoch": 1.23, - "learning_rate": 1.7615207913365527e-05, - "loss": 0.2663, + "learning_rate": 2.761892874416863e-05, + "loss": 0.283, "step": 26435 }, { "epoch": 1.23, - "learning_rate": 1.7614739112090387e-05, - "loss": 0.0708, + "learning_rate": 2.761846067433261e-05, + "loss": 0.0199, "step": 26440 }, { "epoch": 1.23, - "learning_rate": 1.7614270310815246e-05, - "loss": 0.0757, + "learning_rate": 2.761799260449659e-05, + "loss": 0.0774, "step": 26445 }, { "epoch": 1.23, - "learning_rate": 1.7613801509540106e-05, - "loss": 0.0615, + "learning_rate": 2.7617524534660573e-05, + "loss": 0.0804, "step": 26450 }, { "epoch": 1.23, - "learning_rate": 1.7613332708264966e-05, - "loss": 0.1459, + "learning_rate": 2.7617056464824553e-05, + "loss": 0.054, "step": 26455 }, { "epoch": 1.23, - "learning_rate": 1.7612863906989826e-05, - "loss": 0.1307, + "learning_rate": 2.7616588394988532e-05, + "loss": 0.1484, "step": 26460 }, { "epoch": 1.23, - "learning_rate": 1.761239510571469e-05, - "loss": 0.1669, + "learning_rate": 2.7616120325152512e-05, + "loss": 0.1441, "step": 26465 }, { "epoch": 1.24, - "learning_rate": 1.761192630443955e-05, - "loss": 0.2281, + "learning_rate": 2.7615652255316495e-05, + "loss": 0.18, "step": 26470 }, { "epoch": 1.24, - "learning_rate": 1.761145750316441e-05, - "loss": 0.2489, + "learning_rate": 2.7615184185480475e-05, + "loss": 0.1979, "step": 26475 }, { "epoch": 1.24, - "learning_rate": 1.7610988701889273e-05, - "loss": 0.3256, + "learning_rate": 2.7614716115644455e-05, + "loss": 0.2328, "step": 26480 }, { "epoch": 1.24, - "learning_rate": 1.7610519900614133e-05, - "loss": 0.2399, + "learning_rate": 2.7614248045808438e-05, + "loss": 0.3263, "step": 26485 }, { "epoch": 1.24, - "learning_rate": 1.7610051099338993e-05, - "loss": 0.0681, + "learning_rate": 2.7613779975972415e-05, + "loss": 0.0549, "step": 26490 }, { "epoch": 1.24, - "learning_rate": 1.7609582298063853e-05, - "loss": 0.1018, + "learning_rate": 2.7613311906136394e-05, + "loss": 0.0571, "step": 26495 }, { "epoch": 1.24, - "learning_rate": 1.7609113496788713e-05, - "loss": 0.09, + "learning_rate": 2.7612843836300374e-05, + "loss": 0.0743, "step": 26500 }, { "epoch": 1.24, - "learning_rate": 1.7608644695513572e-05, - "loss": 0.1208, + "learning_rate": 2.7612375766464358e-05, + "loss": 0.0909, "step": 26505 }, { "epoch": 1.24, - "learning_rate": 1.7608175894238432e-05, - "loss": 0.1643, + "learning_rate": 2.7611907696628337e-05, + "loss": 0.1282, "step": 26510 }, { "epoch": 1.24, - "learning_rate": 1.7607707092963296e-05, - "loss": 0.2492, + "learning_rate": 2.7611439626792317e-05, + "loss": 0.1184, "step": 26515 }, { "epoch": 1.24, - "learning_rate": 1.7607238291688156e-05, - "loss": 0.2035, + "learning_rate": 2.7610971556956297e-05, + "loss": 0.1973, "step": 26520 }, { "epoch": 1.24, - "learning_rate": 1.7606769490413016e-05, - "loss": 0.1378, + "learning_rate": 2.761050348712028e-05, + "loss": 0.1799, "step": 26525 }, { "epoch": 1.24, - "learning_rate": 1.7606300689137876e-05, - "loss": 0.3092, + "learning_rate": 2.761003541728426e-05, + "loss": 0.3093, "step": 26530 }, { "epoch": 1.24, - "learning_rate": 1.7605831887862735e-05, - "loss": 0.2494, + "learning_rate": 2.760956734744824e-05, + "loss": 0.3023, "step": 26535 }, { "epoch": 1.24, - "learning_rate": 1.7605363086587595e-05, - "loss": 0.0539, + "learning_rate": 2.7609099277612223e-05, + "loss": 0.054, "step": 26540 }, { "epoch": 1.24, - "learning_rate": 1.760489428531246e-05, - "loss": 0.1035, + "learning_rate": 2.7608631207776203e-05, + "loss": 0.0806, "step": 26545 }, { "epoch": 1.24, - "learning_rate": 1.760442548403732e-05, - "loss": 0.079, + "learning_rate": 2.7608163137940183e-05, + "loss": 0.0825, "step": 26550 }, { "epoch": 1.24, - "learning_rate": 1.760395668276218e-05, - "loss": 0.0964, + "learning_rate": 2.760769506810416e-05, + "loss": 0.0995, "step": 26555 }, { "epoch": 1.24, - "learning_rate": 1.760348788148704e-05, - "loss": 0.1856, + "learning_rate": 2.7607226998268142e-05, + "loss": 0.0734, "step": 26560 }, { "epoch": 1.24, - "learning_rate": 1.7603019080211902e-05, - "loss": 0.1496, + "learning_rate": 2.7606758928432122e-05, + "loss": 0.1258, "step": 26565 }, { "epoch": 1.24, - "learning_rate": 1.7602550278936762e-05, - "loss": 0.1282, + "learning_rate": 2.7606290858596102e-05, + "loss": 0.2489, "step": 26570 }, { "epoch": 1.24, - "learning_rate": 1.7602081477661622e-05, - "loss": 0.2813, + "learning_rate": 2.7605822788760082e-05, + "loss": 0.2083, "step": 26575 }, { "epoch": 1.24, - "learning_rate": 1.760161267638648e-05, - "loss": 0.3293, + "learning_rate": 2.7605354718924065e-05, + "loss": 0.3093, "step": 26580 }, { "epoch": 1.24, - "learning_rate": 1.760114387511134e-05, - "loss": 0.1481, + "learning_rate": 2.7604886649088045e-05, + "loss": 0.3207, "step": 26585 }, { "epoch": 1.24, - "learning_rate": 1.76006750738362e-05, - "loss": 0.0804, + "learning_rate": 2.7604418579252025e-05, + "loss": 0.0797, "step": 26590 }, { "epoch": 1.24, - "learning_rate": 1.760020627256106e-05, - "loss": 0.0621, + "learning_rate": 2.7603950509416008e-05, + "loss": 0.1105, "step": 26595 }, { "epoch": 1.24, - "learning_rate": 1.759973747128592e-05, - "loss": 0.1511, + "learning_rate": 2.7603482439579988e-05, + "loss": 0.0829, "step": 26600 }, { "epoch": 1.24, - "learning_rate": 1.759926867001078e-05, - "loss": 0.1558, + "learning_rate": 2.7603014369743967e-05, + "loss": 0.097, "step": 26605 }, { "epoch": 1.24, - "learning_rate": 1.7598799868735645e-05, - "loss": 0.1994, + "learning_rate": 2.7602546299907947e-05, + "loss": 0.0403, "step": 26610 }, { "epoch": 1.24, - "learning_rate": 1.7598331067460505e-05, - "loss": 0.1426, + "learning_rate": 2.760207823007193e-05, + "loss": 0.1353, "step": 26615 }, { "epoch": 1.24, - "learning_rate": 1.7597862266185364e-05, - "loss": 0.2883, + "learning_rate": 2.7601610160235907e-05, + "loss": 0.2431, "step": 26620 }, { "epoch": 1.24, - "learning_rate": 1.7597393464910228e-05, - "loss": 0.1739, + "learning_rate": 2.7601142090399887e-05, + "loss": 0.2593, "step": 26625 }, { "epoch": 1.24, - "learning_rate": 1.7596924663635088e-05, - "loss": 0.3145, + "learning_rate": 2.7600674020563867e-05, + "loss": 0.4024, "step": 26630 }, { "epoch": 1.24, - "learning_rate": 1.7596455862359948e-05, - "loss": 0.3456, + "learning_rate": 2.760020595072785e-05, + "loss": 0.2005, "step": 26635 }, { "epoch": 1.24, - "learning_rate": 1.7595987061084808e-05, - "loss": 0.0483, + "learning_rate": 2.759973788089183e-05, + "loss": 0.0299, "step": 26640 }, { "epoch": 1.24, - "learning_rate": 1.7595518259809668e-05, - "loss": 0.0308, + "learning_rate": 2.759926981105581e-05, + "loss": 0.0994, "step": 26645 }, { "epoch": 1.24, - "learning_rate": 1.7595049458534527e-05, - "loss": 0.0719, + "learning_rate": 2.759880174121979e-05, + "loss": 0.0605, "step": 26650 }, { "epoch": 1.24, - "learning_rate": 1.759458065725939e-05, - "loss": 0.0575, + "learning_rate": 2.7598333671383772e-05, + "loss": 0.1023, "step": 26655 }, { "epoch": 1.24, - "learning_rate": 1.759411185598425e-05, - "loss": 0.123, + "learning_rate": 2.7597865601547752e-05, + "loss": 0.0889, "step": 26660 }, { "epoch": 1.24, - "learning_rate": 1.759364305470911e-05, - "loss": 0.17, + "learning_rate": 2.7597397531711732e-05, + "loss": 0.1332, "step": 26665 }, { "epoch": 1.24, - "learning_rate": 1.759317425343397e-05, - "loss": 0.2294, + "learning_rate": 2.7596929461875715e-05, + "loss": 0.2111, "step": 26670 }, { "epoch": 1.24, - "learning_rate": 1.759270545215883e-05, - "loss": 0.357, + "learning_rate": 2.7596461392039695e-05, + "loss": 0.2561, "step": 26675 }, { "epoch": 1.24, - "learning_rate": 1.759223665088369e-05, - "loss": 0.32, + "learning_rate": 2.759599332220367e-05, + "loss": 0.319, "step": 26680 }, { "epoch": 1.25, - "learning_rate": 1.759176784960855e-05, - "loss": 0.2794, + "learning_rate": 2.759552525236765e-05, + "loss": 0.2339, "step": 26685 }, { "epoch": 1.25, - "learning_rate": 1.7591299048333414e-05, - "loss": 0.0375, + "learning_rate": 2.7595057182531634e-05, + "loss": 0.0774, "step": 26690 }, { "epoch": 1.25, - "learning_rate": 1.7590830247058274e-05, - "loss": 0.0531, + "learning_rate": 2.7594589112695614e-05, + "loss": 0.044, "step": 26695 }, { "epoch": 1.25, - "learning_rate": 1.7590361445783134e-05, - "loss": 0.1184, + "learning_rate": 2.7594121042859594e-05, + "loss": 0.0623, "step": 26700 }, { "epoch": 1.25, - "learning_rate": 1.7589892644507997e-05, - "loss": 0.0585, + "learning_rate": 2.7593652973023574e-05, + "loss": 0.132, "step": 26705 }, { "epoch": 1.25, - "learning_rate": 1.7589423843232857e-05, - "loss": 0.1184, + "learning_rate": 2.7593184903187557e-05, + "loss": 0.1076, "step": 26710 }, { "epoch": 1.25, - "learning_rate": 1.7588955041957717e-05, - "loss": 0.2323, + "learning_rate": 2.7592716833351537e-05, + "loss": 0.0853, "step": 26715 }, { "epoch": 1.25, - "learning_rate": 1.7588486240682577e-05, - "loss": 0.174, + "learning_rate": 2.7592248763515517e-05, + "loss": 0.2748, "step": 26720 }, { "epoch": 1.25, - "learning_rate": 1.7588017439407437e-05, - "loss": 0.2886, + "learning_rate": 2.75917806936795e-05, + "loss": 0.2715, "step": 26725 }, { "epoch": 1.25, - "learning_rate": 1.7587548638132297e-05, - "loss": 0.3716, + "learning_rate": 2.759131262384348e-05, + "loss": 0.1765, "step": 26730 }, { "epoch": 1.25, - "learning_rate": 1.7587079836857157e-05, - "loss": 0.3872, + "learning_rate": 2.759084455400746e-05, + "loss": 0.3085, "step": 26735 }, { "epoch": 1.25, - "learning_rate": 1.7586611035582016e-05, - "loss": 0.0421, + "learning_rate": 2.759037648417144e-05, + "loss": 0.0501, "step": 26740 }, { "epoch": 1.25, - "learning_rate": 1.7586142234306876e-05, - "loss": 0.1046, + "learning_rate": 2.758990841433542e-05, + "loss": 0.0654, "step": 26745 }, { "epoch": 1.25, - "learning_rate": 1.758567343303174e-05, - "loss": 0.081, + "learning_rate": 2.75894403444994e-05, + "loss": 0.0644, "step": 26750 }, { "epoch": 1.25, - "learning_rate": 1.75852046317566e-05, - "loss": 0.0735, + "learning_rate": 2.758897227466338e-05, + "loss": 0.0433, "step": 26755 }, { "epoch": 1.25, - "learning_rate": 1.758473583048146e-05, - "loss": 0.1882, + "learning_rate": 2.758850420482736e-05, + "loss": 0.1507, "step": 26760 }, { "epoch": 1.25, - "learning_rate": 1.758426702920632e-05, - "loss": 0.1912, + "learning_rate": 2.7588036134991342e-05, + "loss": 0.1453, "step": 26765 }, { "epoch": 1.25, - "learning_rate": 1.7583798227931183e-05, - "loss": 0.1503, + "learning_rate": 2.7587568065155322e-05, + "loss": 0.1428, "step": 26770 }, { "epoch": 1.25, - "learning_rate": 1.7583329426656043e-05, - "loss": 0.2477, + "learning_rate": 2.75870999953193e-05, + "loss": 0.1616, "step": 26775 }, { "epoch": 1.25, - "learning_rate": 1.7582860625380903e-05, - "loss": 0.3636, + "learning_rate": 2.7586631925483285e-05, + "loss": 0.2276, "step": 26780 }, { "epoch": 1.25, - "learning_rate": 1.7582391824105763e-05, - "loss": 0.3664, + "learning_rate": 2.7586163855647265e-05, + "loss": 0.2257, "step": 26785 }, { "epoch": 1.25, - "learning_rate": 1.7581923022830623e-05, - "loss": 0.0495, + "learning_rate": 2.7585695785811244e-05, + "loss": 0.0895, "step": 26790 }, { "epoch": 1.25, - "learning_rate": 1.7581454221555486e-05, - "loss": 0.1003, + "learning_rate": 2.7585227715975224e-05, + "loss": 0.1216, "step": 26795 }, { "epoch": 1.25, - "learning_rate": 1.7580985420280346e-05, - "loss": 0.0936, + "learning_rate": 2.7584759646139207e-05, + "loss": 0.0717, "step": 26800 }, { "epoch": 1.25, - "learning_rate": 1.7580516619005206e-05, - "loss": 0.1304, + "learning_rate": 2.7584291576303187e-05, + "loss": 0.0901, "step": 26805 }, { "epoch": 1.25, - "learning_rate": 1.7580047817730066e-05, - "loss": 0.1174, + "learning_rate": 2.7583823506467164e-05, + "loss": 0.1055, "step": 26810 }, { "epoch": 1.25, - "learning_rate": 1.7579579016454926e-05, - "loss": 0.1183, + "learning_rate": 2.7583355436631143e-05, + "loss": 0.1052, "step": 26815 }, { "epoch": 1.25, - "learning_rate": 1.7579110215179786e-05, - "loss": 0.3376, + "learning_rate": 2.7582887366795127e-05, + "loss": 0.1673, "step": 26820 }, { "epoch": 1.25, - "learning_rate": 1.7578641413904645e-05, - "loss": 0.212, + "learning_rate": 2.7582419296959107e-05, + "loss": 0.2426, "step": 26825 }, { "epoch": 1.25, - "learning_rate": 1.757817261262951e-05, - "loss": 0.3103, + "learning_rate": 2.7581951227123086e-05, + "loss": 0.3206, "step": 26830 }, { "epoch": 1.25, - "learning_rate": 1.757770381135437e-05, - "loss": 0.204, + "learning_rate": 2.7581483157287066e-05, + "loss": 0.3846, "step": 26835 }, { "epoch": 1.25, - "learning_rate": 1.757723501007923e-05, - "loss": 0.0467, + "learning_rate": 2.758101508745105e-05, + "loss": 0.0558, "step": 26840 }, { "epoch": 1.25, - "learning_rate": 1.757676620880409e-05, - "loss": 0.0806, + "learning_rate": 2.758054701761503e-05, + "loss": 0.133, "step": 26845 }, { "epoch": 1.25, - "learning_rate": 1.7576297407528952e-05, - "loss": 0.1039, + "learning_rate": 2.758007894777901e-05, + "loss": 0.0974, "step": 26850 }, { "epoch": 1.25, - "learning_rate": 1.7575828606253812e-05, - "loss": 0.1226, + "learning_rate": 2.7579610877942992e-05, + "loss": 0.1085, "step": 26855 }, { "epoch": 1.25, - "learning_rate": 1.7575359804978672e-05, - "loss": 0.0761, + "learning_rate": 2.7579142808106972e-05, + "loss": 0.1434, "step": 26860 }, { "epoch": 1.25, - "learning_rate": 1.7574891003703532e-05, - "loss": 0.197, + "learning_rate": 2.7578674738270952e-05, + "loss": 0.159, "step": 26865 }, { "epoch": 1.25, - "learning_rate": 1.757442220242839e-05, - "loss": 0.2467, + "learning_rate": 2.7578206668434928e-05, + "loss": 0.2478, "step": 26870 }, { "epoch": 1.25, - "learning_rate": 1.757395340115325e-05, - "loss": 0.303, + "learning_rate": 2.757773859859891e-05, + "loss": 0.2168, "step": 26875 }, { "epoch": 1.25, - "learning_rate": 1.757348459987811e-05, - "loss": 0.4572, + "learning_rate": 2.757727052876289e-05, + "loss": 0.5468, "step": 26880 }, { "epoch": 1.25, - "learning_rate": 1.757301579860297e-05, - "loss": 0.3963, + "learning_rate": 2.757680245892687e-05, + "loss": 0.2508, "step": 26885 }, { "epoch": 1.25, - "learning_rate": 1.7572546997327835e-05, - "loss": 0.0959, + "learning_rate": 2.757633438909085e-05, + "loss": 0.0252, "step": 26890 }, { "epoch": 1.25, - "learning_rate": 1.7572078196052695e-05, - "loss": 0.147, + "learning_rate": 2.7575866319254834e-05, + "loss": 0.0561, "step": 26895 }, { "epoch": 1.26, - "learning_rate": 1.7571609394777555e-05, - "loss": 0.078, + "learning_rate": 2.7575398249418814e-05, + "loss": 0.096, "step": 26900 }, { "epoch": 1.26, - "learning_rate": 1.7571140593502415e-05, - "loss": 0.1146, + "learning_rate": 2.7574930179582794e-05, + "loss": 0.1182, "step": 26905 }, { "epoch": 1.26, - "learning_rate": 1.7570671792227278e-05, - "loss": 0.1608, + "learning_rate": 2.7574462109746777e-05, + "loss": 0.227, "step": 26910 }, { "epoch": 1.26, - "learning_rate": 1.7570202990952138e-05, - "loss": 0.1956, + "learning_rate": 2.7573994039910757e-05, + "loss": 0.2021, "step": 26915 }, { "epoch": 1.26, - "learning_rate": 1.7569734189676998e-05, - "loss": 0.2593, + "learning_rate": 2.7573525970074737e-05, + "loss": 0.2733, "step": 26920 }, { "epoch": 1.26, - "learning_rate": 1.7569265388401858e-05, - "loss": 0.3484, + "learning_rate": 2.7573057900238716e-05, + "loss": 0.284, "step": 26925 }, { "epoch": 1.26, - "learning_rate": 1.7568796587126718e-05, - "loss": 0.2616, + "learning_rate": 2.75725898304027e-05, + "loss": 0.4521, "step": 26930 }, { "epoch": 1.26, - "learning_rate": 1.756832778585158e-05, - "loss": 0.3386, + "learning_rate": 2.7572121760566676e-05, + "loss": 0.2281, "step": 26935 }, { "epoch": 1.26, - "learning_rate": 1.756785898457644e-05, - "loss": 0.0567, + "learning_rate": 2.7571653690730656e-05, + "loss": 0.1074, "step": 26940 }, { "epoch": 1.26, - "learning_rate": 1.75673901833013e-05, - "loss": 0.0479, + "learning_rate": 2.7571185620894636e-05, + "loss": 0.1161, "step": 26945 }, { "epoch": 1.26, - "learning_rate": 1.756692138202616e-05, - "loss": 0.1017, + "learning_rate": 2.757071755105862e-05, + "loss": 0.0888, "step": 26950 }, { "epoch": 1.26, - "learning_rate": 1.756645258075102e-05, - "loss": 0.1064, + "learning_rate": 2.75702494812226e-05, + "loss": 0.1003, "step": 26955 }, { "epoch": 1.26, - "learning_rate": 1.756598377947588e-05, - "loss": 0.1581, + "learning_rate": 2.756978141138658e-05, + "loss": 0.1212, "step": 26960 }, { "epoch": 1.26, - "learning_rate": 1.756551497820074e-05, - "loss": 0.0933, + "learning_rate": 2.7569313341550562e-05, + "loss": 0.1769, "step": 26965 }, { "epoch": 1.26, - "learning_rate": 1.75650461769256e-05, - "loss": 0.1222, + "learning_rate": 2.756884527171454e-05, + "loss": 0.2336, "step": 26970 }, { "epoch": 1.26, - "learning_rate": 1.7564577375650464e-05, - "loss": 0.275, + "learning_rate": 2.756837720187852e-05, + "loss": 0.2811, "step": 26975 }, { "epoch": 1.26, - "learning_rate": 1.7564108574375324e-05, - "loss": 0.3865, + "learning_rate": 2.75679091320425e-05, + "loss": 0.3899, "step": 26980 }, { "epoch": 1.26, - "learning_rate": 1.7563639773100184e-05, - "loss": 0.3438, + "learning_rate": 2.7567441062206484e-05, + "loss": 0.2493, "step": 26985 }, { "epoch": 1.26, - "learning_rate": 1.7563170971825047e-05, - "loss": 0.0642, + "learning_rate": 2.7566972992370464e-05, + "loss": 0.0727, "step": 26990 }, { "epoch": 1.26, - "learning_rate": 1.7562702170549907e-05, - "loss": 0.0703, + "learning_rate": 2.756650492253444e-05, + "loss": 0.0782, "step": 26995 }, { "epoch": 1.26, - "learning_rate": 1.7562233369274767e-05, - "loss": 0.0959, + "learning_rate": 2.756603685269842e-05, + "loss": 0.0617, "step": 27000 }, { "epoch": 1.26, - "learning_rate": 1.7561764567999627e-05, - "loss": 0.0822, + "learning_rate": 2.7565568782862404e-05, + "loss": 0.1509, "step": 27005 }, { "epoch": 1.26, - "learning_rate": 1.7561295766724487e-05, - "loss": 0.1104, + "learning_rate": 2.7565100713026383e-05, + "loss": 0.1272, "step": 27010 }, { "epoch": 1.26, - "learning_rate": 1.7560826965449347e-05, - "loss": 0.1466, + "learning_rate": 2.7564632643190363e-05, + "loss": 0.1887, "step": 27015 }, { "epoch": 1.26, - "learning_rate": 1.7560358164174207e-05, - "loss": 0.2263, + "learning_rate": 2.7564164573354346e-05, + "loss": 0.1928, "step": 27020 }, { "epoch": 1.26, - "learning_rate": 1.7559889362899067e-05, - "loss": 0.3172, + "learning_rate": 2.7563696503518326e-05, + "loss": 0.206, "step": 27025 }, { "epoch": 1.26, - "learning_rate": 1.755942056162393e-05, - "loss": 0.3317, + "learning_rate": 2.7563228433682306e-05, + "loss": 0.3126, "step": 27030 }, { "epoch": 1.26, - "learning_rate": 1.755895176034879e-05, - "loss": 0.3048, + "learning_rate": 2.7562760363846286e-05, + "loss": 0.2151, "step": 27035 }, { "epoch": 1.26, - "learning_rate": 1.755848295907365e-05, - "loss": 0.0425, + "learning_rate": 2.756229229401027e-05, + "loss": 0.0827, "step": 27040 }, { "epoch": 1.26, - "learning_rate": 1.755801415779851e-05, - "loss": 0.077, + "learning_rate": 2.756182422417425e-05, + "loss": 0.0308, "step": 27045 }, { "epoch": 1.26, - "learning_rate": 1.755754535652337e-05, - "loss": 0.1112, + "learning_rate": 2.756135615433823e-05, + "loss": 0.0792, "step": 27050 }, { "epoch": 1.26, - "learning_rate": 1.7557076555248233e-05, - "loss": 0.0855, + "learning_rate": 2.756088808450221e-05, + "loss": 0.1393, "step": 27055 }, { "epoch": 1.26, - "learning_rate": 1.7556607753973093e-05, - "loss": 0.1392, + "learning_rate": 2.756042001466619e-05, + "loss": 0.096, "step": 27060 }, { "epoch": 1.26, - "learning_rate": 1.7556138952697953e-05, - "loss": 0.1075, + "learning_rate": 2.7559951944830168e-05, + "loss": 0.1089, "step": 27065 }, { "epoch": 1.26, - "learning_rate": 1.7555670151422813e-05, - "loss": 0.2517, + "learning_rate": 2.7559483874994148e-05, + "loss": 0.1666, "step": 27070 }, { "epoch": 1.26, - "learning_rate": 1.7555201350147676e-05, - "loss": 0.3461, + "learning_rate": 2.7559015805158128e-05, + "loss": 0.2164, "step": 27075 }, { "epoch": 1.26, - "learning_rate": 1.7554732548872536e-05, - "loss": 0.4033, + "learning_rate": 2.755854773532211e-05, + "loss": 0.3704, "step": 27080 }, { "epoch": 1.26, - "learning_rate": 1.7554263747597396e-05, - "loss": 0.3499, + "learning_rate": 2.755807966548609e-05, + "loss": 0.218, "step": 27085 }, { "epoch": 1.26, - "learning_rate": 1.7553794946322256e-05, - "loss": 0.0371, + "learning_rate": 2.755761159565007e-05, + "loss": 0.0739, "step": 27090 }, { "epoch": 1.26, - "learning_rate": 1.7553326145047116e-05, - "loss": 0.0903, + "learning_rate": 2.7557143525814054e-05, + "loss": 0.1344, "step": 27095 }, { "epoch": 1.26, - "learning_rate": 1.7552857343771976e-05, - "loss": 0.0927, + "learning_rate": 2.7556675455978034e-05, + "loss": 0.1578, "step": 27100 }, { "epoch": 1.26, - "learning_rate": 1.7552388542496836e-05, - "loss": 0.1295, + "learning_rate": 2.7556207386142014e-05, + "loss": 0.2067, "step": 27105 }, { "epoch": 1.26, - "learning_rate": 1.7551919741221696e-05, - "loss": 0.1342, + "learning_rate": 2.7555739316305993e-05, + "loss": 0.1024, "step": 27110 }, { "epoch": 1.27, - "learning_rate": 1.7551450939946556e-05, - "loss": 0.2217, + "learning_rate": 2.7555271246469977e-05, + "loss": 0.1475, "step": 27115 }, { "epoch": 1.27, - "learning_rate": 1.755098213867142e-05, - "loss": 0.1572, + "learning_rate": 2.7554803176633956e-05, + "loss": 0.1925, "step": 27120 }, { "epoch": 1.27, - "learning_rate": 1.755051333739628e-05, - "loss": 0.2619, + "learning_rate": 2.7554335106797933e-05, + "loss": 0.3717, "step": 27125 }, { "epoch": 1.27, - "learning_rate": 1.755004453612114e-05, - "loss": 0.303, + "learning_rate": 2.7553867036961913e-05, + "loss": 0.3782, "step": 27130 }, { "epoch": 1.27, - "learning_rate": 1.7549575734846002e-05, - "loss": 0.3301, + "learning_rate": 2.7553398967125896e-05, + "loss": 0.2524, "step": 27135 }, { "epoch": 1.27, - "learning_rate": 1.7549106933570862e-05, - "loss": 0.0658, + "learning_rate": 2.7552930897289876e-05, + "loss": 0.0635, "step": 27140 }, { "epoch": 1.27, - "learning_rate": 1.7548638132295722e-05, - "loss": 0.0525, + "learning_rate": 2.7552462827453855e-05, + "loss": 0.0427, "step": 27145 }, { "epoch": 1.27, - "learning_rate": 1.7548169331020582e-05, - "loss": 0.1088, + "learning_rate": 2.755199475761784e-05, + "loss": 0.0806, "step": 27150 }, { "epoch": 1.27, - "learning_rate": 1.7547700529745442e-05, - "loss": 0.1151, + "learning_rate": 2.755152668778182e-05, + "loss": 0.1157, "step": 27155 }, { "epoch": 1.27, - "learning_rate": 1.7547231728470302e-05, - "loss": 0.1102, + "learning_rate": 2.75510586179458e-05, + "loss": 0.1437, "step": 27160 }, { "epoch": 1.27, - "learning_rate": 1.7546762927195165e-05, - "loss": 0.1528, + "learning_rate": 2.7550590548109778e-05, + "loss": 0.1757, "step": 27165 }, { "epoch": 1.27, - "learning_rate": 1.7546294125920025e-05, - "loss": 0.1441, + "learning_rate": 2.755012247827376e-05, + "loss": 0.1635, "step": 27170 }, { "epoch": 1.27, - "learning_rate": 1.7545825324644885e-05, - "loss": 0.2677, + "learning_rate": 2.754965440843774e-05, + "loss": 0.2316, "step": 27175 }, { "epoch": 1.27, - "learning_rate": 1.7545356523369745e-05, - "loss": 0.3605, + "learning_rate": 2.754918633860172e-05, + "loss": 0.3009, "step": 27180 }, { "epoch": 1.27, - "learning_rate": 1.7544887722094605e-05, - "loss": 0.2491, + "learning_rate": 2.7548718268765697e-05, + "loss": 0.2999, "step": 27185 }, { "epoch": 1.27, - "learning_rate": 1.7544418920819465e-05, - "loss": 0.0406, + "learning_rate": 2.754825019892968e-05, + "loss": 0.1279, "step": 27190 }, { "epoch": 1.27, - "learning_rate": 1.7543950119544325e-05, - "loss": 0.0386, + "learning_rate": 2.754778212909366e-05, + "loss": 0.0824, "step": 27195 }, { "epoch": 1.27, - "learning_rate": 1.7543481318269188e-05, - "loss": 0.0862, + "learning_rate": 2.754731405925764e-05, + "loss": 0.0984, "step": 27200 }, { "epoch": 1.27, - "learning_rate": 1.7543012516994048e-05, - "loss": 0.1498, + "learning_rate": 2.7546845989421623e-05, + "loss": 0.0874, "step": 27205 }, { "epoch": 1.27, - "learning_rate": 1.7542543715718908e-05, - "loss": 0.098, + "learning_rate": 2.7546377919585603e-05, + "loss": 0.0781, "step": 27210 }, { "epoch": 1.27, - "learning_rate": 1.754207491444377e-05, - "loss": 0.1449, + "learning_rate": 2.7545909849749583e-05, + "loss": 0.0944, "step": 27215 }, { "epoch": 1.27, - "learning_rate": 1.754160611316863e-05, - "loss": 0.1432, + "learning_rate": 2.7545441779913563e-05, + "loss": 0.1486, "step": 27220 }, { "epoch": 1.27, - "learning_rate": 1.754113731189349e-05, - "loss": 0.2482, + "learning_rate": 2.7544973710077546e-05, + "loss": 0.2477, "step": 27225 }, { "epoch": 1.27, - "learning_rate": 1.754066851061835e-05, - "loss": 0.5192, + "learning_rate": 2.7544505640241526e-05, + "loss": 0.3283, "step": 27230 }, { "epoch": 1.27, - "learning_rate": 1.754019970934321e-05, - "loss": 0.2192, + "learning_rate": 2.7544037570405506e-05, + "loss": 0.3542, "step": 27235 }, { "epoch": 1.27, - "learning_rate": 1.753973090806807e-05, - "loss": 0.0538, + "learning_rate": 2.7543569500569486e-05, + "loss": 0.0884, "step": 27240 }, { "epoch": 1.27, - "learning_rate": 1.753926210679293e-05, - "loss": 0.0637, + "learning_rate": 2.754310143073347e-05, + "loss": 0.0939, "step": 27245 }, { "epoch": 1.27, - "learning_rate": 1.753879330551779e-05, - "loss": 0.0584, + "learning_rate": 2.7542633360897445e-05, + "loss": 0.1591, "step": 27250 }, { "epoch": 1.27, - "learning_rate": 1.753832450424265e-05, - "loss": 0.1314, + "learning_rate": 2.7542165291061425e-05, + "loss": 0.0898, "step": 27255 }, { "epoch": 1.27, - "learning_rate": 1.7537855702967514e-05, - "loss": 0.0987, + "learning_rate": 2.7541697221225405e-05, + "loss": 0.1615, "step": 27260 }, { "epoch": 1.27, - "learning_rate": 1.7537386901692374e-05, - "loss": 0.101, + "learning_rate": 2.7541229151389388e-05, + "loss": 0.1586, "step": 27265 }, { "epoch": 1.27, - "learning_rate": 1.7536918100417234e-05, - "loss": 0.117, + "learning_rate": 2.7540761081553368e-05, + "loss": 0.1517, "step": 27270 }, { "epoch": 1.27, - "learning_rate": 1.7536449299142094e-05, - "loss": 0.2577, + "learning_rate": 2.7540293011717348e-05, + "loss": 0.1381, "step": 27275 }, { "epoch": 1.27, - "learning_rate": 1.7535980497866957e-05, - "loss": 0.3543, + "learning_rate": 2.753982494188133e-05, + "loss": 0.3747, "step": 27280 }, { "epoch": 1.27, - "learning_rate": 1.7535511696591817e-05, - "loss": 0.2811, + "learning_rate": 2.753935687204531e-05, + "loss": 0.3374, "step": 27285 }, { "epoch": 1.27, - "learning_rate": 1.7535042895316677e-05, - "loss": 0.0357, + "learning_rate": 2.753888880220929e-05, + "loss": 0.0837, "step": 27290 }, { "epoch": 1.27, - "learning_rate": 1.7534574094041537e-05, - "loss": 0.0465, + "learning_rate": 2.753842073237327e-05, + "loss": 0.097, "step": 27295 }, { "epoch": 1.27, - "learning_rate": 1.7534105292766397e-05, - "loss": 0.1089, + "learning_rate": 2.7537952662537254e-05, + "loss": 0.118, "step": 27300 }, { "epoch": 1.27, - "learning_rate": 1.753363649149126e-05, - "loss": 0.1224, + "learning_rate": 2.7537484592701233e-05, + "loss": 0.0809, "step": 27305 }, { "epoch": 1.27, - "learning_rate": 1.753316769021612e-05, - "loss": 0.1423, + "learning_rate": 2.7537016522865213e-05, + "loss": 0.1227, "step": 27310 }, { "epoch": 1.27, - "learning_rate": 1.753269888894098e-05, - "loss": 0.1598, + "learning_rate": 2.753654845302919e-05, + "loss": 0.2199, "step": 27315 }, { "epoch": 1.27, - "learning_rate": 1.753223008766584e-05, - "loss": 0.1652, + "learning_rate": 2.7536080383193173e-05, + "loss": 0.2727, "step": 27320 }, { "epoch": 1.28, - "learning_rate": 1.75317612863907e-05, - "loss": 0.2521, + "learning_rate": 2.7535612313357153e-05, + "loss": 0.1952, "step": 27325 }, { "epoch": 1.28, - "learning_rate": 1.753129248511556e-05, - "loss": 0.3785, + "learning_rate": 2.7535144243521132e-05, + "loss": 0.3614, "step": 27330 }, { "epoch": 1.28, - "learning_rate": 1.753082368384042e-05, - "loss": 0.3144, + "learning_rate": 2.7534676173685116e-05, + "loss": 0.2547, "step": 27335 }, { "epoch": 1.28, - "learning_rate": 1.7530354882565283e-05, - "loss": 0.0735, + "learning_rate": 2.7534208103849095e-05, + "loss": 0.0119, "step": 27340 }, { "epoch": 1.28, - "learning_rate": 1.7529886081290143e-05, - "loss": 0.0404, + "learning_rate": 2.7533740034013075e-05, + "loss": 0.0574, "step": 27345 }, { "epoch": 1.28, - "learning_rate": 1.7529417280015003e-05, - "loss": 0.1386, + "learning_rate": 2.7533271964177055e-05, + "loss": 0.0443, "step": 27350 }, { "epoch": 1.28, - "learning_rate": 1.7528948478739863e-05, - "loss": 0.0787, + "learning_rate": 2.753280389434104e-05, + "loss": 0.0841, "step": 27355 }, { "epoch": 1.28, - "learning_rate": 1.7528479677464726e-05, - "loss": 0.1407, + "learning_rate": 2.7532335824505018e-05, + "loss": 0.0898, "step": 27360 }, { "epoch": 1.28, - "learning_rate": 1.7528010876189586e-05, - "loss": 0.1503, + "learning_rate": 2.7531867754668998e-05, + "loss": 0.1118, "step": 27365 }, { "epoch": 1.28, - "learning_rate": 1.7527542074914446e-05, - "loss": 0.2271, + "learning_rate": 2.7531399684832978e-05, + "loss": 0.1412, "step": 27370 }, { "epoch": 1.28, - "learning_rate": 1.7527073273639306e-05, - "loss": 0.2996, + "learning_rate": 2.7530931614996958e-05, + "loss": 0.2171, "step": 27375 }, { "epoch": 1.28, - "learning_rate": 1.7526604472364166e-05, - "loss": 0.4044, + "learning_rate": 2.7530463545160937e-05, + "loss": 0.365, "step": 27380 }, { "epoch": 1.28, - "learning_rate": 1.7526135671089026e-05, - "loss": 0.1628, + "learning_rate": 2.7529995475324917e-05, + "loss": 0.208, "step": 27385 }, { "epoch": 1.28, - "learning_rate": 1.7525666869813886e-05, - "loss": 0.0612, + "learning_rate": 2.75295274054889e-05, + "loss": 0.0513, "step": 27390 }, { "epoch": 1.28, - "learning_rate": 1.7525198068538746e-05, - "loss": 0.0614, + "learning_rate": 2.752905933565288e-05, + "loss": 0.1209, "step": 27395 }, { "epoch": 1.28, - "learning_rate": 1.752472926726361e-05, - "loss": 0.0768, + "learning_rate": 2.752859126581686e-05, + "loss": 0.0542, "step": 27400 }, { "epoch": 1.28, - "learning_rate": 1.752426046598847e-05, - "loss": 0.1372, + "learning_rate": 2.752812319598084e-05, + "loss": 0.1033, "step": 27405 }, { "epoch": 1.28, - "learning_rate": 1.752379166471333e-05, - "loss": 0.1258, + "learning_rate": 2.7527655126144823e-05, + "loss": 0.1326, "step": 27410 }, { "epoch": 1.28, - "learning_rate": 1.752332286343819e-05, - "loss": 0.1211, + "learning_rate": 2.7527187056308803e-05, + "loss": 0.1027, "step": 27415 }, { "epoch": 1.28, - "learning_rate": 1.7522854062163052e-05, - "loss": 0.1808, + "learning_rate": 2.7526718986472783e-05, + "loss": 0.1504, "step": 27420 }, { "epoch": 1.28, - "learning_rate": 1.7522385260887912e-05, - "loss": 0.3216, + "learning_rate": 2.7526250916636763e-05, + "loss": 0.1953, "step": 27425 }, { "epoch": 1.28, - "learning_rate": 1.7521916459612772e-05, - "loss": 0.3866, + "learning_rate": 2.7525782846800746e-05, + "loss": 0.3413, "step": 27430 }, { "epoch": 1.28, - "learning_rate": 1.7521447658337632e-05, - "loss": 0.214, + "learning_rate": 2.7525314776964726e-05, + "loss": 0.3712, "step": 27435 }, { "epoch": 1.28, - "learning_rate": 1.7520978857062492e-05, - "loss": 0.0791, + "learning_rate": 2.7524846707128702e-05, + "loss": 0.0262, "step": 27440 }, { "epoch": 1.28, - "learning_rate": 1.7520510055787355e-05, - "loss": 0.0851, + "learning_rate": 2.7524378637292682e-05, + "loss": 0.0384, "step": 27445 }, { "epoch": 1.28, - "learning_rate": 1.7520041254512215e-05, - "loss": 0.1055, + "learning_rate": 2.7523910567456665e-05, + "loss": 0.0576, "step": 27450 }, { "epoch": 1.28, - "learning_rate": 1.7519572453237075e-05, - "loss": 0.0761, + "learning_rate": 2.7523442497620645e-05, + "loss": 0.0831, "step": 27455 }, { "epoch": 1.28, - "learning_rate": 1.7519103651961935e-05, - "loss": 0.1543, + "learning_rate": 2.7522974427784625e-05, + "loss": 0.1302, "step": 27460 }, { "epoch": 1.28, - "learning_rate": 1.7518634850686795e-05, - "loss": 0.1972, + "learning_rate": 2.7522506357948608e-05, + "loss": 0.1653, "step": 27465 }, { "epoch": 1.28, - "learning_rate": 1.7518166049411655e-05, - "loss": 0.2102, + "learning_rate": 2.7522038288112588e-05, + "loss": 0.1907, "step": 27470 }, { "epoch": 1.28, - "learning_rate": 1.7517697248136515e-05, - "loss": 0.3063, + "learning_rate": 2.7521570218276568e-05, + "loss": 0.2658, "step": 27475 }, { "epoch": 1.28, - "learning_rate": 1.7517228446861375e-05, - "loss": 0.4658, + "learning_rate": 2.7521102148440547e-05, + "loss": 0.3106, "step": 27480 }, { "epoch": 1.28, - "learning_rate": 1.7516759645586238e-05, - "loss": 0.2809, + "learning_rate": 2.752063407860453e-05, + "loss": 0.331, "step": 27485 }, { "epoch": 1.28, - "learning_rate": 1.7516290844311098e-05, - "loss": 0.0563, + "learning_rate": 2.752016600876851e-05, + "loss": 0.0404, "step": 27490 }, { "epoch": 1.28, - "learning_rate": 1.7515822043035958e-05, - "loss": 0.077, + "learning_rate": 2.751969793893249e-05, + "loss": 0.0581, "step": 27495 }, { "epoch": 1.28, - "learning_rate": 1.751535324176082e-05, - "loss": 0.093, + "learning_rate": 2.751922986909647e-05, + "loss": 0.1581, "step": 27500 }, { "epoch": 1.28, - "learning_rate": 1.751488444048568e-05, - "loss": 0.11, + "learning_rate": 2.751876179926045e-05, + "loss": 0.1493, "step": 27505 }, { "epoch": 1.28, - "learning_rate": 1.751441563921054e-05, - "loss": 0.1683, + "learning_rate": 2.751829372942443e-05, + "loss": 0.102, "step": 27510 }, { "epoch": 1.28, - "learning_rate": 1.75139468379354e-05, - "loss": 0.1233, + "learning_rate": 2.751782565958841e-05, + "loss": 0.1639, "step": 27515 }, { "epoch": 1.28, - "learning_rate": 1.751347803666026e-05, - "loss": 0.207, + "learning_rate": 2.7517357589752393e-05, + "loss": 0.0845, "step": 27520 }, { "epoch": 1.28, - "learning_rate": 1.751300923538512e-05, - "loss": 0.3287, + "learning_rate": 2.7516889519916372e-05, + "loss": 0.2207, "step": 27525 }, { "epoch": 1.28, - "learning_rate": 1.751254043410998e-05, - "loss": 0.4735, + "learning_rate": 2.7516421450080352e-05, + "loss": 0.3716, "step": 27530 }, { "epoch": 1.28, - "learning_rate": 1.751207163283484e-05, - "loss": 0.2638, + "learning_rate": 2.7515953380244332e-05, + "loss": 0.2371, "step": 27535 }, { "epoch": 1.29, - "learning_rate": 1.7511602831559704e-05, - "loss": 0.0586, + "learning_rate": 2.7515485310408315e-05, + "loss": 0.0889, "step": 27540 }, { "epoch": 1.29, - "learning_rate": 1.7511134030284564e-05, - "loss": 0.0821, + "learning_rate": 2.7515017240572295e-05, + "loss": 0.0583, "step": 27545 }, { "epoch": 1.29, - "learning_rate": 1.7510665229009424e-05, - "loss": 0.1407, + "learning_rate": 2.7514549170736275e-05, + "loss": 0.1231, "step": 27550 }, { "epoch": 1.29, - "learning_rate": 1.7510196427734284e-05, - "loss": 0.1208, + "learning_rate": 2.7514081100900255e-05, + "loss": 0.1132, "step": 27555 }, { "epoch": 1.29, - "learning_rate": 1.7509727626459144e-05, - "loss": 0.1009, + "learning_rate": 2.7513613031064238e-05, + "loss": 0.155, "step": 27560 }, { "epoch": 1.29, - "learning_rate": 1.7509258825184007e-05, - "loss": 0.196, + "learning_rate": 2.7513144961228214e-05, + "loss": 0.1438, "step": 27565 }, { "epoch": 1.29, - "learning_rate": 1.7508790023908867e-05, - "loss": 0.17, + "learning_rate": 2.7512676891392194e-05, + "loss": 0.2467, "step": 27570 }, { "epoch": 1.29, - "learning_rate": 1.7508321222633727e-05, - "loss": 0.3356, + "learning_rate": 2.7512208821556177e-05, + "loss": 0.3728, "step": 27575 }, { "epoch": 1.29, - "learning_rate": 1.7507852421358587e-05, - "loss": 0.3552, + "learning_rate": 2.7511740751720157e-05, + "loss": 0.386, "step": 27580 }, { "epoch": 1.29, - "learning_rate": 1.750738362008345e-05, - "loss": 0.2562, + "learning_rate": 2.7511272681884137e-05, + "loss": 0.2282, "step": 27585 }, { "epoch": 1.29, - "learning_rate": 1.750691481880831e-05, - "loss": 0.0318, + "learning_rate": 2.7510804612048117e-05, + "loss": 0.0428, "step": 27590 }, { "epoch": 1.29, - "learning_rate": 1.750644601753317e-05, - "loss": 0.0508, + "learning_rate": 2.75103365422121e-05, + "loss": 0.0724, "step": 27595 }, { "epoch": 1.29, - "learning_rate": 1.750597721625803e-05, - "loss": 0.1133, + "learning_rate": 2.750986847237608e-05, + "loss": 0.0553, "step": 27600 }, { "epoch": 1.29, - "learning_rate": 1.750550841498289e-05, - "loss": 0.1333, + "learning_rate": 2.750940040254006e-05, + "loss": 0.1176, "step": 27605 }, { "epoch": 1.29, - "learning_rate": 1.750503961370775e-05, - "loss": 0.1305, + "learning_rate": 2.750893233270404e-05, + "loss": 0.1243, "step": 27610 }, { "epoch": 1.29, - "learning_rate": 1.750457081243261e-05, - "loss": 0.1097, + "learning_rate": 2.7508464262868023e-05, + "loss": 0.1583, "step": 27615 }, { "epoch": 1.29, - "learning_rate": 1.750410201115747e-05, - "loss": 0.1544, + "learning_rate": 2.7507996193032003e-05, + "loss": 0.1511, "step": 27620 }, { "epoch": 1.29, - "learning_rate": 1.750363320988233e-05, - "loss": 0.2137, + "learning_rate": 2.7507528123195982e-05, + "loss": 0.211, "step": 27625 }, { "epoch": 1.29, - "learning_rate": 1.7503164408607193e-05, - "loss": 0.4484, + "learning_rate": 2.7507060053359962e-05, + "loss": 0.3121, "step": 27630 }, { "epoch": 1.29, - "learning_rate": 1.7502695607332053e-05, - "loss": 0.2642, + "learning_rate": 2.7506591983523942e-05, + "loss": 0.2638, "step": 27635 }, { "epoch": 1.29, - "learning_rate": 1.7502226806056913e-05, - "loss": 0.0451, + "learning_rate": 2.7506123913687922e-05, + "loss": 0.0356, "step": 27640 }, { "epoch": 1.29, - "learning_rate": 1.7501758004781776e-05, - "loss": 0.0739, + "learning_rate": 2.75056558438519e-05, + "loss": 0.0703, "step": 27645 }, { "epoch": 1.29, - "learning_rate": 1.7501289203506636e-05, - "loss": 0.0869, + "learning_rate": 2.7505187774015885e-05, + "loss": 0.085, "step": 27650 }, { "epoch": 1.29, - "learning_rate": 1.7500820402231496e-05, - "loss": 0.0963, + "learning_rate": 2.7504719704179865e-05, + "loss": 0.0835, "step": 27655 }, { "epoch": 1.29, - "learning_rate": 1.7500351600956356e-05, - "loss": 0.0773, + "learning_rate": 2.7504251634343844e-05, + "loss": 0.0965, "step": 27660 }, { "epoch": 1.29, - "learning_rate": 1.7499882799681216e-05, - "loss": 0.1242, + "learning_rate": 2.7503783564507824e-05, + "loss": 0.1107, "step": 27665 }, { "epoch": 1.29, - "learning_rate": 1.7499413998406076e-05, - "loss": 0.1775, + "learning_rate": 2.7503315494671807e-05, + "loss": 0.1891, "step": 27670 }, { "epoch": 1.29, - "learning_rate": 1.7498945197130936e-05, - "loss": 0.3349, + "learning_rate": 2.7502847424835787e-05, + "loss": 0.3491, "step": 27675 }, { "epoch": 1.29, - "learning_rate": 1.74984763958558e-05, - "loss": 0.2802, + "learning_rate": 2.7502379354999767e-05, + "loss": 0.1841, "step": 27680 }, { "epoch": 1.29, - "learning_rate": 1.749800759458066e-05, - "loss": 0.3577, + "learning_rate": 2.7501911285163747e-05, + "loss": 0.2994, "step": 27685 }, { "epoch": 1.29, - "learning_rate": 1.749753879330552e-05, - "loss": 0.0712, + "learning_rate": 2.7501443215327727e-05, + "loss": 0.0297, "step": 27690 }, { "epoch": 1.29, - "learning_rate": 1.749706999203038e-05, - "loss": 0.0935, + "learning_rate": 2.7500975145491707e-05, + "loss": 0.1179, "step": 27695 }, { "epoch": 1.29, - "learning_rate": 1.749660119075524e-05, - "loss": 0.0822, + "learning_rate": 2.7500507075655686e-05, + "loss": 0.0643, "step": 27700 }, { "epoch": 1.29, - "learning_rate": 1.74961323894801e-05, - "loss": 0.1642, + "learning_rate": 2.750003900581967e-05, + "loss": 0.073, "step": 27705 }, { "epoch": 1.29, - "learning_rate": 1.7495663588204962e-05, - "loss": 0.2227, + "learning_rate": 2.749957093598365e-05, + "loss": 0.0744, "step": 27710 }, { "epoch": 1.29, - "learning_rate": 1.7495194786929822e-05, - "loss": 0.1858, + "learning_rate": 2.749910286614763e-05, + "loss": 0.1319, "step": 27715 }, { "epoch": 1.29, - "learning_rate": 1.7494725985654682e-05, - "loss": 0.199, + "learning_rate": 2.749863479631161e-05, + "loss": 0.3513, "step": 27720 }, { "epoch": 1.29, - "learning_rate": 1.7494257184379545e-05, - "loss": 0.2302, + "learning_rate": 2.7498166726475592e-05, + "loss": 0.1999, "step": 27725 }, { "epoch": 1.29, - "learning_rate": 1.7493788383104405e-05, - "loss": 0.3215, + "learning_rate": 2.7497698656639572e-05, + "loss": 0.5361, "step": 27730 }, { "epoch": 1.29, - "learning_rate": 1.7493319581829265e-05, - "loss": 0.2974, + "learning_rate": 2.7497230586803552e-05, + "loss": 0.2125, "step": 27735 }, { "epoch": 1.29, - "learning_rate": 1.7492850780554125e-05, - "loss": 0.0448, + "learning_rate": 2.7496762516967532e-05, + "loss": 0.0869, "step": 27740 }, { "epoch": 1.29, - "learning_rate": 1.7492381979278985e-05, - "loss": 0.1718, + "learning_rate": 2.7496294447131515e-05, + "loss": 0.0586, "step": 27745 }, { "epoch": 1.29, - "learning_rate": 1.7491913178003845e-05, - "loss": 0.1088, + "learning_rate": 2.7495826377295495e-05, + "loss": 0.0497, "step": 27750 }, { "epoch": 1.3, - "learning_rate": 1.7491444376728705e-05, - "loss": 0.1133, + "learning_rate": 2.749535830745947e-05, + "loss": 0.112, "step": 27755 }, { "epoch": 1.3, - "learning_rate": 1.7490975575453565e-05, - "loss": 0.1354, + "learning_rate": 2.7494890237623454e-05, + "loss": 0.135, "step": 27760 }, { "epoch": 1.3, - "learning_rate": 1.7490506774178425e-05, - "loss": 0.1955, + "learning_rate": 2.7494422167787434e-05, + "loss": 0.0718, "step": 27765 }, { "epoch": 1.3, - "learning_rate": 1.7490037972903288e-05, - "loss": 0.1759, + "learning_rate": 2.7493954097951414e-05, + "loss": 0.1493, "step": 27770 }, { "epoch": 1.3, - "learning_rate": 1.7489569171628148e-05, - "loss": 0.2415, + "learning_rate": 2.7493486028115394e-05, + "loss": 0.1885, "step": 27775 }, { "epoch": 1.3, - "learning_rate": 1.7489100370353008e-05, - "loss": 0.5153, + "learning_rate": 2.7493017958279377e-05, + "loss": 0.3673, "step": 27780 }, { "epoch": 1.3, - "learning_rate": 1.7488631569077868e-05, - "loss": 0.3479, + "learning_rate": 2.7492549888443357e-05, + "loss": 0.3758, "step": 27785 }, { "epoch": 1.3, - "learning_rate": 1.748816276780273e-05, - "loss": 0.0432, + "learning_rate": 2.7492081818607337e-05, + "loss": 0.0475, "step": 27790 }, { "epoch": 1.3, - "learning_rate": 1.748769396652759e-05, - "loss": 0.0697, + "learning_rate": 2.7491613748771316e-05, + "loss": 0.0395, "step": 27795 }, { "epoch": 1.3, - "learning_rate": 1.748722516525245e-05, - "loss": 0.084, + "learning_rate": 2.74911456789353e-05, + "loss": 0.137, "step": 27800 }, { "epoch": 1.3, - "learning_rate": 1.748675636397731e-05, - "loss": 0.1467, + "learning_rate": 2.749067760909928e-05, + "loss": 0.1077, "step": 27805 }, { "epoch": 1.3, - "learning_rate": 1.748628756270217e-05, - "loss": 0.1314, + "learning_rate": 2.749020953926326e-05, + "loss": 0.1306, "step": 27810 }, { "epoch": 1.3, - "learning_rate": 1.7485818761427034e-05, - "loss": 0.176, + "learning_rate": 2.7489741469427243e-05, + "loss": 0.0834, "step": 27815 }, { "epoch": 1.3, - "learning_rate": 1.7485349960151894e-05, - "loss": 0.1543, + "learning_rate": 2.748927339959122e-05, + "loss": 0.1719, "step": 27820 }, { "epoch": 1.3, - "learning_rate": 1.7484881158876754e-05, - "loss": 0.1403, + "learning_rate": 2.74888053297552e-05, + "loss": 0.2288, "step": 27825 }, { "epoch": 1.3, - "learning_rate": 1.7484412357601614e-05, - "loss": 0.2803, + "learning_rate": 2.748833725991918e-05, + "loss": 0.2699, "step": 27830 }, { "epoch": 1.3, - "learning_rate": 1.7483943556326474e-05, - "loss": 0.2421, + "learning_rate": 2.7487869190083162e-05, + "loss": 0.3177, "step": 27835 }, { "epoch": 1.3, - "learning_rate": 1.7483474755051334e-05, - "loss": 0.0877, + "learning_rate": 2.748740112024714e-05, + "loss": 0.0738, "step": 27840 }, { "epoch": 1.3, - "learning_rate": 1.7483005953776194e-05, - "loss": 0.034, + "learning_rate": 2.748693305041112e-05, + "loss": 0.0389, "step": 27845 }, { "epoch": 1.3, - "learning_rate": 1.7482537152501057e-05, - "loss": 0.1936, + "learning_rate": 2.74864649805751e-05, + "loss": 0.0625, "step": 27850 }, { "epoch": 1.3, - "learning_rate": 1.7482068351225917e-05, - "loss": 0.0793, + "learning_rate": 2.7485996910739084e-05, + "loss": 0.1434, "step": 27855 }, { "epoch": 1.3, - "learning_rate": 1.7481599549950777e-05, - "loss": 0.1592, + "learning_rate": 2.7485528840903064e-05, + "loss": 0.1452, "step": 27860 }, { "epoch": 1.3, - "learning_rate": 1.748113074867564e-05, - "loss": 0.1525, + "learning_rate": 2.7485060771067044e-05, + "loss": 0.1811, "step": 27865 }, { "epoch": 1.3, - "learning_rate": 1.74806619474005e-05, - "loss": 0.0891, + "learning_rate": 2.7484592701231024e-05, + "loss": 0.2638, "step": 27870 }, { "epoch": 1.3, - "learning_rate": 1.748019314612536e-05, - "loss": 0.3361, + "learning_rate": 2.7484124631395007e-05, + "loss": 0.34, "step": 27875 }, { "epoch": 1.3, - "learning_rate": 1.747972434485022e-05, - "loss": 0.3712, + "learning_rate": 2.7483656561558984e-05, + "loss": 0.328, "step": 27880 }, { "epoch": 1.3, - "learning_rate": 1.747925554357508e-05, - "loss": 0.256, + "learning_rate": 2.7483188491722963e-05, + "loss": 0.2302, "step": 27885 }, { "epoch": 1.3, - "learning_rate": 1.747878674229994e-05, - "loss": 0.0428, + "learning_rate": 2.7482720421886947e-05, + "loss": 0.0415, "step": 27890 }, { "epoch": 1.3, - "learning_rate": 1.74783179410248e-05, - "loss": 0.0539, + "learning_rate": 2.7482252352050926e-05, + "loss": 0.0742, "step": 27895 }, { "epoch": 1.3, - "learning_rate": 1.747784913974966e-05, - "loss": 0.0732, + "learning_rate": 2.7481784282214906e-05, + "loss": 0.1024, "step": 27900 }, { "epoch": 1.3, - "learning_rate": 1.747738033847452e-05, - "loss": 0.0836, + "learning_rate": 2.7481316212378886e-05, + "loss": 0.1901, "step": 27905 }, { "epoch": 1.3, - "learning_rate": 1.7476911537199383e-05, - "loss": 0.096, + "learning_rate": 2.748084814254287e-05, + "loss": 0.1882, "step": 27910 }, { "epoch": 1.3, - "learning_rate": 1.7476442735924243e-05, - "loss": 0.125, + "learning_rate": 2.748038007270685e-05, + "loss": 0.1124, "step": 27915 }, { "epoch": 1.3, - "learning_rate": 1.7475973934649103e-05, - "loss": 0.1961, + "learning_rate": 2.747991200287083e-05, + "loss": 0.2339, "step": 27920 }, { "epoch": 1.3, - "learning_rate": 1.7475505133373963e-05, - "loss": 0.2617, + "learning_rate": 2.747944393303481e-05, + "loss": 0.2302, "step": 27925 }, { "epoch": 1.3, - "learning_rate": 1.7475036332098826e-05, - "loss": 0.282, + "learning_rate": 2.7478975863198792e-05, + "loss": 0.2955, "step": 27930 }, { "epoch": 1.3, - "learning_rate": 1.7474567530823686e-05, - "loss": 0.2166, + "learning_rate": 2.7478507793362772e-05, + "loss": 0.2843, "step": 27935 }, { "epoch": 1.3, - "learning_rate": 1.7474098729548546e-05, - "loss": 0.0175, + "learning_rate": 2.747803972352675e-05, + "loss": 0.0291, "step": 27940 }, { "epoch": 1.3, - "learning_rate": 1.7473629928273406e-05, - "loss": 0.101, + "learning_rate": 2.747757165369073e-05, + "loss": 0.0432, "step": 27945 }, { "epoch": 1.3, - "learning_rate": 1.7473161126998266e-05, - "loss": 0.1078, + "learning_rate": 2.747710358385471e-05, + "loss": 0.066, "step": 27950 }, { "epoch": 1.3, - "learning_rate": 1.747269232572313e-05, - "loss": 0.1029, + "learning_rate": 2.747663551401869e-05, + "loss": 0.1124, "step": 27955 }, { "epoch": 1.3, - "learning_rate": 1.747222352444799e-05, - "loss": 0.1267, + "learning_rate": 2.747616744418267e-05, + "loss": 0.0902, "step": 27960 }, { "epoch": 1.3, - "learning_rate": 1.747175472317285e-05, - "loss": 0.1834, + "learning_rate": 2.7475699374346654e-05, + "loss": 0.0392, "step": 27965 }, { "epoch": 1.31, - "learning_rate": 1.747128592189771e-05, - "loss": 0.2201, + "learning_rate": 2.7475231304510634e-05, + "loss": 0.1585, "step": 27970 }, { "epoch": 1.31, - "learning_rate": 1.747081712062257e-05, - "loss": 0.233, + "learning_rate": 2.7474763234674614e-05, + "loss": 0.1677, "step": 27975 }, { "epoch": 1.31, - "learning_rate": 1.747034831934743e-05, - "loss": 0.3193, + "learning_rate": 2.7474295164838593e-05, + "loss": 0.2895, "step": 27980 }, { "epoch": 1.31, - "learning_rate": 1.746987951807229e-05, - "loss": 0.2488, + "learning_rate": 2.7473827095002577e-05, + "loss": 0.2902, "step": 27985 }, { "epoch": 1.31, - "learning_rate": 1.746941071679715e-05, - "loss": 0.101, + "learning_rate": 2.7473359025166556e-05, + "loss": 0.1316, "step": 27990 }, { "epoch": 1.31, - "learning_rate": 1.7468941915522012e-05, - "loss": 0.0682, + "learning_rate": 2.7472890955330536e-05, + "loss": 0.0687, "step": 27995 }, { "epoch": 1.31, - "learning_rate": 1.7468473114246872e-05, - "loss": 0.0767, + "learning_rate": 2.747242288549452e-05, + "loss": 0.1078, "step": 28000 }, { "epoch": 1.31, - "learning_rate": 1.7468004312971732e-05, - "loss": 0.1169, + "learning_rate": 2.74719548156585e-05, + "loss": 0.1177, "step": 28005 }, { "epoch": 1.31, - "learning_rate": 1.7467535511696595e-05, - "loss": 0.147, + "learning_rate": 2.7471486745822476e-05, + "loss": 0.1082, "step": 28010 }, { "epoch": 1.31, - "learning_rate": 1.7467066710421455e-05, - "loss": 0.1447, + "learning_rate": 2.7471018675986456e-05, + "loss": 0.089, "step": 28015 }, { "epoch": 1.31, - "learning_rate": 1.7466597909146315e-05, - "loss": 0.2328, + "learning_rate": 2.747055060615044e-05, + "loss": 0.249, "step": 28020 }, { "epoch": 1.31, - "learning_rate": 1.7466129107871175e-05, - "loss": 0.1731, + "learning_rate": 2.747008253631442e-05, + "loss": 0.1919, "step": 28025 }, { "epoch": 1.31, - "learning_rate": 1.7465660306596035e-05, - "loss": 0.4335, + "learning_rate": 2.74696144664784e-05, + "loss": 0.2358, "step": 28030 }, { "epoch": 1.31, - "learning_rate": 1.7465191505320895e-05, - "loss": 0.2766, + "learning_rate": 2.7469146396642378e-05, + "loss": 0.3767, "step": 28035 }, { "epoch": 1.31, - "learning_rate": 1.7464722704045755e-05, - "loss": 0.0367, + "learning_rate": 2.746867832680636e-05, + "loss": 0.099, "step": 28040 }, { "epoch": 1.31, - "learning_rate": 1.7464253902770615e-05, - "loss": 0.0953, + "learning_rate": 2.746821025697034e-05, + "loss": 0.0876, "step": 28045 }, { "epoch": 1.31, - "learning_rate": 1.746378510149548e-05, - "loss": 0.0909, + "learning_rate": 2.746774218713432e-05, + "loss": 0.1126, "step": 28050 }, { "epoch": 1.31, - "learning_rate": 1.7463316300220338e-05, - "loss": 0.087, + "learning_rate": 2.74672741172983e-05, + "loss": 0.1157, "step": 28055 }, { "epoch": 1.31, - "learning_rate": 1.7462847498945198e-05, - "loss": 0.0996, + "learning_rate": 2.7466806047462284e-05, + "loss": 0.084, "step": 28060 }, { "epoch": 1.31, - "learning_rate": 1.7462378697670058e-05, - "loss": 0.2102, + "learning_rate": 2.7466337977626264e-05, + "loss": 0.1668, "step": 28065 }, { "epoch": 1.31, - "learning_rate": 1.7461909896394918e-05, - "loss": 0.2142, + "learning_rate": 2.746586990779024e-05, + "loss": 0.1968, "step": 28070 }, { "epoch": 1.31, - "learning_rate": 1.746144109511978e-05, - "loss": 0.2751, + "learning_rate": 2.7465401837954224e-05, + "loss": 0.2437, "step": 28075 }, { "epoch": 1.31, - "learning_rate": 1.746097229384464e-05, - "loss": 0.2934, + "learning_rate": 2.7464933768118203e-05, + "loss": 0.3091, "step": 28080 }, { "epoch": 1.31, - "learning_rate": 1.74605034925695e-05, - "loss": 0.2695, + "learning_rate": 2.7464465698282183e-05, + "loss": 0.2536, "step": 28085 }, { "epoch": 1.31, - "learning_rate": 1.746003469129436e-05, - "loss": 0.0472, + "learning_rate": 2.7463997628446163e-05, + "loss": 0.0452, "step": 28090 }, { "epoch": 1.31, - "learning_rate": 1.7459565890019224e-05, - "loss": 0.0919, + "learning_rate": 2.7463529558610146e-05, + "loss": 0.0926, "step": 28095 }, { "epoch": 1.31, - "learning_rate": 1.7459097088744084e-05, - "loss": 0.1356, + "learning_rate": 2.7463061488774126e-05, + "loss": 0.0483, "step": 28100 }, { "epoch": 1.31, - "learning_rate": 1.7458628287468944e-05, - "loss": 0.1815, + "learning_rate": 2.7462593418938106e-05, + "loss": 0.0627, "step": 28105 }, { "epoch": 1.31, - "learning_rate": 1.7458159486193804e-05, - "loss": 0.1405, + "learning_rate": 2.7462125349102086e-05, + "loss": 0.1334, "step": 28110 }, { "epoch": 1.31, - "learning_rate": 1.7457690684918664e-05, - "loss": 0.1442, + "learning_rate": 2.746165727926607e-05, + "loss": 0.1587, "step": 28115 }, { "epoch": 1.31, - "learning_rate": 1.7457221883643524e-05, - "loss": 0.1353, + "learning_rate": 2.746118920943005e-05, + "loss": 0.1338, "step": 28120 }, { "epoch": 1.31, - "learning_rate": 1.7456753082368384e-05, - "loss": 0.3492, + "learning_rate": 2.746072113959403e-05, + "loss": 0.2125, "step": 28125 }, { "epoch": 1.31, - "learning_rate": 1.7456284281093244e-05, - "loss": 0.1896, + "learning_rate": 2.7460253069758012e-05, + "loss": 0.2883, "step": 28130 }, { "epoch": 1.31, - "learning_rate": 1.7455815479818104e-05, - "loss": 0.2213, + "learning_rate": 2.7459784999921988e-05, + "loss": 0.2835, "step": 28135 }, { "epoch": 1.31, - "learning_rate": 1.7455346678542967e-05, - "loss": 0.0696, + "learning_rate": 2.7459316930085968e-05, + "loss": 0.0499, "step": 28140 }, { "epoch": 1.31, - "learning_rate": 1.7454877877267827e-05, - "loss": 0.0769, + "learning_rate": 2.7458848860249948e-05, + "loss": 0.0252, "step": 28145 }, { "epoch": 1.31, - "learning_rate": 1.7454409075992687e-05, - "loss": 0.0635, + "learning_rate": 2.745838079041393e-05, + "loss": 0.1248, "step": 28150 }, { "epoch": 1.31, - "learning_rate": 1.745394027471755e-05, - "loss": 0.1047, + "learning_rate": 2.745791272057791e-05, + "loss": 0.1053, "step": 28155 }, { "epoch": 1.31, - "learning_rate": 1.745347147344241e-05, - "loss": 0.1078, + "learning_rate": 2.745744465074189e-05, + "loss": 0.1143, "step": 28160 }, { "epoch": 1.31, - "learning_rate": 1.745300267216727e-05, - "loss": 0.2376, + "learning_rate": 2.745697658090587e-05, + "loss": 0.2063, "step": 28165 }, { "epoch": 1.31, - "learning_rate": 1.745253387089213e-05, - "loss": 0.1726, + "learning_rate": 2.7456508511069854e-05, + "loss": 0.2292, "step": 28170 }, { "epoch": 1.31, - "learning_rate": 1.745206506961699e-05, - "loss": 0.2706, + "learning_rate": 2.7456040441233833e-05, + "loss": 0.2822, "step": 28175 }, { "epoch": 1.31, - "learning_rate": 1.745159626834185e-05, - "loss": 0.3923, + "learning_rate": 2.7455572371397813e-05, + "loss": 0.5754, "step": 28180 }, { "epoch": 1.32, - "learning_rate": 1.745112746706671e-05, - "loss": 0.2519, + "learning_rate": 2.7455104301561796e-05, + "loss": 0.2625, "step": 28185 }, { "epoch": 1.32, - "learning_rate": 1.7450658665791573e-05, - "loss": 0.0364, + "learning_rate": 2.7454636231725776e-05, + "loss": 0.0868, "step": 28190 }, { "epoch": 1.32, - "learning_rate": 1.7450189864516433e-05, - "loss": 0.1435, + "learning_rate": 2.7454168161889756e-05, + "loss": 0.0331, "step": 28195 }, { "epoch": 1.32, - "learning_rate": 1.7449721063241293e-05, - "loss": 0.0601, + "learning_rate": 2.7453700092053733e-05, + "loss": 0.097, "step": 28200 }, { "epoch": 1.32, - "learning_rate": 1.7449252261966153e-05, - "loss": 0.0721, + "learning_rate": 2.7453232022217716e-05, + "loss": 0.0772, "step": 28205 }, { "epoch": 1.32, - "learning_rate": 1.7448783460691013e-05, - "loss": 0.1386, + "learning_rate": 2.7452763952381696e-05, + "loss": 0.1231, "step": 28210 }, { "epoch": 1.32, - "learning_rate": 1.7448314659415876e-05, - "loss": 0.1472, + "learning_rate": 2.7452295882545675e-05, + "loss": 0.0905, "step": 28215 }, { "epoch": 1.32, - "learning_rate": 1.7447845858140736e-05, - "loss": 0.2471, + "learning_rate": 2.7451827812709655e-05, + "loss": 0.2014, "step": 28220 }, { "epoch": 1.32, - "learning_rate": 1.7447377056865596e-05, - "loss": 0.2434, + "learning_rate": 2.745135974287364e-05, + "loss": 0.2087, "step": 28225 }, { "epoch": 1.32, - "learning_rate": 1.7446908255590456e-05, - "loss": 0.3995, + "learning_rate": 2.7450891673037618e-05, + "loss": 0.3961, "step": 28230 }, { "epoch": 1.32, - "learning_rate": 1.744643945431532e-05, - "loss": 0.4138, + "learning_rate": 2.7450423603201598e-05, + "loss": 0.2582, "step": 28235 }, { "epoch": 1.32, - "learning_rate": 1.744597065304018e-05, - "loss": 0.091, + "learning_rate": 2.744995553336558e-05, + "loss": 0.0715, "step": 28240 }, { "epoch": 1.32, - "learning_rate": 1.744550185176504e-05, - "loss": 0.0882, + "learning_rate": 2.744948746352956e-05, + "loss": 0.083, "step": 28245 }, { "epoch": 1.32, - "learning_rate": 1.74450330504899e-05, - "loss": 0.0817, + "learning_rate": 2.744901939369354e-05, + "loss": 0.1078, "step": 28250 }, { "epoch": 1.32, - "learning_rate": 1.744456424921476e-05, - "loss": 0.1233, + "learning_rate": 2.744855132385752e-05, + "loss": 0.0949, "step": 28255 }, { "epoch": 1.32, - "learning_rate": 1.744409544793962e-05, - "loss": 0.0671, + "learning_rate": 2.74480832540215e-05, + "loss": 0.1244, "step": 28260 }, { "epoch": 1.32, - "learning_rate": 1.744362664666448e-05, - "loss": 0.1374, + "learning_rate": 2.744761518418548e-05, + "loss": 0.188, "step": 28265 }, { "epoch": 1.32, - "learning_rate": 1.744315784538934e-05, - "loss": 0.1614, + "learning_rate": 2.744714711434946e-05, + "loss": 0.2029, "step": 28270 }, { "epoch": 1.32, - "learning_rate": 1.74426890441142e-05, - "loss": 0.1907, + "learning_rate": 2.744667904451344e-05, + "loss": 0.2647, "step": 28275 }, { "epoch": 1.32, - "learning_rate": 1.7442220242839062e-05, - "loss": 0.385, + "learning_rate": 2.7446210974677423e-05, + "loss": 0.3943, "step": 28280 }, { "epoch": 1.32, - "learning_rate": 1.7441751441563922e-05, - "loss": 0.2444, + "learning_rate": 2.7445742904841403e-05, + "loss": 0.4138, "step": 28285 }, { "epoch": 1.32, - "learning_rate": 1.7441282640288782e-05, - "loss": 0.0702, + "learning_rate": 2.7445274835005383e-05, + "loss": 0.0202, "step": 28290 }, { "epoch": 1.32, - "learning_rate": 1.7440813839013646e-05, - "loss": 0.1009, + "learning_rate": 2.7444806765169363e-05, + "loss": 0.0484, "step": 28295 }, { "epoch": 1.32, - "learning_rate": 1.7440345037738505e-05, - "loss": 0.0929, + "learning_rate": 2.7444338695333346e-05, + "loss": 0.0667, "step": 28300 }, { "epoch": 1.32, - "learning_rate": 1.7439876236463365e-05, - "loss": 0.1733, + "learning_rate": 2.7443870625497326e-05, + "loss": 0.0925, "step": 28305 }, { "epoch": 1.32, - "learning_rate": 1.7439407435188225e-05, - "loss": 0.1582, + "learning_rate": 2.7443402555661305e-05, + "loss": 0.181, "step": 28310 }, { "epoch": 1.32, - "learning_rate": 1.7438938633913085e-05, - "loss": 0.1792, + "learning_rate": 2.744293448582529e-05, + "loss": 0.1262, "step": 28315 }, { "epoch": 1.32, - "learning_rate": 1.7438469832637945e-05, - "loss": 0.1222, + "learning_rate": 2.744246641598927e-05, + "loss": 0.2171, "step": 28320 }, { "epoch": 1.32, - "learning_rate": 1.7438001031362805e-05, - "loss": 0.3091, + "learning_rate": 2.7441998346153245e-05, + "loss": 0.1984, "step": 28325 }, { "epoch": 1.32, - "learning_rate": 1.743753223008767e-05, - "loss": 0.4052, + "learning_rate": 2.7441530276317225e-05, + "loss": 0.3275, "step": 28330 }, { "epoch": 1.32, - "learning_rate": 1.743706342881253e-05, - "loss": 0.198, + "learning_rate": 2.7441062206481208e-05, + "loss": 0.2852, "step": 28335 }, { "epoch": 1.32, - "learning_rate": 1.743659462753739e-05, - "loss": 0.0879, + "learning_rate": 2.7440594136645188e-05, + "loss": 0.062, "step": 28340 }, { "epoch": 1.32, - "learning_rate": 1.7436125826262248e-05, - "loss": 0.0845, + "learning_rate": 2.7440126066809168e-05, + "loss": 0.0677, "step": 28345 }, { "epoch": 1.32, - "learning_rate": 1.7435657024987108e-05, - "loss": 0.0569, + "learning_rate": 2.7439657996973147e-05, + "loss": 0.1333, "step": 28350 }, { "epoch": 1.32, - "learning_rate": 1.7435188223711968e-05, - "loss": 0.1203, + "learning_rate": 2.743918992713713e-05, + "loss": 0.128, "step": 28355 }, { "epoch": 1.32, - "learning_rate": 1.743471942243683e-05, - "loss": 0.1062, + "learning_rate": 2.743872185730111e-05, + "loss": 0.1954, "step": 28360 }, { "epoch": 1.32, - "learning_rate": 1.743425062116169e-05, - "loss": 0.2103, + "learning_rate": 2.743825378746509e-05, + "loss": 0.1572, "step": 28365 }, { "epoch": 1.32, - "learning_rate": 1.743378181988655e-05, - "loss": 0.2832, + "learning_rate": 2.7437785717629073e-05, + "loss": 0.134, "step": 28370 }, { "epoch": 1.32, - "learning_rate": 1.7433313018611415e-05, - "loss": 0.2313, + "learning_rate": 2.7437317647793053e-05, + "loss": 0.2102, "step": 28375 }, { "epoch": 1.32, - "learning_rate": 1.7432844217336275e-05, - "loss": 0.4534, + "learning_rate": 2.7436849577957033e-05, + "loss": 0.3999, "step": 28380 }, { "epoch": 1.32, - "learning_rate": 1.7432375416061135e-05, - "loss": 0.238, + "learning_rate": 2.743638150812101e-05, + "loss": 0.1921, "step": 28385 }, { "epoch": 1.32, - "learning_rate": 1.7431906614785994e-05, - "loss": 0.0604, + "learning_rate": 2.7435913438284993e-05, + "loss": 0.0599, "step": 28390 }, { "epoch": 1.32, - "learning_rate": 1.7431437813510854e-05, - "loss": 0.1129, + "learning_rate": 2.7435445368448973e-05, + "loss": 0.0365, "step": 28395 }, { "epoch": 1.33, - "learning_rate": 1.7430969012235714e-05, - "loss": 0.0935, + "learning_rate": 2.7434977298612952e-05, + "loss": 0.1112, "step": 28400 }, { "epoch": 1.33, - "learning_rate": 1.7430500210960574e-05, - "loss": 0.0649, + "learning_rate": 2.7434509228776932e-05, + "loss": 0.0971, "step": 28405 }, { "epoch": 1.33, - "learning_rate": 1.7430031409685434e-05, - "loss": 0.0958, + "learning_rate": 2.7434041158940915e-05, + "loss": 0.0944, "step": 28410 }, { "epoch": 1.33, - "learning_rate": 1.7429562608410294e-05, - "loss": 0.1529, + "learning_rate": 2.7433573089104895e-05, + "loss": 0.1911, "step": 28415 }, { "epoch": 1.33, - "learning_rate": 1.7429093807135157e-05, - "loss": 0.2254, + "learning_rate": 2.7433105019268875e-05, + "loss": 0.0776, "step": 28420 }, { "epoch": 1.33, - "learning_rate": 1.7428625005860017e-05, - "loss": 0.3171, + "learning_rate": 2.7432636949432858e-05, + "loss": 0.1998, "step": 28425 }, { "epoch": 1.33, - "learning_rate": 1.7428156204584877e-05, - "loss": 0.3572, + "learning_rate": 2.7432168879596838e-05, + "loss": 0.3238, "step": 28430 }, { "epoch": 1.33, - "learning_rate": 1.7427687403309737e-05, - "loss": 0.2508, + "learning_rate": 2.7431700809760818e-05, + "loss": 0.1711, "step": 28435 }, { "epoch": 1.33, - "learning_rate": 1.74272186020346e-05, - "loss": 0.0471, + "learning_rate": 2.7431232739924798e-05, + "loss": 0.0681, "step": 28440 }, { "epoch": 1.33, - "learning_rate": 1.742674980075946e-05, - "loss": 0.097, + "learning_rate": 2.743076467008878e-05, + "loss": 0.0965, "step": 28445 }, { "epoch": 1.33, - "learning_rate": 1.742628099948432e-05, - "loss": 0.039, + "learning_rate": 2.7430296600252757e-05, + "loss": 0.0806, "step": 28450 }, { "epoch": 1.33, - "learning_rate": 1.742581219820918e-05, - "loss": 0.1062, + "learning_rate": 2.7429828530416737e-05, + "loss": 0.0958, "step": 28455 }, { "epoch": 1.33, - "learning_rate": 1.742534339693404e-05, - "loss": 0.0994, + "learning_rate": 2.7429360460580717e-05, + "loss": 0.2168, "step": 28460 }, { "epoch": 1.33, - "learning_rate": 1.7424874595658904e-05, - "loss": 0.0935, + "learning_rate": 2.74288923907447e-05, + "loss": 0.1312, "step": 28465 }, { "epoch": 1.33, - "learning_rate": 1.7424405794383764e-05, - "loss": 0.1496, + "learning_rate": 2.742842432090868e-05, + "loss": 0.2123, "step": 28470 }, { "epoch": 1.33, - "learning_rate": 1.7423936993108623e-05, - "loss": 0.2379, + "learning_rate": 2.742795625107266e-05, + "loss": 0.2767, "step": 28475 }, { "epoch": 1.33, - "learning_rate": 1.7423468191833483e-05, - "loss": 0.1887, + "learning_rate": 2.742748818123664e-05, + "loss": 0.3819, "step": 28480 }, { "epoch": 1.33, - "learning_rate": 1.7422999390558343e-05, - "loss": 0.1909, + "learning_rate": 2.7427020111400623e-05, + "loss": 0.2376, "step": 28485 }, { "epoch": 1.33, - "learning_rate": 1.7422530589283203e-05, - "loss": 0.0401, + "learning_rate": 2.7426552041564603e-05, + "loss": 0.0465, "step": 28490 }, { "epoch": 1.33, - "learning_rate": 1.7422061788008063e-05, - "loss": 0.0426, + "learning_rate": 2.7426083971728582e-05, + "loss": 0.0668, "step": 28495 }, { "epoch": 1.33, - "learning_rate": 1.7421592986732923e-05, - "loss": 0.0825, + "learning_rate": 2.7425615901892566e-05, + "loss": 0.1181, "step": 28500 }, { "epoch": 1.33, - "learning_rate": 1.7421124185457786e-05, - "loss": 0.1346, + "learning_rate": 2.7425147832056545e-05, + "loss": 0.0504, "step": 28505 }, { "epoch": 1.33, - "learning_rate": 1.7420655384182646e-05, - "loss": 0.1569, + "learning_rate": 2.7424679762220525e-05, + "loss": 0.1084, "step": 28510 }, { "epoch": 1.33, - "learning_rate": 1.7420186582907506e-05, - "loss": 0.1373, + "learning_rate": 2.7424211692384502e-05, + "loss": 0.1506, "step": 28515 }, { "epoch": 1.33, - "learning_rate": 1.741971778163237e-05, - "loss": 0.1554, + "learning_rate": 2.7423743622548485e-05, + "loss": 0.1575, "step": 28520 }, { "epoch": 1.33, - "learning_rate": 1.741924898035723e-05, - "loss": 0.223, + "learning_rate": 2.7423275552712465e-05, + "loss": 0.2008, "step": 28525 }, { "epoch": 1.33, - "learning_rate": 1.741878017908209e-05, - "loss": 0.5315, + "learning_rate": 2.7422807482876445e-05, + "loss": 0.3201, "step": 28530 }, { "epoch": 1.33, - "learning_rate": 1.741831137780695e-05, - "loss": 0.2349, + "learning_rate": 2.7422339413040424e-05, + "loss": 0.2395, "step": 28535 }, { "epoch": 1.33, - "learning_rate": 1.741784257653181e-05, - "loss": 0.0541, + "learning_rate": 2.7421871343204408e-05, + "loss": 0.1258, "step": 28540 }, { "epoch": 1.33, - "learning_rate": 1.741737377525667e-05, - "loss": 0.086, + "learning_rate": 2.7421403273368387e-05, + "loss": 0.0798, "step": 28545 }, { "epoch": 1.33, - "learning_rate": 1.741690497398153e-05, - "loss": 0.08, + "learning_rate": 2.7420935203532367e-05, + "loss": 0.0968, "step": 28550 }, { "epoch": 1.33, - "learning_rate": 1.741643617270639e-05, - "loss": 0.0762, + "learning_rate": 2.742046713369635e-05, + "loss": 0.0886, "step": 28555 }, { "epoch": 1.33, - "learning_rate": 1.7415967371431253e-05, - "loss": 0.1393, + "learning_rate": 2.741999906386033e-05, + "loss": 0.2289, "step": 28560 }, { "epoch": 1.33, - "learning_rate": 1.7415498570156112e-05, - "loss": 0.169, + "learning_rate": 2.741953099402431e-05, + "loss": 0.2077, "step": 28565 }, { "epoch": 1.33, - "learning_rate": 1.7415029768880972e-05, - "loss": 0.1328, + "learning_rate": 2.741906292418829e-05, + "loss": 0.163, "step": 28570 }, { "epoch": 1.33, - "learning_rate": 1.7414560967605832e-05, - "loss": 0.2354, + "learning_rate": 2.741859485435227e-05, + "loss": 0.217, "step": 28575 }, { "epoch": 1.33, - "learning_rate": 1.7414092166330692e-05, - "loss": 0.4134, + "learning_rate": 2.741812678451625e-05, + "loss": 0.3247, "step": 28580 }, { "epoch": 1.33, - "learning_rate": 1.7413623365055556e-05, - "loss": 0.3811, + "learning_rate": 2.741765871468023e-05, + "loss": 0.3186, "step": 28585 }, { "epoch": 1.33, - "learning_rate": 1.7413154563780416e-05, - "loss": 0.0618, + "learning_rate": 2.741719064484421e-05, + "loss": 0.0384, "step": 28590 }, { "epoch": 1.33, - "learning_rate": 1.7412685762505275e-05, - "loss": 0.0739, + "learning_rate": 2.7416722575008192e-05, + "loss": 0.0504, "step": 28595 }, { "epoch": 1.33, - "learning_rate": 1.7412216961230135e-05, - "loss": 0.1536, + "learning_rate": 2.7416254505172172e-05, + "loss": 0.0692, "step": 28600 }, { "epoch": 1.33, - "learning_rate": 1.7411748159955e-05, - "loss": 0.1513, + "learning_rate": 2.7415786435336152e-05, + "loss": 0.1006, "step": 28605 }, { "epoch": 1.33, - "learning_rate": 1.741127935867986e-05, - "loss": 0.1786, + "learning_rate": 2.7415318365500135e-05, + "loss": 0.1761, "step": 28610 }, { "epoch": 1.34, - "learning_rate": 1.741081055740472e-05, - "loss": 0.1871, + "learning_rate": 2.7414850295664115e-05, + "loss": 0.1248, "step": 28615 }, { "epoch": 1.34, - "learning_rate": 1.741034175612958e-05, - "loss": 0.1789, + "learning_rate": 2.7414382225828095e-05, + "loss": 0.19, "step": 28620 }, { "epoch": 1.34, - "learning_rate": 1.740987295485444e-05, - "loss": 0.247, + "learning_rate": 2.7413914155992075e-05, + "loss": 0.1107, "step": 28625 }, { "epoch": 1.34, - "learning_rate": 1.74094041535793e-05, - "loss": 0.2658, + "learning_rate": 2.7413446086156058e-05, + "loss": 0.293, "step": 28630 }, { "epoch": 1.34, - "learning_rate": 1.740893535230416e-05, - "loss": 0.2673, + "learning_rate": 2.7412978016320038e-05, + "loss": 0.2144, "step": 28635 }, { "epoch": 1.34, - "learning_rate": 1.7408466551029018e-05, - "loss": 0.0536, + "learning_rate": 2.7412509946484014e-05, + "loss": 0.0996, "step": 28640 }, { "epoch": 1.34, - "learning_rate": 1.740799774975388e-05, - "loss": 0.0496, + "learning_rate": 2.7412041876647994e-05, + "loss": 0.0689, "step": 28645 }, { "epoch": 1.34, - "learning_rate": 1.740752894847874e-05, - "loss": 0.1276, + "learning_rate": 2.7411573806811977e-05, + "loss": 0.0643, "step": 28650 }, { "epoch": 1.34, - "learning_rate": 1.74070601472036e-05, - "loss": 0.0875, + "learning_rate": 2.7411105736975957e-05, + "loss": 0.0736, "step": 28655 }, { "epoch": 1.34, - "learning_rate": 1.740659134592846e-05, - "loss": 0.1346, + "learning_rate": 2.7410637667139937e-05, + "loss": 0.1032, "step": 28660 }, { "epoch": 1.34, - "learning_rate": 1.7406122544653325e-05, - "loss": 0.1021, + "learning_rate": 2.7410169597303917e-05, + "loss": 0.1954, "step": 28665 }, { "epoch": 1.34, - "learning_rate": 1.7405653743378185e-05, - "loss": 0.1522, + "learning_rate": 2.74097015274679e-05, + "loss": 0.1054, "step": 28670 }, { "epoch": 1.34, - "learning_rate": 1.7405184942103045e-05, - "loss": 0.3115, + "learning_rate": 2.740923345763188e-05, + "loss": 0.3436, "step": 28675 }, { "epoch": 1.34, - "learning_rate": 1.7404716140827904e-05, - "loss": 0.3448, + "learning_rate": 2.740876538779586e-05, + "loss": 0.2841, "step": 28680 }, { "epoch": 1.34, - "learning_rate": 1.7404247339552764e-05, - "loss": 0.3397, + "learning_rate": 2.7408297317959843e-05, + "loss": 0.2925, "step": 28685 }, { "epoch": 1.34, - "learning_rate": 1.7403778538277624e-05, - "loss": 0.1061, + "learning_rate": 2.7407829248123822e-05, + "loss": 0.0757, "step": 28690 }, { "epoch": 1.34, - "learning_rate": 1.7403309737002484e-05, - "loss": 0.0856, + "learning_rate": 2.7407361178287802e-05, + "loss": 0.0521, "step": 28695 }, { "epoch": 1.34, - "learning_rate": 1.7402840935727348e-05, - "loss": 0.0919, + "learning_rate": 2.7406893108451782e-05, + "loss": 0.0722, "step": 28700 }, { "epoch": 1.34, - "learning_rate": 1.7402372134452208e-05, - "loss": 0.2105, + "learning_rate": 2.7406425038615762e-05, + "loss": 0.1159, "step": 28705 }, { "epoch": 1.34, - "learning_rate": 1.7401903333177067e-05, - "loss": 0.1149, + "learning_rate": 2.7405956968779742e-05, + "loss": 0.1192, "step": 28710 }, { "epoch": 1.34, - "learning_rate": 1.7401434531901927e-05, - "loss": 0.1549, + "learning_rate": 2.740548889894372e-05, + "loss": 0.1786, "step": 28715 }, { "epoch": 1.34, - "learning_rate": 1.7400965730626787e-05, - "loss": 0.201, + "learning_rate": 2.74050208291077e-05, + "loss": 0.2406, "step": 28720 }, { "epoch": 1.34, - "learning_rate": 1.740049692935165e-05, - "loss": 0.235, + "learning_rate": 2.7404552759271685e-05, + "loss": 0.202, "step": 28725 }, { "epoch": 1.34, - "learning_rate": 1.740002812807651e-05, - "loss": 0.2196, + "learning_rate": 2.7404084689435664e-05, + "loss": 0.2896, "step": 28730 }, { "epoch": 1.34, - "learning_rate": 1.739955932680137e-05, - "loss": 0.2805, + "learning_rate": 2.7403616619599644e-05, + "loss": 0.2428, "step": 28735 }, { "epoch": 1.34, - "learning_rate": 1.739909052552623e-05, - "loss": 0.0878, + "learning_rate": 2.7403148549763627e-05, + "loss": 0.0399, "step": 28740 }, { "epoch": 1.34, - "learning_rate": 1.7398621724251094e-05, - "loss": 0.1229, + "learning_rate": 2.7402680479927607e-05, + "loss": 0.0714, "step": 28745 }, { "epoch": 1.34, - "learning_rate": 1.7398152922975954e-05, - "loss": 0.0782, + "learning_rate": 2.7402212410091587e-05, + "loss": 0.047, "step": 28750 }, { "epoch": 1.34, - "learning_rate": 1.7397684121700814e-05, - "loss": 0.0866, + "learning_rate": 2.7401744340255567e-05, + "loss": 0.0778, "step": 28755 }, { "epoch": 1.34, - "learning_rate": 1.7397215320425674e-05, - "loss": 0.151, + "learning_rate": 2.740127627041955e-05, + "loss": 0.0955, "step": 28760 }, { "epoch": 1.34, - "learning_rate": 1.7396746519150534e-05, - "loss": 0.1959, + "learning_rate": 2.7400808200583526e-05, + "loss": 0.099, "step": 28765 }, { "epoch": 1.34, - "learning_rate": 1.7396277717875393e-05, - "loss": 0.2202, + "learning_rate": 2.7400340130747506e-05, + "loss": 0.1675, "step": 28770 }, { "epoch": 1.34, - "learning_rate": 1.7395808916600253e-05, - "loss": 0.1692, + "learning_rate": 2.7399872060911486e-05, + "loss": 0.1985, "step": 28775 }, { "epoch": 1.34, - "learning_rate": 1.7395340115325113e-05, - "loss": 0.4181, + "learning_rate": 2.739940399107547e-05, + "loss": 0.4462, "step": 28780 }, { "epoch": 1.34, - "learning_rate": 1.7394871314049973e-05, - "loss": 0.3173, + "learning_rate": 2.739893592123945e-05, + "loss": 0.2544, "step": 28785 }, { "epoch": 1.34, - "learning_rate": 1.7394402512774837e-05, - "loss": 0.0864, + "learning_rate": 2.739846785140343e-05, + "loss": 0.043, "step": 28790 }, { "epoch": 1.34, - "learning_rate": 1.7393933711499697e-05, - "loss": 0.0677, + "learning_rate": 2.7397999781567412e-05, + "loss": 0.0404, "step": 28795 }, { "epoch": 1.34, - "learning_rate": 1.7393464910224556e-05, - "loss": 0.0952, + "learning_rate": 2.7397531711731392e-05, + "loss": 0.073, "step": 28800 }, { "epoch": 1.34, - "learning_rate": 1.739299610894942e-05, - "loss": 0.0573, + "learning_rate": 2.7397063641895372e-05, + "loss": 0.0875, "step": 28805 }, { "epoch": 1.34, - "learning_rate": 1.739252730767428e-05, - "loss": 0.2943, + "learning_rate": 2.739659557205935e-05, + "loss": 0.0974, "step": 28810 }, { "epoch": 1.34, - "learning_rate": 1.739205850639914e-05, - "loss": 0.142, + "learning_rate": 2.7396127502223335e-05, + "loss": 0.2016, "step": 28815 }, { "epoch": 1.34, - "learning_rate": 1.7391589705124e-05, - "loss": 0.0782, + "learning_rate": 2.7395659432387315e-05, + "loss": 0.2177, "step": 28820 }, { "epoch": 1.35, - "learning_rate": 1.739112090384886e-05, - "loss": 0.2885, + "learning_rate": 2.7395191362551294e-05, + "loss": 0.2592, "step": 28825 }, { "epoch": 1.35, - "learning_rate": 1.739065210257372e-05, - "loss": 0.3619, + "learning_rate": 2.739472329271527e-05, + "loss": 0.4144, "step": 28830 }, { "epoch": 1.35, - "learning_rate": 1.739018330129858e-05, - "loss": 0.3443, + "learning_rate": 2.7394255222879254e-05, + "loss": 0.2577, "step": 28835 }, { "epoch": 1.35, - "learning_rate": 1.7389714500023443e-05, - "loss": 0.0266, + "learning_rate": 2.7393787153043234e-05, + "loss": 0.1039, "step": 28840 }, { "epoch": 1.35, - "learning_rate": 1.7389245698748303e-05, - "loss": 0.0487, + "learning_rate": 2.7393319083207214e-05, + "loss": 0.0974, "step": 28845 }, { "epoch": 1.35, - "learning_rate": 1.7388776897473163e-05, - "loss": 0.0475, + "learning_rate": 2.7392851013371194e-05, + "loss": 0.075, "step": 28850 }, { "epoch": 1.35, - "learning_rate": 1.7388308096198023e-05, - "loss": 0.1119, + "learning_rate": 2.7392382943535177e-05, + "loss": 0.1481, "step": 28855 }, { "epoch": 1.35, - "learning_rate": 1.7387839294922882e-05, - "loss": 0.1051, + "learning_rate": 2.7391914873699157e-05, + "loss": 0.1497, "step": 28860 }, { "epoch": 1.35, - "learning_rate": 1.7387370493647742e-05, - "loss": 0.1066, + "learning_rate": 2.7391446803863136e-05, + "loss": 0.114, "step": 28865 }, { "epoch": 1.35, - "learning_rate": 1.7386901692372606e-05, - "loss": 0.1715, + "learning_rate": 2.739097873402712e-05, + "loss": 0.1186, "step": 28870 }, { "epoch": 1.35, - "learning_rate": 1.7386432891097466e-05, - "loss": 0.1286, + "learning_rate": 2.73905106641911e-05, + "loss": 0.218, "step": 28875 }, { "epoch": 1.35, - "learning_rate": 1.7385964089822326e-05, - "loss": 0.2792, + "learning_rate": 2.739004259435508e-05, + "loss": 0.4271, "step": 28880 }, { "epoch": 1.35, - "learning_rate": 1.738549528854719e-05, - "loss": 0.2294, + "learning_rate": 2.738957452451906e-05, + "loss": 0.3903, "step": 28885 }, { "epoch": 1.35, - "learning_rate": 1.738502648727205e-05, - "loss": 0.0443, + "learning_rate": 2.738910645468304e-05, + "loss": 0.0907, "step": 28890 }, { "epoch": 1.35, - "learning_rate": 1.738455768599691e-05, - "loss": 0.0679, + "learning_rate": 2.738863838484702e-05, + "loss": 0.0537, "step": 28895 }, { "epoch": 1.35, - "learning_rate": 1.738408888472177e-05, - "loss": 0.0798, + "learning_rate": 2.7388170315011e-05, + "loss": 0.0873, "step": 28900 }, { "epoch": 1.35, - "learning_rate": 1.738362008344663e-05, - "loss": 0.0693, + "learning_rate": 2.738770224517498e-05, + "loss": 0.0824, "step": 28905 }, { "epoch": 1.35, - "learning_rate": 1.738315128217149e-05, - "loss": 0.129, + "learning_rate": 2.738723417533896e-05, + "loss": 0.2234, "step": 28910 }, { "epoch": 1.35, - "learning_rate": 1.738268248089635e-05, - "loss": 0.1704, + "learning_rate": 2.738676610550294e-05, + "loss": 0.1144, "step": 28915 }, { "epoch": 1.35, - "learning_rate": 1.738221367962121e-05, - "loss": 0.1876, + "learning_rate": 2.738629803566692e-05, + "loss": 0.1043, "step": 28920 }, { "epoch": 1.35, - "learning_rate": 1.738174487834607e-05, - "loss": 0.3406, + "learning_rate": 2.7385829965830904e-05, + "loss": 0.3015, "step": 28925 }, { "epoch": 1.35, - "learning_rate": 1.738127607707093e-05, - "loss": 0.3488, + "learning_rate": 2.7385361895994884e-05, + "loss": 0.414, "step": 28930 }, { "epoch": 1.35, - "learning_rate": 1.738080727579579e-05, - "loss": 0.3817, + "learning_rate": 2.7384893826158864e-05, + "loss": 0.2836, "step": 28935 }, { "epoch": 1.35, - "learning_rate": 1.738033847452065e-05, - "loss": 0.0798, + "learning_rate": 2.7384425756322844e-05, + "loss": 0.0647, "step": 28940 }, { "epoch": 1.35, - "learning_rate": 1.737986967324551e-05, - "loss": 0.1087, + "learning_rate": 2.7383957686486827e-05, + "loss": 0.1362, "step": 28945 }, { "epoch": 1.35, - "learning_rate": 1.7379400871970375e-05, - "loss": 0.0793, + "learning_rate": 2.7383489616650807e-05, + "loss": 0.0785, "step": 28950 }, { "epoch": 1.35, - "learning_rate": 1.7378932070695235e-05, - "loss": 0.1104, + "learning_rate": 2.7383021546814783e-05, + "loss": 0.1368, "step": 28955 }, { "epoch": 1.35, - "learning_rate": 1.7378463269420095e-05, - "loss": 0.1382, + "learning_rate": 2.7382553476978763e-05, + "loss": 0.0939, "step": 28960 }, { "epoch": 1.35, - "learning_rate": 1.7377994468144955e-05, - "loss": 0.207, + "learning_rate": 2.7382085407142746e-05, + "loss": 0.1351, "step": 28965 }, { "epoch": 1.35, - "learning_rate": 1.7377525666869815e-05, - "loss": 0.1813, + "learning_rate": 2.7381617337306726e-05, + "loss": 0.1164, "step": 28970 }, { "epoch": 1.35, - "learning_rate": 1.7377056865594674e-05, - "loss": 0.2456, + "learning_rate": 2.7381149267470706e-05, + "loss": 0.2935, "step": 28975 }, { "epoch": 1.35, - "learning_rate": 1.7376588064319538e-05, - "loss": 0.2929, + "learning_rate": 2.738068119763469e-05, + "loss": 0.3958, "step": 28980 }, { "epoch": 1.35, - "learning_rate": 1.7376119263044398e-05, - "loss": 0.2645, + "learning_rate": 2.738021312779867e-05, + "loss": 0.2515, "step": 28985 }, { "epoch": 1.35, - "learning_rate": 1.7375650461769258e-05, - "loss": 0.0423, + "learning_rate": 2.737974505796265e-05, + "loss": 0.05, "step": 28990 }, { "epoch": 1.35, - "learning_rate": 1.7375181660494118e-05, - "loss": 0.1462, + "learning_rate": 2.737927698812663e-05, + "loss": 0.0642, "step": 28995 }, { "epoch": 1.35, - "learning_rate": 1.7374712859218978e-05, - "loss": 0.0766, + "learning_rate": 2.7378808918290612e-05, + "loss": 0.1265, "step": 29000 }, { "epoch": 1.35, - "learning_rate": 1.7374244057943837e-05, - "loss": 0.0594, + "learning_rate": 2.737834084845459e-05, + "loss": 0.2096, "step": 29005 }, { "epoch": 1.35, - "learning_rate": 1.7373775256668697e-05, - "loss": 0.1614, + "learning_rate": 2.737787277861857e-05, + "loss": 0.0948, "step": 29010 }, { "epoch": 1.35, - "learning_rate": 1.737330645539356e-05, - "loss": 0.0778, + "learning_rate": 2.737740470878255e-05, + "loss": 0.229, "step": 29015 }, { "epoch": 1.35, - "learning_rate": 1.737283765411842e-05, - "loss": 0.1516, + "learning_rate": 2.737693663894653e-05, + "loss": 0.1108, "step": 29020 }, { "epoch": 1.35, - "learning_rate": 1.737236885284328e-05, - "loss": 0.3276, + "learning_rate": 2.737646856911051e-05, + "loss": 0.1859, "step": 29025 }, { "epoch": 1.35, - "learning_rate": 1.7371900051568144e-05, - "loss": 0.3269, + "learning_rate": 2.737600049927449e-05, + "loss": 0.2927, "step": 29030 }, { "epoch": 1.35, - "learning_rate": 1.7371431250293004e-05, - "loss": 0.3945, + "learning_rate": 2.7375532429438474e-05, + "loss": 0.3511, "step": 29035 }, { "epoch": 1.36, - "learning_rate": 1.7370962449017864e-05, - "loss": 0.0705, + "learning_rate": 2.7375064359602454e-05, + "loss": 0.0414, "step": 29040 }, { "epoch": 1.36, - "learning_rate": 1.7370493647742724e-05, - "loss": 0.1158, + "learning_rate": 2.7374596289766434e-05, + "loss": 0.0578, "step": 29045 }, { "epoch": 1.36, - "learning_rate": 1.7370024846467584e-05, - "loss": 0.0786, + "learning_rate": 2.7374128219930413e-05, + "loss": 0.0689, "step": 29050 }, { "epoch": 1.36, - "learning_rate": 1.7369556045192444e-05, - "loss": 0.1491, + "learning_rate": 2.7373660150094397e-05, + "loss": 0.1956, "step": 29055 }, { "epoch": 1.36, - "learning_rate": 1.7369087243917303e-05, - "loss": 0.1254, + "learning_rate": 2.7373192080258376e-05, + "loss": 0.1061, "step": 29060 }, { "epoch": 1.36, - "learning_rate": 1.7368618442642163e-05, - "loss": 0.0802, + "learning_rate": 2.7372724010422356e-05, + "loss": 0.0428, "step": 29065 }, { "epoch": 1.36, - "learning_rate": 1.7368149641367027e-05, - "loss": 0.2187, + "learning_rate": 2.7372255940586336e-05, + "loss": 0.2066, "step": 29070 }, { "epoch": 1.36, - "learning_rate": 1.7367680840091887e-05, - "loss": 0.1845, + "learning_rate": 2.737178787075032e-05, + "loss": 0.1464, "step": 29075 }, { "epoch": 1.36, - "learning_rate": 1.7367212038816747e-05, - "loss": 0.4131, + "learning_rate": 2.7371319800914296e-05, + "loss": 0.2196, "step": 29080 }, { "epoch": 1.36, - "learning_rate": 1.7366743237541607e-05, - "loss": 0.2237, + "learning_rate": 2.7370851731078275e-05, + "loss": 0.3011, "step": 29085 }, { "epoch": 1.36, - "learning_rate": 1.7366274436266466e-05, - "loss": 0.0269, + "learning_rate": 2.7370383661242255e-05, + "loss": 0.0939, "step": 29090 }, { "epoch": 1.36, - "learning_rate": 1.736580563499133e-05, - "loss": 0.1229, + "learning_rate": 2.736991559140624e-05, + "loss": 0.0834, "step": 29095 }, { "epoch": 1.36, - "learning_rate": 1.736533683371619e-05, - "loss": 0.0902, + "learning_rate": 2.736944752157022e-05, + "loss": 0.0629, "step": 29100 }, { "epoch": 1.36, - "learning_rate": 1.736486803244105e-05, - "loss": 0.148, + "learning_rate": 2.7368979451734198e-05, + "loss": 0.0954, "step": 29105 }, { "epoch": 1.36, - "learning_rate": 1.736439923116591e-05, - "loss": 0.109, + "learning_rate": 2.736851138189818e-05, + "loss": 0.1095, "step": 29110 }, { "epoch": 1.36, - "learning_rate": 1.7363930429890773e-05, - "loss": 0.1508, + "learning_rate": 2.736804331206216e-05, + "loss": 0.1453, "step": 29115 }, { "epoch": 1.36, - "learning_rate": 1.7363461628615633e-05, - "loss": 0.1622, + "learning_rate": 2.736757524222614e-05, + "loss": 0.2119, "step": 29120 }, { "epoch": 1.36, - "learning_rate": 1.7362992827340493e-05, - "loss": 0.2647, + "learning_rate": 2.736710717239012e-05, + "loss": 0.278, "step": 29125 }, { "epoch": 1.36, - "learning_rate": 1.7362524026065353e-05, - "loss": 0.2541, + "learning_rate": 2.7366639102554104e-05, + "loss": 0.2615, "step": 29130 }, { "epoch": 1.36, - "learning_rate": 1.7362055224790213e-05, - "loss": 0.2231, + "learning_rate": 2.7366171032718084e-05, + "loss": 0.3442, "step": 29135 }, { "epoch": 1.36, - "learning_rate": 1.7361586423515073e-05, - "loss": 0.0197, + "learning_rate": 2.7365702962882064e-05, + "loss": 0.0458, "step": 29140 }, { "epoch": 1.36, - "learning_rate": 1.7361117622239933e-05, - "loss": 0.0373, + "learning_rate": 2.736523489304604e-05, + "loss": 0.0862, "step": 29145 }, { "epoch": 1.36, - "learning_rate": 1.7360648820964792e-05, - "loss": 0.0942, + "learning_rate": 2.7364766823210023e-05, + "loss": 0.0631, "step": 29150 }, { "epoch": 1.36, - "learning_rate": 1.7360180019689656e-05, - "loss": 0.0481, + "learning_rate": 2.7364298753374003e-05, + "loss": 0.033, "step": 29155 }, { "epoch": 1.36, - "learning_rate": 1.7359711218414516e-05, - "loss": 0.1787, + "learning_rate": 2.7363830683537983e-05, + "loss": 0.0917, "step": 29160 }, { "epoch": 1.36, - "learning_rate": 1.7359242417139376e-05, - "loss": 0.1285, + "learning_rate": 2.7363362613701966e-05, + "loss": 0.1551, "step": 29165 }, { "epoch": 1.36, - "learning_rate": 1.7358773615864236e-05, - "loss": 0.2024, + "learning_rate": 2.7362894543865946e-05, + "loss": 0.1307, "step": 29170 }, { "epoch": 1.36, - "learning_rate": 1.73583048145891e-05, - "loss": 0.2614, + "learning_rate": 2.7362426474029926e-05, + "loss": 0.2624, "step": 29175 }, { "epoch": 1.36, - "learning_rate": 1.735783601331396e-05, - "loss": 0.2649, + "learning_rate": 2.7361958404193906e-05, + "loss": 0.3407, "step": 29180 }, { "epoch": 1.36, - "learning_rate": 1.735736721203882e-05, - "loss": 0.2944, + "learning_rate": 2.736149033435789e-05, + "loss": 0.2735, "step": 29185 }, { "epoch": 1.36, - "learning_rate": 1.735689841076368e-05, - "loss": 0.0346, + "learning_rate": 2.736102226452187e-05, + "loss": 0.0644, "step": 29190 }, { "epoch": 1.36, - "learning_rate": 1.735642960948854e-05, - "loss": 0.1004, + "learning_rate": 2.736055419468585e-05, + "loss": 0.0732, "step": 29195 }, { "epoch": 1.36, - "learning_rate": 1.73559608082134e-05, - "loss": 0.1003, + "learning_rate": 2.7360086124849828e-05, + "loss": 0.0568, "step": 29200 }, { "epoch": 1.36, - "learning_rate": 1.735549200693826e-05, - "loss": 0.1607, + "learning_rate": 2.735961805501381e-05, + "loss": 0.0936, "step": 29205 }, { "epoch": 1.36, - "learning_rate": 1.7355023205663122e-05, - "loss": 0.153, + "learning_rate": 2.7359149985177788e-05, + "loss": 0.0682, "step": 29210 }, { "epoch": 1.36, - "learning_rate": 1.7354554404387982e-05, - "loss": 0.122, + "learning_rate": 2.7358681915341768e-05, + "loss": 0.2085, "step": 29215 }, { "epoch": 1.36, - "learning_rate": 1.7354085603112842e-05, - "loss": 0.2213, + "learning_rate": 2.735821384550575e-05, + "loss": 0.1262, "step": 29220 }, { "epoch": 1.36, - "learning_rate": 1.73536168018377e-05, - "loss": 0.2523, + "learning_rate": 2.735774577566973e-05, + "loss": 0.1878, "step": 29225 }, { "epoch": 1.36, - "learning_rate": 1.735314800056256e-05, - "loss": 0.4636, + "learning_rate": 2.735727770583371e-05, + "loss": 0.3107, "step": 29230 }, { "epoch": 1.36, - "learning_rate": 1.7352679199287425e-05, - "loss": 0.3613, + "learning_rate": 2.735680963599769e-05, + "loss": 0.3274, "step": 29235 }, { "epoch": 1.36, - "learning_rate": 1.7352210398012285e-05, - "loss": 0.0464, + "learning_rate": 2.7356341566161674e-05, + "loss": 0.0723, "step": 29240 }, { "epoch": 1.36, - "learning_rate": 1.7351741596737145e-05, - "loss": 0.0687, + "learning_rate": 2.7355873496325653e-05, + "loss": 0.0393, "step": 29245 }, { "epoch": 1.36, - "learning_rate": 1.7351272795462005e-05, - "loss": 0.0908, + "learning_rate": 2.7355405426489633e-05, + "loss": 0.0706, "step": 29250 }, { "epoch": 1.37, - "learning_rate": 1.7350803994186868e-05, - "loss": 0.1691, + "learning_rate": 2.7354937356653613e-05, + "loss": 0.1295, "step": 29255 }, { "epoch": 1.37, - "learning_rate": 1.7350335192911728e-05, - "loss": 0.1535, + "learning_rate": 2.7354469286817596e-05, + "loss": 0.1177, "step": 29260 }, { "epoch": 1.37, - "learning_rate": 1.7349866391636588e-05, - "loss": 0.205, + "learning_rate": 2.7354001216981576e-05, + "loss": 0.1223, "step": 29265 }, { "epoch": 1.37, - "learning_rate": 1.7349397590361448e-05, - "loss": 0.1682, + "learning_rate": 2.7353533147145552e-05, + "loss": 0.2594, "step": 29270 }, { "epoch": 1.37, - "learning_rate": 1.7348928789086308e-05, - "loss": 0.3615, + "learning_rate": 2.7353065077309532e-05, + "loss": 0.1623, "step": 29275 }, { "epoch": 1.37, - "learning_rate": 1.7348459987811168e-05, - "loss": 0.3168, + "learning_rate": 2.7352597007473515e-05, + "loss": 0.3219, "step": 29280 }, { "epoch": 1.37, - "learning_rate": 1.7347991186536028e-05, - "loss": 0.3735, + "learning_rate": 2.7352128937637495e-05, + "loss": 0.2786, "step": 29285 }, { "epoch": 1.37, - "learning_rate": 1.7347522385260888e-05, - "loss": 0.0129, + "learning_rate": 2.7351660867801475e-05, + "loss": 0.0947, "step": 29290 }, { "epoch": 1.37, - "learning_rate": 1.7347053583985747e-05, - "loss": 0.0673, + "learning_rate": 2.7351192797965458e-05, + "loss": 0.05, "step": 29295 }, { "epoch": 1.37, - "learning_rate": 1.734658478271061e-05, - "loss": 0.0334, + "learning_rate": 2.7350724728129438e-05, + "loss": 0.0553, "step": 29300 }, { "epoch": 1.37, - "learning_rate": 1.734611598143547e-05, - "loss": 0.1941, + "learning_rate": 2.7350256658293418e-05, + "loss": 0.1859, "step": 29305 }, { "epoch": 1.37, - "learning_rate": 1.734564718016033e-05, - "loss": 0.1482, + "learning_rate": 2.7349788588457398e-05, + "loss": 0.1293, "step": 29310 }, { "epoch": 1.37, - "learning_rate": 1.7345178378885194e-05, - "loss": 0.1165, + "learning_rate": 2.734932051862138e-05, + "loss": 0.1753, "step": 29315 }, { "epoch": 1.37, - "learning_rate": 1.7344709577610054e-05, - "loss": 0.245, + "learning_rate": 2.734885244878536e-05, + "loss": 0.2847, "step": 29320 }, { "epoch": 1.37, - "learning_rate": 1.7344240776334914e-05, - "loss": 0.2832, + "learning_rate": 2.734838437894934e-05, + "loss": 0.3092, "step": 29325 }, { "epoch": 1.37, - "learning_rate": 1.7343771975059774e-05, - "loss": 0.6608, + "learning_rate": 2.734791630911332e-05, + "loss": 0.2419, "step": 29330 }, { "epoch": 1.37, - "learning_rate": 1.7343303173784634e-05, - "loss": 0.5553, + "learning_rate": 2.73474482392773e-05, + "loss": 0.3635, "step": 29335 }, { "epoch": 1.37, - "learning_rate": 1.7342834372509494e-05, - "loss": 0.0925, + "learning_rate": 2.734698016944128e-05, + "loss": 0.0397, "step": 29340 }, { "epoch": 1.37, - "learning_rate": 1.7342365571234354e-05, - "loss": 0.1114, + "learning_rate": 2.734651209960526e-05, + "loss": 0.0611, "step": 29345 }, { "epoch": 1.37, - "learning_rate": 1.7341896769959217e-05, - "loss": 0.0405, + "learning_rate": 2.7346044029769243e-05, + "loss": 0.074, "step": 29350 }, { "epoch": 1.37, - "learning_rate": 1.7341427968684077e-05, - "loss": 0.1301, + "learning_rate": 2.7345575959933223e-05, + "loss": 0.0998, "step": 29355 }, { "epoch": 1.37, - "learning_rate": 1.7340959167408937e-05, - "loss": 0.1224, + "learning_rate": 2.7345107890097203e-05, + "loss": 0.1043, "step": 29360 }, { "epoch": 1.37, - "learning_rate": 1.7340490366133797e-05, - "loss": 0.1806, + "learning_rate": 2.7344639820261183e-05, + "loss": 0.1502, "step": 29365 }, { "epoch": 1.37, - "learning_rate": 1.7340021564858657e-05, - "loss": 0.1208, + "learning_rate": 2.7344171750425166e-05, + "loss": 0.1774, "step": 29370 }, { "epoch": 1.37, - "learning_rate": 1.7339552763583517e-05, - "loss": 0.2126, + "learning_rate": 2.7343703680589146e-05, + "loss": 0.2189, "step": 29375 }, { "epoch": 1.37, - "learning_rate": 1.733908396230838e-05, - "loss": 0.2936, + "learning_rate": 2.7343235610753125e-05, + "loss": 0.3716, "step": 29380 }, { "epoch": 1.37, - "learning_rate": 1.733861516103324e-05, - "loss": 0.3515, + "learning_rate": 2.7342767540917105e-05, + "loss": 0.2224, "step": 29385 }, { "epoch": 1.37, - "learning_rate": 1.73381463597581e-05, - "loss": 0.1816, + "learning_rate": 2.734229947108109e-05, + "loss": 0.0402, "step": 29390 }, { "epoch": 1.37, - "learning_rate": 1.7337677558482963e-05, - "loss": 0.065, + "learning_rate": 2.7341831401245068e-05, + "loss": 0.127, "step": 29395 }, { "epoch": 1.37, - "learning_rate": 1.7337208757207823e-05, - "loss": 0.089, + "learning_rate": 2.7341363331409045e-05, + "loss": 0.1143, "step": 29400 }, { "epoch": 1.37, - "learning_rate": 1.7336739955932683e-05, - "loss": 0.0681, + "learning_rate": 2.7340895261573028e-05, + "loss": 0.1461, "step": 29405 }, { "epoch": 1.37, - "learning_rate": 1.7336271154657543e-05, - "loss": 0.1273, + "learning_rate": 2.7340427191737008e-05, + "loss": 0.1332, "step": 29410 }, { "epoch": 1.37, - "learning_rate": 1.7335802353382403e-05, - "loss": 0.1598, + "learning_rate": 2.7339959121900987e-05, + "loss": 0.1332, "step": 29415 }, { "epoch": 1.37, - "learning_rate": 1.7335333552107263e-05, - "loss": 0.1456, + "learning_rate": 2.7339491052064967e-05, + "loss": 0.0964, "step": 29420 }, { "epoch": 1.37, - "learning_rate": 1.7334864750832123e-05, - "loss": 0.275, + "learning_rate": 2.733902298222895e-05, + "loss": 0.2352, "step": 29425 }, { "epoch": 1.37, - "learning_rate": 1.7334395949556983e-05, - "loss": 0.314, + "learning_rate": 2.733855491239293e-05, + "loss": 0.3506, "step": 29430 }, { "epoch": 1.37, - "learning_rate": 1.7333927148281843e-05, - "loss": 0.298, + "learning_rate": 2.733808684255691e-05, + "loss": 0.2323, "step": 29435 }, { "epoch": 1.37, - "learning_rate": 1.7333458347006706e-05, - "loss": 0.0133, + "learning_rate": 2.733761877272089e-05, + "loss": 0.0665, "step": 29440 }, { "epoch": 1.37, - "learning_rate": 1.7332989545731566e-05, - "loss": 0.0531, + "learning_rate": 2.7337150702884873e-05, + "loss": 0.0533, "step": 29445 }, { "epoch": 1.37, - "learning_rate": 1.7332520744456426e-05, - "loss": 0.0883, + "learning_rate": 2.7336682633048853e-05, + "loss": 0.1063, "step": 29450 }, { "epoch": 1.37, - "learning_rate": 1.7332051943181286e-05, - "loss": 0.0872, + "learning_rate": 2.7336214563212833e-05, + "loss": 0.1773, "step": 29455 }, { "epoch": 1.37, - "learning_rate": 1.733158314190615e-05, - "loss": 0.085, + "learning_rate": 2.733574649337681e-05, + "loss": 0.0982, "step": 29460 }, { "epoch": 1.37, - "learning_rate": 1.733111434063101e-05, - "loss": 0.1929, + "learning_rate": 2.7335278423540792e-05, + "loss": 0.112, "step": 29465 }, { "epoch": 1.38, - "learning_rate": 1.733064553935587e-05, - "loss": 0.1555, + "learning_rate": 2.7334810353704772e-05, + "loss": 0.2218, "step": 29470 }, { "epoch": 1.38, - "learning_rate": 1.733017673808073e-05, - "loss": 0.1887, + "learning_rate": 2.7334342283868752e-05, + "loss": 0.1661, "step": 29475 }, { "epoch": 1.38, - "learning_rate": 1.732970793680559e-05, - "loss": 0.241, + "learning_rate": 2.7333874214032735e-05, + "loss": 0.2736, "step": 29480 }, { "epoch": 1.38, - "learning_rate": 1.732923913553045e-05, - "loss": 0.2993, + "learning_rate": 2.7333406144196715e-05, + "loss": 0.2688, "step": 29485 }, { "epoch": 1.38, - "learning_rate": 1.7328770334255312e-05, - "loss": 0.0399, + "learning_rate": 2.7332938074360695e-05, + "loss": 0.0811, "step": 29490 }, { "epoch": 1.38, - "learning_rate": 1.7328301532980172e-05, - "loss": 0.0674, + "learning_rate": 2.7332470004524675e-05, + "loss": 0.0763, "step": 29495 }, { "epoch": 1.38, - "learning_rate": 1.7327832731705032e-05, - "loss": 0.068, + "learning_rate": 2.7332001934688658e-05, + "loss": 0.0626, "step": 29500 }, { "epoch": 1.38, - "learning_rate": 1.7327363930429892e-05, - "loss": 0.1605, + "learning_rate": 2.7331533864852638e-05, + "loss": 0.1125, "step": 29505 }, { "epoch": 1.38, - "learning_rate": 1.7326895129154752e-05, - "loss": 0.095, + "learning_rate": 2.7331065795016618e-05, + "loss": 0.1497, "step": 29510 }, { "epoch": 1.38, - "learning_rate": 1.732642632787961e-05, - "loss": 0.1541, + "learning_rate": 2.7330597725180597e-05, + "loss": 0.1313, "step": 29515 }, { "epoch": 1.38, - "learning_rate": 1.732595752660447e-05, - "loss": 0.197, + "learning_rate": 2.733012965534458e-05, + "loss": 0.1472, "step": 29520 }, { "epoch": 1.38, - "learning_rate": 1.7325488725329335e-05, - "loss": 0.1531, + "learning_rate": 2.7329661585508557e-05, + "loss": 0.2164, "step": 29525 }, { "epoch": 1.38, - "learning_rate": 1.7325019924054195e-05, - "loss": 0.206, + "learning_rate": 2.7329193515672537e-05, + "loss": 0.3979, "step": 29530 }, { "epoch": 1.38, - "learning_rate": 1.7324551122779055e-05, - "loss": 0.2874, + "learning_rate": 2.732872544583652e-05, + "loss": 0.3194, "step": 29535 }, { "epoch": 1.38, - "learning_rate": 1.7324082321503918e-05, - "loss": 0.0179, + "learning_rate": 2.73282573760005e-05, + "loss": 0.0541, "step": 29540 }, { "epoch": 1.38, - "learning_rate": 1.7323613520228778e-05, - "loss": 0.0721, + "learning_rate": 2.732778930616448e-05, + "loss": 0.0625, "step": 29545 }, { "epoch": 1.38, - "learning_rate": 1.7323144718953638e-05, - "loss": 0.0472, + "learning_rate": 2.732732123632846e-05, + "loss": 0.0817, "step": 29550 }, { "epoch": 1.38, - "learning_rate": 1.7322675917678498e-05, - "loss": 0.0831, + "learning_rate": 2.7326853166492443e-05, + "loss": 0.0807, "step": 29555 }, { "epoch": 1.38, - "learning_rate": 1.7322207116403358e-05, - "loss": 0.0901, + "learning_rate": 2.7326385096656423e-05, + "loss": 0.1262, "step": 29560 }, { "epoch": 1.38, - "learning_rate": 1.7321738315128218e-05, - "loss": 0.1381, + "learning_rate": 2.7325917026820402e-05, + "loss": 0.234, "step": 29565 }, { "epoch": 1.38, - "learning_rate": 1.7321269513853078e-05, - "loss": 0.2179, + "learning_rate": 2.7325448956984382e-05, + "loss": 0.1336, "step": 29570 }, { "epoch": 1.38, - "learning_rate": 1.7320800712577938e-05, - "loss": 0.298, + "learning_rate": 2.7324980887148365e-05, + "loss": 0.2085, "step": 29575 }, { "epoch": 1.38, - "learning_rate": 1.73203319113028e-05, - "loss": 0.3747, + "learning_rate": 2.7324512817312345e-05, + "loss": 0.4152, "step": 29580 }, { "epoch": 1.38, - "learning_rate": 1.731986311002766e-05, - "loss": 0.2943, + "learning_rate": 2.7324044747476325e-05, + "loss": 0.2356, "step": 29585 }, { "epoch": 1.38, - "learning_rate": 1.731939430875252e-05, - "loss": 0.0136, + "learning_rate": 2.7323576677640305e-05, + "loss": 0.0428, "step": 29590 }, { "epoch": 1.38, - "learning_rate": 1.731892550747738e-05, - "loss": 0.1105, + "learning_rate": 2.7323108607804285e-05, + "loss": 0.0449, "step": 29595 }, { "epoch": 1.38, - "learning_rate": 1.731845670620224e-05, - "loss": 0.104, + "learning_rate": 2.7322640537968264e-05, + "loss": 0.067, "step": 29600 }, { "epoch": 1.38, - "learning_rate": 1.7317987904927104e-05, - "loss": 0.0937, + "learning_rate": 2.7322172468132244e-05, + "loss": 0.0813, "step": 29605 }, { "epoch": 1.38, - "learning_rate": 1.7317519103651964e-05, - "loss": 0.0857, + "learning_rate": 2.7321704398296227e-05, + "loss": 0.1189, "step": 29610 }, { "epoch": 1.38, - "learning_rate": 1.7317050302376824e-05, - "loss": 0.2497, + "learning_rate": 2.7321236328460207e-05, + "loss": 0.1537, "step": 29615 }, { "epoch": 1.38, - "learning_rate": 1.7316581501101684e-05, - "loss": 0.2124, + "learning_rate": 2.7320768258624187e-05, + "loss": 0.1681, "step": 29620 }, { "epoch": 1.38, - "learning_rate": 1.7316112699826544e-05, - "loss": 0.2004, + "learning_rate": 2.7320300188788167e-05, + "loss": 0.1898, "step": 29625 }, { "epoch": 1.38, - "learning_rate": 1.7315643898551407e-05, - "loss": 0.5103, + "learning_rate": 2.731983211895215e-05, + "loss": 0.3343, "step": 29630 }, { "epoch": 1.38, - "learning_rate": 1.7315175097276267e-05, - "loss": 0.2454, + "learning_rate": 2.731936404911613e-05, + "loss": 0.2255, "step": 29635 }, { "epoch": 1.38, - "learning_rate": 1.7314706296001127e-05, - "loss": 0.0501, + "learning_rate": 2.731889597928011e-05, + "loss": 0.0407, "step": 29640 }, { "epoch": 1.38, - "learning_rate": 1.7314237494725987e-05, - "loss": 0.0609, + "learning_rate": 2.7318427909444093e-05, + "loss": 0.0766, "step": 29645 }, { "epoch": 1.38, - "learning_rate": 1.7313768693450847e-05, - "loss": 0.1009, + "learning_rate": 2.731795983960807e-05, + "loss": 0.089, "step": 29650 }, { "epoch": 1.38, - "learning_rate": 1.7313299892175707e-05, - "loss": 0.1467, + "learning_rate": 2.731749176977205e-05, + "loss": 0.0971, "step": 29655 }, { "epoch": 1.38, - "learning_rate": 1.7312831090900567e-05, - "loss": 0.1255, + "learning_rate": 2.731702369993603e-05, + "loss": 0.0827, "step": 29660 }, { "epoch": 1.38, - "learning_rate": 1.731236228962543e-05, - "loss": 0.2066, + "learning_rate": 2.7316555630100012e-05, + "loss": 0.1651, "step": 29665 }, { "epoch": 1.38, - "learning_rate": 1.731189348835029e-05, - "loss": 0.1712, + "learning_rate": 2.7316087560263992e-05, + "loss": 0.2248, "step": 29670 }, { "epoch": 1.38, - "learning_rate": 1.731142468707515e-05, - "loss": 0.2046, + "learning_rate": 2.7315619490427972e-05, + "loss": 0.1914, "step": 29675 }, { "epoch": 1.38, - "learning_rate": 1.7310955885800013e-05, - "loss": 0.2975, + "learning_rate": 2.7315151420591952e-05, + "loss": 0.3703, "step": 29680 }, { "epoch": 1.39, - "learning_rate": 1.7310487084524873e-05, - "loss": 0.2379, + "learning_rate": 2.7314683350755935e-05, + "loss": 0.2352, "step": 29685 }, { "epoch": 1.39, - "learning_rate": 1.7310018283249733e-05, - "loss": 0.0619, + "learning_rate": 2.7314215280919915e-05, + "loss": 0.0698, "step": 29690 }, { "epoch": 1.39, - "learning_rate": 1.7309549481974593e-05, - "loss": 0.0921, + "learning_rate": 2.7313747211083895e-05, + "loss": 0.0649, "step": 29695 }, { "epoch": 1.39, - "learning_rate": 1.7309080680699453e-05, - "loss": 0.0904, + "learning_rate": 2.7313279141247874e-05, + "loss": 0.1088, "step": 29700 }, { "epoch": 1.39, - "learning_rate": 1.7308611879424313e-05, - "loss": 0.1603, + "learning_rate": 2.7312811071411858e-05, + "loss": 0.117, "step": 29705 }, { "epoch": 1.39, - "learning_rate": 1.7308143078149173e-05, - "loss": 0.1683, + "learning_rate": 2.7312343001575837e-05, + "loss": 0.0865, "step": 29710 }, { "epoch": 1.39, - "learning_rate": 1.7307674276874033e-05, - "loss": 0.2013, + "learning_rate": 2.7311874931739814e-05, + "loss": 0.19, "step": 29715 }, { "epoch": 1.39, - "learning_rate": 1.7307205475598896e-05, - "loss": 0.193, + "learning_rate": 2.7311406861903797e-05, + "loss": 0.1368, "step": 29720 }, { "epoch": 1.39, - "learning_rate": 1.7306736674323756e-05, - "loss": 0.2606, + "learning_rate": 2.7310938792067777e-05, + "loss": 0.2456, "step": 29725 }, { "epoch": 1.39, - "learning_rate": 1.7306267873048616e-05, - "loss": 0.4187, + "learning_rate": 2.7310470722231757e-05, + "loss": 0.2816, "step": 29730 }, { "epoch": 1.39, - "learning_rate": 1.7305799071773476e-05, - "loss": 0.2754, + "learning_rate": 2.7310002652395736e-05, + "loss": 0.2871, "step": 29735 }, { "epoch": 1.39, - "learning_rate": 1.7305330270498336e-05, - "loss": 0.0698, + "learning_rate": 2.730953458255972e-05, + "loss": 0.0209, "step": 29740 }, { "epoch": 1.39, - "learning_rate": 1.73048614692232e-05, - "loss": 0.053, + "learning_rate": 2.73090665127237e-05, + "loss": 0.0584, "step": 29745 }, { "epoch": 1.39, - "learning_rate": 1.730439266794806e-05, - "loss": 0.0753, + "learning_rate": 2.730859844288768e-05, + "loss": 0.0264, "step": 29750 }, { "epoch": 1.39, - "learning_rate": 1.730392386667292e-05, - "loss": 0.1207, + "learning_rate": 2.730813037305166e-05, + "loss": 0.0675, "step": 29755 }, { "epoch": 1.39, - "learning_rate": 1.730345506539778e-05, - "loss": 0.1284, + "learning_rate": 2.7307662303215642e-05, + "loss": 0.1525, "step": 29760 }, { "epoch": 1.39, - "learning_rate": 1.7302986264122642e-05, - "loss": 0.1345, + "learning_rate": 2.7307194233379622e-05, + "loss": 0.1282, "step": 29765 }, { "epoch": 1.39, - "learning_rate": 1.7302517462847502e-05, - "loss": 0.1827, + "learning_rate": 2.7306726163543602e-05, + "loss": 0.1607, "step": 29770 }, { "epoch": 1.39, - "learning_rate": 1.7302048661572362e-05, - "loss": 0.1537, + "learning_rate": 2.7306258093707582e-05, + "loss": 0.1726, "step": 29775 }, { "epoch": 1.39, - "learning_rate": 1.7301579860297222e-05, - "loss": 0.3358, + "learning_rate": 2.730579002387156e-05, + "loss": 0.3153, "step": 29780 }, { "epoch": 1.39, - "learning_rate": 1.7301111059022082e-05, - "loss": 0.2873, + "learning_rate": 2.730532195403554e-05, + "loss": 0.3799, "step": 29785 }, { "epoch": 1.39, - "learning_rate": 1.7300642257746942e-05, - "loss": 0.0583, + "learning_rate": 2.730485388419952e-05, + "loss": 0.0519, "step": 29790 }, { "epoch": 1.39, - "learning_rate": 1.7300173456471802e-05, - "loss": 0.0595, + "learning_rate": 2.7304385814363504e-05, + "loss": 0.0807, "step": 29795 }, { "epoch": 1.39, - "learning_rate": 1.7299704655196662e-05, - "loss": 0.0834, + "learning_rate": 2.7303917744527484e-05, + "loss": 0.0703, "step": 29800 }, { "epoch": 1.39, - "learning_rate": 1.7299235853921522e-05, - "loss": 0.0797, + "learning_rate": 2.7303449674691464e-05, + "loss": 0.1323, "step": 29805 }, { "epoch": 1.39, - "learning_rate": 1.7298767052646385e-05, - "loss": 0.0914, + "learning_rate": 2.7302981604855444e-05, + "loss": 0.1327, "step": 29810 }, { "epoch": 1.39, - "learning_rate": 1.7298298251371245e-05, - "loss": 0.1655, + "learning_rate": 2.7302513535019427e-05, + "loss": 0.1007, "step": 29815 }, { "epoch": 1.39, - "learning_rate": 1.7297829450096105e-05, - "loss": 0.383, + "learning_rate": 2.7302045465183407e-05, + "loss": 0.1576, "step": 29820 }, { "epoch": 1.39, - "learning_rate": 1.7297360648820968e-05, - "loss": 0.2981, + "learning_rate": 2.7301577395347387e-05, + "loss": 0.303, "step": 29825 }, { "epoch": 1.39, - "learning_rate": 1.7296891847545828e-05, - "loss": 0.3188, + "learning_rate": 2.730110932551137e-05, + "loss": 0.4647, "step": 29830 }, { "epoch": 1.39, - "learning_rate": 1.7296423046270688e-05, - "loss": 0.3305, + "learning_rate": 2.730064125567535e-05, + "loss": 0.2118, "step": 29835 }, { "epoch": 1.39, - "learning_rate": 1.7295954244995548e-05, - "loss": 0.0652, + "learning_rate": 2.7300173185839326e-05, + "loss": 0.0858, "step": 29840 }, { "epoch": 1.39, - "learning_rate": 1.7295485443720408e-05, - "loss": 0.0954, + "learning_rate": 2.7299705116003306e-05, + "loss": 0.1213, "step": 29845 }, { "epoch": 1.39, - "learning_rate": 1.7295016642445268e-05, - "loss": 0.1109, + "learning_rate": 2.729923704616729e-05, + "loss": 0.067, "step": 29850 }, { "epoch": 1.39, - "learning_rate": 1.7294547841170128e-05, - "loss": 0.091, + "learning_rate": 2.729876897633127e-05, + "loss": 0.1014, "step": 29855 }, { "epoch": 1.39, - "learning_rate": 1.729407903989499e-05, - "loss": 0.1538, + "learning_rate": 2.729830090649525e-05, + "loss": 0.0869, "step": 29860 }, { "epoch": 1.39, - "learning_rate": 1.729361023861985e-05, - "loss": 0.1593, + "learning_rate": 2.729783283665923e-05, + "loss": 0.0445, "step": 29865 }, { "epoch": 1.39, - "learning_rate": 1.729314143734471e-05, - "loss": 0.1569, + "learning_rate": 2.7297364766823212e-05, + "loss": 0.1358, "step": 29870 }, { "epoch": 1.39, - "learning_rate": 1.729267263606957e-05, - "loss": 0.2395, + "learning_rate": 2.729689669698719e-05, + "loss": 0.1666, "step": 29875 }, { "epoch": 1.39, - "learning_rate": 1.729220383479443e-05, - "loss": 0.3388, + "learning_rate": 2.729642862715117e-05, + "loss": 0.2982, "step": 29880 }, { "epoch": 1.39, - "learning_rate": 1.729173503351929e-05, - "loss": 0.3125, + "learning_rate": 2.729596055731515e-05, + "loss": 0.2161, "step": 29885 }, { "epoch": 1.39, - "learning_rate": 1.7291266232244154e-05, - "loss": 0.0445, + "learning_rate": 2.7295492487479135e-05, + "loss": 0.0664, "step": 29890 }, { "epoch": 1.39, - "learning_rate": 1.7290797430969014e-05, - "loss": 0.0567, + "learning_rate": 2.7295024417643114e-05, + "loss": 0.0384, "step": 29895 }, { "epoch": 1.4, - "learning_rate": 1.7290328629693874e-05, - "loss": 0.0666, + "learning_rate": 2.7294556347807094e-05, + "loss": 0.0762, "step": 29900 }, { "epoch": 1.4, - "learning_rate": 1.7289859828418737e-05, - "loss": 0.1269, + "learning_rate": 2.7294088277971074e-05, + "loss": 0.0866, "step": 29905 }, { "epoch": 1.4, - "learning_rate": 1.7289391027143597e-05, - "loss": 0.0666, + "learning_rate": 2.7293620208135054e-05, + "loss": 0.103, "step": 29910 }, { "epoch": 1.4, - "learning_rate": 1.7288922225868457e-05, - "loss": 0.1662, + "learning_rate": 2.7293152138299034e-05, + "loss": 0.1467, "step": 29915 }, { "epoch": 1.4, - "learning_rate": 1.7288453424593317e-05, - "loss": 0.2673, + "learning_rate": 2.7292684068463013e-05, + "loss": 0.142, "step": 29920 }, { "epoch": 1.4, - "learning_rate": 1.7287984623318177e-05, - "loss": 0.2463, + "learning_rate": 2.7292215998626997e-05, + "loss": 0.1852, "step": 29925 }, { "epoch": 1.4, - "learning_rate": 1.7287515822043037e-05, - "loss": 0.4108, + "learning_rate": 2.7291747928790976e-05, + "loss": 0.3194, "step": 29930 }, { "epoch": 1.4, - "learning_rate": 1.7287047020767897e-05, - "loss": 0.3232, + "learning_rate": 2.7291279858954956e-05, + "loss": 0.2362, "step": 29935 }, { "epoch": 1.4, - "learning_rate": 1.7286578219492757e-05, - "loss": 0.057, + "learning_rate": 2.7290811789118936e-05, + "loss": 0.0778, "step": 29940 }, { "epoch": 1.4, - "learning_rate": 1.7286109418217617e-05, - "loss": 0.0386, + "learning_rate": 2.729034371928292e-05, + "loss": 0.0882, "step": 29945 }, { "epoch": 1.4, - "learning_rate": 1.728564061694248e-05, - "loss": 0.0733, + "learning_rate": 2.72898756494469e-05, + "loss": 0.046, "step": 29950 }, { "epoch": 1.4, - "learning_rate": 1.728517181566734e-05, - "loss": 0.0618, + "learning_rate": 2.728940757961088e-05, + "loss": 0.1595, "step": 29955 }, { "epoch": 1.4, - "learning_rate": 1.72847030143922e-05, - "loss": 0.1045, + "learning_rate": 2.7288939509774862e-05, + "loss": 0.1162, "step": 29960 }, { "epoch": 1.4, - "learning_rate": 1.728423421311706e-05, - "loss": 0.1213, + "learning_rate": 2.728847143993884e-05, + "loss": 0.1399, "step": 29965 }, { "epoch": 1.4, - "learning_rate": 1.7283765411841923e-05, - "loss": 0.1967, + "learning_rate": 2.728800337010282e-05, + "loss": 0.1863, "step": 29970 }, { "epoch": 1.4, - "learning_rate": 1.7283296610566783e-05, - "loss": 0.1985, + "learning_rate": 2.7287535300266798e-05, + "loss": 0.1735, "step": 29975 }, { "epoch": 1.4, - "learning_rate": 1.7282827809291643e-05, - "loss": 0.2068, + "learning_rate": 2.728706723043078e-05, + "loss": 0.4845, "step": 29980 }, { "epoch": 1.4, - "learning_rate": 1.7282359008016503e-05, - "loss": 0.3445, + "learning_rate": 2.728659916059476e-05, + "loss": 0.3567, "step": 29985 }, { "epoch": 1.4, - "learning_rate": 1.7281890206741363e-05, - "loss": 0.0561, + "learning_rate": 2.728613109075874e-05, + "loss": 0.054, "step": 29990 }, { "epoch": 1.4, - "learning_rate": 1.7281421405466223e-05, - "loss": 0.049, + "learning_rate": 2.728566302092272e-05, + "loss": 0.0953, "step": 29995 }, { "epoch": 1.4, - "learning_rate": 1.7280952604191086e-05, - "loss": 0.0819, + "learning_rate": 2.7285194951086704e-05, + "loss": 0.111, "step": 30000 }, { "epoch": 1.4, - "learning_rate": 1.7280483802915946e-05, - "loss": 0.0887, + "learning_rate": 2.7284726881250684e-05, + "loss": 0.0884, "step": 30005 }, { "epoch": 1.4, - "learning_rate": 1.7280015001640806e-05, - "loss": 0.0983, + "learning_rate": 2.7284258811414664e-05, + "loss": 0.1144, "step": 30010 }, { "epoch": 1.4, - "learning_rate": 1.7279546200365666e-05, - "loss": 0.1115, + "learning_rate": 2.7283790741578647e-05, + "loss": 0.1874, "step": 30015 }, { "epoch": 1.4, - "learning_rate": 1.7279077399090526e-05, - "loss": 0.1199, + "learning_rate": 2.7283322671742627e-05, + "loss": 0.181, "step": 30020 }, { "epoch": 1.4, - "learning_rate": 1.7278608597815386e-05, - "loss": 0.277, + "learning_rate": 2.7282854601906607e-05, + "loss": 0.2215, "step": 30025 }, { "epoch": 1.4, - "learning_rate": 1.727813979654025e-05, - "loss": 0.2758, + "learning_rate": 2.7282386532070583e-05, + "loss": 0.23, "step": 30030 }, { "epoch": 1.4, - "learning_rate": 1.727767099526511e-05, - "loss": 0.3149, + "learning_rate": 2.7281918462234566e-05, + "loss": 0.2539, "step": 30035 }, { "epoch": 1.4, - "learning_rate": 1.727720219398997e-05, - "loss": 0.0286, + "learning_rate": 2.7281450392398546e-05, + "loss": 0.0296, "step": 30040 }, { "epoch": 1.4, - "learning_rate": 1.727673339271483e-05, - "loss": 0.0506, + "learning_rate": 2.7280982322562526e-05, + "loss": 0.1145, "step": 30045 }, { "epoch": 1.4, - "learning_rate": 1.7276264591439692e-05, - "loss": 0.0629, + "learning_rate": 2.7280514252726506e-05, + "loss": 0.052, "step": 30050 }, { "epoch": 1.4, - "learning_rate": 1.7275795790164552e-05, - "loss": 0.1089, + "learning_rate": 2.728004618289049e-05, + "loss": 0.0763, "step": 30055 }, { "epoch": 1.4, - "learning_rate": 1.7275326988889412e-05, - "loss": 0.0838, + "learning_rate": 2.727957811305447e-05, + "loss": 0.093, "step": 30060 }, { "epoch": 1.4, - "learning_rate": 1.7274858187614272e-05, - "loss": 0.1634, + "learning_rate": 2.727911004321845e-05, + "loss": 0.132, "step": 30065 }, { "epoch": 1.4, - "learning_rate": 1.7274389386339132e-05, - "loss": 0.2009, + "learning_rate": 2.7278641973382428e-05, + "loss": 0.1771, "step": 30070 }, { "epoch": 1.4, - "learning_rate": 1.7273920585063992e-05, - "loss": 0.1953, + "learning_rate": 2.727817390354641e-05, + "loss": 0.2489, "step": 30075 }, { "epoch": 1.4, - "learning_rate": 1.7273451783788852e-05, - "loss": 0.3996, + "learning_rate": 2.727770583371039e-05, + "loss": 0.4328, "step": 30080 }, { "epoch": 1.4, - "learning_rate": 1.7272982982513712e-05, - "loss": 0.4205, + "learning_rate": 2.727723776387437e-05, + "loss": 0.278, "step": 30085 }, { "epoch": 1.4, - "learning_rate": 1.7272514181238575e-05, - "loss": 0.0856, + "learning_rate": 2.727676969403835e-05, + "loss": 0.1109, "step": 30090 }, { "epoch": 1.4, - "learning_rate": 1.7272045379963435e-05, - "loss": 0.1023, + "learning_rate": 2.727630162420233e-05, + "loss": 0.1077, "step": 30095 }, { "epoch": 1.4, - "learning_rate": 1.7271576578688295e-05, - "loss": 0.0996, + "learning_rate": 2.727583355436631e-05, + "loss": 0.0888, "step": 30100 }, { "epoch": 1.4, - "learning_rate": 1.7271107777413155e-05, - "loss": 0.131, + "learning_rate": 2.727536548453029e-05, + "loss": 0.0535, "step": 30105 }, { "epoch": 1.4, - "learning_rate": 1.727063897613802e-05, - "loss": 0.1035, + "learning_rate": 2.7274897414694274e-05, + "loss": 0.1247, "step": 30110 }, { "epoch": 1.41, - "learning_rate": 1.7270170174862878e-05, - "loss": 0.15, + "learning_rate": 2.7274429344858253e-05, + "loss": 0.2781, "step": 30115 }, { "epoch": 1.41, - "learning_rate": 1.7269701373587738e-05, - "loss": 0.144, + "learning_rate": 2.7273961275022233e-05, + "loss": 0.1777, "step": 30120 }, { "epoch": 1.41, - "learning_rate": 1.7269232572312598e-05, - "loss": 0.1548, + "learning_rate": 2.7273493205186213e-05, + "loss": 0.1703, "step": 30125 }, { "epoch": 1.41, - "learning_rate": 1.7268763771037458e-05, - "loss": 0.3646, + "learning_rate": 2.7273025135350196e-05, + "loss": 0.2506, "step": 30130 }, { "epoch": 1.41, - "learning_rate": 1.7268294969762318e-05, - "loss": 0.4213, + "learning_rate": 2.7272557065514176e-05, + "loss": 0.2557, "step": 30135 }, { "epoch": 1.41, - "learning_rate": 1.726782616848718e-05, - "loss": 0.0461, + "learning_rate": 2.7272088995678156e-05, + "loss": 0.057, "step": 30140 }, { "epoch": 1.41, - "learning_rate": 1.726735736721204e-05, - "loss": 0.0972, + "learning_rate": 2.727162092584214e-05, + "loss": 0.0928, "step": 30145 }, { "epoch": 1.41, - "learning_rate": 1.72668885659369e-05, - "loss": 0.0614, + "learning_rate": 2.727115285600612e-05, + "loss": 0.1044, "step": 30150 }, { "epoch": 1.41, - "learning_rate": 1.726641976466176e-05, - "loss": 0.1055, + "learning_rate": 2.7270684786170095e-05, + "loss": 0.0798, "step": 30155 }, { "epoch": 1.41, - "learning_rate": 1.726595096338662e-05, - "loss": 0.1407, + "learning_rate": 2.7270216716334075e-05, + "loss": 0.0734, "step": 30160 }, { "epoch": 1.41, - "learning_rate": 1.726548216211148e-05, - "loss": 0.1042, + "learning_rate": 2.726974864649806e-05, + "loss": 0.1477, "step": 30165 }, { "epoch": 1.41, - "learning_rate": 1.726501336083634e-05, - "loss": 0.23, + "learning_rate": 2.7269280576662038e-05, + "loss": 0.11, "step": 30170 }, { "epoch": 1.41, - "learning_rate": 1.7264544559561204e-05, - "loss": 0.2761, + "learning_rate": 2.7268812506826018e-05, + "loss": 0.3176, "step": 30175 }, { "epoch": 1.41, - "learning_rate": 1.7264075758286064e-05, - "loss": 0.2038, + "learning_rate": 2.7268344436989998e-05, + "loss": 0.4192, "step": 30180 }, { "epoch": 1.41, - "learning_rate": 1.7263606957010924e-05, - "loss": 0.3806, + "learning_rate": 2.726787636715398e-05, + "loss": 0.3313, "step": 30185 }, { "epoch": 1.41, - "learning_rate": 1.7263138155735787e-05, - "loss": 0.0514, + "learning_rate": 2.726740829731796e-05, + "loss": 0.0438, "step": 30190 }, { "epoch": 1.41, - "learning_rate": 1.7262669354460647e-05, - "loss": 0.0675, + "learning_rate": 2.726694022748194e-05, + "loss": 0.0737, "step": 30195 }, { "epoch": 1.41, - "learning_rate": 1.7262200553185507e-05, - "loss": 0.0407, + "learning_rate": 2.7266472157645924e-05, + "loss": 0.084, "step": 30200 }, { "epoch": 1.41, - "learning_rate": 1.7261731751910367e-05, - "loss": 0.1628, + "learning_rate": 2.7266004087809904e-05, + "loss": 0.0911, "step": 30205 }, { "epoch": 1.41, - "learning_rate": 1.7261262950635227e-05, - "loss": 0.1318, + "learning_rate": 2.7265536017973884e-05, + "loss": 0.0846, "step": 30210 }, { "epoch": 1.41, - "learning_rate": 1.7260794149360087e-05, - "loss": 0.1692, + "learning_rate": 2.7265067948137863e-05, + "loss": 0.237, "step": 30215 }, { "epoch": 1.41, - "learning_rate": 1.7260325348084947e-05, - "loss": 0.1106, + "learning_rate": 2.7264599878301843e-05, + "loss": 0.1959, "step": 30220 }, { "epoch": 1.41, - "learning_rate": 1.7259856546809807e-05, - "loss": 0.2392, + "learning_rate": 2.7264131808465823e-05, + "loss": 0.2003, "step": 30225 }, { "epoch": 1.41, - "learning_rate": 1.725938774553467e-05, - "loss": 0.3086, + "learning_rate": 2.7263663738629803e-05, + "loss": 0.3959, "step": 30230 }, { "epoch": 1.41, - "learning_rate": 1.725891894425953e-05, - "loss": 0.3793, + "learning_rate": 2.7263195668793783e-05, + "loss": 0.2974, "step": 30235 }, { "epoch": 1.41, - "learning_rate": 1.725845014298439e-05, - "loss": 0.0984, + "learning_rate": 2.7262727598957766e-05, + "loss": 0.0595, "step": 30240 }, { "epoch": 1.41, - "learning_rate": 1.725798134170925e-05, - "loss": 0.0687, + "learning_rate": 2.7262259529121746e-05, + "loss": 0.0366, "step": 30245 }, { "epoch": 1.41, - "learning_rate": 1.725751254043411e-05, - "loss": 0.0722, + "learning_rate": 2.7261791459285725e-05, + "loss": 0.0932, "step": 30250 }, { "epoch": 1.41, - "learning_rate": 1.7257043739158973e-05, - "loss": 0.1206, + "learning_rate": 2.7261323389449705e-05, + "loss": 0.1064, "step": 30255 }, { "epoch": 1.41, - "learning_rate": 1.7256574937883833e-05, - "loss": 0.0961, + "learning_rate": 2.726085531961369e-05, + "loss": 0.0823, "step": 30260 }, { "epoch": 1.41, - "learning_rate": 1.7256106136608693e-05, - "loss": 0.1549, + "learning_rate": 2.7260387249777668e-05, + "loss": 0.169, "step": 30265 }, { "epoch": 1.41, - "learning_rate": 1.7255637335333553e-05, - "loss": 0.1804, + "learning_rate": 2.7259919179941648e-05, + "loss": 0.11, "step": 30270 }, { "epoch": 1.41, - "learning_rate": 1.7255168534058413e-05, - "loss": 0.2152, + "learning_rate": 2.725945111010563e-05, + "loss": 0.287, "step": 30275 }, { "epoch": 1.41, - "learning_rate": 1.7254699732783276e-05, - "loss": 0.3638, + "learning_rate": 2.7258983040269608e-05, + "loss": 0.4121, "step": 30280 }, { "epoch": 1.41, - "learning_rate": 1.7254230931508136e-05, - "loss": 0.2409, + "learning_rate": 2.7258514970433588e-05, + "loss": 0.296, "step": 30285 }, { "epoch": 1.41, - "learning_rate": 1.7253762130232996e-05, - "loss": 0.0768, + "learning_rate": 2.7258046900597567e-05, + "loss": 0.1055, "step": 30290 }, { "epoch": 1.41, - "learning_rate": 1.7253293328957856e-05, - "loss": 0.0842, + "learning_rate": 2.725757883076155e-05, + "loss": 0.085, "step": 30295 }, { "epoch": 1.41, - "learning_rate": 1.7252824527682716e-05, - "loss": 0.0726, + "learning_rate": 2.725711076092553e-05, + "loss": 0.1009, "step": 30300 }, { "epoch": 1.41, - "learning_rate": 1.7252355726407576e-05, - "loss": 0.0589, + "learning_rate": 2.725664269108951e-05, + "loss": 0.1298, "step": 30305 }, { "epoch": 1.41, - "learning_rate": 1.7251886925132436e-05, - "loss": 0.2003, + "learning_rate": 2.725617462125349e-05, + "loss": 0.1291, "step": 30310 }, { "epoch": 1.41, - "learning_rate": 1.7251418123857296e-05, - "loss": 0.1153, + "learning_rate": 2.7255706551417473e-05, + "loss": 0.0958, "step": 30315 }, { "epoch": 1.41, - "learning_rate": 1.725094932258216e-05, - "loss": 0.1932, + "learning_rate": 2.7255238481581453e-05, + "loss": 0.2243, "step": 30320 }, { "epoch": 1.42, - "learning_rate": 1.725048052130702e-05, - "loss": 0.2228, + "learning_rate": 2.7254770411745433e-05, + "loss": 0.2349, "step": 30325 }, { "epoch": 1.42, - "learning_rate": 1.725001172003188e-05, - "loss": 0.4502, + "learning_rate": 2.7254302341909416e-05, + "loss": 0.3058, "step": 30330 }, { "epoch": 1.42, - "learning_rate": 1.7249542918756742e-05, - "loss": 0.2931, + "learning_rate": 2.7253834272073396e-05, + "loss": 0.2994, "step": 30335 }, { "epoch": 1.42, - "learning_rate": 1.7249074117481602e-05, - "loss": 0.0577, + "learning_rate": 2.7253366202237376e-05, + "loss": 0.0751, "step": 30340 }, { "epoch": 1.42, - "learning_rate": 1.7248605316206462e-05, - "loss": 0.0496, + "learning_rate": 2.7252898132401352e-05, + "loss": 0.0397, "step": 30345 }, { "epoch": 1.42, - "learning_rate": 1.7248136514931322e-05, - "loss": 0.1063, + "learning_rate": 2.7252430062565335e-05, + "loss": 0.0997, "step": 30350 }, { "epoch": 1.42, - "learning_rate": 1.7247667713656182e-05, - "loss": 0.0715, + "learning_rate": 2.7251961992729315e-05, + "loss": 0.125, "step": 30355 }, { "epoch": 1.42, - "learning_rate": 1.7247198912381042e-05, - "loss": 0.1267, + "learning_rate": 2.7251493922893295e-05, + "loss": 0.148, "step": 30360 }, { "epoch": 1.42, - "learning_rate": 1.7246730111105902e-05, - "loss": 0.1406, + "learning_rate": 2.7251025853057275e-05, + "loss": 0.1239, "step": 30365 }, { "epoch": 1.42, - "learning_rate": 1.7246261309830765e-05, - "loss": 0.1855, + "learning_rate": 2.7250557783221258e-05, + "loss": 0.1708, "step": 30370 }, { "epoch": 1.42, - "learning_rate": 1.7245792508555625e-05, - "loss": 0.2098, + "learning_rate": 2.7250089713385238e-05, + "loss": 0.2306, "step": 30375 }, { "epoch": 1.42, - "learning_rate": 1.7245323707280485e-05, - "loss": 0.294, + "learning_rate": 2.7249621643549218e-05, + "loss": 0.3325, "step": 30380 }, { "epoch": 1.42, - "learning_rate": 1.7244854906005345e-05, - "loss": 0.3826, + "learning_rate": 2.72491535737132e-05, + "loss": 0.2819, "step": 30385 }, { "epoch": 1.42, - "learning_rate": 1.7244386104730205e-05, - "loss": 0.0421, + "learning_rate": 2.724868550387718e-05, + "loss": 0.0532, "step": 30390 }, { "epoch": 1.42, - "learning_rate": 1.7243917303455065e-05, - "loss": 0.0705, + "learning_rate": 2.724821743404116e-05, + "loss": 0.0423, "step": 30395 }, { "epoch": 1.42, - "learning_rate": 1.724344850217993e-05, - "loss": 0.0582, + "learning_rate": 2.724774936420514e-05, + "loss": 0.1486, "step": 30400 }, { "epoch": 1.42, - "learning_rate": 1.7242979700904788e-05, - "loss": 0.0843, + "learning_rate": 2.7247281294369124e-05, + "loss": 0.1546, "step": 30405 }, { "epoch": 1.42, - "learning_rate": 1.7242510899629648e-05, - "loss": 0.0893, + "learning_rate": 2.72468132245331e-05, + "loss": 0.067, "step": 30410 }, { "epoch": 1.42, - "learning_rate": 1.724204209835451e-05, - "loss": 0.0917, + "learning_rate": 2.724634515469708e-05, + "loss": 0.1597, "step": 30415 }, { "epoch": 1.42, - "learning_rate": 1.724157329707937e-05, - "loss": 0.1858, + "learning_rate": 2.724587708486106e-05, + "loss": 0.2715, "step": 30420 }, { "epoch": 1.42, - "learning_rate": 1.724110449580423e-05, - "loss": 0.1572, + "learning_rate": 2.7245409015025043e-05, + "loss": 0.2488, "step": 30425 }, { "epoch": 1.42, - "learning_rate": 1.724063569452909e-05, - "loss": 0.3922, + "learning_rate": 2.7244940945189023e-05, + "loss": 0.4374, "step": 30430 }, { "epoch": 1.42, - "learning_rate": 1.724016689325395e-05, - "loss": 0.2831, + "learning_rate": 2.7244472875353002e-05, + "loss": 0.2353, "step": 30435 }, { "epoch": 1.42, - "learning_rate": 1.723969809197881e-05, - "loss": 0.0452, + "learning_rate": 2.7244004805516986e-05, + "loss": 0.1057, "step": 30440 }, { "epoch": 1.42, - "learning_rate": 1.723922929070367e-05, - "loss": 0.0772, + "learning_rate": 2.7243536735680965e-05, + "loss": 0.0851, "step": 30445 }, { "epoch": 1.42, - "learning_rate": 1.723876048942853e-05, - "loss": 0.0977, + "learning_rate": 2.7243068665844945e-05, + "loss": 0.0867, "step": 30450 }, { "epoch": 1.42, - "learning_rate": 1.723829168815339e-05, - "loss": 0.1266, + "learning_rate": 2.7242600596008925e-05, + "loss": 0.1265, "step": 30455 }, { "epoch": 1.42, - "learning_rate": 1.7237822886878254e-05, - "loss": 0.1233, + "learning_rate": 2.7242132526172908e-05, + "loss": 0.0953, "step": 30460 }, { "epoch": 1.42, - "learning_rate": 1.7237354085603114e-05, - "loss": 0.1393, + "learning_rate": 2.7241664456336888e-05, + "loss": 0.0935, "step": 30465 }, { "epoch": 1.42, - "learning_rate": 1.7236885284327974e-05, - "loss": 0.2311, + "learning_rate": 2.7241196386500865e-05, + "loss": 0.1259, "step": 30470 }, { "epoch": 1.42, - "learning_rate": 1.7236416483052834e-05, - "loss": 0.2408, + "learning_rate": 2.7240728316664844e-05, + "loss": 0.1751, "step": 30475 }, { "epoch": 1.42, - "learning_rate": 1.7235947681777697e-05, - "loss": 0.3132, + "learning_rate": 2.7240260246828828e-05, + "loss": 0.2389, "step": 30480 }, { "epoch": 1.42, - "learning_rate": 1.7235478880502557e-05, - "loss": 0.3054, + "learning_rate": 2.7239792176992807e-05, + "loss": 0.307, "step": 30485 }, { "epoch": 1.42, - "learning_rate": 1.7235010079227417e-05, - "loss": 0.0488, + "learning_rate": 2.7239324107156787e-05, + "loss": 0.0281, "step": 30490 }, { "epoch": 1.42, - "learning_rate": 1.7234541277952277e-05, - "loss": 0.0335, + "learning_rate": 2.7238856037320767e-05, + "loss": 0.0688, "step": 30495 }, { "epoch": 1.42, - "learning_rate": 1.7234072476677137e-05, - "loss": 0.1066, + "learning_rate": 2.723838796748475e-05, + "loss": 0.115, "step": 30500 }, { "epoch": 1.42, - "learning_rate": 1.7233603675401997e-05, - "loss": 0.1301, + "learning_rate": 2.723791989764873e-05, + "loss": 0.0956, "step": 30505 }, { "epoch": 1.42, - "learning_rate": 1.723313487412686e-05, - "loss": 0.1551, + "learning_rate": 2.723745182781271e-05, + "loss": 0.1199, "step": 30510 }, { "epoch": 1.42, - "learning_rate": 1.723266607285172e-05, - "loss": 0.2577, + "learning_rate": 2.7236983757976693e-05, + "loss": 0.1294, "step": 30515 }, { "epoch": 1.42, - "learning_rate": 1.723219727157658e-05, - "loss": 0.2499, + "learning_rate": 2.7236515688140673e-05, + "loss": 0.2323, "step": 30520 }, { "epoch": 1.42, - "learning_rate": 1.723172847030144e-05, - "loss": 0.156, + "learning_rate": 2.7236047618304653e-05, + "loss": 0.1501, "step": 30525 }, { "epoch": 1.42, - "learning_rate": 1.72312596690263e-05, - "loss": 0.4003, + "learning_rate": 2.7235579548468633e-05, + "loss": 0.2528, "step": 30530 }, { "epoch": 1.42, - "learning_rate": 1.723079086775116e-05, - "loss": 0.2101, + "learning_rate": 2.7235111478632612e-05, + "loss": 0.2352, "step": 30535 }, { "epoch": 1.43, - "learning_rate": 1.7230322066476023e-05, - "loss": 0.0443, + "learning_rate": 2.7234643408796592e-05, + "loss": 0.0412, "step": 30540 }, { "epoch": 1.43, - "learning_rate": 1.7229853265200883e-05, - "loss": 0.0775, + "learning_rate": 2.7234175338960572e-05, + "loss": 0.0744, "step": 30545 }, { "epoch": 1.43, - "learning_rate": 1.7229384463925743e-05, - "loss": 0.0966, + "learning_rate": 2.7233707269124552e-05, + "loss": 0.0957, "step": 30550 }, { "epoch": 1.43, - "learning_rate": 1.7228915662650603e-05, - "loss": 0.1195, + "learning_rate": 2.7233239199288535e-05, + "loss": 0.1033, "step": 30555 }, { "epoch": 1.43, - "learning_rate": 1.7228446861375467e-05, - "loss": 0.1571, + "learning_rate": 2.7232771129452515e-05, + "loss": 0.1381, "step": 30560 }, { "epoch": 1.43, - "learning_rate": 1.7227978060100326e-05, - "loss": 0.0814, + "learning_rate": 2.7232303059616495e-05, + "loss": 0.1634, "step": 30565 }, { "epoch": 1.43, - "learning_rate": 1.7227509258825186e-05, - "loss": 0.2036, + "learning_rate": 2.7231834989780478e-05, + "loss": 0.2235, "step": 30570 }, { "epoch": 1.43, - "learning_rate": 1.7227040457550046e-05, - "loss": 0.2727, + "learning_rate": 2.7231366919944458e-05, + "loss": 0.244, "step": 30575 }, { "epoch": 1.43, - "learning_rate": 1.7226571656274906e-05, - "loss": 0.4466, + "learning_rate": 2.7230898850108437e-05, + "loss": 0.5258, "step": 30580 }, { "epoch": 1.43, - "learning_rate": 1.7226102854999766e-05, - "loss": 0.209, + "learning_rate": 2.7230430780272417e-05, + "loss": 0.3767, "step": 30585 }, { "epoch": 1.43, - "learning_rate": 1.7225634053724626e-05, + "learning_rate": 2.72299627104364e-05, "loss": 0.0356, "step": 30590 }, { "epoch": 1.43, - "learning_rate": 1.7225165252449486e-05, - "loss": 0.0369, + "learning_rate": 2.722949464060038e-05, + "loss": 0.0709, "step": 30595 }, { "epoch": 1.43, - "learning_rate": 1.7224696451174346e-05, - "loss": 0.1753, + "learning_rate": 2.7229026570764357e-05, + "loss": 0.0632, "step": 30600 }, { "epoch": 1.43, - "learning_rate": 1.722422764989921e-05, - "loss": 0.1393, + "learning_rate": 2.7228558500928337e-05, + "loss": 0.1599, "step": 30605 }, { "epoch": 1.43, - "learning_rate": 1.722375884862407e-05, - "loss": 0.1501, + "learning_rate": 2.722809043109232e-05, + "loss": 0.1475, "step": 30610 }, { "epoch": 1.43, - "learning_rate": 1.722329004734893e-05, - "loss": 0.0778, + "learning_rate": 2.72276223612563e-05, + "loss": 0.1541, "step": 30615 }, { "epoch": 1.43, - "learning_rate": 1.7222821246073793e-05, - "loss": 0.2176, + "learning_rate": 2.722715429142028e-05, + "loss": 0.0917, "step": 30620 }, { "epoch": 1.43, - "learning_rate": 1.7222352444798652e-05, - "loss": 0.2829, + "learning_rate": 2.7226686221584263e-05, + "loss": 0.2968, "step": 30625 }, { "epoch": 1.43, - "learning_rate": 1.7221883643523512e-05, - "loss": 0.4749, + "learning_rate": 2.7226218151748242e-05, + "loss": 0.3078, "step": 30630 }, { "epoch": 1.43, - "learning_rate": 1.7221414842248372e-05, - "loss": 0.1959, + "learning_rate": 2.7225750081912222e-05, + "loss": 0.1851, "step": 30635 }, { "epoch": 1.43, - "learning_rate": 1.7220946040973232e-05, - "loss": 0.074, + "learning_rate": 2.7225282012076202e-05, + "loss": 0.0619, "step": 30640 }, { "epoch": 1.43, - "learning_rate": 1.7220477239698092e-05, - "loss": 0.1144, + "learning_rate": 2.7224813942240185e-05, + "loss": 0.1107, "step": 30645 }, { "epoch": 1.43, - "learning_rate": 1.7220008438422956e-05, - "loss": 0.1019, + "learning_rate": 2.7224345872404165e-05, + "loss": 0.0636, "step": 30650 }, { "epoch": 1.43, - "learning_rate": 1.7219539637147815e-05, - "loss": 0.0822, + "learning_rate": 2.7223877802568145e-05, + "loss": 0.1222, "step": 30655 }, { "epoch": 1.43, - "learning_rate": 1.7219070835872675e-05, - "loss": 0.1619, + "learning_rate": 2.722340973273212e-05, + "loss": 0.072, "step": 30660 }, { "epoch": 1.43, - "learning_rate": 1.7218602034597535e-05, - "loss": 0.1428, + "learning_rate": 2.7222941662896105e-05, + "loss": 0.2491, "step": 30665 }, { "epoch": 1.43, - "learning_rate": 1.7218133233322395e-05, - "loss": 0.1916, + "learning_rate": 2.7222473593060084e-05, + "loss": 0.1339, "step": 30670 }, { "epoch": 1.43, - "learning_rate": 1.7217664432047255e-05, - "loss": 0.1891, + "learning_rate": 2.7222005523224064e-05, + "loss": 0.1526, "step": 30675 }, { "epoch": 1.43, - "learning_rate": 1.7217195630772115e-05, - "loss": 0.2762, + "learning_rate": 2.7221537453388044e-05, + "loss": 0.4271, "step": 30680 }, { "epoch": 1.43, - "learning_rate": 1.721672682949698e-05, - "loss": 0.1885, + "learning_rate": 2.7221069383552027e-05, + "loss": 0.1767, "step": 30685 }, { "epoch": 1.43, - "learning_rate": 1.721625802822184e-05, - "loss": 0.06, + "learning_rate": 2.7220601313716007e-05, + "loss": 0.0591, "step": 30690 }, { "epoch": 1.43, - "learning_rate": 1.72157892269467e-05, - "loss": 0.098, + "learning_rate": 2.7220133243879987e-05, + "loss": 0.0743, "step": 30695 }, { "epoch": 1.43, - "learning_rate": 1.721532042567156e-05, - "loss": 0.0611, + "learning_rate": 2.721966517404397e-05, + "loss": 0.1219, "step": 30700 }, { "epoch": 1.43, - "learning_rate": 1.721485162439642e-05, - "loss": 0.0604, + "learning_rate": 2.721919710420795e-05, + "loss": 0.1162, "step": 30705 }, { "epoch": 1.43, - "learning_rate": 1.721438282312128e-05, - "loss": 0.1263, + "learning_rate": 2.721872903437193e-05, + "loss": 0.0511, "step": 30710 }, { "epoch": 1.43, - "learning_rate": 1.721391402184614e-05, - "loss": 0.1096, + "learning_rate": 2.721826096453591e-05, + "loss": 0.2201, "step": 30715 }, { "epoch": 1.43, - "learning_rate": 1.7213445220571e-05, - "loss": 0.1963, + "learning_rate": 2.7217792894699893e-05, + "loss": 0.1748, "step": 30720 }, { "epoch": 1.43, - "learning_rate": 1.721297641929586e-05, - "loss": 0.2184, + "learning_rate": 2.721732482486387e-05, + "loss": 0.2107, "step": 30725 }, { "epoch": 1.43, - "learning_rate": 1.721250761802072e-05, - "loss": 0.3873, + "learning_rate": 2.721685675502785e-05, + "loss": 0.1747, "step": 30730 }, { "epoch": 1.43, - "learning_rate": 1.721203881674558e-05, - "loss": 0.2638, + "learning_rate": 2.721638868519183e-05, + "loss": 0.2163, "step": 30735 }, { "epoch": 1.43, - "learning_rate": 1.7211570015470444e-05, - "loss": 0.0434, + "learning_rate": 2.7215920615355812e-05, + "loss": 0.0622, "step": 30740 }, { "epoch": 1.43, - "learning_rate": 1.7211101214195304e-05, - "loss": 0.0737, + "learning_rate": 2.7215452545519792e-05, + "loss": 0.0323, "step": 30745 }, { "epoch": 1.43, - "learning_rate": 1.7210632412920164e-05, - "loss": 0.0951, + "learning_rate": 2.721498447568377e-05, + "loss": 0.0959, "step": 30750 }, { "epoch": 1.44, - "learning_rate": 1.7210163611645024e-05, - "loss": 0.1319, + "learning_rate": 2.7214516405847755e-05, + "loss": 0.1716, "step": 30755 }, { "epoch": 1.44, - "learning_rate": 1.7209694810369884e-05, - "loss": 0.1413, + "learning_rate": 2.7214048336011735e-05, + "loss": 0.1084, "step": 30760 }, { "epoch": 1.44, - "learning_rate": 1.7209226009094748e-05, - "loss": 0.112, + "learning_rate": 2.7213580266175714e-05, + "loss": 0.2736, "step": 30765 }, { "epoch": 1.44, - "learning_rate": 1.7208757207819607e-05, - "loss": 0.1886, + "learning_rate": 2.7213112196339694e-05, + "loss": 0.1093, "step": 30770 }, { "epoch": 1.44, - "learning_rate": 1.7208288406544467e-05, - "loss": 0.3033, + "learning_rate": 2.7212644126503677e-05, + "loss": 0.1853, "step": 30775 }, { "epoch": 1.44, - "learning_rate": 1.7207819605269327e-05, - "loss": 0.3538, + "learning_rate": 2.7212176056667657e-05, + "loss": 0.3013, "step": 30780 }, { "epoch": 1.44, - "learning_rate": 1.7207350803994187e-05, - "loss": 0.3352, + "learning_rate": 2.7211707986831637e-05, + "loss": 0.2163, "step": 30785 }, { "epoch": 1.44, - "learning_rate": 1.720688200271905e-05, - "loss": 0.0869, + "learning_rate": 2.7211239916995614e-05, + "loss": 0.0327, "step": 30790 }, { "epoch": 1.44, - "learning_rate": 1.720641320144391e-05, - "loss": 0.034, + "learning_rate": 2.7210771847159597e-05, + "loss": 0.079, "step": 30795 }, { "epoch": 1.44, - "learning_rate": 1.720594440016877e-05, - "loss": 0.1027, + "learning_rate": 2.7210303777323577e-05, + "loss": 0.1031, "step": 30800 }, { "epoch": 1.44, - "learning_rate": 1.720547559889363e-05, - "loss": 0.1315, + "learning_rate": 2.7209835707487556e-05, + "loss": 0.0917, "step": 30805 }, { "epoch": 1.44, - "learning_rate": 1.720500679761849e-05, - "loss": 0.1198, + "learning_rate": 2.720936763765154e-05, + "loss": 0.1494, "step": 30810 }, { "epoch": 1.44, - "learning_rate": 1.720453799634335e-05, - "loss": 0.1869, + "learning_rate": 2.720889956781552e-05, + "loss": 0.1752, "step": 30815 }, { "epoch": 1.44, - "learning_rate": 1.720406919506821e-05, - "loss": 0.1343, + "learning_rate": 2.72084314979795e-05, + "loss": 0.2704, "step": 30820 }, { "epoch": 1.44, - "learning_rate": 1.720360039379307e-05, - "loss": 0.2136, + "learning_rate": 2.720796342814348e-05, + "loss": 0.2556, "step": 30825 }, { "epoch": 1.44, - "learning_rate": 1.7203131592517933e-05, - "loss": 0.3957, + "learning_rate": 2.7207495358307462e-05, + "loss": 0.3796, "step": 30830 }, { "epoch": 1.44, - "learning_rate": 1.7202662791242793e-05, - "loss": 0.291, + "learning_rate": 2.7207027288471442e-05, + "loss": 0.2472, "step": 30835 }, { "epoch": 1.44, - "learning_rate": 1.7202193989967653e-05, - "loss": 0.0897, + "learning_rate": 2.7206559218635422e-05, + "loss": 0.0347, "step": 30840 }, { "epoch": 1.44, - "learning_rate": 1.7201725188692517e-05, - "loss": 0.0911, + "learning_rate": 2.72060911487994e-05, + "loss": 0.0353, "step": 30845 }, { "epoch": 1.44, - "learning_rate": 1.7201256387417377e-05, - "loss": 0.1066, + "learning_rate": 2.720562307896338e-05, + "loss": 0.1022, "step": 30850 }, { "epoch": 1.44, - "learning_rate": 1.7200787586142237e-05, - "loss": 0.1329, + "learning_rate": 2.720515500912736e-05, + "loss": 0.1397, "step": 30855 }, { "epoch": 1.44, - "learning_rate": 1.7200318784867096e-05, - "loss": 0.0502, + "learning_rate": 2.720468693929134e-05, + "loss": 0.2229, "step": 30860 }, { "epoch": 1.44, - "learning_rate": 1.7199849983591956e-05, - "loss": 0.0901, + "learning_rate": 2.720421886945532e-05, + "loss": 0.1854, "step": 30865 }, { "epoch": 1.44, - "learning_rate": 1.7199381182316816e-05, - "loss": 0.145, + "learning_rate": 2.7203750799619304e-05, + "loss": 0.2439, "step": 30870 }, { "epoch": 1.44, - "learning_rate": 1.7198912381041676e-05, - "loss": 0.2188, + "learning_rate": 2.7203282729783284e-05, + "loss": 0.173, "step": 30875 }, { "epoch": 1.44, - "learning_rate": 1.719844357976654e-05, - "loss": 0.3924, + "learning_rate": 2.7202814659947264e-05, + "loss": 0.3904, "step": 30880 }, { "epoch": 1.44, - "learning_rate": 1.71979747784914e-05, - "loss": 0.2868, + "learning_rate": 2.7202346590111247e-05, + "loss": 0.2966, "step": 30885 }, { "epoch": 1.44, - "learning_rate": 1.719750597721626e-05, - "loss": 0.0285, + "learning_rate": 2.7201878520275227e-05, + "loss": 0.0616, "step": 30890 }, { "epoch": 1.44, - "learning_rate": 1.719703717594112e-05, - "loss": 0.0456, + "learning_rate": 2.7201410450439207e-05, + "loss": 0.0956, "step": 30895 }, { "epoch": 1.44, - "learning_rate": 1.719656837466598e-05, - "loss": 0.0869, + "learning_rate": 2.7200942380603186e-05, + "loss": 0.0862, "step": 30900 }, { "epoch": 1.44, - "learning_rate": 1.719609957339084e-05, - "loss": 0.1951, + "learning_rate": 2.720047431076717e-05, + "loss": 0.0854, "step": 30905 }, { "epoch": 1.44, - "learning_rate": 1.7195630772115703e-05, - "loss": 0.1306, + "learning_rate": 2.720000624093115e-05, + "loss": 0.125, "step": 30910 }, { "epoch": 1.44, - "learning_rate": 1.7195161970840562e-05, - "loss": 0.1019, + "learning_rate": 2.7199538171095126e-05, + "loss": 0.148, "step": 30915 }, { "epoch": 1.44, - "learning_rate": 1.7194693169565422e-05, - "loss": 0.1409, + "learning_rate": 2.7199070101259106e-05, + "loss": 0.1018, "step": 30920 }, { "epoch": 1.44, - "learning_rate": 1.7194224368290282e-05, - "loss": 0.3232, + "learning_rate": 2.719860203142309e-05, + "loss": 0.1813, "step": 30925 }, { "epoch": 1.44, - "learning_rate": 1.7193755567015146e-05, - "loss": 0.2785, + "learning_rate": 2.719813396158707e-05, + "loss": 0.3915, "step": 30930 }, { "epoch": 1.44, - "learning_rate": 1.7193286765740006e-05, - "loss": 0.1941, + "learning_rate": 2.719766589175105e-05, + "loss": 0.3807, "step": 30935 }, { "epoch": 1.44, - "learning_rate": 1.7192817964464866e-05, - "loss": 0.0544, + "learning_rate": 2.7197197821915032e-05, + "loss": 0.0512, "step": 30940 }, { "epoch": 1.44, - "learning_rate": 1.7192349163189725e-05, - "loss": 0.0817, + "learning_rate": 2.719672975207901e-05, + "loss": 0.0802, "step": 30945 }, { "epoch": 1.44, - "learning_rate": 1.7191880361914585e-05, - "loss": 0.1332, + "learning_rate": 2.719626168224299e-05, + "loss": 0.0591, "step": 30950 }, { "epoch": 1.44, - "learning_rate": 1.7191411560639445e-05, - "loss": 0.0841, + "learning_rate": 2.719579361240697e-05, + "loss": 0.1057, "step": 30955 }, { "epoch": 1.44, - "learning_rate": 1.7190942759364305e-05, - "loss": 0.148, + "learning_rate": 2.7195325542570954e-05, + "loss": 0.0571, "step": 30960 }, { "epoch": 1.44, - "learning_rate": 1.7190473958089165e-05, - "loss": 0.2526, + "learning_rate": 2.7194857472734934e-05, + "loss": 0.1816, "step": 30965 }, { "epoch": 1.45, - "learning_rate": 1.719000515681403e-05, - "loss": 0.1986, + "learning_rate": 2.7194389402898914e-05, + "loss": 0.1923, "step": 30970 }, { "epoch": 1.45, - "learning_rate": 1.718953635553889e-05, - "loss": 0.1943, + "learning_rate": 2.719392133306289e-05, + "loss": 0.2293, "step": 30975 }, { "epoch": 1.45, - "learning_rate": 1.718906755426375e-05, - "loss": 0.361, + "learning_rate": 2.7193453263226874e-05, + "loss": 0.2999, "step": 30980 }, { "epoch": 1.45, - "learning_rate": 1.718859875298861e-05, - "loss": 0.1899, + "learning_rate": 2.7192985193390854e-05, + "loss": 0.2714, "step": 30985 }, { "epoch": 1.45, - "learning_rate": 1.718812995171347e-05, - "loss": 0.0256, + "learning_rate": 2.7192517123554833e-05, + "loss": 0.0879, "step": 30990 }, { "epoch": 1.45, - "learning_rate": 1.718766115043833e-05, - "loss": 0.0823, + "learning_rate": 2.7192049053718817e-05, + "loss": 0.0799, "step": 30995 }, { "epoch": 1.45, - "learning_rate": 1.718719234916319e-05, - "loss": 0.1232, + "learning_rate": 2.7191580983882796e-05, + "loss": 0.1103, "step": 31000 }, { "epoch": 1.45, - "learning_rate": 1.718672354788805e-05, - "loss": 0.0927, + "learning_rate": 2.7191112914046776e-05, + "loss": 0.1332, "step": 31005 }, { "epoch": 1.45, - "learning_rate": 1.718625474661291e-05, - "loss": 0.112, + "learning_rate": 2.7190644844210756e-05, + "loss": 0.1342, "step": 31010 }, { "epoch": 1.45, - "learning_rate": 1.718578594533777e-05, - "loss": 0.1358, + "learning_rate": 2.719017677437474e-05, + "loss": 0.1187, "step": 31015 }, { "epoch": 1.45, - "learning_rate": 1.7185317144062635e-05, - "loss": 0.1677, + "learning_rate": 2.718970870453872e-05, + "loss": 0.1804, "step": 31020 }, { "epoch": 1.45, - "learning_rate": 1.7184848342787495e-05, - "loss": 0.2751, + "learning_rate": 2.71892406347027e-05, + "loss": 0.2227, "step": 31025 }, { "epoch": 1.45, - "learning_rate": 1.7184379541512355e-05, - "loss": 0.2994, + "learning_rate": 2.718877256486668e-05, + "loss": 0.329, "step": 31030 }, { "epoch": 1.45, - "learning_rate": 1.7183910740237214e-05, - "loss": 0.2568, + "learning_rate": 2.7188304495030662e-05, + "loss": 0.2625, "step": 31035 }, { "epoch": 1.45, - "learning_rate": 1.7183441938962074e-05, - "loss": 0.0416, + "learning_rate": 2.7187836425194638e-05, + "loss": 0.0191, "step": 31040 }, { "epoch": 1.45, - "learning_rate": 1.7182973137686934e-05, - "loss": 0.087, + "learning_rate": 2.7187368355358618e-05, + "loss": 0.0709, "step": 31045 }, { "epoch": 1.45, - "learning_rate": 1.7182504336411798e-05, - "loss": 0.084, + "learning_rate": 2.71869002855226e-05, + "loss": 0.078, "step": 31050 }, { "epoch": 1.45, - "learning_rate": 1.7182035535136658e-05, - "loss": 0.1638, + "learning_rate": 2.718643221568658e-05, + "loss": 0.1497, "step": 31055 }, { "epoch": 1.45, - "learning_rate": 1.7181566733861518e-05, - "loss": 0.1059, + "learning_rate": 2.718596414585056e-05, + "loss": 0.0965, "step": 31060 }, { "epoch": 1.45, - "learning_rate": 1.718109793258638e-05, - "loss": 0.0665, + "learning_rate": 2.718549607601454e-05, + "loss": 0.1385, "step": 31065 }, { "epoch": 1.45, - "learning_rate": 1.718062913131124e-05, - "loss": 0.0996, + "learning_rate": 2.7185028006178524e-05, + "loss": 0.197, "step": 31070 }, { "epoch": 1.45, - "learning_rate": 1.71801603300361e-05, - "loss": 0.1804, + "learning_rate": 2.7184559936342504e-05, + "loss": 0.2058, "step": 31075 }, { "epoch": 1.45, - "learning_rate": 1.717969152876096e-05, - "loss": 0.3874, + "learning_rate": 2.7184091866506484e-05, + "loss": 0.3558, "step": 31080 }, { "epoch": 1.45, - "learning_rate": 1.717922272748582e-05, - "loss": 0.2361, + "learning_rate": 2.7183623796670463e-05, + "loss": 0.1702, "step": 31085 }, { "epoch": 1.45, - "learning_rate": 1.717875392621068e-05, - "loss": 0.0242, + "learning_rate": 2.7183155726834447e-05, + "loss": 0.062, "step": 31090 }, { "epoch": 1.45, - "learning_rate": 1.717828512493554e-05, - "loss": 0.0469, + "learning_rate": 2.7182687656998426e-05, + "loss": 0.0479, "step": 31095 }, { "epoch": 1.45, - "learning_rate": 1.71778163236604e-05, - "loss": 0.0775, + "learning_rate": 2.7182219587162406e-05, + "loss": 0.0988, "step": 31100 }, { "epoch": 1.45, - "learning_rate": 1.717734752238526e-05, - "loss": 0.1437, + "learning_rate": 2.7181751517326383e-05, + "loss": 0.1179, "step": 31105 }, { "epoch": 1.45, - "learning_rate": 1.717687872111012e-05, - "loss": 0.1462, + "learning_rate": 2.7181283447490366e-05, + "loss": 0.1696, "step": 31110 }, { "epoch": 1.45, - "learning_rate": 1.7176409919834984e-05, - "loss": 0.1569, + "learning_rate": 2.7180815377654346e-05, + "loss": 0.176, "step": 31115 }, { "epoch": 1.45, - "learning_rate": 1.7175941118559843e-05, - "loss": 0.1341, + "learning_rate": 2.7180347307818326e-05, + "loss": 0.1533, "step": 31120 }, { "epoch": 1.45, - "learning_rate": 1.7175472317284703e-05, - "loss": 0.2074, + "learning_rate": 2.717987923798231e-05, + "loss": 0.1897, "step": 31125 }, { "epoch": 1.45, - "learning_rate": 1.7175003516009567e-05, - "loss": 0.3704, + "learning_rate": 2.717941116814629e-05, + "loss": 0.3079, "step": 31130 }, { "epoch": 1.45, - "learning_rate": 1.7174534714734427e-05, - "loss": 0.3183, + "learning_rate": 2.717894309831027e-05, + "loss": 0.2082, "step": 31135 }, { "epoch": 1.45, - "learning_rate": 1.7174065913459287e-05, - "loss": 0.0238, + "learning_rate": 2.7178475028474248e-05, + "loss": 0.0371, "step": 31140 }, { "epoch": 1.45, - "learning_rate": 1.7173597112184147e-05, - "loss": 0.0825, + "learning_rate": 2.717800695863823e-05, + "loss": 0.0378, "step": 31145 }, { "epoch": 1.45, - "learning_rate": 1.7173128310909006e-05, - "loss": 0.0863, + "learning_rate": 2.717753888880221e-05, + "loss": 0.063, "step": 31150 }, { "epoch": 1.45, - "learning_rate": 1.7172659509633866e-05, - "loss": 0.1149, + "learning_rate": 2.717707081896619e-05, + "loss": 0.1161, "step": 31155 }, { "epoch": 1.45, - "learning_rate": 1.717219070835873e-05, - "loss": 0.1763, + "learning_rate": 2.717660274913017e-05, + "loss": 0.2054, "step": 31160 }, { "epoch": 1.45, - "learning_rate": 1.717172190708359e-05, - "loss": 0.1381, + "learning_rate": 2.717613467929415e-05, + "loss": 0.2005, "step": 31165 }, { "epoch": 1.45, - "learning_rate": 1.717125310580845e-05, - "loss": 0.1784, + "learning_rate": 2.717566660945813e-05, + "loss": 0.1581, "step": 31170 }, { "epoch": 1.45, - "learning_rate": 1.717078430453331e-05, - "loss": 0.272, + "learning_rate": 2.717519853962211e-05, + "loss": 0.144, "step": 31175 }, { "epoch": 1.45, - "learning_rate": 1.717031550325817e-05, - "loss": 0.4498, + "learning_rate": 2.7174730469786094e-05, + "loss": 0.2286, "step": 31180 }, { "epoch": 1.46, - "learning_rate": 1.716984670198303e-05, - "loss": 0.4483, + "learning_rate": 2.7174262399950073e-05, + "loss": 0.3269, "step": 31185 }, { "epoch": 1.46, - "learning_rate": 1.716937790070789e-05, - "loss": 0.0412, + "learning_rate": 2.7173794330114053e-05, + "loss": 0.1078, "step": 31190 }, { "epoch": 1.46, - "learning_rate": 1.7168909099432753e-05, - "loss": 0.0857, + "learning_rate": 2.7173326260278033e-05, + "loss": 0.0554, "step": 31195 }, { "epoch": 1.46, - "learning_rate": 1.7168440298157613e-05, - "loss": 0.1485, + "learning_rate": 2.7172858190442016e-05, + "loss": 0.0881, "step": 31200 }, { "epoch": 1.46, - "learning_rate": 1.7167971496882473e-05, - "loss": 0.1242, + "learning_rate": 2.7172390120605996e-05, + "loss": 0.1346, "step": 31205 }, { "epoch": 1.46, - "learning_rate": 1.7167502695607336e-05, - "loss": 0.0397, + "learning_rate": 2.7171922050769976e-05, + "loss": 0.1582, "step": 31210 }, { "epoch": 1.46, - "learning_rate": 1.7167033894332196e-05, - "loss": 0.1162, + "learning_rate": 2.7171453980933956e-05, + "loss": 0.2103, "step": 31215 }, { "epoch": 1.46, - "learning_rate": 1.7166565093057056e-05, - "loss": 0.0797, + "learning_rate": 2.717098591109794e-05, + "loss": 0.1397, "step": 31220 }, { "epoch": 1.46, - "learning_rate": 1.7166096291781916e-05, - "loss": 0.2146, + "learning_rate": 2.717051784126192e-05, + "loss": 0.3329, "step": 31225 }, { "epoch": 1.46, - "learning_rate": 1.7165627490506776e-05, - "loss": 0.4251, + "learning_rate": 2.7170049771425895e-05, + "loss": 0.2672, "step": 31230 }, { "epoch": 1.46, - "learning_rate": 1.7165158689231636e-05, - "loss": 0.2648, + "learning_rate": 2.7169581701589878e-05, + "loss": 0.2304, "step": 31235 }, { "epoch": 1.46, - "learning_rate": 1.7164689887956495e-05, - "loss": 0.0691, + "learning_rate": 2.7169113631753858e-05, + "loss": 0.072, "step": 31240 }, { "epoch": 1.46, - "learning_rate": 1.7164221086681355e-05, - "loss": 0.0339, + "learning_rate": 2.7168645561917838e-05, + "loss": 0.0674, "step": 31245 }, { "epoch": 1.46, - "learning_rate": 1.7163752285406215e-05, - "loss": 0.0822, + "learning_rate": 2.7168177492081818e-05, + "loss": 0.0143, "step": 31250 }, { "epoch": 1.46, - "learning_rate": 1.716328348413108e-05, - "loss": 0.1465, + "learning_rate": 2.71677094222458e-05, + "loss": 0.078, "step": 31255 }, { "epoch": 1.46, - "learning_rate": 1.716281468285594e-05, - "loss": 0.0895, + "learning_rate": 2.716724135240978e-05, + "loss": 0.1475, "step": 31260 }, { "epoch": 1.46, - "learning_rate": 1.71623458815808e-05, - "loss": 0.093, + "learning_rate": 2.716677328257376e-05, + "loss": 0.1014, "step": 31265 }, { "epoch": 1.46, - "learning_rate": 1.716187708030566e-05, - "loss": 0.1726, + "learning_rate": 2.716630521273774e-05, + "loss": 0.1701, "step": 31270 }, { "epoch": 1.46, - "learning_rate": 1.7161408279030522e-05, - "loss": 0.2715, + "learning_rate": 2.7165837142901724e-05, + "loss": 0.2333, "step": 31275 }, { "epoch": 1.46, - "learning_rate": 1.7160939477755382e-05, - "loss": 0.2339, + "learning_rate": 2.7165369073065703e-05, + "loss": 0.3492, "step": 31280 }, { "epoch": 1.46, - "learning_rate": 1.716047067648024e-05, - "loss": 0.424, + "learning_rate": 2.7164901003229683e-05, + "loss": 0.2331, "step": 31285 }, { "epoch": 1.46, - "learning_rate": 1.71600018752051e-05, - "loss": 0.0541, + "learning_rate": 2.7164432933393663e-05, + "loss": 0.0265, "step": 31290 }, { "epoch": 1.46, - "learning_rate": 1.715953307392996e-05, - "loss": 0.076, + "learning_rate": 2.7163964863557643e-05, + "loss": 0.0504, "step": 31295 }, { "epoch": 1.46, - "learning_rate": 1.7159064272654825e-05, - "loss": 0.0524, + "learning_rate": 2.7163496793721623e-05, + "loss": 0.0923, "step": 31300 }, { "epoch": 1.46, - "learning_rate": 1.7158595471379685e-05, - "loss": 0.142, + "learning_rate": 2.7163028723885603e-05, + "loss": 0.0899, "step": 31305 }, { "epoch": 1.46, - "learning_rate": 1.7158126670104545e-05, - "loss": 0.1337, + "learning_rate": 2.7162560654049586e-05, + "loss": 0.1123, "step": 31310 }, { "epoch": 1.46, - "learning_rate": 1.7157657868829405e-05, - "loss": 0.171, + "learning_rate": 2.7162092584213566e-05, + "loss": 0.1174, "step": 31315 }, { "epoch": 1.46, - "learning_rate": 1.7157189067554265e-05, - "loss": 0.1233, + "learning_rate": 2.7161624514377545e-05, + "loss": 0.1602, "step": 31320 }, { "epoch": 1.46, - "learning_rate": 1.7156720266279124e-05, - "loss": 0.2044, + "learning_rate": 2.7161156444541525e-05, + "loss": 0.3227, "step": 31325 }, { "epoch": 1.46, - "learning_rate": 1.7156251465003984e-05, - "loss": 0.3685, + "learning_rate": 2.716068837470551e-05, + "loss": 0.2324, "step": 31330 }, { "epoch": 1.46, - "learning_rate": 1.7155782663728844e-05, - "loss": 0.2695, + "learning_rate": 2.7160220304869488e-05, + "loss": 0.3153, "step": 31335 }, { "epoch": 1.46, - "learning_rate": 1.7155313862453708e-05, - "loss": 0.0894, + "learning_rate": 2.7159752235033468e-05, + "loss": 0.0325, "step": 31340 }, { "epoch": 1.46, - "learning_rate": 1.7154845061178568e-05, - "loss": 0.0491, + "learning_rate": 2.7159284165197448e-05, + "loss": 0.0524, "step": 31345 }, { "epoch": 1.46, - "learning_rate": 1.7154376259903428e-05, - "loss": 0.0855, + "learning_rate": 2.715881609536143e-05, + "loss": 0.0863, "step": 31350 }, { "epoch": 1.46, - "learning_rate": 1.715390745862829e-05, - "loss": 0.0858, + "learning_rate": 2.7158348025525407e-05, + "loss": 0.0851, "step": 31355 }, { "epoch": 1.46, - "learning_rate": 1.715343865735315e-05, - "loss": 0.1007, + "learning_rate": 2.7157879955689387e-05, + "loss": 0.1045, "step": 31360 }, { "epoch": 1.46, - "learning_rate": 1.715296985607801e-05, - "loss": 0.247, + "learning_rate": 2.715741188585337e-05, + "loss": 0.1, "step": 31365 }, { "epoch": 1.46, - "learning_rate": 1.715250105480287e-05, - "loss": 0.1082, + "learning_rate": 2.715694381601735e-05, + "loss": 0.1667, "step": 31370 }, { "epoch": 1.46, - "learning_rate": 1.715203225352773e-05, - "loss": 0.2143, + "learning_rate": 2.715647574618133e-05, + "loss": 0.1546, "step": 31375 }, { "epoch": 1.46, - "learning_rate": 1.715156345225259e-05, - "loss": 0.4156, + "learning_rate": 2.715600767634531e-05, + "loss": 0.3749, "step": 31380 }, { "epoch": 1.46, - "learning_rate": 1.715109465097745e-05, - "loss": 0.3131, + "learning_rate": 2.7155539606509293e-05, + "loss": 0.3595, "step": 31385 }, { "epoch": 1.46, - "learning_rate": 1.7150625849702314e-05, - "loss": 0.038, + "learning_rate": 2.7155071536673273e-05, + "loss": 0.0289, "step": 31390 }, { "epoch": 1.46, - "learning_rate": 1.7150157048427174e-05, - "loss": 0.0726, + "learning_rate": 2.7154603466837253e-05, + "loss": 0.0314, "step": 31395 }, { "epoch": 1.47, - "learning_rate": 1.7149688247152034e-05, - "loss": 0.0399, + "learning_rate": 2.7154135397001233e-05, + "loss": 0.0526, "step": 31400 }, { "epoch": 1.47, - "learning_rate": 1.7149219445876894e-05, - "loss": 0.1019, + "learning_rate": 2.7153667327165216e-05, + "loss": 0.0726, "step": 31405 }, { "epoch": 1.47, - "learning_rate": 1.7148750644601754e-05, - "loss": 0.0969, + "learning_rate": 2.7153199257329196e-05, + "loss": 0.1525, "step": 31410 }, { "epoch": 1.47, - "learning_rate": 1.7148281843326617e-05, - "loss": 0.1317, + "learning_rate": 2.7152731187493175e-05, + "loss": 0.0999, "step": 31415 }, { "epoch": 1.47, - "learning_rate": 1.7147813042051477e-05, - "loss": 0.1599, + "learning_rate": 2.7152263117657155e-05, + "loss": 0.1569, "step": 31420 }, { "epoch": 1.47, - "learning_rate": 1.7147344240776337e-05, - "loss": 0.1803, + "learning_rate": 2.7151795047821135e-05, + "loss": 0.2005, "step": 31425 }, { "epoch": 1.47, - "learning_rate": 1.7146875439501197e-05, - "loss": 0.2713, + "learning_rate": 2.7151326977985115e-05, + "loss": 0.3146, "step": 31430 }, { "epoch": 1.47, - "learning_rate": 1.7146406638226057e-05, - "loss": 0.348, + "learning_rate": 2.7150858908149095e-05, + "loss": 0.2056, "step": 31435 }, { "epoch": 1.47, - "learning_rate": 1.714593783695092e-05, - "loss": 0.0418, + "learning_rate": 2.7150390838313078e-05, + "loss": 0.0459, "step": 31440 }, { "epoch": 1.47, - "learning_rate": 1.714546903567578e-05, - "loss": 0.0582, + "learning_rate": 2.7149922768477058e-05, + "loss": 0.0732, "step": 31445 }, { "epoch": 1.47, - "learning_rate": 1.714500023440064e-05, - "loss": 0.0857, + "learning_rate": 2.7149454698641038e-05, + "loss": 0.1148, "step": 31450 }, { "epoch": 1.47, - "learning_rate": 1.71445314331255e-05, - "loss": 0.1093, + "learning_rate": 2.7148986628805017e-05, + "loss": 0.089, "step": 31455 }, { "epoch": 1.47, - "learning_rate": 1.714406263185036e-05, - "loss": 0.143, + "learning_rate": 2.7148518558969e-05, + "loss": 0.1015, "step": 31460 }, { "epoch": 1.47, - "learning_rate": 1.714359383057522e-05, - "loss": 0.2097, + "learning_rate": 2.714805048913298e-05, + "loss": 0.1189, "step": 31465 }, { "epoch": 1.47, - "learning_rate": 1.714312502930008e-05, - "loss": 0.2391, + "learning_rate": 2.714758241929696e-05, + "loss": 0.1489, "step": 31470 }, { "epoch": 1.47, - "learning_rate": 1.714265622802494e-05, - "loss": 0.2575, + "learning_rate": 2.714711434946094e-05, + "loss": 0.2937, "step": 31475 }, { "epoch": 1.47, - "learning_rate": 1.7142187426749803e-05, - "loss": 0.3507, + "learning_rate": 2.714664627962492e-05, + "loss": 0.4018, "step": 31480 }, { "epoch": 1.47, - "learning_rate": 1.7141718625474663e-05, - "loss": 0.2108, + "learning_rate": 2.71461782097889e-05, + "loss": 0.2134, "step": 31485 }, { "epoch": 1.47, - "learning_rate": 1.7141249824199523e-05, - "loss": 0.1031, + "learning_rate": 2.714571013995288e-05, + "loss": 0.0386, "step": 31490 }, { "epoch": 1.47, - "learning_rate": 1.7140781022924386e-05, - "loss": 0.0743, + "learning_rate": 2.7145242070116863e-05, + "loss": 0.0726, "step": 31495 }, { "epoch": 1.47, - "learning_rate": 1.7140312221649246e-05, - "loss": 0.1272, + "learning_rate": 2.7144774000280843e-05, + "loss": 0.0902, "step": 31500 }, { "epoch": 1.47, - "learning_rate": 1.7139843420374106e-05, - "loss": 0.1309, + "learning_rate": 2.7144305930444822e-05, + "loss": 0.0905, "step": 31505 }, { "epoch": 1.47, - "learning_rate": 1.7139374619098966e-05, - "loss": 0.1227, + "learning_rate": 2.7143837860608802e-05, + "loss": 0.128, "step": 31510 }, { "epoch": 1.47, - "learning_rate": 1.7138905817823826e-05, - "loss": 0.125, + "learning_rate": 2.7143369790772785e-05, + "loss": 0.1366, "step": 31515 }, { "epoch": 1.47, - "learning_rate": 1.7138437016548686e-05, - "loss": 0.1504, + "learning_rate": 2.7142901720936765e-05, + "loss": 0.1131, "step": 31520 }, { "epoch": 1.47, - "learning_rate": 1.7137968215273546e-05, - "loss": 0.1449, + "learning_rate": 2.7142433651100745e-05, + "loss": 0.1289, "step": 31525 }, { "epoch": 1.47, - "learning_rate": 1.713749941399841e-05, - "loss": 0.338, + "learning_rate": 2.7141965581264725e-05, + "loss": 0.2813, "step": 31530 }, { "epoch": 1.47, - "learning_rate": 1.713703061272327e-05, - "loss": 0.2274, + "learning_rate": 2.7141497511428708e-05, + "loss": 0.2638, "step": 31535 }, { "epoch": 1.47, - "learning_rate": 1.713656181144813e-05, - "loss": 0.3505, + "learning_rate": 2.7141029441592688e-05, + "loss": 0.06, "step": 31540 }, { "epoch": 1.47, - "learning_rate": 1.713609301017299e-05, - "loss": 0.0287, + "learning_rate": 2.7140561371756664e-05, + "loss": 0.0568, "step": 31545 }, { "epoch": 1.47, - "learning_rate": 1.713562420889785e-05, - "loss": 0.0951, + "learning_rate": 2.7140093301920647e-05, + "loss": 0.0778, "step": 31550 }, { "epoch": 1.47, - "learning_rate": 1.713515540762271e-05, - "loss": 0.0786, + "learning_rate": 2.7139625232084627e-05, + "loss": 0.0678, "step": 31555 }, { "epoch": 1.47, - "learning_rate": 1.7134686606347572e-05, - "loss": 0.0759, + "learning_rate": 2.7139157162248607e-05, + "loss": 0.0792, "step": 31560 }, { "epoch": 1.47, - "learning_rate": 1.7134217805072432e-05, - "loss": 0.1229, + "learning_rate": 2.7138689092412587e-05, + "loss": 0.1343, "step": 31565 }, { "epoch": 1.47, - "learning_rate": 1.7133749003797292e-05, - "loss": 0.2114, + "learning_rate": 2.713822102257657e-05, + "loss": 0.158, "step": 31570 }, { "epoch": 1.47, - "learning_rate": 1.713328020252215e-05, - "loss": 0.1955, + "learning_rate": 2.713775295274055e-05, + "loss": 0.2188, "step": 31575 }, { "epoch": 1.47, - "learning_rate": 1.7132811401247015e-05, - "loss": 0.2346, + "learning_rate": 2.713728488290453e-05, + "loss": 0.2547, "step": 31580 }, { "epoch": 1.47, - "learning_rate": 1.7132342599971875e-05, - "loss": 0.1724, + "learning_rate": 2.713681681306851e-05, + "loss": 0.3107, "step": 31585 }, { "epoch": 1.47, - "learning_rate": 1.7131873798696735e-05, - "loss": 0.0742, + "learning_rate": 2.7136348743232493e-05, + "loss": 0.0808, "step": 31590 }, { "epoch": 1.47, - "learning_rate": 1.7131404997421595e-05, - "loss": 0.0828, + "learning_rate": 2.7135880673396473e-05, + "loss": 0.1067, "step": 31595 }, { "epoch": 1.47, - "learning_rate": 1.7130936196146455e-05, - "loss": 0.0835, + "learning_rate": 2.7135412603560452e-05, + "loss": 0.0573, "step": 31600 }, { "epoch": 1.47, - "learning_rate": 1.7130467394871315e-05, - "loss": 0.1199, + "learning_rate": 2.7134944533724436e-05, + "loss": 0.0894, "step": 31605 }, { "epoch": 1.47, - "learning_rate": 1.7129998593596175e-05, - "loss": 0.1261, + "learning_rate": 2.7134476463888412e-05, + "loss": 0.096, "step": 31610 }, { "epoch": 1.48, - "learning_rate": 1.7129529792321035e-05, - "loss": 0.096, + "learning_rate": 2.7134008394052392e-05, + "loss": 0.1275, "step": 31615 }, { "epoch": 1.48, - "learning_rate": 1.7129060991045894e-05, - "loss": 0.2419, + "learning_rate": 2.713354032421637e-05, + "loss": 0.2443, "step": 31620 }, { "epoch": 1.48, - "learning_rate": 1.7128592189770758e-05, - "loss": 0.225, + "learning_rate": 2.7133072254380355e-05, + "loss": 0.1851, "step": 31625 }, { "epoch": 1.48, - "learning_rate": 1.7128123388495618e-05, - "loss": 0.286, + "learning_rate": 2.7132604184544335e-05, + "loss": 0.3059, "step": 31630 }, { "epoch": 1.48, - "learning_rate": 1.7127654587220478e-05, - "loss": 0.2153, + "learning_rate": 2.7132136114708315e-05, + "loss": 0.2894, "step": 31635 }, { "epoch": 1.48, - "learning_rate": 1.712718578594534e-05, - "loss": 0.0429, + "learning_rate": 2.7131668044872294e-05, + "loss": 0.0236, "step": 31640 }, { "epoch": 1.48, - "learning_rate": 1.71267169846702e-05, - "loss": 0.0757, + "learning_rate": 2.7131199975036278e-05, + "loss": 0.044, "step": 31645 }, { "epoch": 1.48, - "learning_rate": 1.712624818339506e-05, - "loss": 0.0863, + "learning_rate": 2.7130731905200257e-05, + "loss": 0.1022, "step": 31650 }, { "epoch": 1.48, - "learning_rate": 1.712577938211992e-05, - "loss": 0.0917, + "learning_rate": 2.7130263835364237e-05, + "loss": 0.0992, "step": 31655 }, { "epoch": 1.48, - "learning_rate": 1.712531058084478e-05, - "loss": 0.104, + "learning_rate": 2.7129795765528217e-05, + "loss": 0.135, "step": 31660 }, { "epoch": 1.48, - "learning_rate": 1.712484177956964e-05, - "loss": 0.1161, + "learning_rate": 2.71293276956922e-05, + "loss": 0.2547, "step": 31665 }, { "epoch": 1.48, - "learning_rate": 1.7124372978294504e-05, - "loss": 0.1551, + "learning_rate": 2.7128859625856177e-05, + "loss": 0.1903, "step": 31670 }, { "epoch": 1.48, - "learning_rate": 1.7123904177019364e-05, - "loss": 0.2739, + "learning_rate": 2.7128391556020156e-05, + "loss": 0.3072, "step": 31675 }, { "epoch": 1.48, - "learning_rate": 1.7123435375744224e-05, - "loss": 0.4039, + "learning_rate": 2.712792348618414e-05, + "loss": 0.461, "step": 31680 }, { "epoch": 1.48, - "learning_rate": 1.7122966574469084e-05, - "loss": 0.2652, + "learning_rate": 2.712745541634812e-05, + "loss": 0.3273, "step": 31685 }, { "epoch": 1.48, - "learning_rate": 1.7122497773193944e-05, - "loss": 0.0578, + "learning_rate": 2.71269873465121e-05, + "loss": 0.0519, "step": 31690 }, { "epoch": 1.48, - "learning_rate": 1.7122028971918804e-05, - "loss": 0.0959, + "learning_rate": 2.712651927667608e-05, + "loss": 0.0846, "step": 31695 }, { "epoch": 1.48, - "learning_rate": 1.7121560170643664e-05, - "loss": 0.1124, + "learning_rate": 2.7126051206840062e-05, + "loss": 0.12, "step": 31700 }, { "epoch": 1.48, - "learning_rate": 1.7121091369368527e-05, - "loss": 0.0831, + "learning_rate": 2.7125583137004042e-05, + "loss": 0.0657, "step": 31705 }, { "epoch": 1.48, - "learning_rate": 1.7120622568093387e-05, - "loss": 0.1715, + "learning_rate": 2.7125115067168022e-05, + "loss": 0.0977, "step": 31710 }, { "epoch": 1.48, - "learning_rate": 1.7120153766818247e-05, - "loss": 0.1528, + "learning_rate": 2.7124646997332002e-05, + "loss": 0.1374, "step": 31715 }, { "epoch": 1.48, - "learning_rate": 1.711968496554311e-05, - "loss": 0.1222, + "learning_rate": 2.7124178927495985e-05, + "loss": 0.1501, "step": 31720 }, { "epoch": 1.48, - "learning_rate": 1.711921616426797e-05, - "loss": 0.3348, + "learning_rate": 2.7123710857659965e-05, + "loss": 0.2465, "step": 31725 }, { "epoch": 1.48, - "learning_rate": 1.711874736299283e-05, - "loss": 0.2315, + "learning_rate": 2.7123242787823945e-05, + "loss": 0.1671, "step": 31730 }, { "epoch": 1.48, - "learning_rate": 1.711827856171769e-05, - "loss": 0.3509, + "learning_rate": 2.7122774717987924e-05, + "loss": 0.2262, "step": 31735 }, { "epoch": 1.48, - "learning_rate": 1.711780976044255e-05, - "loss": 0.025, + "learning_rate": 2.7122306648151904e-05, + "loss": 0.0625, "step": 31740 }, { "epoch": 1.48, - "learning_rate": 1.711734095916741e-05, - "loss": 0.0601, + "learning_rate": 2.7121838578315884e-05, + "loss": 0.0483, "step": 31745 }, { "epoch": 1.48, - "learning_rate": 1.711687215789227e-05, - "loss": 0.0986, + "learning_rate": 2.7121370508479864e-05, + "loss": 0.0927, "step": 31750 }, { "epoch": 1.48, - "learning_rate": 1.711640335661713e-05, - "loss": 0.117, + "learning_rate": 2.7120902438643847e-05, + "loss": 0.079, "step": 31755 }, { "epoch": 1.48, - "learning_rate": 1.711593455534199e-05, - "loss": 0.0785, + "learning_rate": 2.7120434368807827e-05, + "loss": 0.1691, "step": 31760 }, { "epoch": 1.48, - "learning_rate": 1.7115465754066853e-05, - "loss": 0.1231, + "learning_rate": 2.7119966298971807e-05, + "loss": 0.1397, "step": 31765 }, { "epoch": 1.48, - "learning_rate": 1.7114996952791713e-05, - "loss": 0.1392, + "learning_rate": 2.7119498229135787e-05, + "loss": 0.1657, "step": 31770 }, { "epoch": 1.48, - "learning_rate": 1.7114528151516573e-05, - "loss": 0.1812, + "learning_rate": 2.711903015929977e-05, + "loss": 0.2364, "step": 31775 }, { "epoch": 1.48, - "learning_rate": 1.7114059350241433e-05, - "loss": 0.2136, + "learning_rate": 2.711856208946375e-05, + "loss": 0.2493, "step": 31780 }, { "epoch": 1.48, - "learning_rate": 1.7113590548966296e-05, - "loss": 0.257, + "learning_rate": 2.711809401962773e-05, + "loss": 0.2651, "step": 31785 }, { "epoch": 1.48, - "learning_rate": 1.7113121747691156e-05, - "loss": 0.025, + "learning_rate": 2.7117625949791713e-05, + "loss": 0.0424, "step": 31790 }, { "epoch": 1.48, - "learning_rate": 1.7112652946416016e-05, - "loss": 0.0484, + "learning_rate": 2.7117157879955692e-05, + "loss": 0.0796, "step": 31795 }, { "epoch": 1.48, - "learning_rate": 1.7112184145140876e-05, - "loss": 0.0609, + "learning_rate": 2.711668981011967e-05, + "loss": 0.1075, "step": 31800 }, { "epoch": 1.48, - "learning_rate": 1.7111715343865736e-05, - "loss": 0.1174, + "learning_rate": 2.711622174028365e-05, + "loss": 0.1335, "step": 31805 }, { "epoch": 1.48, - "learning_rate": 1.71112465425906e-05, - "loss": 0.0714, + "learning_rate": 2.7115753670447632e-05, + "loss": 0.4187, "step": 31810 }, { "epoch": 1.48, - "learning_rate": 1.711077774131546e-05, - "loss": 0.111, + "learning_rate": 2.711528560061161e-05, + "loss": 0.1783, "step": 31815 }, { "epoch": 1.48, - "learning_rate": 1.711030894004032e-05, - "loss": 0.1114, + "learning_rate": 2.711481753077559e-05, + "loss": 0.1089, "step": 31820 }, { "epoch": 1.48, - "learning_rate": 1.710984013876518e-05, - "loss": 0.207, + "learning_rate": 2.711434946093957e-05, + "loss": 0.2229, "step": 31825 }, { "epoch": 1.49, - "learning_rate": 1.710937133749004e-05, - "loss": 0.299, + "learning_rate": 2.7113881391103555e-05, + "loss": 0.3265, "step": 31830 }, { "epoch": 1.49, - "learning_rate": 1.71089025362149e-05, - "loss": 0.4295, + "learning_rate": 2.7113413321267534e-05, + "loss": 0.241, "step": 31835 }, { "epoch": 1.49, - "learning_rate": 1.710843373493976e-05, - "loss": 0.1151, + "learning_rate": 2.7112945251431514e-05, + "loss": 0.0489, "step": 31840 }, { "epoch": 1.49, - "learning_rate": 1.7107964933664622e-05, - "loss": 0.1369, + "learning_rate": 2.7112477181595497e-05, + "loss": 0.0377, "step": 31845 }, { "epoch": 1.49, - "learning_rate": 1.7107496132389482e-05, - "loss": 0.0604, + "learning_rate": 2.7112009111759477e-05, + "loss": 0.0665, "step": 31850 }, { "epoch": 1.49, - "learning_rate": 1.7107027331114342e-05, - "loss": 0.1176, + "learning_rate": 2.7111541041923457e-05, + "loss": 0.0568, "step": 31855 }, { "epoch": 1.49, - "learning_rate": 1.7106558529839202e-05, - "loss": 0.0861, + "learning_rate": 2.7111072972087433e-05, + "loss": 0.1248, "step": 31860 }, { "epoch": 1.49, - "learning_rate": 1.7106089728564065e-05, - "loss": 0.1066, + "learning_rate": 2.7110604902251417e-05, + "loss": 0.109, "step": 31865 }, { "epoch": 1.49, - "learning_rate": 1.7105620927288925e-05, - "loss": 0.1939, + "learning_rate": 2.7110136832415396e-05, + "loss": 0.217, "step": 31870 }, { "epoch": 1.49, - "learning_rate": 1.7105152126013785e-05, - "loss": 0.1461, + "learning_rate": 2.7109668762579376e-05, + "loss": 0.2257, "step": 31875 }, { "epoch": 1.49, - "learning_rate": 1.7104683324738645e-05, - "loss": 0.2631, + "learning_rate": 2.7109200692743356e-05, + "loss": 0.3889, "step": 31880 }, { "epoch": 1.49, - "learning_rate": 1.7104214523463505e-05, - "loss": 0.2348, + "learning_rate": 2.710873262290734e-05, + "loss": 0.3604, "step": 31885 }, { "epoch": 1.49, - "learning_rate": 1.7103745722188365e-05, - "loss": 0.0548, + "learning_rate": 2.710826455307132e-05, + "loss": 0.0339, "step": 31890 }, { "epoch": 1.49, - "learning_rate": 1.7103276920913225e-05, - "loss": 0.085, + "learning_rate": 2.71077964832353e-05, + "loss": 0.0946, "step": 31895 }, { "epoch": 1.49, - "learning_rate": 1.7102808119638085e-05, - "loss": 0.0418, + "learning_rate": 2.710732841339928e-05, + "loss": 0.1173, "step": 31900 }, { "epoch": 1.49, - "learning_rate": 1.7102339318362948e-05, - "loss": 0.1059, + "learning_rate": 2.7106860343563262e-05, + "loss": 0.091, "step": 31905 }, { "epoch": 1.49, - "learning_rate": 1.7101870517087808e-05, - "loss": 0.1142, + "learning_rate": 2.7106392273727242e-05, + "loss": 0.1903, "step": 31910 }, { "epoch": 1.49, - "learning_rate": 1.7101401715812668e-05, - "loss": 0.1028, + "learning_rate": 2.710592420389122e-05, + "loss": 0.1138, "step": 31915 }, { "epoch": 1.49, - "learning_rate": 1.7100932914537528e-05, - "loss": 0.1925, + "learning_rate": 2.7105456134055205e-05, + "loss": 0.1817, "step": 31920 }, { "epoch": 1.49, - "learning_rate": 1.710046411326239e-05, - "loss": 0.1953, + "learning_rate": 2.710498806421918e-05, + "loss": 0.1822, "step": 31925 }, { "epoch": 1.49, - "learning_rate": 1.709999531198725e-05, - "loss": 0.3952, + "learning_rate": 2.710451999438316e-05, + "loss": 0.2536, "step": 31930 }, { "epoch": 1.49, - "learning_rate": 1.709952651071211e-05, - "loss": 0.316, + "learning_rate": 2.710405192454714e-05, + "loss": 0.2957, "step": 31935 }, { "epoch": 1.49, - "learning_rate": 1.709905770943697e-05, - "loss": 0.0727, + "learning_rate": 2.7103583854711124e-05, + "loss": 0.0512, "step": 31940 }, { "epoch": 1.49, - "learning_rate": 1.709858890816183e-05, - "loss": 0.0474, + "learning_rate": 2.7103115784875104e-05, + "loss": 0.0712, "step": 31945 }, { "epoch": 1.49, - "learning_rate": 1.7098120106886694e-05, - "loss": 0.0985, + "learning_rate": 2.7102647715039084e-05, + "loss": 0.0949, "step": 31950 }, { "epoch": 1.49, - "learning_rate": 1.7097651305611554e-05, - "loss": 0.1191, + "learning_rate": 2.7102179645203064e-05, + "loss": 0.1251, "step": 31955 }, { "epoch": 1.49, - "learning_rate": 1.7097182504336414e-05, - "loss": 0.1053, + "learning_rate": 2.7101711575367047e-05, + "loss": 0.0888, "step": 31960 }, { "epoch": 1.49, - "learning_rate": 1.7096713703061274e-05, - "loss": 0.1443, + "learning_rate": 2.7101243505531027e-05, + "loss": 0.0918, "step": 31965 }, { "epoch": 1.49, - "learning_rate": 1.7096244901786134e-05, - "loss": 0.1016, + "learning_rate": 2.7100775435695006e-05, + "loss": 0.1947, "step": 31970 }, { "epoch": 1.49, - "learning_rate": 1.7095776100510994e-05, - "loss": 0.2535, + "learning_rate": 2.710030736585899e-05, + "loss": 0.293, "step": 31975 }, { "epoch": 1.49, - "learning_rate": 1.7095307299235854e-05, - "loss": 0.2324, + "learning_rate": 2.709983929602297e-05, + "loss": 0.2794, "step": 31980 }, { "epoch": 1.49, - "learning_rate": 1.7094838497960714e-05, - "loss": 0.3434, + "learning_rate": 2.709937122618695e-05, + "loss": 0.2422, "step": 31985 }, { "epoch": 1.49, - "learning_rate": 1.7094369696685577e-05, - "loss": 0.057, + "learning_rate": 2.7098903156350926e-05, + "loss": 0.0326, "step": 31990 }, { "epoch": 1.49, - "learning_rate": 1.7093900895410437e-05, - "loss": 0.1534, + "learning_rate": 2.709843508651491e-05, + "loss": 0.0456, "step": 31995 }, { "epoch": 1.49, - "learning_rate": 1.7093432094135297e-05, - "loss": 0.0607, + "learning_rate": 2.709796701667889e-05, + "loss": 0.124, "step": 32000 }, { "epoch": 1.49, - "learning_rate": 1.709296329286016e-05, - "loss": 0.0599, + "learning_rate": 2.709749894684287e-05, + "loss": 0.1717, "step": 32005 }, { "epoch": 1.49, - "learning_rate": 1.709249449158502e-05, - "loss": 0.1019, + "learning_rate": 2.7097030877006848e-05, + "loss": 0.0744, "step": 32010 }, { "epoch": 1.49, - "learning_rate": 1.709202569030988e-05, - "loss": 0.0972, + "learning_rate": 2.709656280717083e-05, + "loss": 0.154, "step": 32015 }, { "epoch": 1.49, - "learning_rate": 1.709155688903474e-05, - "loss": 0.1218, + "learning_rate": 2.709609473733481e-05, + "loss": 0.0861, "step": 32020 }, { "epoch": 1.49, - "learning_rate": 1.70910880877596e-05, - "loss": 0.234, + "learning_rate": 2.709562666749879e-05, + "loss": 0.2772, "step": 32025 }, { "epoch": 1.49, - "learning_rate": 1.709061928648446e-05, - "loss": 0.3776, + "learning_rate": 2.7095158597662774e-05, + "loss": 0.3768, "step": 32030 }, { "epoch": 1.49, - "learning_rate": 1.709015048520932e-05, - "loss": 0.2142, + "learning_rate": 2.7094690527826754e-05, + "loss": 0.2509, "step": 32035 }, { "epoch": 1.5, - "learning_rate": 1.7089681683934183e-05, - "loss": 0.0744, + "learning_rate": 2.7094222457990734e-05, + "loss": 0.0797, "step": 32040 }, { "epoch": 1.5, - "learning_rate": 1.7089212882659043e-05, - "loss": 0.0301, + "learning_rate": 2.7093754388154714e-05, + "loss": 0.0646, "step": 32045 }, { "epoch": 1.5, - "learning_rate": 1.7088744081383903e-05, - "loss": 0.1003, + "learning_rate": 2.7093286318318694e-05, + "loss": 0.1134, "step": 32050 }, { "epoch": 1.5, - "learning_rate": 1.7088275280108763e-05, - "loss": 0.0953, + "learning_rate": 2.7092818248482673e-05, + "loss": 0.0643, "step": 32055 }, { "epoch": 1.5, - "learning_rate": 1.7087806478833623e-05, - "loss": 0.1135, + "learning_rate": 2.7092350178646653e-05, + "loss": 0.111, "step": 32060 }, { "epoch": 1.5, - "learning_rate": 1.7087337677558483e-05, - "loss": 0.097, + "learning_rate": 2.7091882108810633e-05, + "loss": 0.1633, "step": 32065 }, { "epoch": 1.5, - "learning_rate": 1.7086868876283346e-05, - "loss": 0.2052, + "learning_rate": 2.7091414038974616e-05, + "loss": 0.1374, "step": 32070 }, { "epoch": 1.5, - "learning_rate": 1.7086400075008206e-05, - "loss": 0.1643, + "learning_rate": 2.7090945969138596e-05, + "loss": 0.2281, "step": 32075 }, { "epoch": 1.5, - "learning_rate": 1.7085931273733066e-05, - "loss": 0.301, + "learning_rate": 2.7090477899302576e-05, + "loss": 0.3332, "step": 32080 }, { "epoch": 1.5, - "learning_rate": 1.7085462472457926e-05, - "loss": 0.389, + "learning_rate": 2.7090009829466556e-05, + "loss": 0.2668, "step": 32085 }, { "epoch": 1.5, - "learning_rate": 1.708499367118279e-05, - "loss": 0.0549, + "learning_rate": 2.708954175963054e-05, + "loss": 0.0363, "step": 32090 }, { "epoch": 1.5, - "learning_rate": 1.708452486990765e-05, - "loss": 0.0714, + "learning_rate": 2.708907368979452e-05, + "loss": 0.0522, "step": 32095 }, { "epoch": 1.5, - "learning_rate": 1.708405606863251e-05, - "loss": 0.1293, + "learning_rate": 2.70886056199585e-05, + "loss": 0.1357, "step": 32100 }, { "epoch": 1.5, - "learning_rate": 1.708358726735737e-05, - "loss": 0.0897, + "learning_rate": 2.7088137550122482e-05, + "loss": 0.1301, "step": 32105 }, { "epoch": 1.5, - "learning_rate": 1.708311846608223e-05, - "loss": 0.162, + "learning_rate": 2.708766948028646e-05, + "loss": 0.1362, "step": 32110 }, { "epoch": 1.5, - "learning_rate": 1.708264966480709e-05, - "loss": 0.1221, + "learning_rate": 2.7087201410450438e-05, + "loss": 0.0767, "step": 32115 }, { "epoch": 1.5, - "learning_rate": 1.708218086353195e-05, - "loss": 0.1646, + "learning_rate": 2.7086733340614418e-05, + "loss": 0.171, "step": 32120 }, { "epoch": 1.5, - "learning_rate": 1.708171206225681e-05, - "loss": 0.271, + "learning_rate": 2.70862652707784e-05, + "loss": 0.1663, "step": 32125 }, { "epoch": 1.5, - "learning_rate": 1.708124326098167e-05, - "loss": 0.3324, + "learning_rate": 2.708579720094238e-05, + "loss": 0.3131, "step": 32130 }, { "epoch": 1.5, - "learning_rate": 1.7080774459706532e-05, - "loss": 0.295, + "learning_rate": 2.708532913110636e-05, + "loss": 0.2447, "step": 32135 }, { "epoch": 1.5, - "learning_rate": 1.7080305658431392e-05, - "loss": 0.0784, + "learning_rate": 2.708486106127034e-05, + "loss": 0.0599, "step": 32140 }, { "epoch": 1.5, - "learning_rate": 1.7079836857156252e-05, - "loss": 0.0895, + "learning_rate": 2.7084392991434324e-05, + "loss": 0.0719, "step": 32145 }, { "epoch": 1.5, - "learning_rate": 1.7079368055881115e-05, - "loss": 0.0987, + "learning_rate": 2.7083924921598304e-05, + "loss": 0.0617, "step": 32150 }, { "epoch": 1.5, - "learning_rate": 1.7078899254605975e-05, - "loss": 0.1247, + "learning_rate": 2.7083456851762283e-05, + "loss": 0.1061, "step": 32155 }, { "epoch": 1.5, - "learning_rate": 1.7078430453330835e-05, - "loss": 0.145, + "learning_rate": 2.7082988781926267e-05, + "loss": 0.1063, "step": 32160 }, { "epoch": 1.5, - "learning_rate": 1.7077961652055695e-05, - "loss": 0.0755, + "learning_rate": 2.7082520712090246e-05, + "loss": 0.1159, "step": 32165 }, { "epoch": 1.5, - "learning_rate": 1.7077492850780555e-05, - "loss": 0.1894, + "learning_rate": 2.7082052642254226e-05, + "loss": 0.2318, "step": 32170 }, { "epoch": 1.5, - "learning_rate": 1.7077024049505415e-05, - "loss": 0.2969, + "learning_rate": 2.7081584572418206e-05, + "loss": 0.2681, "step": 32175 }, { "epoch": 1.5, - "learning_rate": 1.7076555248230278e-05, - "loss": 0.3833, + "learning_rate": 2.7081116502582186e-05, + "loss": 0.3604, "step": 32180 }, { "epoch": 1.5, - "learning_rate": 1.7076086446955138e-05, - "loss": 0.2779, + "learning_rate": 2.7080648432746166e-05, + "loss": 0.2109, "step": 32185 }, { "epoch": 1.5, - "learning_rate": 1.7075617645679998e-05, - "loss": 0.0559, + "learning_rate": 2.7080180362910145e-05, + "loss": 0.0237, "step": 32190 }, { "epoch": 1.5, - "learning_rate": 1.7075148844404858e-05, - "loss": 0.0812, + "learning_rate": 2.7079712293074125e-05, + "loss": 0.0279, "step": 32195 }, { "epoch": 1.5, - "learning_rate": 1.7074680043129718e-05, - "loss": 0.0472, + "learning_rate": 2.707924422323811e-05, + "loss": 0.1434, "step": 32200 }, { "epoch": 1.5, - "learning_rate": 1.7074211241854578e-05, - "loss": 0.0631, + "learning_rate": 2.7078776153402088e-05, + "loss": 0.062, "step": 32205 }, { "epoch": 1.5, - "learning_rate": 1.7073742440579438e-05, - "loss": 0.1842, + "learning_rate": 2.7078308083566068e-05, + "loss": 0.092, "step": 32210 }, { "epoch": 1.5, - "learning_rate": 1.70732736393043e-05, - "loss": 0.1205, + "learning_rate": 2.707784001373005e-05, + "loss": 0.061, "step": 32215 }, { "epoch": 1.5, - "learning_rate": 1.707280483802916e-05, - "loss": 0.1232, + "learning_rate": 2.707737194389403e-05, + "loss": 0.1411, "step": 32220 }, { "epoch": 1.5, - "learning_rate": 1.707233603675402e-05, - "loss": 0.1249, + "learning_rate": 2.707690387405801e-05, + "loss": 0.1898, "step": 32225 }, { "epoch": 1.5, - "learning_rate": 1.7071867235478884e-05, - "loss": 0.3041, + "learning_rate": 2.707643580422199e-05, + "loss": 0.3524, "step": 32230 }, { "epoch": 1.5, - "learning_rate": 1.7071398434203744e-05, - "loss": 0.3015, + "learning_rate": 2.7075967734385974e-05, + "loss": 0.293, "step": 32235 }, { "epoch": 1.5, - "learning_rate": 1.7070929632928604e-05, + "learning_rate": 2.707549966454995e-05, "loss": 0.0641, "step": 32240 }, { "epoch": 1.5, - "learning_rate": 1.7070460831653464e-05, - "loss": 0.0933, + "learning_rate": 2.707503159471393e-05, + "loss": 0.0273, "step": 32245 }, { "epoch": 1.5, - "learning_rate": 1.7069992030378324e-05, - "loss": 0.0691, + "learning_rate": 2.707456352487791e-05, + "loss": 0.0713, "step": 32250 }, { "epoch": 1.51, - "learning_rate": 1.7069523229103184e-05, - "loss": 0.1205, + "learning_rate": 2.7074095455041893e-05, + "loss": 0.1019, "step": 32255 }, { "epoch": 1.51, - "learning_rate": 1.7069054427828044e-05, - "loss": 0.1456, + "learning_rate": 2.7073627385205873e-05, + "loss": 0.0929, "step": 32260 }, { "epoch": 1.51, - "learning_rate": 1.7068585626552904e-05, - "loss": 0.0755, + "learning_rate": 2.7073159315369853e-05, + "loss": 0.1088, "step": 32265 }, { "epoch": 1.51, - "learning_rate": 1.7068116825277764e-05, - "loss": 0.1583, + "learning_rate": 2.7072691245533833e-05, + "loss": 0.0874, "step": 32270 }, { "epoch": 1.51, - "learning_rate": 1.7067648024002627e-05, - "loss": 0.2089, + "learning_rate": 2.7072223175697816e-05, + "loss": 0.271, "step": 32275 }, { "epoch": 1.51, - "learning_rate": 1.7067179222727487e-05, - "loss": 0.3421, + "learning_rate": 2.7071755105861796e-05, + "loss": 0.3929, "step": 32280 }, { "epoch": 1.51, - "learning_rate": 1.7066710421452347e-05, - "loss": 0.1861, + "learning_rate": 2.7071287036025776e-05, + "loss": 0.351, "step": 32285 }, { "epoch": 1.51, - "learning_rate": 1.7066241620177207e-05, - "loss": 0.0438, + "learning_rate": 2.707081896618976e-05, + "loss": 0.0732, "step": 32290 }, { "epoch": 1.51, - "learning_rate": 1.706577281890207e-05, - "loss": 0.1262, + "learning_rate": 2.707035089635374e-05, + "loss": 0.0978, "step": 32295 }, { "epoch": 1.51, - "learning_rate": 1.706530401762693e-05, - "loss": 0.0893, + "learning_rate": 2.706988282651772e-05, + "loss": 0.0841, "step": 32300 }, { "epoch": 1.51, - "learning_rate": 1.706483521635179e-05, - "loss": 0.0865, + "learning_rate": 2.7069414756681695e-05, + "loss": 0.104, "step": 32305 }, { "epoch": 1.51, - "learning_rate": 1.706436641507665e-05, - "loss": 0.1191, + "learning_rate": 2.7068946686845678e-05, + "loss": 0.1508, "step": 32310 }, { "epoch": 1.51, - "learning_rate": 1.706389761380151e-05, - "loss": 0.1458, + "learning_rate": 2.7068478617009658e-05, + "loss": 0.1151, "step": 32315 }, { "epoch": 1.51, - "learning_rate": 1.7063428812526373e-05, - "loss": 0.1978, + "learning_rate": 2.7068010547173638e-05, + "loss": 0.2606, "step": 32320 }, { "epoch": 1.51, - "learning_rate": 1.7062960011251233e-05, - "loss": 0.197, + "learning_rate": 2.7067542477337617e-05, + "loss": 0.2159, "step": 32325 }, { "epoch": 1.51, - "learning_rate": 1.7062491209976093e-05, - "loss": 0.3146, + "learning_rate": 2.70670744075016e-05, + "loss": 0.2934, "step": 32330 }, { "epoch": 1.51, - "learning_rate": 1.7062022408700953e-05, - "loss": 0.2121, + "learning_rate": 2.706660633766558e-05, + "loss": 0.3062, "step": 32335 }, { "epoch": 1.51, - "learning_rate": 1.7061553607425813e-05, - "loss": 0.07, + "learning_rate": 2.706613826782956e-05, + "loss": 0.0399, "step": 32340 }, { "epoch": 1.51, - "learning_rate": 1.7061084806150673e-05, - "loss": 0.1082, + "learning_rate": 2.7065670197993543e-05, + "loss": 0.0873, "step": 32345 }, { "epoch": 1.51, - "learning_rate": 1.7060616004875533e-05, - "loss": 0.1035, + "learning_rate": 2.7065202128157523e-05, + "loss": 0.0339, "step": 32350 }, { "epoch": 1.51, - "learning_rate": 1.7060147203600396e-05, - "loss": 0.1162, + "learning_rate": 2.7064734058321503e-05, + "loss": 0.077, "step": 32355 }, { "epoch": 1.51, - "learning_rate": 1.7059678402325256e-05, - "loss": 0.1757, + "learning_rate": 2.7064265988485483e-05, + "loss": 0.261, "step": 32360 }, { "epoch": 1.51, - "learning_rate": 1.7059209601050116e-05, - "loss": 0.1391, + "learning_rate": 2.7063797918649463e-05, + "loss": 0.0711, "step": 32365 }, { "epoch": 1.51, - "learning_rate": 1.7058740799774976e-05, - "loss": 0.1575, + "learning_rate": 2.7063329848813443e-05, + "loss": 0.1113, "step": 32370 }, { "epoch": 1.51, - "learning_rate": 1.705827199849984e-05, - "loss": 0.1514, + "learning_rate": 2.7062861778977422e-05, + "loss": 0.2722, "step": 32375 }, { "epoch": 1.51, - "learning_rate": 1.70578031972247e-05, - "loss": 0.2948, + "learning_rate": 2.7062393709141402e-05, + "loss": 0.3051, "step": 32380 }, { "epoch": 1.51, - "learning_rate": 1.705733439594956e-05, - "loss": 0.2625, + "learning_rate": 2.7061925639305385e-05, + "loss": 0.2774, "step": 32385 }, { "epoch": 1.51, - "learning_rate": 1.705686559467442e-05, - "loss": 0.0237, + "learning_rate": 2.7061457569469365e-05, + "loss": 0.0502, "step": 32390 }, { "epoch": 1.51, - "learning_rate": 1.705639679339928e-05, - "loss": 0.0852, + "learning_rate": 2.7060989499633345e-05, + "loss": 0.0761, "step": 32395 }, { "epoch": 1.51, - "learning_rate": 1.705592799212414e-05, - "loss": 0.1271, + "learning_rate": 2.7060521429797328e-05, + "loss": 0.0651, "step": 32400 }, { "epoch": 1.51, - "learning_rate": 1.7055459190849e-05, - "loss": 0.0789, + "learning_rate": 2.7060053359961308e-05, + "loss": 0.1633, "step": 32405 }, { "epoch": 1.51, - "learning_rate": 1.705499038957386e-05, - "loss": 0.1637, + "learning_rate": 2.7059585290125288e-05, + "loss": 0.0858, "step": 32410 }, { "epoch": 1.51, - "learning_rate": 1.7054521588298722e-05, - "loss": 0.1025, + "learning_rate": 2.7059117220289268e-05, + "loss": 0.073, "step": 32415 }, { "epoch": 1.51, - "learning_rate": 1.7054052787023582e-05, - "loss": 0.1824, + "learning_rate": 2.705864915045325e-05, + "loss": 0.1283, "step": 32420 }, { "epoch": 1.51, - "learning_rate": 1.7053583985748442e-05, - "loss": 0.2877, + "learning_rate": 2.705818108061723e-05, + "loss": 0.2236, "step": 32425 }, { "epoch": 1.51, - "learning_rate": 1.7053115184473302e-05, - "loss": 0.3047, + "learning_rate": 2.7057713010781207e-05, + "loss": 0.1609, "step": 32430 }, { "epoch": 1.51, - "learning_rate": 1.7052646383198165e-05, - "loss": 0.2598, + "learning_rate": 2.7057244940945187e-05, + "loss": 0.3077, "step": 32435 }, { "epoch": 1.51, - "learning_rate": 1.7052177581923025e-05, - "loss": 0.0308, + "learning_rate": 2.705677687110917e-05, + "loss": 0.0376, "step": 32440 }, { "epoch": 1.51, - "learning_rate": 1.7051708780647885e-05, - "loss": 0.0843, + "learning_rate": 2.705630880127315e-05, + "loss": 0.0517, "step": 32445 }, { "epoch": 1.51, - "learning_rate": 1.7051239979372745e-05, - "loss": 0.0413, + "learning_rate": 2.705584073143713e-05, + "loss": 0.073, "step": 32450 }, { "epoch": 1.51, - "learning_rate": 1.7050771178097605e-05, - "loss": 0.1075, + "learning_rate": 2.7055372661601113e-05, + "loss": 0.1018, "step": 32455 }, { "epoch": 1.51, - "learning_rate": 1.705030237682247e-05, - "loss": 0.0626, + "learning_rate": 2.7054904591765093e-05, + "loss": 0.1511, "step": 32460 }, { "epoch": 1.51, - "learning_rate": 1.7049833575547328e-05, - "loss": 0.134, + "learning_rate": 2.7054436521929073e-05, + "loss": 0.1133, "step": 32465 }, { "epoch": 1.52, - "learning_rate": 1.7049364774272188e-05, - "loss": 0.2199, + "learning_rate": 2.7053968452093052e-05, + "loss": 0.1803, "step": 32470 }, { "epoch": 1.52, - "learning_rate": 1.7048895972997048e-05, - "loss": 0.1289, + "learning_rate": 2.7053500382257036e-05, + "loss": 0.1564, "step": 32475 }, { "epoch": 1.52, - "learning_rate": 1.7048427171721908e-05, - "loss": 0.3547, + "learning_rate": 2.7053032312421016e-05, + "loss": 0.2363, "step": 32480 }, { "epoch": 1.52, - "learning_rate": 1.7047958370446768e-05, - "loss": 0.21, + "learning_rate": 2.7052564242584995e-05, + "loss": 0.2663, "step": 32485 }, { "epoch": 1.52, - "learning_rate": 1.7047489569171628e-05, - "loss": 0.0306, + "learning_rate": 2.7052096172748975e-05, + "loss": 0.0317, "step": 32490 }, { "epoch": 1.52, - "learning_rate": 1.7047020767896488e-05, - "loss": 0.0337, + "learning_rate": 2.7051628102912955e-05, + "loss": 0.0785, "step": 32495 }, { "epoch": 1.52, - "learning_rate": 1.704655196662135e-05, - "loss": 0.0887, + "learning_rate": 2.7051160033076935e-05, + "loss": 0.117, "step": 32500 }, { "epoch": 1.52, - "learning_rate": 1.704608316534621e-05, - "loss": 0.1361, + "learning_rate": 2.7050691963240915e-05, + "loss": 0.1065, "step": 32505 }, { "epoch": 1.52, - "learning_rate": 1.704561436407107e-05, - "loss": 0.1427, + "learning_rate": 2.7050223893404894e-05, + "loss": 0.1299, "step": 32510 }, { "epoch": 1.52, - "learning_rate": 1.7045145562795934e-05, - "loss": 0.1748, + "learning_rate": 2.7049755823568878e-05, + "loss": 0.097, "step": 32515 }, { "epoch": 1.52, - "learning_rate": 1.7044676761520794e-05, - "loss": 0.2038, + "learning_rate": 2.7049287753732857e-05, + "loss": 0.2074, "step": 32520 }, { "epoch": 1.52, - "learning_rate": 1.7044207960245654e-05, - "loss": 0.1809, + "learning_rate": 2.7048819683896837e-05, + "loss": 0.2138, "step": 32525 }, { "epoch": 1.52, - "learning_rate": 1.7043739158970514e-05, - "loss": 0.2253, + "learning_rate": 2.704835161406082e-05, + "loss": 0.2639, "step": 32530 }, { "epoch": 1.52, - "learning_rate": 1.7043270357695374e-05, - "loss": 0.3482, + "learning_rate": 2.70478835442248e-05, + "loss": 0.3218, "step": 32535 }, { "epoch": 1.52, - "learning_rate": 1.7042801556420234e-05, - "loss": 0.1008, + "learning_rate": 2.704741547438878e-05, + "loss": 0.0558, "step": 32540 }, { "epoch": 1.52, - "learning_rate": 1.7042332755145094e-05, - "loss": 0.0441, + "learning_rate": 2.704694740455276e-05, + "loss": 0.0935, "step": 32545 }, { "epoch": 1.52, - "learning_rate": 1.7041863953869954e-05, - "loss": 0.1013, + "learning_rate": 2.7046479334716743e-05, + "loss": 0.0543, "step": 32550 }, { "epoch": 1.52, - "learning_rate": 1.7041395152594817e-05, - "loss": 0.0886, + "learning_rate": 2.704601126488072e-05, + "loss": 0.0953, "step": 32555 }, { "epoch": 1.52, - "learning_rate": 1.7040926351319677e-05, - "loss": 0.074, + "learning_rate": 2.70455431950447e-05, + "loss": 0.0852, "step": 32560 }, { "epoch": 1.52, - "learning_rate": 1.7040457550044537e-05, - "loss": 0.1257, + "learning_rate": 2.704507512520868e-05, + "loss": 0.179, "step": 32565 }, { "epoch": 1.52, - "learning_rate": 1.7039988748769397e-05, - "loss": 0.1518, + "learning_rate": 2.7044607055372662e-05, + "loss": 0.2043, "step": 32570 }, { "epoch": 1.52, - "learning_rate": 1.7039519947494257e-05, - "loss": 0.2869, + "learning_rate": 2.7044138985536642e-05, + "loss": 0.2253, "step": 32575 }, { "epoch": 1.52, - "learning_rate": 1.703905114621912e-05, - "loss": 0.295, + "learning_rate": 2.7043670915700622e-05, + "loss": 0.3692, "step": 32580 }, { "epoch": 1.52, - "learning_rate": 1.703858234494398e-05, - "loss": 0.2024, + "learning_rate": 2.7043202845864605e-05, + "loss": 0.1366, "step": 32585 }, { "epoch": 1.52, - "learning_rate": 1.703811354366884e-05, - "loss": 0.1001, + "learning_rate": 2.7042734776028585e-05, + "loss": 0.0413, "step": 32590 }, { "epoch": 1.52, - "learning_rate": 1.70376447423937e-05, - "loss": 0.0773, + "learning_rate": 2.7042266706192565e-05, + "loss": 0.0573, "step": 32595 }, { "epoch": 1.52, - "learning_rate": 1.7037175941118563e-05, - "loss": 0.0731, + "learning_rate": 2.7041798636356545e-05, + "loss": 0.0395, "step": 32600 }, { "epoch": 1.52, - "learning_rate": 1.7036707139843423e-05, - "loss": 0.0798, + "learning_rate": 2.7041330566520528e-05, + "loss": 0.1138, "step": 32605 }, { "epoch": 1.52, - "learning_rate": 1.7036238338568283e-05, - "loss": 0.1625, + "learning_rate": 2.7040862496684508e-05, + "loss": 0.0869, "step": 32610 }, { "epoch": 1.52, - "learning_rate": 1.7035769537293143e-05, - "loss": 0.2427, + "learning_rate": 2.7040394426848488e-05, + "loss": 0.1003, "step": 32615 }, { "epoch": 1.52, - "learning_rate": 1.7035300736018003e-05, - "loss": 0.1587, + "learning_rate": 2.7039926357012464e-05, + "loss": 0.0849, "step": 32620 }, { "epoch": 1.52, - "learning_rate": 1.7034831934742863e-05, - "loss": 0.1713, + "learning_rate": 2.7039458287176447e-05, + "loss": 0.2475, "step": 32625 }, { "epoch": 1.52, - "learning_rate": 1.7034363133467723e-05, - "loss": 0.3759, + "learning_rate": 2.7038990217340427e-05, + "loss": 0.329, "step": 32630 }, { "epoch": 1.52, - "learning_rate": 1.7033894332192583e-05, - "loss": 0.3358, + "learning_rate": 2.7038522147504407e-05, + "loss": 0.2789, "step": 32635 }, { "epoch": 1.52, - "learning_rate": 1.7033425530917443e-05, - "loss": 0.0724, + "learning_rate": 2.703805407766839e-05, + "loss": 0.0185, "step": 32640 }, { "epoch": 1.52, - "learning_rate": 1.7032956729642306e-05, - "loss": 0.0727, + "learning_rate": 2.703758600783237e-05, + "loss": 0.1266, "step": 32645 }, { "epoch": 1.52, - "learning_rate": 1.7032487928367166e-05, - "loss": 0.0715, + "learning_rate": 2.703711793799635e-05, + "loss": 0.0976, "step": 32650 }, { "epoch": 1.52, - "learning_rate": 1.7032019127092026e-05, - "loss": 0.0836, + "learning_rate": 2.703664986816033e-05, + "loss": 0.1195, "step": 32655 }, { "epoch": 1.52, - "learning_rate": 1.703155032581689e-05, - "loss": 0.1122, + "learning_rate": 2.7036181798324313e-05, + "loss": 0.0895, "step": 32660 }, { "epoch": 1.52, - "learning_rate": 1.703108152454175e-05, - "loss": 0.2039, + "learning_rate": 2.7035713728488292e-05, + "loss": 0.0975, "step": 32665 }, { "epoch": 1.52, - "learning_rate": 1.703061272326661e-05, - "loss": 0.2377, + "learning_rate": 2.7035245658652272e-05, + "loss": 0.1516, "step": 32670 }, { "epoch": 1.52, - "learning_rate": 1.703014392199147e-05, - "loss": 0.198, + "learning_rate": 2.7034777588816252e-05, + "loss": 0.2423, "step": 32675 }, { "epoch": 1.52, - "learning_rate": 1.702967512071633e-05, - "loss": 0.2264, + "learning_rate": 2.7034309518980232e-05, + "loss": 0.4485, "step": 32680 }, { "epoch": 1.53, - "learning_rate": 1.702920631944119e-05, - "loss": 0.2453, + "learning_rate": 2.7033841449144212e-05, + "loss": 0.2426, "step": 32685 }, { "epoch": 1.53, - "learning_rate": 1.702873751816605e-05, - "loss": 0.0258, + "learning_rate": 2.703337337930819e-05, + "loss": 0.0359, "step": 32690 }, { "epoch": 1.53, - "learning_rate": 1.7028268716890912e-05, - "loss": 0.0916, + "learning_rate": 2.703290530947217e-05, + "loss": 0.0842, "step": 32695 }, { "epoch": 1.53, - "learning_rate": 1.7027799915615772e-05, - "loss": 0.075, + "learning_rate": 2.7032437239636155e-05, + "loss": 0.185, "step": 32700 }, { "epoch": 1.53, - "learning_rate": 1.7027331114340632e-05, - "loss": 0.0646, + "learning_rate": 2.7031969169800134e-05, + "loss": 0.1004, "step": 32705 }, { "epoch": 1.53, - "learning_rate": 1.7026862313065492e-05, - "loss": 0.0724, + "learning_rate": 2.7031501099964114e-05, + "loss": 0.1709, "step": 32710 }, { "epoch": 1.53, - "learning_rate": 1.7026393511790352e-05, - "loss": 0.1197, + "learning_rate": 2.7031033030128097e-05, + "loss": 0.1358, "step": 32715 }, { "epoch": 1.53, - "learning_rate": 1.7025924710515212e-05, - "loss": 0.1027, + "learning_rate": 2.7030564960292077e-05, + "loss": 0.2217, "step": 32720 }, { "epoch": 1.53, - "learning_rate": 1.7025455909240075e-05, - "loss": 0.1982, + "learning_rate": 2.7030096890456057e-05, + "loss": 0.173, "step": 32725 }, { "epoch": 1.53, - "learning_rate": 1.7024987107964935e-05, - "loss": 0.5231, + "learning_rate": 2.7029628820620037e-05, + "loss": 0.2953, "step": 32730 }, { "epoch": 1.53, - "learning_rate": 1.7024518306689795e-05, - "loss": 0.2235, + "learning_rate": 2.702916075078402e-05, + "loss": 0.2254, "step": 32735 }, { "epoch": 1.53, - "learning_rate": 1.702404950541466e-05, - "loss": 0.0613, + "learning_rate": 2.7028692680948e-05, + "loss": 0.0873, "step": 32740 }, { "epoch": 1.53, - "learning_rate": 1.702358070413952e-05, - "loss": 0.0882, + "learning_rate": 2.7028224611111976e-05, + "loss": 0.0436, "step": 32745 }, { "epoch": 1.53, - "learning_rate": 1.702311190286438e-05, - "loss": 0.1131, + "learning_rate": 2.7027756541275956e-05, + "loss": 0.0812, "step": 32750 }, { "epoch": 1.53, - "learning_rate": 1.7022643101589238e-05, - "loss": 0.0765, + "learning_rate": 2.702728847143994e-05, + "loss": 0.1044, "step": 32755 }, { "epoch": 1.53, - "learning_rate": 1.7022174300314098e-05, - "loss": 0.1578, + "learning_rate": 2.702682040160392e-05, + "loss": 0.0775, "step": 32760 }, { "epoch": 1.53, - "learning_rate": 1.7021705499038958e-05, - "loss": 0.1737, + "learning_rate": 2.70263523317679e-05, + "loss": 0.1807, "step": 32765 }, { "epoch": 1.53, - "learning_rate": 1.7021236697763818e-05, - "loss": 0.1597, + "learning_rate": 2.7025884261931882e-05, + "loss": 0.159, "step": 32770 }, { "epoch": 1.53, - "learning_rate": 1.7020767896488678e-05, - "loss": 0.1821, + "learning_rate": 2.7025416192095862e-05, + "loss": 0.2465, "step": 32775 }, { "epoch": 1.53, - "learning_rate": 1.7020299095213538e-05, - "loss": 0.3385, + "learning_rate": 2.7024948122259842e-05, + "loss": 0.4615, "step": 32780 }, { "epoch": 1.53, - "learning_rate": 1.70198302939384e-05, - "loss": 0.2524, + "learning_rate": 2.702448005242382e-05, + "loss": 0.2719, "step": 32785 }, { "epoch": 1.53, - "learning_rate": 1.701936149266326e-05, + "learning_rate": 2.7024011982587805e-05, "loss": 0.0534, "step": 32790 }, { "epoch": 1.53, - "learning_rate": 1.701889269138812e-05, - "loss": 0.0347, + "learning_rate": 2.7023543912751785e-05, + "loss": 0.0485, "step": 32795 }, { "epoch": 1.53, - "learning_rate": 1.7018423890112984e-05, - "loss": 0.0499, + "learning_rate": 2.7023075842915765e-05, + "loss": 0.0531, "step": 32800 }, { "epoch": 1.53, - "learning_rate": 1.7017955088837844e-05, - "loss": 0.1067, + "learning_rate": 2.7022607773079744e-05, + "loss": 0.1008, "step": 32805 }, { "epoch": 1.53, - "learning_rate": 1.7017486287562704e-05, - "loss": 0.1506, + "learning_rate": 2.7022139703243724e-05, + "loss": 0.1294, "step": 32810 }, { "epoch": 1.53, - "learning_rate": 1.7017017486287564e-05, - "loss": 0.1958, + "learning_rate": 2.7021671633407704e-05, + "loss": 0.1395, "step": 32815 }, { "epoch": 1.53, - "learning_rate": 1.7016548685012424e-05, - "loss": 0.1141, + "learning_rate": 2.7021203563571684e-05, + "loss": 0.2705, "step": 32820 }, { "epoch": 1.53, - "learning_rate": 1.7016079883737284e-05, - "loss": 0.3248, + "learning_rate": 2.7020735493735667e-05, + "loss": 0.1477, "step": 32825 }, { "epoch": 1.53, - "learning_rate": 1.7015611082462147e-05, - "loss": 0.3599, + "learning_rate": 2.7020267423899647e-05, + "loss": 0.3652, "step": 32830 }, { "epoch": 1.53, - "learning_rate": 1.7015142281187007e-05, - "loss": 0.2578, + "learning_rate": 2.7019799354063627e-05, + "loss": 0.2274, "step": 32835 }, { "epoch": 1.53, - "learning_rate": 1.7014673479911867e-05, - "loss": 0.0727, + "learning_rate": 2.7019331284227606e-05, + "loss": 0.0386, "step": 32840 }, { "epoch": 1.53, - "learning_rate": 1.7014204678636727e-05, - "loss": 0.057, + "learning_rate": 2.701886321439159e-05, + "loss": 0.0185, "step": 32845 }, { "epoch": 1.53, - "learning_rate": 1.7013735877361587e-05, - "loss": 0.0768, + "learning_rate": 2.701839514455557e-05, + "loss": 0.0817, "step": 32850 }, { "epoch": 1.53, - "learning_rate": 1.7013267076086447e-05, - "loss": 0.1104, + "learning_rate": 2.701792707471955e-05, + "loss": 0.0923, "step": 32855 }, { "epoch": 1.53, - "learning_rate": 1.7012798274811307e-05, - "loss": 0.18, + "learning_rate": 2.701745900488353e-05, + "loss": 0.1459, "step": 32860 }, { "epoch": 1.53, - "learning_rate": 1.701232947353617e-05, - "loss": 0.1028, + "learning_rate": 2.7016990935047512e-05, + "loss": 0.1658, "step": 32865 }, { "epoch": 1.53, - "learning_rate": 1.701186067226103e-05, - "loss": 0.298, + "learning_rate": 2.701652286521149e-05, + "loss": 0.15, "step": 32870 }, { "epoch": 1.53, - "learning_rate": 1.701139187098589e-05, - "loss": 0.1878, + "learning_rate": 2.701605479537547e-05, + "loss": 0.1887, "step": 32875 }, { "epoch": 1.53, - "learning_rate": 1.7010923069710754e-05, - "loss": 0.3387, + "learning_rate": 2.701558672553945e-05, + "loss": 0.3236, "step": 32880 }, { "epoch": 1.53, - "learning_rate": 1.7010454268435614e-05, - "loss": 0.2769, + "learning_rate": 2.701511865570343e-05, + "loss": 0.331, "step": 32885 }, { "epoch": 1.53, - "learning_rate": 1.7009985467160473e-05, - "loss": 0.0933, + "learning_rate": 2.701465058586741e-05, + "loss": 0.0743, "step": 32890 }, { "epoch": 1.53, - "learning_rate": 1.7009516665885333e-05, - "loss": 0.0418, + "learning_rate": 2.701418251603139e-05, + "loss": 0.0395, "step": 32895 }, { "epoch": 1.54, - "learning_rate": 1.7009047864610193e-05, - "loss": 0.0895, + "learning_rate": 2.7013714446195374e-05, + "loss": 0.0998, "step": 32900 }, { "epoch": 1.54, - "learning_rate": 1.7008579063335053e-05, - "loss": 0.1456, + "learning_rate": 2.7013246376359354e-05, + "loss": 0.0713, "step": 32905 }, { "epoch": 1.54, - "learning_rate": 1.7008110262059913e-05, - "loss": 0.0858, + "learning_rate": 2.7012778306523334e-05, + "loss": 0.1306, "step": 32910 }, { "epoch": 1.54, - "learning_rate": 1.7007641460784773e-05, - "loss": 0.0988, + "learning_rate": 2.7012310236687314e-05, + "loss": 0.1828, "step": 32915 }, { "epoch": 1.54, - "learning_rate": 1.7007172659509633e-05, - "loss": 0.1303, + "learning_rate": 2.7011842166851297e-05, + "loss": 0.1994, "step": 32920 }, { "epoch": 1.54, - "learning_rate": 1.7006703858234496e-05, - "loss": 0.193, + "learning_rate": 2.7011374097015277e-05, + "loss": 0.2601, "step": 32925 }, { "epoch": 1.54, - "learning_rate": 1.7006235056959356e-05, - "loss": 0.3254, + "learning_rate": 2.7010906027179257e-05, + "loss": 0.4279, "step": 32930 }, { "epoch": 1.54, - "learning_rate": 1.7005766255684216e-05, - "loss": 0.1753, + "learning_rate": 2.7010437957343233e-05, + "loss": 0.3181, "step": 32935 }, { "epoch": 1.54, - "learning_rate": 1.7005297454409076e-05, - "loss": 0.0474, + "learning_rate": 2.7009969887507216e-05, + "loss": 0.0657, "step": 32940 }, { "epoch": 1.54, - "learning_rate": 1.700482865313394e-05, - "loss": 0.0911, + "learning_rate": 2.7009501817671196e-05, + "loss": 0.0556, "step": 32945 }, { "epoch": 1.54, - "learning_rate": 1.70043598518588e-05, - "loss": 0.1147, + "learning_rate": 2.7009033747835176e-05, + "loss": 0.1159, "step": 32950 }, { "epoch": 1.54, - "learning_rate": 1.700389105058366e-05, - "loss": 0.1001, + "learning_rate": 2.700856567799916e-05, + "loss": 0.1832, "step": 32955 }, { "epoch": 1.54, - "learning_rate": 1.700342224930852e-05, - "loss": 0.1293, + "learning_rate": 2.700809760816314e-05, + "loss": 0.1578, "step": 32960 }, { "epoch": 1.54, - "learning_rate": 1.700295344803338e-05, - "loss": 0.1598, + "learning_rate": 2.700762953832712e-05, + "loss": 0.0559, "step": 32965 }, { "epoch": 1.54, - "learning_rate": 1.7002484646758243e-05, - "loss": 0.1911, + "learning_rate": 2.70071614684911e-05, + "loss": 0.2554, "step": 32970 }, { "epoch": 1.54, - "learning_rate": 1.7002015845483102e-05, - "loss": 0.2595, + "learning_rate": 2.7006693398655082e-05, + "loss": 0.2261, "step": 32975 }, { "epoch": 1.54, - "learning_rate": 1.7001547044207962e-05, - "loss": 0.2554, + "learning_rate": 2.700622532881906e-05, + "loss": 0.4121, "step": 32980 }, { "epoch": 1.54, - "learning_rate": 1.7001078242932822e-05, - "loss": 0.2857, + "learning_rate": 2.700575725898304e-05, + "loss": 0.2396, "step": 32985 }, { "epoch": 1.54, - "learning_rate": 1.7000609441657682e-05, - "loss": 0.0408, + "learning_rate": 2.700528918914702e-05, + "loss": 0.1215, "step": 32990 }, { "epoch": 1.54, - "learning_rate": 1.7000140640382542e-05, - "loss": 0.0558, + "learning_rate": 2.7004821119311004e-05, + "loss": 0.0893, "step": 32995 }, { "epoch": 1.54, - "learning_rate": 1.6999671839107402e-05, - "loss": 0.0425, + "learning_rate": 2.700435304947498e-05, + "loss": 0.0738, "step": 33000 }, { "epoch": 1.54, - "learning_rate": 1.6999203037832262e-05, - "loss": 0.1221, + "learning_rate": 2.700388497963896e-05, + "loss": 0.068, "step": 33005 }, { "epoch": 1.54, - "learning_rate": 1.6998734236557125e-05, - "loss": 0.0814, + "learning_rate": 2.7003416909802944e-05, + "loss": 0.1042, "step": 33010 }, { "epoch": 1.54, - "learning_rate": 1.6998265435281985e-05, - "loss": 0.1134, + "learning_rate": 2.7002948839966924e-05, + "loss": 0.2321, "step": 33015 }, { "epoch": 1.54, - "learning_rate": 1.6997796634006845e-05, - "loss": 0.1492, + "learning_rate": 2.7002480770130904e-05, + "loss": 0.1587, "step": 33020 }, { "epoch": 1.54, - "learning_rate": 1.699732783273171e-05, - "loss": 0.2222, + "learning_rate": 2.7002012700294883e-05, + "loss": 0.2503, "step": 33025 }, { "epoch": 1.54, - "learning_rate": 1.699685903145657e-05, - "loss": 0.2727, + "learning_rate": 2.7001544630458867e-05, + "loss": 0.3625, "step": 33030 }, { "epoch": 1.54, - "learning_rate": 1.699639023018143e-05, - "loss": 0.2581, + "learning_rate": 2.7001076560622846e-05, + "loss": 0.3232, "step": 33035 }, { "epoch": 1.54, - "learning_rate": 1.699592142890629e-05, - "loss": 0.0504, + "learning_rate": 2.7000608490786826e-05, + "loss": 0.0347, "step": 33040 }, { "epoch": 1.54, - "learning_rate": 1.699545262763115e-05, - "loss": 0.0664, + "learning_rate": 2.7000140420950806e-05, + "loss": 0.0626, "step": 33045 }, { "epoch": 1.54, - "learning_rate": 1.6994983826356008e-05, - "loss": 0.0636, + "learning_rate": 2.699967235111479e-05, + "loss": 0.0422, "step": 33050 }, { "epoch": 1.54, - "learning_rate": 1.6994515025080868e-05, - "loss": 0.1054, + "learning_rate": 2.699920428127877e-05, + "loss": 0.1606, "step": 33055 }, { "epoch": 1.54, - "learning_rate": 1.6994046223805728e-05, - "loss": 0.0915, + "learning_rate": 2.6998736211442746e-05, + "loss": 0.0907, "step": 33060 }, { "epoch": 1.54, - "learning_rate": 1.699357742253059e-05, - "loss": 0.1315, + "learning_rate": 2.699826814160673e-05, + "loss": 0.0803, "step": 33065 }, { "epoch": 1.54, - "learning_rate": 1.699310862125545e-05, - "loss": 0.1195, + "learning_rate": 2.699780007177071e-05, + "loss": 0.2084, "step": 33070 }, { "epoch": 1.54, - "learning_rate": 1.699263981998031e-05, - "loss": 0.2035, + "learning_rate": 2.699733200193469e-05, + "loss": 0.2523, "step": 33075 }, { "epoch": 1.54, - "learning_rate": 1.699217101870517e-05, - "loss": 0.3399, + "learning_rate": 2.6996863932098668e-05, + "loss": 0.2984, "step": 33080 }, { "epoch": 1.54, - "learning_rate": 1.699170221743003e-05, - "loss": 0.4258, + "learning_rate": 2.699639586226265e-05, + "loss": 0.2349, "step": 33085 }, { "epoch": 1.54, - "learning_rate": 1.6991233416154895e-05, - "loss": 0.0815, + "learning_rate": 2.699592779242663e-05, + "loss": 0.0274, "step": 33090 }, { "epoch": 1.54, - "learning_rate": 1.6990764614879754e-05, - "loss": 0.0727, + "learning_rate": 2.699545972259061e-05, + "loss": 0.0587, "step": 33095 }, { "epoch": 1.54, - "learning_rate": 1.6990295813604614e-05, - "loss": 0.0544, + "learning_rate": 2.699499165275459e-05, + "loss": 0.0998, "step": 33100 }, { "epoch": 1.54, - "learning_rate": 1.6989827012329474e-05, - "loss": 0.0978, + "learning_rate": 2.6994523582918574e-05, + "loss": 0.0799, "step": 33105 }, { "epoch": 1.54, - "learning_rate": 1.6989358211054338e-05, - "loss": 0.0592, + "learning_rate": 2.6994055513082554e-05, + "loss": 0.1107, "step": 33110 }, { "epoch": 1.55, - "learning_rate": 1.6988889409779198e-05, - "loss": 0.075, + "learning_rate": 2.6993587443246534e-05, + "loss": 0.1558, "step": 33115 }, { "epoch": 1.55, - "learning_rate": 1.6988420608504058e-05, - "loss": 0.2028, + "learning_rate": 2.6993119373410513e-05, + "loss": 0.1899, "step": 33120 }, { "epoch": 1.55, - "learning_rate": 1.6987951807228917e-05, - "loss": 0.1716, + "learning_rate": 2.6992651303574493e-05, + "loss": 0.104, "step": 33125 }, { "epoch": 1.55, - "learning_rate": 1.6987483005953777e-05, - "loss": 0.4326, + "learning_rate": 2.6992183233738473e-05, + "loss": 0.3399, "step": 33130 }, { "epoch": 1.55, - "learning_rate": 1.6987014204678637e-05, - "loss": 0.1933, + "learning_rate": 2.6991715163902453e-05, + "loss": 0.2708, "step": 33135 }, { "epoch": 1.55, - "learning_rate": 1.6986545403403497e-05, - "loss": 0.0459, + "learning_rate": 2.6991247094066436e-05, + "loss": 0.0323, "step": 33140 }, { "epoch": 1.55, - "learning_rate": 1.6986076602128357e-05, - "loss": 0.0417, + "learning_rate": 2.6990779024230416e-05, + "loss": 0.0878, "step": 33145 }, { "epoch": 1.55, - "learning_rate": 1.698560780085322e-05, - "loss": 0.0917, + "learning_rate": 2.6990310954394396e-05, + "loss": 0.0769, "step": 33150 }, { "epoch": 1.55, - "learning_rate": 1.698513899957808e-05, - "loss": 0.1252, + "learning_rate": 2.6989842884558376e-05, + "loss": 0.1322, "step": 33155 }, { "epoch": 1.55, - "learning_rate": 1.698467019830294e-05, - "loss": 0.132, + "learning_rate": 2.698937481472236e-05, + "loss": 0.1286, "step": 33160 }, { "epoch": 1.55, - "learning_rate": 1.69842013970278e-05, - "loss": 0.1409, + "learning_rate": 2.698890674488634e-05, + "loss": 0.1078, "step": 33165 }, { "epoch": 1.55, - "learning_rate": 1.6983732595752664e-05, - "loss": 0.1662, + "learning_rate": 2.698843867505032e-05, + "loss": 0.1352, "step": 33170 }, { "epoch": 1.55, - "learning_rate": 1.6983263794477524e-05, - "loss": 0.1353, + "learning_rate": 2.6987970605214298e-05, + "loss": 0.2099, "step": 33175 }, { "epoch": 1.55, - "learning_rate": 1.6982794993202383e-05, - "loss": 0.3043, + "learning_rate": 2.698750253537828e-05, + "loss": 0.3099, "step": 33180 }, { "epoch": 1.55, - "learning_rate": 1.6982326191927243e-05, - "loss": 0.3157, + "learning_rate": 2.698703446554226e-05, + "loss": 0.2091, "step": 33185 }, { "epoch": 1.55, - "learning_rate": 1.6981857390652103e-05, - "loss": 0.041, + "learning_rate": 2.6986566395706238e-05, + "loss": 0.0674, "step": 33190 }, { "epoch": 1.55, - "learning_rate": 1.6981388589376963e-05, - "loss": 0.0717, + "learning_rate": 2.698609832587022e-05, + "loss": 0.0518, "step": 33195 }, { "epoch": 1.55, - "learning_rate": 1.6980919788101823e-05, - "loss": 0.1099, + "learning_rate": 2.69856302560342e-05, + "loss": 0.0735, "step": 33200 }, { "epoch": 1.55, - "learning_rate": 1.6980450986826687e-05, - "loss": 0.0831, + "learning_rate": 2.698516218619818e-05, + "loss": 0.0593, "step": 33205 }, { "epoch": 1.55, - "learning_rate": 1.6979982185551546e-05, - "loss": 0.084, + "learning_rate": 2.698469411636216e-05, + "loss": 0.1139, "step": 33210 }, { "epoch": 1.55, - "learning_rate": 1.6979513384276406e-05, - "loss": 0.1968, + "learning_rate": 2.6984226046526144e-05, + "loss": 0.1457, "step": 33215 }, { "epoch": 1.55, - "learning_rate": 1.6979044583001266e-05, - "loss": 0.1033, + "learning_rate": 2.6983757976690123e-05, + "loss": 0.2006, "step": 33220 }, { "epoch": 1.55, - "learning_rate": 1.6978575781726126e-05, - "loss": 0.1612, + "learning_rate": 2.6983289906854103e-05, + "loss": 0.2604, "step": 33225 }, { "epoch": 1.55, - "learning_rate": 1.697810698045099e-05, - "loss": 0.4094, + "learning_rate": 2.6982821837018083e-05, + "loss": 0.2674, "step": 33230 }, { "epoch": 1.55, - "learning_rate": 1.697763817917585e-05, - "loss": 0.4097, + "learning_rate": 2.6982353767182066e-05, + "loss": 0.2838, "step": 33235 }, { "epoch": 1.55, - "learning_rate": 1.697716937790071e-05, - "loss": 0.0522, + "learning_rate": 2.6981885697346046e-05, + "loss": 0.0742, "step": 33240 }, { "epoch": 1.55, - "learning_rate": 1.697670057662557e-05, - "loss": 0.0752, + "learning_rate": 2.6981417627510026e-05, + "loss": 0.0737, "step": 33245 }, { "epoch": 1.55, - "learning_rate": 1.6976231775350433e-05, - "loss": 0.0983, + "learning_rate": 2.6980949557674006e-05, + "loss": 0.0998, "step": 33250 }, { "epoch": 1.55, - "learning_rate": 1.6975762974075293e-05, - "loss": 0.0858, + "learning_rate": 2.6980481487837986e-05, + "loss": 0.0874, "step": 33255 }, { "epoch": 1.55, - "learning_rate": 1.6975294172800153e-05, - "loss": 0.0756, + "learning_rate": 2.6980013418001965e-05, + "loss": 0.0858, "step": 33260 }, { "epoch": 1.55, - "learning_rate": 1.6974825371525013e-05, - "loss": 0.1873, + "learning_rate": 2.6979545348165945e-05, + "loss": 0.1706, "step": 33265 }, { "epoch": 1.55, - "learning_rate": 1.6974356570249872e-05, - "loss": 0.2313, + "learning_rate": 2.697907727832993e-05, + "loss": 0.2193, "step": 33270 }, { "epoch": 1.55, - "learning_rate": 1.6973887768974732e-05, - "loss": 0.196, + "learning_rate": 2.6978609208493908e-05, + "loss": 0.1598, "step": 33275 }, { "epoch": 1.55, - "learning_rate": 1.6973418967699592e-05, - "loss": 0.3991, + "learning_rate": 2.6978141138657888e-05, + "loss": 0.3171, "step": 33280 }, { "epoch": 1.55, - "learning_rate": 1.6972950166424452e-05, - "loss": 0.3086, + "learning_rate": 2.6977673068821868e-05, + "loss": 0.4434, "step": 33285 }, { "epoch": 1.55, - "learning_rate": 1.6972481365149312e-05, - "loss": 0.0442, + "learning_rate": 2.697720499898585e-05, + "loss": 0.0919, "step": 33290 }, { "epoch": 1.55, - "learning_rate": 1.6972012563874176e-05, - "loss": 0.0435, + "learning_rate": 2.697673692914983e-05, + "loss": 0.0747, "step": 33295 }, { "epoch": 1.55, - "learning_rate": 1.6971543762599035e-05, - "loss": 0.0705, + "learning_rate": 2.697626885931381e-05, + "loss": 0.0845, "step": 33300 }, { "epoch": 1.55, - "learning_rate": 1.6971074961323895e-05, - "loss": 0.1096, + "learning_rate": 2.697580078947779e-05, + "loss": 0.0738, "step": 33305 }, { "epoch": 1.55, - "learning_rate": 1.697060616004876e-05, - "loss": 0.1128, + "learning_rate": 2.6975332719641774e-05, + "loss": 0.0936, "step": 33310 }, { "epoch": 1.55, - "learning_rate": 1.697013735877362e-05, - "loss": 0.1482, + "learning_rate": 2.697486464980575e-05, + "loss": 0.1804, "step": 33315 }, { "epoch": 1.55, - "learning_rate": 1.696966855749848e-05, - "loss": 0.2205, + "learning_rate": 2.697439657996973e-05, + "loss": 0.1244, "step": 33320 }, { "epoch": 1.55, - "learning_rate": 1.696919975622334e-05, - "loss": 0.2817, + "learning_rate": 2.6973928510133713e-05, + "loss": 0.2265, "step": 33325 }, { "epoch": 1.56, - "learning_rate": 1.69687309549482e-05, - "loss": 0.3504, + "learning_rate": 2.6973460440297693e-05, + "loss": 0.2955, "step": 33330 }, { "epoch": 1.56, - "learning_rate": 1.696826215367306e-05, - "loss": 0.3524, + "learning_rate": 2.6972992370461673e-05, + "loss": 0.2679, "step": 33335 }, { "epoch": 1.56, - "learning_rate": 1.6967793352397918e-05, - "loss": 0.0259, + "learning_rate": 2.6972524300625653e-05, + "loss": 0.0258, "step": 33340 }, { "epoch": 1.56, - "learning_rate": 1.696732455112278e-05, - "loss": 0.1418, + "learning_rate": 2.6972056230789636e-05, + "loss": 0.0695, "step": 33345 }, { "epoch": 1.56, - "learning_rate": 1.696685574984764e-05, - "loss": 0.0996, + "learning_rate": 2.6971588160953616e-05, + "loss": 0.0812, "step": 33350 }, { "epoch": 1.56, - "learning_rate": 1.69663869485725e-05, - "loss": 0.0931, + "learning_rate": 2.6971120091117595e-05, + "loss": 0.0363, "step": 33355 }, { "epoch": 1.56, - "learning_rate": 1.696591814729736e-05, - "loss": 0.1358, + "learning_rate": 2.6970652021281575e-05, + "loss": 0.1397, "step": 33360 }, { "epoch": 1.56, - "learning_rate": 1.696544934602222e-05, - "loss": 0.162, + "learning_rate": 2.697018395144556e-05, + "loss": 0.1164, "step": 33365 }, { "epoch": 1.56, - "learning_rate": 1.696498054474708e-05, - "loss": 0.1283, + "learning_rate": 2.6969715881609538e-05, + "loss": 0.1975, "step": 33370 }, { "epoch": 1.56, - "learning_rate": 1.6964511743471945e-05, - "loss": 0.192, + "learning_rate": 2.6969247811773518e-05, + "loss": 0.2331, "step": 33375 }, { "epoch": 1.56, - "learning_rate": 1.6964042942196805e-05, - "loss": 0.386, + "learning_rate": 2.6968779741937498e-05, + "loss": 0.3827, "step": 33380 }, { "epoch": 1.56, - "learning_rate": 1.6963574140921664e-05, - "loss": 0.2255, + "learning_rate": 2.6968311672101478e-05, + "loss": 0.2749, "step": 33385 }, { "epoch": 1.56, - "learning_rate": 1.6963105339646528e-05, - "loss": 0.0262, + "learning_rate": 2.6967843602265458e-05, + "loss": 0.0281, "step": 33390 }, { "epoch": 1.56, - "learning_rate": 1.6962636538371388e-05, - "loss": 0.0905, + "learning_rate": 2.6967375532429437e-05, + "loss": 0.1315, "step": 33395 }, { "epoch": 1.56, - "learning_rate": 1.6962167737096248e-05, - "loss": 0.0624, + "learning_rate": 2.696690746259342e-05, + "loss": 0.0946, "step": 33400 }, { "epoch": 1.56, - "learning_rate": 1.6961698935821108e-05, - "loss": 0.1754, + "learning_rate": 2.69664393927574e-05, + "loss": 0.1075, "step": 33405 }, { "epoch": 1.56, - "learning_rate": 1.6961230134545968e-05, - "loss": 0.0953, + "learning_rate": 2.696597132292138e-05, + "loss": 0.0648, "step": 33410 }, { "epoch": 1.56, - "learning_rate": 1.6960761333270827e-05, - "loss": 0.1157, + "learning_rate": 2.696550325308536e-05, + "loss": 0.1504, "step": 33415 }, { "epoch": 1.56, - "learning_rate": 1.6960292531995687e-05, - "loss": 0.1889, + "learning_rate": 2.6965035183249343e-05, + "loss": 0.1815, "step": 33420 }, { "epoch": 1.56, - "learning_rate": 1.6959823730720547e-05, - "loss": 0.2906, + "learning_rate": 2.6964567113413323e-05, + "loss": 0.246, "step": 33425 }, { "epoch": 1.56, - "learning_rate": 1.6959354929445407e-05, - "loss": 0.3266, + "learning_rate": 2.6964099043577303e-05, + "loss": 0.347, "step": 33430 }, { "epoch": 1.56, - "learning_rate": 1.695888612817027e-05, - "loss": 0.278, + "learning_rate": 2.6963630973741286e-05, + "loss": 0.3222, "step": 33435 }, { "epoch": 1.56, - "learning_rate": 1.695841732689513e-05, - "loss": 0.0655, + "learning_rate": 2.6963162903905262e-05, + "loss": 0.0387, "step": 33440 }, { "epoch": 1.56, - "learning_rate": 1.695794852561999e-05, - "loss": 0.138, + "learning_rate": 2.6962694834069242e-05, + "loss": 0.0481, "step": 33445 }, { "epoch": 1.56, - "learning_rate": 1.695747972434485e-05, - "loss": 0.0724, + "learning_rate": 2.6962226764233222e-05, + "loss": 0.0647, "step": 33450 }, { "epoch": 1.56, - "learning_rate": 1.6957010923069714e-05, - "loss": 0.0803, + "learning_rate": 2.6961758694397205e-05, + "loss": 0.0807, "step": 33455 }, { "epoch": 1.56, - "learning_rate": 1.6956542121794574e-05, - "loss": 0.1474, + "learning_rate": 2.6961290624561185e-05, + "loss": 0.1542, "step": 33460 }, { "epoch": 1.56, - "learning_rate": 1.6956073320519434e-05, - "loss": 0.1498, + "learning_rate": 2.6960822554725165e-05, + "loss": 0.1546, "step": 33465 }, { "epoch": 1.56, - "learning_rate": 1.6955604519244294e-05, - "loss": 0.195, + "learning_rate": 2.6960354484889145e-05, + "loss": 0.1595, "step": 33470 }, { "epoch": 1.56, - "learning_rate": 1.6955135717969153e-05, - "loss": 0.2709, + "learning_rate": 2.6959886415053128e-05, + "loss": 0.149, "step": 33475 }, { "epoch": 1.56, - "learning_rate": 1.6954666916694017e-05, - "loss": 0.318, + "learning_rate": 2.6959418345217108e-05, + "loss": 0.2892, "step": 33480 }, { "epoch": 1.56, - "learning_rate": 1.6954198115418877e-05, - "loss": 0.3973, + "learning_rate": 2.6958950275381088e-05, + "loss": 0.1681, "step": 33485 }, { "epoch": 1.56, - "learning_rate": 1.6953729314143737e-05, - "loss": 0.0661, + "learning_rate": 2.6958482205545067e-05, + "loss": 0.0971, "step": 33490 }, { "epoch": 1.56, - "learning_rate": 1.6953260512868597e-05, - "loss": 0.0608, + "learning_rate": 2.695801413570905e-05, + "loss": 0.0487, "step": 33495 }, { "epoch": 1.56, - "learning_rate": 1.6952791711593457e-05, - "loss": 0.0867, + "learning_rate": 2.695754606587303e-05, + "loss": 0.0572, "step": 33500 }, { "epoch": 1.56, - "learning_rate": 1.6952322910318316e-05, - "loss": 0.1162, + "learning_rate": 2.6957077996037007e-05, + "loss": 0.1197, "step": 33505 }, { "epoch": 1.56, - "learning_rate": 1.6951854109043176e-05, - "loss": 0.1134, + "learning_rate": 2.695660992620099e-05, + "loss": 0.1154, "step": 33510 }, { "epoch": 1.56, - "learning_rate": 1.6951385307768036e-05, - "loss": 0.0616, + "learning_rate": 2.695614185636497e-05, + "loss": 0.1573, "step": 33515 }, { "epoch": 1.56, - "learning_rate": 1.69509165064929e-05, - "loss": 0.1131, + "learning_rate": 2.695567378652895e-05, + "loss": 0.1595, "step": 33520 }, { "epoch": 1.56, - "learning_rate": 1.695044770521776e-05, - "loss": 0.2365, + "learning_rate": 2.695520571669293e-05, + "loss": 0.1694, "step": 33525 }, { "epoch": 1.56, - "learning_rate": 1.694997890394262e-05, - "loss": 0.3303, + "learning_rate": 2.6954737646856913e-05, + "loss": 0.4786, "step": 33530 }, { "epoch": 1.56, - "learning_rate": 1.6949510102667483e-05, - "loss": 0.2189, + "learning_rate": 2.6954269577020893e-05, + "loss": 0.3407, "step": 33535 }, { "epoch": 1.57, - "learning_rate": 1.6949041301392343e-05, - "loss": 0.0139, + "learning_rate": 2.6953801507184872e-05, + "loss": 0.0328, "step": 33540 }, { "epoch": 1.57, - "learning_rate": 1.6948572500117203e-05, - "loss": 0.0497, + "learning_rate": 2.6953333437348852e-05, + "loss": 0.0409, "step": 33545 }, { "epoch": 1.57, - "learning_rate": 1.6948103698842063e-05, - "loss": 0.1044, + "learning_rate": 2.6952865367512835e-05, + "loss": 0.0669, "step": 33550 }, { "epoch": 1.57, - "learning_rate": 1.6947634897566923e-05, - "loss": 0.0744, + "learning_rate": 2.6952397297676815e-05, + "loss": 0.1159, "step": 33555 }, { "epoch": 1.57, - "learning_rate": 1.6947166096291782e-05, - "loss": 0.1061, + "learning_rate": 2.6951929227840795e-05, + "loss": 0.0854, "step": 33560 }, { "epoch": 1.57, - "learning_rate": 1.6946697295016642e-05, - "loss": 0.1483, + "learning_rate": 2.6951461158004775e-05, + "loss": 0.1544, "step": 33565 }, { "epoch": 1.57, - "learning_rate": 1.6946228493741502e-05, - "loss": 0.1121, + "learning_rate": 2.6950993088168755e-05, + "loss": 0.239, "step": 33570 }, { "epoch": 1.57, - "learning_rate": 1.6945759692466366e-05, - "loss": 0.2839, + "learning_rate": 2.6950525018332734e-05, + "loss": 0.2662, "step": 33575 }, { "epoch": 1.57, - "learning_rate": 1.6945290891191226e-05, - "loss": 0.3162, + "learning_rate": 2.6950056948496714e-05, + "loss": 0.4318, "step": 33580 }, { "epoch": 1.57, - "learning_rate": 1.6944822089916086e-05, - "loss": 0.3926, + "learning_rate": 2.6949588878660698e-05, + "loss": 0.2809, "step": 33585 }, { "epoch": 1.57, - "learning_rate": 1.6944353288640945e-05, - "loss": 0.0653, + "learning_rate": 2.6949120808824677e-05, + "loss": 0.0523, "step": 33590 }, { "epoch": 1.57, - "learning_rate": 1.6943884487365805e-05, - "loss": 0.0839, + "learning_rate": 2.6948652738988657e-05, + "loss": 0.0609, "step": 33595 }, { "epoch": 1.57, - "learning_rate": 1.694341568609067e-05, - "loss": 0.0872, + "learning_rate": 2.6948184669152637e-05, + "loss": 0.1092, "step": 33600 }, { "epoch": 1.57, - "learning_rate": 1.694294688481553e-05, - "loss": 0.0573, + "learning_rate": 2.694771659931662e-05, + "loss": 0.101, "step": 33605 }, { "epoch": 1.57, - "learning_rate": 1.694247808354039e-05, - "loss": 0.1543, + "learning_rate": 2.69472485294806e-05, + "loss": 0.1491, "step": 33610 }, { "epoch": 1.57, - "learning_rate": 1.694200928226525e-05, - "loss": 0.1087, + "learning_rate": 2.694678045964458e-05, + "loss": 0.1125, "step": 33615 }, { "epoch": 1.57, - "learning_rate": 1.6941540480990112e-05, - "loss": 0.2351, + "learning_rate": 2.6946312389808563e-05, + "loss": 0.1055, "step": 33620 }, { "epoch": 1.57, - "learning_rate": 1.6941071679714972e-05, - "loss": 0.2309, + "learning_rate": 2.6945844319972543e-05, + "loss": 0.2823, "step": 33625 }, { "epoch": 1.57, - "learning_rate": 1.6940602878439832e-05, - "loss": 0.431, + "learning_rate": 2.694537625013652e-05, + "loss": 0.2148, "step": 33630 }, { "epoch": 1.57, - "learning_rate": 1.694013407716469e-05, - "loss": 0.2245, + "learning_rate": 2.69449081803005e-05, + "loss": 0.1385, "step": 33635 }, { "epoch": 1.57, - "learning_rate": 1.693966527588955e-05, - "loss": 0.0425, + "learning_rate": 2.6944440110464482e-05, + "loss": 0.0313, "step": 33640 }, { "epoch": 1.57, - "learning_rate": 1.693919647461441e-05, - "loss": 0.0515, + "learning_rate": 2.6943972040628462e-05, + "loss": 0.1056, "step": 33645 }, { "epoch": 1.57, - "learning_rate": 1.693872767333927e-05, - "loss": 0.0823, + "learning_rate": 2.6943503970792442e-05, + "loss": 0.0715, "step": 33650 }, { "epoch": 1.57, - "learning_rate": 1.693825887206413e-05, - "loss": 0.1146, + "learning_rate": 2.6943035900956422e-05, + "loss": 0.0989, "step": 33655 }, { "epoch": 1.57, - "learning_rate": 1.6937790070788995e-05, - "loss": 0.0919, + "learning_rate": 2.6942567831120405e-05, + "loss": 0.1732, "step": 33660 }, { "epoch": 1.57, - "learning_rate": 1.6937321269513855e-05, - "loss": 0.1207, + "learning_rate": 2.6942099761284385e-05, + "loss": 0.1354, "step": 33665 }, { "epoch": 1.57, - "learning_rate": 1.6936852468238715e-05, - "loss": 0.1433, + "learning_rate": 2.6941631691448365e-05, + "loss": 0.1368, "step": 33670 }, { "epoch": 1.57, - "learning_rate": 1.6936383666963575e-05, - "loss": 0.1465, + "learning_rate": 2.6941163621612344e-05, + "loss": 0.236, "step": 33675 }, { "epoch": 1.57, - "learning_rate": 1.6935914865688438e-05, - "loss": 0.3602, + "learning_rate": 2.6940695551776328e-05, + "loss": 0.2753, "step": 33680 }, { "epoch": 1.57, - "learning_rate": 1.6935446064413298e-05, - "loss": 0.1597, + "learning_rate": 2.6940227481940307e-05, + "loss": 0.2297, "step": 33685 }, { "epoch": 1.57, - "learning_rate": 1.6934977263138158e-05, - "loss": 0.0452, + "learning_rate": 2.6939759412104287e-05, + "loss": 0.0475, "step": 33690 }, { "epoch": 1.57, - "learning_rate": 1.6934508461863018e-05, - "loss": 0.0767, + "learning_rate": 2.6939291342268267e-05, + "loss": 0.0418, "step": 33695 }, { "epoch": 1.57, - "learning_rate": 1.6934039660587878e-05, - "loss": 0.1225, + "learning_rate": 2.6938823272432247e-05, + "loss": 0.0751, "step": 33700 }, { "epoch": 1.57, - "learning_rate": 1.6933570859312738e-05, - "loss": 0.0508, + "learning_rate": 2.6938355202596227e-05, + "loss": 0.0975, "step": 33705 }, { "epoch": 1.57, - "learning_rate": 1.6933102058037597e-05, - "loss": 0.0468, + "learning_rate": 2.6937887132760207e-05, + "loss": 0.0945, "step": 33710 }, { "epoch": 1.57, - "learning_rate": 1.693263325676246e-05, - "loss": 0.1234, + "learning_rate": 2.693741906292419e-05, + "loss": 0.1498, "step": 33715 }, { "epoch": 1.57, - "learning_rate": 1.693216445548732e-05, - "loss": 0.1179, + "learning_rate": 2.693695099308817e-05, + "loss": 0.1429, "step": 33720 }, { "epoch": 1.57, - "learning_rate": 1.693169565421218e-05, - "loss": 0.2601, + "learning_rate": 2.693648292325215e-05, + "loss": 0.1884, "step": 33725 }, { "epoch": 1.57, - "learning_rate": 1.693122685293704e-05, - "loss": 0.3565, + "learning_rate": 2.693601485341613e-05, + "loss": 0.3491, "step": 33730 }, { "epoch": 1.57, - "learning_rate": 1.69307580516619e-05, - "loss": 0.2142, + "learning_rate": 2.6935546783580112e-05, + "loss": 0.2117, "step": 33735 }, { "epoch": 1.57, - "learning_rate": 1.6930289250386764e-05, - "loss": 0.0426, + "learning_rate": 2.6935078713744092e-05, + "loss": 0.0323, "step": 33740 }, { "epoch": 1.57, - "learning_rate": 1.6929820449111624e-05, - "loss": 0.0552, + "learning_rate": 2.6934610643908072e-05, + "loss": 0.0391, "step": 33745 }, { "epoch": 1.57, - "learning_rate": 1.6929351647836484e-05, - "loss": 0.1314, + "learning_rate": 2.6934142574072055e-05, + "loss": 0.1505, "step": 33750 }, { "epoch": 1.58, - "learning_rate": 1.6928882846561344e-05, - "loss": 0.1151, + "learning_rate": 2.693367450423603e-05, + "loss": 0.0932, "step": 33755 }, { "epoch": 1.58, - "learning_rate": 1.6928414045286207e-05, - "loss": 0.0642, + "learning_rate": 2.693320643440001e-05, + "loss": 0.1488, "step": 33760 }, { "epoch": 1.58, - "learning_rate": 1.6927945244011067e-05, - "loss": 0.1305, + "learning_rate": 2.693273836456399e-05, + "loss": 0.1667, "step": 33765 }, { "epoch": 1.58, - "learning_rate": 1.6927476442735927e-05, - "loss": 0.1309, + "learning_rate": 2.6932270294727974e-05, + "loss": 0.2161, "step": 33770 }, { "epoch": 1.58, - "learning_rate": 1.6927007641460787e-05, - "loss": 0.1447, + "learning_rate": 2.6931802224891954e-05, + "loss": 0.3211, "step": 33775 }, { "epoch": 1.58, - "learning_rate": 1.6926538840185647e-05, - "loss": 0.3888, + "learning_rate": 2.6931334155055934e-05, + "loss": 0.5273, "step": 33780 }, { "epoch": 1.58, - "learning_rate": 1.6926070038910507e-05, - "loss": 0.2636, + "learning_rate": 2.6930866085219914e-05, + "loss": 0.228, "step": 33785 }, { "epoch": 1.58, - "learning_rate": 1.6925601237635367e-05, - "loss": 0.1061, + "learning_rate": 2.6930398015383897e-05, + "loss": 0.039, "step": 33790 }, { "epoch": 1.58, - "learning_rate": 1.6925132436360226e-05, - "loss": 0.0731, + "learning_rate": 2.6929929945547877e-05, + "loss": 0.0235, "step": 33795 }, { "epoch": 1.58, - "learning_rate": 1.6924663635085086e-05, - "loss": 0.0947, + "learning_rate": 2.6929461875711857e-05, + "loss": 0.0688, "step": 33800 }, { "epoch": 1.58, - "learning_rate": 1.692419483380995e-05, - "loss": 0.1717, + "learning_rate": 2.692899380587584e-05, + "loss": 0.1279, "step": 33805 }, { "epoch": 1.58, - "learning_rate": 1.692372603253481e-05, - "loss": 0.1695, + "learning_rate": 2.692852573603982e-05, + "loss": 0.1052, "step": 33810 }, { "epoch": 1.58, - "learning_rate": 1.692325723125967e-05, - "loss": 0.1291, + "learning_rate": 2.69280576662038e-05, + "loss": 0.2008, "step": 33815 }, { "epoch": 1.58, - "learning_rate": 1.6922788429984533e-05, - "loss": 0.1663, + "learning_rate": 2.6927589596367776e-05, + "loss": 0.1396, "step": 33820 }, { "epoch": 1.58, - "learning_rate": 1.6922319628709393e-05, - "loss": 0.1583, + "learning_rate": 2.692712152653176e-05, + "loss": 0.2285, "step": 33825 }, { "epoch": 1.58, - "learning_rate": 1.6921850827434253e-05, - "loss": 0.2274, + "learning_rate": 2.692665345669574e-05, + "loss": 0.3607, "step": 33830 }, { "epoch": 1.58, - "learning_rate": 1.6921382026159113e-05, - "loss": 0.3158, + "learning_rate": 2.692618538685972e-05, + "loss": 0.2623, "step": 33835 }, { "epoch": 1.58, - "learning_rate": 1.6920913224883973e-05, - "loss": 0.0246, + "learning_rate": 2.69257173170237e-05, + "loss": 0.0463, "step": 33840 }, { "epoch": 1.58, - "learning_rate": 1.6920444423608833e-05, - "loss": 0.0657, + "learning_rate": 2.6925249247187682e-05, + "loss": 0.1067, "step": 33845 }, { "epoch": 1.58, - "learning_rate": 1.6919975622333693e-05, - "loss": 0.0545, + "learning_rate": 2.6924781177351662e-05, + "loss": 0.1125, "step": 33850 }, { "epoch": 1.58, - "learning_rate": 1.6919506821058556e-05, - "loss": 0.0805, + "learning_rate": 2.692431310751564e-05, + "loss": 0.1145, "step": 33855 }, { "epoch": 1.58, - "learning_rate": 1.6919038019783416e-05, - "loss": 0.1578, + "learning_rate": 2.6923845037679625e-05, + "loss": 0.0639, "step": 33860 }, { "epoch": 1.58, - "learning_rate": 1.6918569218508276e-05, - "loss": 0.1488, + "learning_rate": 2.6923376967843605e-05, + "loss": 0.0958, "step": 33865 }, { "epoch": 1.58, - "learning_rate": 1.6918100417233136e-05, - "loss": 0.1877, + "learning_rate": 2.6922908898007584e-05, + "loss": 0.1917, "step": 33870 }, { "epoch": 1.58, - "learning_rate": 1.6917631615957996e-05, - "loss": 0.2024, + "learning_rate": 2.6922440828171564e-05, + "loss": 0.2605, "step": 33875 }, { "epoch": 1.58, - "learning_rate": 1.6917162814682856e-05, - "loss": 0.3046, + "learning_rate": 2.6921972758335544e-05, + "loss": 0.3061, "step": 33880 }, { "epoch": 1.58, - "learning_rate": 1.691669401340772e-05, - "loss": 0.2374, + "learning_rate": 2.6921504688499524e-05, + "loss": 0.2658, "step": 33885 }, { "epoch": 1.58, - "learning_rate": 1.691622521213258e-05, - "loss": 0.0777, + "learning_rate": 2.6921036618663504e-05, + "loss": 0.1025, "step": 33890 }, { "epoch": 1.58, - "learning_rate": 1.691575641085744e-05, - "loss": 0.0611, + "learning_rate": 2.6920568548827483e-05, + "loss": 0.0978, "step": 33895 }, { "epoch": 1.58, - "learning_rate": 1.6915287609582302e-05, - "loss": 0.0871, + "learning_rate": 2.6920100478991467e-05, + "loss": 0.0522, "step": 33900 }, { "epoch": 1.58, - "learning_rate": 1.6914818808307162e-05, - "loss": 0.0591, + "learning_rate": 2.6919632409155447e-05, + "loss": 0.1059, "step": 33905 }, { "epoch": 1.58, - "learning_rate": 1.6914350007032022e-05, - "loss": 0.1289, + "learning_rate": 2.6919164339319426e-05, + "loss": 0.0923, "step": 33910 }, { "epoch": 1.58, - "learning_rate": 1.6913881205756882e-05, - "loss": 0.2123, + "learning_rate": 2.6918696269483406e-05, + "loss": 0.136, "step": 33915 }, { "epoch": 1.58, - "learning_rate": 1.6913412404481742e-05, - "loss": 0.1573, + "learning_rate": 2.691822819964739e-05, + "loss": 0.1848, "step": 33920 }, { "epoch": 1.58, - "learning_rate": 1.69129436032066e-05, - "loss": 0.2144, + "learning_rate": 2.691776012981137e-05, + "loss": 0.2659, "step": 33925 }, { "epoch": 1.58, - "learning_rate": 1.691247480193146e-05, - "loss": 0.3514, + "learning_rate": 2.691729205997535e-05, + "loss": 0.3446, "step": 33930 }, { "epoch": 1.58, - "learning_rate": 1.691200600065632e-05, - "loss": 0.2415, + "learning_rate": 2.6916823990139332e-05, + "loss": 0.2859, "step": 33935 }, { "epoch": 1.58, - "learning_rate": 1.691153719938118e-05, - "loss": 0.0814, + "learning_rate": 2.6916355920303312e-05, + "loss": 0.0622, "step": 33940 }, { "epoch": 1.58, - "learning_rate": 1.6911068398106045e-05, - "loss": 0.1162, + "learning_rate": 2.691588785046729e-05, + "loss": 0.0741, "step": 33945 }, { "epoch": 1.58, - "learning_rate": 1.6910599596830905e-05, - "loss": 0.0979, + "learning_rate": 2.6915419780631268e-05, + "loss": 0.0572, "step": 33950 }, { "epoch": 1.58, - "learning_rate": 1.6910130795555765e-05, - "loss": 0.1339, + "learning_rate": 2.691495171079525e-05, + "loss": 0.0683, "step": 33955 }, { "epoch": 1.58, - "learning_rate": 1.6909661994280625e-05, - "loss": 0.1206, + "learning_rate": 2.691448364095923e-05, + "loss": 0.1142, "step": 33960 }, { "epoch": 1.58, - "learning_rate": 1.6909193193005488e-05, - "loss": 0.1281, + "learning_rate": 2.691401557112321e-05, + "loss": 0.0827, "step": 33965 }, { "epoch": 1.59, - "learning_rate": 1.6908724391730348e-05, - "loss": 0.2954, + "learning_rate": 2.691354750128719e-05, + "loss": 0.2321, "step": 33970 }, { "epoch": 1.59, - "learning_rate": 1.6908255590455208e-05, - "loss": 0.3219, + "learning_rate": 2.6913079431451174e-05, + "loss": 0.2662, "step": 33975 }, { "epoch": 1.59, - "learning_rate": 1.6907786789180068e-05, - "loss": 0.4502, + "learning_rate": 2.6912611361615154e-05, + "loss": 0.2821, "step": 33980 }, { "epoch": 1.59, - "learning_rate": 1.6907317987904928e-05, - "loss": 0.2787, + "learning_rate": 2.6912143291779134e-05, + "loss": 0.2323, "step": 33985 }, { "epoch": 1.59, - "learning_rate": 1.6906849186629788e-05, - "loss": 0.0437, + "learning_rate": 2.6911675221943117e-05, + "loss": 0.0372, "step": 33990 }, { "epoch": 1.59, - "learning_rate": 1.690638038535465e-05, - "loss": 0.0631, + "learning_rate": 2.6911207152107097e-05, + "loss": 0.0707, "step": 33995 }, { "epoch": 1.59, - "learning_rate": 1.690591158407951e-05, - "loss": 0.1087, + "learning_rate": 2.6910739082271077e-05, + "loss": 0.0431, "step": 34000 }, { "epoch": 1.59, - "learning_rate": 1.690544278280437e-05, - "loss": 0.1206, + "learning_rate": 2.6910271012435056e-05, + "loss": 0.0836, "step": 34005 }, { "epoch": 1.59, - "learning_rate": 1.690497398152923e-05, - "loss": 0.1209, + "learning_rate": 2.6909802942599036e-05, + "loss": 0.1059, "step": 34010 }, { "epoch": 1.59, - "learning_rate": 1.690450518025409e-05, - "loss": 0.0778, + "learning_rate": 2.6909334872763016e-05, + "loss": 0.1898, "step": 34015 }, { "epoch": 1.59, - "learning_rate": 1.690403637897895e-05, - "loss": 0.2117, + "learning_rate": 2.6908866802926996e-05, + "loss": 0.2238, "step": 34020 }, { "epoch": 1.59, - "learning_rate": 1.690356757770381e-05, - "loss": 0.2959, + "learning_rate": 2.6908398733090976e-05, + "loss": 0.3044, "step": 34025 }, { "epoch": 1.59, - "learning_rate": 1.6903098776428674e-05, - "loss": 0.2687, + "learning_rate": 2.690793066325496e-05, + "loss": 0.2505, "step": 34030 }, { "epoch": 1.59, - "learning_rate": 1.6902629975153534e-05, - "loss": 0.2491, + "learning_rate": 2.690746259341894e-05, + "loss": 0.2176, "step": 34035 }, { "epoch": 1.59, - "learning_rate": 1.6902161173878394e-05, - "loss": 0.0504, + "learning_rate": 2.690699452358292e-05, + "loss": 0.055, "step": 34040 }, { "epoch": 1.59, - "learning_rate": 1.6901692372603257e-05, - "loss": 0.039, + "learning_rate": 2.6906526453746902e-05, + "loss": 0.0949, "step": 34045 }, { "epoch": 1.59, - "learning_rate": 1.6901223571328117e-05, - "loss": 0.0585, + "learning_rate": 2.690605838391088e-05, + "loss": 0.1021, "step": 34050 }, { "epoch": 1.59, - "learning_rate": 1.6900754770052977e-05, - "loss": 0.095, + "learning_rate": 2.690559031407486e-05, + "loss": 0.0625, "step": 34055 }, { "epoch": 1.59, - "learning_rate": 1.6900285968777837e-05, - "loss": 0.0796, + "learning_rate": 2.690512224423884e-05, + "loss": 0.132, "step": 34060 }, { "epoch": 1.59, - "learning_rate": 1.6899817167502697e-05, - "loss": 0.1026, + "learning_rate": 2.6904654174402824e-05, + "loss": 0.187, "step": 34065 }, { "epoch": 1.59, - "learning_rate": 1.6899348366227557e-05, - "loss": 0.2815, + "learning_rate": 2.69041861045668e-05, + "loss": 0.1742, "step": 34070 }, { "epoch": 1.59, - "learning_rate": 1.6898879564952417e-05, - "loss": 0.1817, + "learning_rate": 2.690371803473078e-05, + "loss": 0.2368, "step": 34075 }, { "epoch": 1.59, - "learning_rate": 1.6898410763677277e-05, - "loss": 0.3487, + "learning_rate": 2.690324996489476e-05, + "loss": 0.4076, "step": 34080 }, { "epoch": 1.59, - "learning_rate": 1.689794196240214e-05, - "loss": 0.4519, + "learning_rate": 2.6902781895058744e-05, + "loss": 0.2802, "step": 34085 }, { "epoch": 1.59, - "learning_rate": 1.6897473161127e-05, - "loss": 0.1144, + "learning_rate": 2.6902313825222723e-05, + "loss": 0.0225, "step": 34090 }, { "epoch": 1.59, - "learning_rate": 1.689700435985186e-05, - "loss": 0.0615, + "learning_rate": 2.6901845755386703e-05, + "loss": 0.0578, "step": 34095 }, { "epoch": 1.59, - "learning_rate": 1.689653555857672e-05, - "loss": 0.1148, + "learning_rate": 2.6901377685550683e-05, + "loss": 0.0631, "step": 34100 }, { "epoch": 1.59, - "learning_rate": 1.689606675730158e-05, - "loss": 0.14, + "learning_rate": 2.6900909615714666e-05, + "loss": 0.0355, "step": 34105 }, { "epoch": 1.59, - "learning_rate": 1.6895597956026443e-05, - "loss": 0.1061, + "learning_rate": 2.6900441545878646e-05, + "loss": 0.1352, "step": 34110 }, { "epoch": 1.59, - "learning_rate": 1.6895129154751303e-05, - "loss": 0.1066, + "learning_rate": 2.6899973476042626e-05, + "loss": 0.1168, "step": 34115 }, { "epoch": 1.59, - "learning_rate": 1.6894660353476163e-05, - "loss": 0.1549, + "learning_rate": 2.689950540620661e-05, + "loss": 0.2445, "step": 34120 }, { "epoch": 1.59, - "learning_rate": 1.6894191552201023e-05, - "loss": 0.1757, + "learning_rate": 2.689903733637059e-05, + "loss": 0.1885, "step": 34125 }, { "epoch": 1.59, - "learning_rate": 1.6893722750925886e-05, - "loss": 0.226, + "learning_rate": 2.689856926653457e-05, + "loss": 0.2987, "step": 34130 }, { "epoch": 1.59, - "learning_rate": 1.6893253949650746e-05, - "loss": 0.1872, + "learning_rate": 2.6898101196698545e-05, + "loss": 0.2409, "step": 34135 }, { "epoch": 1.59, - "learning_rate": 1.6892785148375606e-05, - "loss": 0.0721, + "learning_rate": 2.689763312686253e-05, + "loss": 0.0281, "step": 34140 }, { "epoch": 1.59, - "learning_rate": 1.6892316347100466e-05, - "loss": 0.0644, + "learning_rate": 2.6897165057026508e-05, + "loss": 0.0748, "step": 34145 }, { "epoch": 1.59, - "learning_rate": 1.6891847545825326e-05, - "loss": 0.082, + "learning_rate": 2.6896696987190488e-05, + "loss": 0.0213, "step": 34150 }, { "epoch": 1.59, - "learning_rate": 1.6891378744550186e-05, - "loss": 0.1829, + "learning_rate": 2.6896228917354468e-05, + "loss": 0.1206, "step": 34155 }, { "epoch": 1.59, - "learning_rate": 1.6890909943275046e-05, - "loss": 0.1974, + "learning_rate": 2.689576084751845e-05, + "loss": 0.1155, "step": 34160 }, { "epoch": 1.59, - "learning_rate": 1.6890441141999906e-05, - "loss": 0.1326, + "learning_rate": 2.689529277768243e-05, + "loss": 0.1549, "step": 34165 }, { "epoch": 1.59, - "learning_rate": 1.688997234072477e-05, - "loss": 0.197, + "learning_rate": 2.689482470784641e-05, + "loss": 0.1066, "step": 34170 }, { "epoch": 1.59, - "learning_rate": 1.688950353944963e-05, - "loss": 0.272, + "learning_rate": 2.6894356638010394e-05, + "loss": 0.2163, "step": 34175 }, { "epoch": 1.59, - "learning_rate": 1.688903473817449e-05, - "loss": 0.3754, + "learning_rate": 2.6893888568174374e-05, + "loss": 0.3527, "step": 34180 }, { "epoch": 1.6, - "learning_rate": 1.688856593689935e-05, - "loss": 0.21, + "learning_rate": 2.6893420498338354e-05, + "loss": 0.2397, "step": 34185 }, { "epoch": 1.6, - "learning_rate": 1.6888097135624212e-05, - "loss": 0.072, + "learning_rate": 2.6892952428502333e-05, + "loss": 0.0529, "step": 34190 }, { "epoch": 1.6, - "learning_rate": 1.6887628334349072e-05, - "loss": 0.0515, + "learning_rate": 2.6892484358666317e-05, + "loss": 0.0579, "step": 34195 }, { "epoch": 1.6, - "learning_rate": 1.6887159533073932e-05, - "loss": 0.0671, + "learning_rate": 2.6892016288830293e-05, + "loss": 0.0869, "step": 34200 }, { "epoch": 1.6, - "learning_rate": 1.6886690731798792e-05, - "loss": 0.3223, + "learning_rate": 2.6891548218994273e-05, + "loss": 0.0736, "step": 34205 }, { "epoch": 1.6, - "learning_rate": 1.6886221930523652e-05, - "loss": 0.065, + "learning_rate": 2.6891080149158253e-05, + "loss": 0.118, "step": 34210 }, { "epoch": 1.6, - "learning_rate": 1.6885753129248512e-05, - "loss": 0.1724, + "learning_rate": 2.6890612079322236e-05, + "loss": 0.1502, "step": 34215 }, { "epoch": 1.6, - "learning_rate": 1.688528432797337e-05, - "loss": 0.2077, + "learning_rate": 2.6890144009486216e-05, + "loss": 0.222, "step": 34220 }, { "epoch": 1.6, - "learning_rate": 1.6884815526698235e-05, - "loss": 0.2935, + "learning_rate": 2.6889675939650195e-05, + "loss": 0.1775, "step": 34225 }, { "epoch": 1.6, - "learning_rate": 1.6884346725423095e-05, - "loss": 0.4017, + "learning_rate": 2.688920786981418e-05, + "loss": 0.274, "step": 34230 }, { "epoch": 1.6, - "learning_rate": 1.6883877924147955e-05, - "loss": 0.1805, + "learning_rate": 2.688873979997816e-05, + "loss": 0.2308, "step": 34235 }, { "epoch": 1.6, - "learning_rate": 1.6883409122872815e-05, - "loss": 0.0684, + "learning_rate": 2.688827173014214e-05, + "loss": 0.0443, "step": 34240 }, { "epoch": 1.6, - "learning_rate": 1.6882940321597675e-05, - "loss": 0.1078, + "learning_rate": 2.6887803660306118e-05, + "loss": 0.0793, "step": 34245 }, { "epoch": 1.6, - "learning_rate": 1.6882471520322538e-05, - "loss": 0.0505, + "learning_rate": 2.68873355904701e-05, + "loss": 0.0679, "step": 34250 }, { "epoch": 1.6, - "learning_rate": 1.6882002719047398e-05, - "loss": 0.0993, + "learning_rate": 2.688686752063408e-05, + "loss": 0.1434, "step": 34255 }, { "epoch": 1.6, - "learning_rate": 1.6881533917772258e-05, - "loss": 0.1775, + "learning_rate": 2.6886399450798058e-05, + "loss": 0.0738, "step": 34260 }, { "epoch": 1.6, - "learning_rate": 1.6881065116497118e-05, - "loss": 0.0837, + "learning_rate": 2.6885931380962037e-05, + "loss": 0.1883, "step": 34265 }, { "epoch": 1.6, - "learning_rate": 1.688059631522198e-05, - "loss": 0.1667, + "learning_rate": 2.688546331112602e-05, + "loss": 0.2448, "step": 34270 }, { "epoch": 1.6, - "learning_rate": 1.688012751394684e-05, - "loss": 0.233, + "learning_rate": 2.688499524129e-05, + "loss": 0.3142, "step": 34275 }, { "epoch": 1.6, - "learning_rate": 1.68796587126717e-05, - "loss": 0.2761, + "learning_rate": 2.688452717145398e-05, + "loss": 0.2646, "step": 34280 }, { "epoch": 1.6, - "learning_rate": 1.687918991139656e-05, - "loss": 0.371, + "learning_rate": 2.688405910161796e-05, + "loss": 0.1814, "step": 34285 }, { "epoch": 1.6, - "learning_rate": 1.687872111012142e-05, - "loss": 0.0497, + "learning_rate": 2.6883591031781943e-05, + "loss": 0.0626, "step": 34290 }, { "epoch": 1.6, - "learning_rate": 1.687825230884628e-05, - "loss": 0.0667, + "learning_rate": 2.6883122961945923e-05, + "loss": 0.0643, "step": 34295 }, { "epoch": 1.6, - "learning_rate": 1.687778350757114e-05, - "loss": 0.096, + "learning_rate": 2.6882654892109903e-05, + "loss": 0.062, "step": 34300 }, { "epoch": 1.6, - "learning_rate": 1.6877314706296e-05, - "loss": 0.0963, + "learning_rate": 2.6882186822273886e-05, + "loss": 0.0597, "step": 34305 }, { "epoch": 1.6, - "learning_rate": 1.687684590502086e-05, - "loss": 0.2588, + "learning_rate": 2.6881718752437866e-05, + "loss": 0.0672, "step": 34310 }, { "epoch": 1.6, - "learning_rate": 1.6876377103745724e-05, - "loss": 0.0869, + "learning_rate": 2.6881250682601846e-05, + "loss": 0.1581, "step": 34315 }, { "epoch": 1.6, - "learning_rate": 1.6875908302470584e-05, - "loss": 0.1955, + "learning_rate": 2.6880782612765826e-05, + "loss": 0.1584, "step": 34320 }, { "epoch": 1.6, - "learning_rate": 1.6875439501195444e-05, - "loss": 0.2895, + "learning_rate": 2.6880314542929805e-05, + "loss": 0.2152, "step": 34325 }, { "epoch": 1.6, - "learning_rate": 1.6874970699920307e-05, - "loss": 0.3213, + "learning_rate": 2.6879846473093785e-05, + "loss": 0.2151, "step": 34330 }, { "epoch": 1.6, - "learning_rate": 1.6874501898645167e-05, - "loss": 0.3409, + "learning_rate": 2.6879378403257765e-05, + "loss": 0.2441, "step": 34335 }, { "epoch": 1.6, - "learning_rate": 1.6874033097370027e-05, - "loss": 0.0989, + "learning_rate": 2.6878910333421745e-05, + "loss": 0.0613, "step": 34340 }, { "epoch": 1.6, - "learning_rate": 1.6873564296094887e-05, - "loss": 0.0809, + "learning_rate": 2.6878442263585728e-05, + "loss": 0.1019, "step": 34345 }, { "epoch": 1.6, - "learning_rate": 1.6873095494819747e-05, - "loss": 0.0576, + "learning_rate": 2.6877974193749708e-05, + "loss": 0.0949, "step": 34350 }, { "epoch": 1.6, - "learning_rate": 1.6872626693544607e-05, - "loss": 0.1124, + "learning_rate": 2.6877506123913688e-05, + "loss": 0.1059, "step": 34355 }, { "epoch": 1.6, - "learning_rate": 1.6872157892269467e-05, - "loss": 0.1191, + "learning_rate": 2.687703805407767e-05, + "loss": 0.1085, "step": 34360 }, { "epoch": 1.6, - "learning_rate": 1.687168909099433e-05, - "loss": 0.1092, + "learning_rate": 2.687656998424165e-05, + "loss": 0.085, "step": 34365 }, { "epoch": 1.6, - "learning_rate": 1.687122028971919e-05, - "loss": 0.1064, + "learning_rate": 2.687610191440563e-05, + "loss": 0.162, "step": 34370 }, { "epoch": 1.6, - "learning_rate": 1.687075148844405e-05, - "loss": 0.2331, + "learning_rate": 2.687563384456961e-05, + "loss": 0.2523, "step": 34375 }, { "epoch": 1.6, - "learning_rate": 1.687028268716891e-05, - "loss": 0.3882, + "learning_rate": 2.6875165774733594e-05, + "loss": 0.3305, "step": 34380 }, { "epoch": 1.6, - "learning_rate": 1.686981388589377e-05, - "loss": 0.2774, + "learning_rate": 2.6874697704897573e-05, + "loss": 0.1923, "step": 34385 }, { "epoch": 1.6, - "learning_rate": 1.686934508461863e-05, - "loss": 0.0547, + "learning_rate": 2.687422963506155e-05, + "loss": 0.0241, "step": 34390 }, { "epoch": 1.6, - "learning_rate": 1.6868876283343493e-05, - "loss": 0.0845, + "learning_rate": 2.687376156522553e-05, + "loss": 0.0576, "step": 34395 }, { "epoch": 1.61, - "learning_rate": 1.6868407482068353e-05, - "loss": 0.1467, + "learning_rate": 2.6873293495389513e-05, + "loss": 0.0547, "step": 34400 }, { "epoch": 1.61, - "learning_rate": 1.6867938680793213e-05, - "loss": 0.1287, + "learning_rate": 2.6872825425553493e-05, + "loss": 0.0452, "step": 34405 }, { "epoch": 1.61, - "learning_rate": 1.6867469879518076e-05, - "loss": 0.1478, + "learning_rate": 2.6872357355717472e-05, + "loss": 0.0912, "step": 34410 }, { "epoch": 1.61, - "learning_rate": 1.6867001078242936e-05, - "loss": 0.2076, + "learning_rate": 2.6871889285881456e-05, + "loss": 0.0769, "step": 34415 }, { "epoch": 1.61, - "learning_rate": 1.6866532276967796e-05, - "loss": 0.1496, + "learning_rate": 2.6871421216045435e-05, + "loss": 0.1924, "step": 34420 }, { "epoch": 1.61, - "learning_rate": 1.6866063475692656e-05, - "loss": 0.2264, + "learning_rate": 2.6870953146209415e-05, + "loss": 0.193, "step": 34425 }, { "epoch": 1.61, - "learning_rate": 1.6865594674417516e-05, - "loss": 0.3986, + "learning_rate": 2.6870485076373395e-05, + "loss": 0.3018, "step": 34430 }, { "epoch": 1.61, - "learning_rate": 1.6865125873142376e-05, - "loss": 0.3061, + "learning_rate": 2.687001700653738e-05, + "loss": 0.237, "step": 34435 }, { "epoch": 1.61, - "learning_rate": 1.6864657071867236e-05, - "loss": 0.0217, + "learning_rate": 2.6869548936701358e-05, + "loss": 0.0542, "step": 34440 }, { "epoch": 1.61, - "learning_rate": 1.6864188270592096e-05, - "loss": 0.0725, + "learning_rate": 2.6869080866865338e-05, + "loss": 0.0745, "step": 34445 }, { "epoch": 1.61, - "learning_rate": 1.6863719469316956e-05, - "loss": 0.0708, + "learning_rate": 2.6868612797029314e-05, + "loss": 0.1468, "step": 34450 }, { "epoch": 1.61, - "learning_rate": 1.686325066804182e-05, - "loss": 0.1335, + "learning_rate": 2.6868144727193298e-05, + "loss": 0.0688, "step": 34455 }, { "epoch": 1.61, - "learning_rate": 1.686278186676668e-05, - "loss": 0.1592, + "learning_rate": 2.6867676657357277e-05, + "loss": 0.1102, "step": 34460 }, { "epoch": 1.61, - "learning_rate": 1.686231306549154e-05, - "loss": 0.2168, + "learning_rate": 2.6867208587521257e-05, + "loss": 0.1132, "step": 34465 }, { "epoch": 1.61, - "learning_rate": 1.68618442642164e-05, - "loss": 0.1201, + "learning_rate": 2.686674051768524e-05, + "loss": 0.124, "step": 34470 }, { "epoch": 1.61, - "learning_rate": 1.6861375462941262e-05, - "loss": 0.241, + "learning_rate": 2.686627244784922e-05, + "loss": 0.2497, "step": 34475 }, { "epoch": 1.61, - "learning_rate": 1.6860906661666122e-05, - "loss": 0.4339, + "learning_rate": 2.68658043780132e-05, + "loss": 0.453, "step": 34480 }, { "epoch": 1.61, - "learning_rate": 1.6860437860390982e-05, - "loss": 0.332, + "learning_rate": 2.686533630817718e-05, + "loss": 0.3008, "step": 34485 }, { "epoch": 1.61, - "learning_rate": 1.6859969059115842e-05, - "loss": 0.0336, + "learning_rate": 2.6864868238341163e-05, + "loss": 0.0518, "step": 34490 }, { "epoch": 1.61, - "learning_rate": 1.6859500257840702e-05, - "loss": 0.0586, + "learning_rate": 2.6864400168505143e-05, + "loss": 0.0615, "step": 34495 }, { "epoch": 1.61, - "learning_rate": 1.6859031456565562e-05, - "loss": 0.0608, + "learning_rate": 2.6863932098669123e-05, + "loss": 0.0578, "step": 34500 }, { "epoch": 1.61, - "learning_rate": 1.6858562655290425e-05, - "loss": 0.1441, + "learning_rate": 2.6863464028833103e-05, + "loss": 0.0557, "step": 34505 }, { "epoch": 1.61, - "learning_rate": 1.6858093854015285e-05, - "loss": 0.0687, + "learning_rate": 2.6862995958997086e-05, + "loss": 0.112, "step": 34510 }, { "epoch": 1.61, - "learning_rate": 1.6857625052740145e-05, - "loss": 0.1453, + "learning_rate": 2.6862527889161062e-05, + "loss": 0.2484, "step": 34515 }, { "epoch": 1.61, - "learning_rate": 1.6857156251465005e-05, - "loss": 0.1763, + "learning_rate": 2.6862059819325042e-05, + "loss": 0.0795, "step": 34520 }, { "epoch": 1.61, - "learning_rate": 1.6856687450189865e-05, - "loss": 0.2107, + "learning_rate": 2.6861591749489022e-05, + "loss": 0.1779, "step": 34525 }, { "epoch": 1.61, - "learning_rate": 1.6856218648914725e-05, - "loss": 0.3186, + "learning_rate": 2.6861123679653005e-05, + "loss": 0.2807, "step": 34530 }, { "epoch": 1.61, - "learning_rate": 1.6855749847639585e-05, - "loss": 0.3441, + "learning_rate": 2.6860655609816985e-05, + "loss": 0.3128, "step": 34535 }, { "epoch": 1.61, - "learning_rate": 1.6855281046364448e-05, - "loss": 0.0226, + "learning_rate": 2.6860187539980965e-05, + "loss": 0.0468, "step": 34540 }, { "epoch": 1.61, - "learning_rate": 1.6854812245089308e-05, - "loss": 0.075, + "learning_rate": 2.6859719470144948e-05, + "loss": 0.0491, "step": 34545 }, { "epoch": 1.61, - "learning_rate": 1.6854343443814168e-05, - "loss": 0.1509, + "learning_rate": 2.6859251400308928e-05, + "loss": 0.0459, "step": 34550 }, { "epoch": 1.61, - "learning_rate": 1.685387464253903e-05, - "loss": 0.0642, + "learning_rate": 2.6858783330472908e-05, + "loss": 0.0644, "step": 34555 }, { "epoch": 1.61, - "learning_rate": 1.685340584126389e-05, - "loss": 0.1539, + "learning_rate": 2.6858315260636887e-05, + "loss": 0.1558, "step": 34560 }, { "epoch": 1.61, - "learning_rate": 1.685293703998875e-05, - "loss": 0.1392, + "learning_rate": 2.685784719080087e-05, + "loss": 0.111, "step": 34565 }, { "epoch": 1.61, - "learning_rate": 1.685246823871361e-05, - "loss": 0.0896, + "learning_rate": 2.685737912096485e-05, + "loss": 0.1514, "step": 34570 }, { "epoch": 1.61, - "learning_rate": 1.685199943743847e-05, - "loss": 0.3167, + "learning_rate": 2.685691105112883e-05, + "loss": 0.1856, "step": 34575 }, { "epoch": 1.61, - "learning_rate": 1.685153063616333e-05, - "loss": 0.3073, + "learning_rate": 2.6856442981292807e-05, + "loss": 0.2387, "step": 34580 }, { "epoch": 1.61, - "learning_rate": 1.685106183488819e-05, - "loss": 0.2712, + "learning_rate": 2.685597491145679e-05, + "loss": 0.3527, "step": 34585 }, { "epoch": 1.61, - "learning_rate": 1.685059303361305e-05, - "loss": 0.0389, + "learning_rate": 2.685550684162077e-05, + "loss": 0.076, "step": 34590 }, { "epoch": 1.61, - "learning_rate": 1.6850124232337914e-05, - "loss": 0.0515, + "learning_rate": 2.685503877178475e-05, + "loss": 0.0482, "step": 34595 }, { "epoch": 1.61, - "learning_rate": 1.6849655431062774e-05, - "loss": 0.1278, + "learning_rate": 2.6854570701948733e-05, + "loss": 0.0494, "step": 34600 }, { "epoch": 1.61, - "learning_rate": 1.6849186629787634e-05, - "loss": 0.0901, + "learning_rate": 2.6854102632112712e-05, + "loss": 0.0806, "step": 34605 }, { "epoch": 1.61, - "learning_rate": 1.6848717828512494e-05, - "loss": 0.1417, + "learning_rate": 2.6853634562276692e-05, + "loss": 0.1098, "step": 34610 }, { "epoch": 1.62, - "learning_rate": 1.6848249027237357e-05, - "loss": 0.1445, + "learning_rate": 2.6853166492440672e-05, + "loss": 0.1137, "step": 34615 }, { "epoch": 1.62, - "learning_rate": 1.6847780225962217e-05, - "loss": 0.1738, + "learning_rate": 2.6852698422604655e-05, + "loss": 0.1498, "step": 34620 }, { "epoch": 1.62, - "learning_rate": 1.6847311424687077e-05, - "loss": 0.23, + "learning_rate": 2.6852230352768635e-05, + "loss": 0.2765, "step": 34625 }, { "epoch": 1.62, - "learning_rate": 1.6846842623411937e-05, - "loss": 0.3401, + "learning_rate": 2.6851762282932615e-05, + "loss": 0.2931, "step": 34630 }, { "epoch": 1.62, - "learning_rate": 1.6846373822136797e-05, - "loss": 0.2777, + "learning_rate": 2.6851294213096595e-05, + "loss": 0.3579, "step": 34635 }, { "epoch": 1.62, - "learning_rate": 1.6845905020861657e-05, - "loss": 0.0903, + "learning_rate": 2.6850826143260575e-05, + "loss": 0.0235, "step": 34640 }, { "epoch": 1.62, - "learning_rate": 1.684543621958652e-05, - "loss": 0.0529, + "learning_rate": 2.6850358073424554e-05, + "loss": 0.0594, "step": 34645 }, { "epoch": 1.62, - "learning_rate": 1.684496741831138e-05, - "loss": 0.1014, + "learning_rate": 2.6849890003588534e-05, + "loss": 0.1259, "step": 34650 }, { "epoch": 1.62, - "learning_rate": 1.684449861703624e-05, - "loss": 0.1217, + "learning_rate": 2.6849421933752517e-05, + "loss": 0.0955, "step": 34655 }, { "epoch": 1.62, - "learning_rate": 1.68440298157611e-05, - "loss": 0.1712, + "learning_rate": 2.6848953863916497e-05, + "loss": 0.0988, "step": 34660 }, { "epoch": 1.62, - "learning_rate": 1.684356101448596e-05, - "loss": 0.1573, + "learning_rate": 2.6848485794080477e-05, + "loss": 0.0903, "step": 34665 }, { "epoch": 1.62, - "learning_rate": 1.684309221321082e-05, - "loss": 0.2182, + "learning_rate": 2.6848017724244457e-05, + "loss": 0.1835, "step": 34670 }, { "epoch": 1.62, - "learning_rate": 1.684262341193568e-05, - "loss": 0.1673, + "learning_rate": 2.684754965440844e-05, + "loss": 0.1849, "step": 34675 }, { "epoch": 1.62, - "learning_rate": 1.6842154610660543e-05, - "loss": 0.1951, + "learning_rate": 2.684708158457242e-05, + "loss": 0.3924, "step": 34680 }, { "epoch": 1.62, - "learning_rate": 1.6841685809385403e-05, - "loss": 0.2755, + "learning_rate": 2.68466135147364e-05, + "loss": 0.2054, "step": 34685 }, { "epoch": 1.62, - "learning_rate": 1.6841217008110263e-05, - "loss": 0.0314, + "learning_rate": 2.684614544490038e-05, + "loss": 0.0244, "step": 34690 }, { "epoch": 1.62, - "learning_rate": 1.6840748206835126e-05, - "loss": 0.0377, + "learning_rate": 2.6845677375064363e-05, + "loss": 0.0388, "step": 34695 }, { "epoch": 1.62, - "learning_rate": 1.6840279405559986e-05, - "loss": 0.0633, + "learning_rate": 2.6845209305228343e-05, + "loss": 0.0666, "step": 34700 }, { "epoch": 1.62, - "learning_rate": 1.6839810604284846e-05, - "loss": 0.0738, + "learning_rate": 2.684474123539232e-05, + "loss": 0.0782, "step": 34705 }, { "epoch": 1.62, - "learning_rate": 1.6839341803009706e-05, - "loss": 0.147, + "learning_rate": 2.68442731655563e-05, + "loss": 0.1094, "step": 34710 }, { "epoch": 1.62, - "learning_rate": 1.6838873001734566e-05, - "loss": 0.0918, + "learning_rate": 2.6843805095720282e-05, + "loss": 0.1747, "step": 34715 }, { "epoch": 1.62, - "learning_rate": 1.6838404200459426e-05, - "loss": 0.113, + "learning_rate": 2.6843337025884262e-05, + "loss": 0.1708, "step": 34720 }, { "epoch": 1.62, - "learning_rate": 1.6837935399184286e-05, - "loss": 0.1378, + "learning_rate": 2.684286895604824e-05, + "loss": 0.2518, "step": 34725 }, { "epoch": 1.62, - "learning_rate": 1.6837466597909146e-05, - "loss": 0.2208, + "learning_rate": 2.6842400886212225e-05, + "loss": 0.1943, "step": 34730 }, { "epoch": 1.62, - "learning_rate": 1.683699779663401e-05, - "loss": 0.3092, + "learning_rate": 2.6841932816376205e-05, + "loss": 0.3573, "step": 34735 }, { "epoch": 1.62, - "learning_rate": 1.683652899535887e-05, - "loss": 0.0954, + "learning_rate": 2.6841464746540184e-05, + "loss": 0.0451, "step": 34740 }, { "epoch": 1.62, - "learning_rate": 1.683606019408373e-05, - "loss": 0.0398, + "learning_rate": 2.6840996676704164e-05, + "loss": 0.0434, "step": 34745 }, { "epoch": 1.62, - "learning_rate": 1.683559139280859e-05, - "loss": 0.0969, + "learning_rate": 2.6840528606868148e-05, + "loss": 0.0879, "step": 34750 }, { "epoch": 1.62, - "learning_rate": 1.683512259153345e-05, - "loss": 0.075, + "learning_rate": 2.6840060537032127e-05, + "loss": 0.0797, "step": 34755 }, { "epoch": 1.62, - "learning_rate": 1.6834653790258312e-05, - "loss": 0.1861, + "learning_rate": 2.6839592467196107e-05, + "loss": 0.1144, "step": 34760 }, { "epoch": 1.62, - "learning_rate": 1.6834184988983172e-05, - "loss": 0.1478, + "learning_rate": 2.6839124397360087e-05, + "loss": 0.1451, "step": 34765 }, { "epoch": 1.62, - "learning_rate": 1.6833716187708032e-05, - "loss": 0.1653, + "learning_rate": 2.6838656327524067e-05, + "loss": 0.2889, "step": 34770 }, { "epoch": 1.62, - "learning_rate": 1.6833247386432892e-05, - "loss": 0.2705, + "learning_rate": 2.6838188257688047e-05, + "loss": 0.1306, "step": 34775 }, { "epoch": 1.62, - "learning_rate": 1.6832778585157755e-05, - "loss": 0.2997, + "learning_rate": 2.6837720187852026e-05, + "loss": 0.3733, "step": 34780 }, { "epoch": 1.62, - "learning_rate": 1.6832309783882615e-05, - "loss": 0.2416, + "learning_rate": 2.683725211801601e-05, + "loss": 0.2779, "step": 34785 }, { "epoch": 1.62, - "learning_rate": 1.6831840982607475e-05, - "loss": 0.0703, + "learning_rate": 2.683678404817999e-05, + "loss": 0.0484, "step": 34790 }, { "epoch": 1.62, - "learning_rate": 1.6831372181332335e-05, - "loss": 0.0518, + "learning_rate": 2.683631597834397e-05, + "loss": 0.0482, "step": 34795 }, { "epoch": 1.62, - "learning_rate": 1.6830903380057195e-05, - "loss": 0.1567, + "learning_rate": 2.683584790850795e-05, + "loss": 0.0655, "step": 34800 }, { "epoch": 1.62, - "learning_rate": 1.6830434578782055e-05, - "loss": 0.0769, + "learning_rate": 2.6835379838671932e-05, + "loss": 0.0945, "step": 34805 }, { "epoch": 1.62, - "learning_rate": 1.6829965777506915e-05, - "loss": 0.0968, + "learning_rate": 2.6834911768835912e-05, + "loss": 0.1258, "step": 34810 }, { "epoch": 1.62, - "learning_rate": 1.6829496976231775e-05, - "loss": 0.0712, + "learning_rate": 2.6834443698999892e-05, + "loss": 0.1134, "step": 34815 }, { "epoch": 1.62, - "learning_rate": 1.6829028174956635e-05, - "loss": 0.2025, + "learning_rate": 2.6833975629163872e-05, + "loss": 0.1888, "step": 34820 }, { "epoch": 1.62, - "learning_rate": 1.6828559373681498e-05, - "loss": 0.2455, + "learning_rate": 2.6833507559327855e-05, + "loss": 0.2423, "step": 34825 }, { "epoch": 1.63, - "learning_rate": 1.6828090572406358e-05, - "loss": 0.1866, + "learning_rate": 2.683303948949183e-05, + "loss": 0.3884, "step": 34830 }, { "epoch": 1.63, - "learning_rate": 1.6827621771131218e-05, - "loss": 0.3097, + "learning_rate": 2.683257141965581e-05, + "loss": 0.3771, "step": 34835 }, { "epoch": 1.63, - "learning_rate": 1.682715296985608e-05, - "loss": 0.0356, + "learning_rate": 2.6832103349819794e-05, + "loss": 0.0532, "step": 34840 }, { "epoch": 1.63, - "learning_rate": 1.682668416858094e-05, - "loss": 0.0695, + "learning_rate": 2.6831635279983774e-05, + "loss": 0.0419, "step": 34845 }, { "epoch": 1.63, - "learning_rate": 1.68262153673058e-05, - "loss": 0.0611, + "learning_rate": 2.6831167210147754e-05, + "loss": 0.056, "step": 34850 }, { "epoch": 1.63, - "learning_rate": 1.682574656603066e-05, - "loss": 0.0836, + "learning_rate": 2.6830699140311734e-05, + "loss": 0.0864, "step": 34855 }, { "epoch": 1.63, - "learning_rate": 1.682527776475552e-05, - "loss": 0.1121, + "learning_rate": 2.6830231070475717e-05, + "loss": 0.0579, "step": 34860 }, { "epoch": 1.63, - "learning_rate": 1.682480896348038e-05, - "loss": 0.1199, + "learning_rate": 2.6829763000639697e-05, + "loss": 0.1614, "step": 34865 }, { "epoch": 1.63, - "learning_rate": 1.682434016220524e-05, - "loss": 0.1647, + "learning_rate": 2.6829294930803677e-05, + "loss": 0.1465, "step": 34870 }, { "epoch": 1.63, - "learning_rate": 1.6823871360930104e-05, - "loss": 0.1451, + "learning_rate": 2.6828826860967656e-05, + "loss": 0.2053, "step": 34875 }, { "epoch": 1.63, - "learning_rate": 1.6823402559654964e-05, - "loss": 0.2457, + "learning_rate": 2.682835879113164e-05, + "loss": 0.3873, "step": 34880 }, { "epoch": 1.63, - "learning_rate": 1.6822933758379824e-05, - "loss": 0.2741, + "learning_rate": 2.682789072129562e-05, + "loss": 0.2562, "step": 34885 }, { "epoch": 1.63, - "learning_rate": 1.6822464957104684e-05, - "loss": 0.0439, + "learning_rate": 2.68274226514596e-05, + "loss": 0.016, "step": 34890 }, { "epoch": 1.63, - "learning_rate": 1.6821996155829544e-05, - "loss": 0.0378, + "learning_rate": 2.6826954581623576e-05, + "loss": 0.0762, "step": 34895 }, { "epoch": 1.63, - "learning_rate": 1.6821527354554404e-05, - "loss": 0.134, + "learning_rate": 2.682648651178756e-05, + "loss": 0.1148, "step": 34900 }, { "epoch": 1.63, - "learning_rate": 1.6821058553279267e-05, - "loss": 0.0693, + "learning_rate": 2.682601844195154e-05, + "loss": 0.0941, "step": 34905 }, { "epoch": 1.63, - "learning_rate": 1.6820589752004127e-05, - "loss": 0.1013, + "learning_rate": 2.682555037211552e-05, + "loss": 0.1167, "step": 34910 }, { "epoch": 1.63, - "learning_rate": 1.6820120950728987e-05, - "loss": 0.1133, + "learning_rate": 2.6825082302279502e-05, + "loss": 0.2013, "step": 34915 }, { "epoch": 1.63, - "learning_rate": 1.681965214945385e-05, - "loss": 0.1309, + "learning_rate": 2.682461423244348e-05, + "loss": 0.1354, "step": 34920 }, { "epoch": 1.63, - "learning_rate": 1.681918334817871e-05, - "loss": 0.1584, + "learning_rate": 2.682414616260746e-05, + "loss": 0.1162, "step": 34925 }, { "epoch": 1.63, - "learning_rate": 1.681871454690357e-05, - "loss": 0.2859, + "learning_rate": 2.682367809277144e-05, + "loss": 0.2893, "step": 34930 }, { "epoch": 1.63, - "learning_rate": 1.681824574562843e-05, - "loss": 0.2421, + "learning_rate": 2.6823210022935424e-05, + "loss": 0.3477, "step": 34935 }, { "epoch": 1.63, - "learning_rate": 1.681777694435329e-05, - "loss": 0.0499, + "learning_rate": 2.6822741953099404e-05, + "loss": 0.0413, "step": 34940 }, { "epoch": 1.63, - "learning_rate": 1.681730814307815e-05, - "loss": 0.0865, + "learning_rate": 2.6822273883263384e-05, + "loss": 0.0796, "step": 34945 }, { "epoch": 1.63, - "learning_rate": 1.681683934180301e-05, - "loss": 0.0344, + "learning_rate": 2.6821805813427364e-05, + "loss": 0.0605, "step": 34950 }, { "epoch": 1.63, - "learning_rate": 1.681637054052787e-05, - "loss": 0.1128, + "learning_rate": 2.6821337743591344e-05, + "loss": 0.134, "step": 34955 }, { "epoch": 1.63, - "learning_rate": 1.681590173925273e-05, - "loss": 0.0706, + "learning_rate": 2.6820869673755324e-05, + "loss": 0.0784, "step": 34960 }, { "epoch": 1.63, - "learning_rate": 1.6815432937977593e-05, - "loss": 0.1731, + "learning_rate": 2.6820401603919303e-05, + "loss": 0.1225, "step": 34965 }, { "epoch": 1.63, - "learning_rate": 1.6814964136702453e-05, - "loss": 0.1794, + "learning_rate": 2.6819933534083287e-05, + "loss": 0.1522, "step": 34970 }, { "epoch": 1.63, - "learning_rate": 1.6814495335427313e-05, - "loss": 0.2741, + "learning_rate": 2.6819465464247266e-05, + "loss": 0.2148, "step": 34975 }, { "epoch": 1.63, - "learning_rate": 1.6814026534152173e-05, - "loss": 0.4299, + "learning_rate": 2.6818997394411246e-05, + "loss": 0.3193, "step": 34980 }, { "epoch": 1.63, - "learning_rate": 1.6813557732877036e-05, - "loss": 0.2528, + "learning_rate": 2.6818529324575226e-05, + "loss": 0.2492, "step": 34985 }, { "epoch": 1.63, - "learning_rate": 1.6813088931601896e-05, - "loss": 0.0214, + "learning_rate": 2.681806125473921e-05, + "loss": 0.0347, "step": 34990 }, { "epoch": 1.63, - "learning_rate": 1.6812620130326756e-05, - "loss": 0.0732, + "learning_rate": 2.681759318490319e-05, + "loss": 0.0383, "step": 34995 }, { "epoch": 1.63, - "learning_rate": 1.6812151329051616e-05, - "loss": 0.0879, + "learning_rate": 2.681712511506717e-05, + "loss": 0.12, "step": 35000 }, { "epoch": 1.63, - "learning_rate": 1.6811682527776476e-05, - "loss": 0.0779, + "learning_rate": 2.681665704523115e-05, + "loss": 0.0772, "step": 35005 }, { "epoch": 1.63, - "learning_rate": 1.6811213726501336e-05, - "loss": 0.1751, + "learning_rate": 2.6816188975395132e-05, + "loss": 0.1024, "step": 35010 }, { "epoch": 1.63, - "learning_rate": 1.68107449252262e-05, - "loss": 0.1612, + "learning_rate": 2.6815720905559112e-05, + "loss": 0.146, "step": 35015 }, { "epoch": 1.63, - "learning_rate": 1.681027612395106e-05, - "loss": 0.1184, + "learning_rate": 2.6815252835723088e-05, + "loss": 0.0903, "step": 35020 }, { "epoch": 1.63, - "learning_rate": 1.680980732267592e-05, - "loss": 0.2088, + "learning_rate": 2.681478476588707e-05, + "loss": 0.1709, "step": 35025 }, { "epoch": 1.63, - "learning_rate": 1.680933852140078e-05, - "loss": 0.2776, + "learning_rate": 2.681431669605105e-05, + "loss": 0.4136, "step": 35030 }, { "epoch": 1.63, - "learning_rate": 1.680886972012564e-05, - "loss": 0.3245, + "learning_rate": 2.681384862621503e-05, + "loss": 0.2824, "step": 35035 }, { "epoch": 1.64, - "learning_rate": 1.68084009188505e-05, - "loss": 0.0458, + "learning_rate": 2.681338055637901e-05, + "loss": 0.0363, "step": 35040 }, { "epoch": 1.64, - "learning_rate": 1.6807932117575362e-05, - "loss": 0.0426, + "learning_rate": 2.6812912486542994e-05, + "loss": 0.0652, "step": 35045 }, { "epoch": 1.64, - "learning_rate": 1.6807463316300222e-05, - "loss": 0.0691, + "learning_rate": 2.6812444416706974e-05, + "loss": 0.0782, "step": 35050 }, { "epoch": 1.64, - "learning_rate": 1.6806994515025082e-05, - "loss": 0.1131, + "learning_rate": 2.6811976346870954e-05, + "loss": 0.0762, "step": 35055 }, { "epoch": 1.64, - "learning_rate": 1.6806525713749942e-05, - "loss": 0.1071, + "learning_rate": 2.6811508277034933e-05, + "loss": 0.0833, "step": 35060 }, { "epoch": 1.64, - "learning_rate": 1.6806056912474805e-05, - "loss": 0.1247, + "learning_rate": 2.6811040207198917e-05, + "loss": 0.206, "step": 35065 }, { "epoch": 1.64, - "learning_rate": 1.6805588111199665e-05, - "loss": 0.207, + "learning_rate": 2.6810572137362896e-05, + "loss": 0.2424, "step": 35070 }, { "epoch": 1.64, - "learning_rate": 1.6805119309924525e-05, - "loss": 0.3076, + "learning_rate": 2.6810104067526876e-05, + "loss": 0.2071, "step": 35075 }, { "epoch": 1.64, - "learning_rate": 1.6804650508649385e-05, - "loss": 0.353, + "learning_rate": 2.6809635997690856e-05, + "loss": 0.3054, "step": 35080 }, { "epoch": 1.64, - "learning_rate": 1.6804181707374245e-05, - "loss": 0.2484, + "learning_rate": 2.6809167927854836e-05, + "loss": 0.2174, "step": 35085 }, { "epoch": 1.64, - "learning_rate": 1.6803712906099105e-05, - "loss": 0.0702, + "learning_rate": 2.6808699858018816e-05, + "loss": 0.067, "step": 35090 }, { "epoch": 1.64, - "learning_rate": 1.6803244104823965e-05, - "loss": 0.0584, + "learning_rate": 2.6808231788182796e-05, + "loss": 0.1032, "step": 35095 }, { "epoch": 1.64, - "learning_rate": 1.6802775303548825e-05, - "loss": 0.0575, + "learning_rate": 2.680776371834678e-05, + "loss": 0.1334, "step": 35100 }, { "epoch": 1.64, - "learning_rate": 1.680230650227369e-05, - "loss": 0.0894, + "learning_rate": 2.680729564851076e-05, + "loss": 0.0693, "step": 35105 }, { "epoch": 1.64, - "learning_rate": 1.6801837700998548e-05, - "loss": 0.1053, + "learning_rate": 2.680682757867474e-05, + "loss": 0.1273, "step": 35110 }, { "epoch": 1.64, - "learning_rate": 1.6801368899723408e-05, - "loss": 0.0946, + "learning_rate": 2.6806359508838718e-05, + "loss": 0.1383, "step": 35115 }, { "epoch": 1.64, - "learning_rate": 1.6800900098448268e-05, - "loss": 0.1376, + "learning_rate": 2.68058914390027e-05, + "loss": 0.1119, "step": 35120 }, { "epoch": 1.64, - "learning_rate": 1.680043129717313e-05, - "loss": 0.2744, + "learning_rate": 2.680542336916668e-05, + "loss": 0.205, "step": 35125 }, { "epoch": 1.64, - "learning_rate": 1.679996249589799e-05, - "loss": 0.4252, + "learning_rate": 2.680495529933066e-05, + "loss": 0.3333, "step": 35130 }, { "epoch": 1.64, - "learning_rate": 1.679949369462285e-05, - "loss": 0.235, + "learning_rate": 2.680448722949464e-05, + "loss": 0.2637, "step": 35135 }, { "epoch": 1.64, - "learning_rate": 1.679902489334771e-05, - "loss": 0.0659, + "learning_rate": 2.6804019159658624e-05, + "loss": 0.0679, "step": 35140 }, { "epoch": 1.64, - "learning_rate": 1.679855609207257e-05, - "loss": 0.0471, + "learning_rate": 2.68035510898226e-05, + "loss": 0.0672, "step": 35145 }, { "epoch": 1.64, - "learning_rate": 1.679808729079743e-05, - "loss": 0.0852, + "learning_rate": 2.680308301998658e-05, + "loss": 0.0977, "step": 35150 }, { "epoch": 1.64, - "learning_rate": 1.6797618489522294e-05, - "loss": 0.1383, + "learning_rate": 2.6802614950150564e-05, + "loss": 0.0903, "step": 35155 }, { "epoch": 1.64, - "learning_rate": 1.6797149688247154e-05, - "loss": 0.0855, + "learning_rate": 2.6802146880314543e-05, + "loss": 0.1231, "step": 35160 }, { "epoch": 1.64, - "learning_rate": 1.6796680886972014e-05, - "loss": 0.1548, + "learning_rate": 2.6801678810478523e-05, + "loss": 0.1169, "step": 35165 }, { "epoch": 1.64, - "learning_rate": 1.6796212085696874e-05, - "loss": 0.1816, + "learning_rate": 2.6801210740642503e-05, + "loss": 0.1604, "step": 35170 }, { "epoch": 1.64, - "learning_rate": 1.6795743284421734e-05, - "loss": 0.118, + "learning_rate": 2.6800742670806486e-05, + "loss": 0.2335, "step": 35175 }, { "epoch": 1.64, - "learning_rate": 1.6795274483146594e-05, - "loss": 0.414, + "learning_rate": 2.6800274600970466e-05, + "loss": 0.3201, "step": 35180 }, { "epoch": 1.64, - "learning_rate": 1.6794805681871454e-05, - "loss": 0.2278, + "learning_rate": 2.6799806531134446e-05, + "loss": 0.2803, "step": 35185 }, { "epoch": 1.64, - "learning_rate": 1.6794336880596317e-05, - "loss": 0.074, + "learning_rate": 2.6799338461298426e-05, + "loss": 0.0642, "step": 35190 }, { "epoch": 1.64, - "learning_rate": 1.6793868079321177e-05, - "loss": 0.051, + "learning_rate": 2.679887039146241e-05, + "loss": 0.0155, "step": 35195 }, { "epoch": 1.64, - "learning_rate": 1.6793399278046037e-05, - "loss": 0.0984, + "learning_rate": 2.679840232162639e-05, + "loss": 0.0646, "step": 35200 }, { "epoch": 1.64, - "learning_rate": 1.67929304767709e-05, - "loss": 0.0757, + "learning_rate": 2.679793425179037e-05, + "loss": 0.1013, "step": 35205 }, { "epoch": 1.64, - "learning_rate": 1.679246167549576e-05, - "loss": 0.1527, + "learning_rate": 2.679746618195435e-05, + "loss": 0.1002, "step": 35210 }, { "epoch": 1.64, - "learning_rate": 1.679199287422062e-05, - "loss": 0.1969, + "learning_rate": 2.6796998112118328e-05, + "loss": 0.1269, "step": 35215 }, { "epoch": 1.64, - "learning_rate": 1.679152407294548e-05, - "loss": 0.0733, + "learning_rate": 2.6796530042282308e-05, + "loss": 0.0967, "step": 35220 }, { "epoch": 1.64, - "learning_rate": 1.679105527167034e-05, - "loss": 0.2065, + "learning_rate": 2.6796061972446288e-05, + "loss": 0.2312, "step": 35225 }, { "epoch": 1.64, - "learning_rate": 1.67905864703952e-05, - "loss": 0.2298, + "learning_rate": 2.679559390261027e-05, + "loss": 0.2334, "step": 35230 }, { "epoch": 1.64, - "learning_rate": 1.679011766912006e-05, - "loss": 0.2383, + "learning_rate": 2.679512583277425e-05, + "loss": 0.319, "step": 35235 }, { "epoch": 1.64, - "learning_rate": 1.678964886784492e-05, - "loss": 0.0261, + "learning_rate": 2.679465776293823e-05, + "loss": 0.0588, "step": 35240 }, { "epoch": 1.64, - "learning_rate": 1.6789180066569783e-05, - "loss": 0.0558, + "learning_rate": 2.679418969310221e-05, + "loss": 0.0514, "step": 35245 }, { "epoch": 1.64, - "learning_rate": 1.6788711265294643e-05, - "loss": 0.1007, + "learning_rate": 2.6793721623266194e-05, + "loss": 0.1554, "step": 35250 }, { "epoch": 1.65, - "learning_rate": 1.6788242464019503e-05, - "loss": 0.1138, + "learning_rate": 2.6793253553430173e-05, + "loss": 0.1115, "step": 35255 }, { "epoch": 1.65, - "learning_rate": 1.6787773662744363e-05, - "loss": 0.1242, + "learning_rate": 2.6792785483594153e-05, + "loss": 0.1398, "step": 35260 }, { "epoch": 1.65, - "learning_rate": 1.6787304861469223e-05, - "loss": 0.2085, + "learning_rate": 2.6792317413758136e-05, + "loss": 0.1965, "step": 35265 }, { "epoch": 1.65, - "learning_rate": 1.6786836060194086e-05, - "loss": 0.171, + "learning_rate": 2.6791849343922113e-05, + "loss": 0.1584, "step": 35270 }, { "epoch": 1.65, - "learning_rate": 1.6786367258918946e-05, - "loss": 0.1888, + "learning_rate": 2.6791381274086093e-05, + "loss": 0.2064, "step": 35275 }, { "epoch": 1.65, - "learning_rate": 1.6785898457643806e-05, - "loss": 0.2637, + "learning_rate": 2.6790913204250073e-05, + "loss": 0.3626, "step": 35280 }, { "epoch": 1.65, - "learning_rate": 1.6785429656368666e-05, - "loss": 0.3491, + "learning_rate": 2.6790445134414056e-05, + "loss": 0.3062, "step": 35285 }, { "epoch": 1.65, - "learning_rate": 1.6784960855093526e-05, - "loss": 0.0144, + "learning_rate": 2.6789977064578036e-05, + "loss": 0.0683, "step": 35290 }, { "epoch": 1.65, - "learning_rate": 1.678449205381839e-05, - "loss": 0.0612, + "learning_rate": 2.6789508994742015e-05, + "loss": 0.0324, "step": 35295 }, { "epoch": 1.65, - "learning_rate": 1.678402325254325e-05, - "loss": 0.0288, + "learning_rate": 2.6789040924905995e-05, + "loss": 0.051, "step": 35300 }, { "epoch": 1.65, - "learning_rate": 1.678355445126811e-05, - "loss": 0.1251, + "learning_rate": 2.678857285506998e-05, + "loss": 0.093, "step": 35305 }, { "epoch": 1.65, - "learning_rate": 1.678308564999297e-05, - "loss": 0.1111, + "learning_rate": 2.6788104785233958e-05, + "loss": 0.1299, "step": 35310 }, { "epoch": 1.65, - "learning_rate": 1.678261684871783e-05, - "loss": 0.1563, + "learning_rate": 2.6787636715397938e-05, + "loss": 0.1499, "step": 35315 }, { "epoch": 1.65, - "learning_rate": 1.678214804744269e-05, - "loss": 0.1506, + "learning_rate": 2.6787168645561918e-05, + "loss": 0.2239, "step": 35320 }, { "epoch": 1.65, - "learning_rate": 1.678167924616755e-05, - "loss": 0.2064, + "learning_rate": 2.67867005757259e-05, + "loss": 0.215, "step": 35325 }, { "epoch": 1.65, - "learning_rate": 1.678121044489241e-05, - "loss": 0.4103, + "learning_rate": 2.678623250588988e-05, + "loss": 0.2326, "step": 35330 }, { "epoch": 1.65, - "learning_rate": 1.6780741643617272e-05, - "loss": 0.253, + "learning_rate": 2.6785764436053857e-05, + "loss": 0.1818, "step": 35335 }, { "epoch": 1.65, - "learning_rate": 1.6780272842342132e-05, - "loss": 0.0347, + "learning_rate": 2.678529636621784e-05, + "loss": 0.0709, "step": 35340 }, { "epoch": 1.65, - "learning_rate": 1.6779804041066992e-05, - "loss": 0.0921, + "learning_rate": 2.678482829638182e-05, + "loss": 0.0507, "step": 35345 }, { "epoch": 1.65, - "learning_rate": 1.6779335239791856e-05, - "loss": 0.0761, + "learning_rate": 2.67843602265458e-05, + "loss": 0.0713, "step": 35350 }, { "epoch": 1.65, - "learning_rate": 1.6778866438516716e-05, - "loss": 0.053, + "learning_rate": 2.678389215670978e-05, + "loss": 0.1011, "step": 35355 }, { "epoch": 1.65, - "learning_rate": 1.6778397637241575e-05, - "loss": 0.0907, + "learning_rate": 2.6783424086873763e-05, + "loss": 0.0856, "step": 35360 }, { "epoch": 1.65, - "learning_rate": 1.6777928835966435e-05, - "loss": 0.1111, + "learning_rate": 2.6782956017037743e-05, + "loss": 0.1326, "step": 35365 }, { "epoch": 1.65, - "learning_rate": 1.6777460034691295e-05, - "loss": 0.171, + "learning_rate": 2.6782487947201723e-05, + "loss": 0.139, "step": 35370 }, { "epoch": 1.65, - "learning_rate": 1.6776991233416155e-05, - "loss": 0.1687, + "learning_rate": 2.6782019877365703e-05, + "loss": 0.2801, "step": 35375 }, { "epoch": 1.65, - "learning_rate": 1.6776522432141015e-05, - "loss": 0.3957, + "learning_rate": 2.6781551807529686e-05, + "loss": 0.3569, "step": 35380 }, { "epoch": 1.65, - "learning_rate": 1.677605363086588e-05, - "loss": 0.2045, + "learning_rate": 2.6781083737693666e-05, + "loss": 0.2894, "step": 35385 }, { "epoch": 1.65, - "learning_rate": 1.677558482959074e-05, - "loss": 0.0608, + "learning_rate": 2.6780615667857645e-05, + "loss": 0.0866, "step": 35390 }, { "epoch": 1.65, - "learning_rate": 1.67751160283156e-05, - "loss": 0.0678, + "learning_rate": 2.678014759802163e-05, + "loss": 0.0207, "step": 35395 }, { "epoch": 1.65, - "learning_rate": 1.6774647227040458e-05, - "loss": 0.1181, + "learning_rate": 2.6779679528185605e-05, + "loss": 0.0807, "step": 35400 }, { "epoch": 1.65, - "learning_rate": 1.6774178425765318e-05, - "loss": 0.0646, + "learning_rate": 2.6779211458349585e-05, + "loss": 0.0606, "step": 35405 }, { "epoch": 1.65, - "learning_rate": 1.6773709624490178e-05, - "loss": 0.1283, + "learning_rate": 2.6778743388513565e-05, + "loss": 0.1061, "step": 35410 }, { "epoch": 1.65, - "learning_rate": 1.677324082321504e-05, - "loss": 0.1436, + "learning_rate": 2.6778275318677548e-05, + "loss": 0.1483, "step": 35415 }, { "epoch": 1.65, - "learning_rate": 1.67727720219399e-05, - "loss": 0.1407, + "learning_rate": 2.6777807248841528e-05, + "loss": 0.0769, "step": 35420 }, { "epoch": 1.65, - "learning_rate": 1.677230322066476e-05, - "loss": 0.1615, + "learning_rate": 2.6777339179005508e-05, + "loss": 0.1539, "step": 35425 }, { "epoch": 1.65, - "learning_rate": 1.6771834419389625e-05, - "loss": 0.3031, + "learning_rate": 2.6776871109169487e-05, + "loss": 0.3495, "step": 35430 }, { "epoch": 1.65, - "learning_rate": 1.6771365618114485e-05, - "loss": 0.2634, + "learning_rate": 2.677640303933347e-05, + "loss": 0.2758, "step": 35435 }, { "epoch": 1.65, - "learning_rate": 1.6770896816839345e-05, - "loss": 0.0649, + "learning_rate": 2.677593496949745e-05, + "loss": 0.0222, "step": 35440 }, { "epoch": 1.65, - "learning_rate": 1.6770428015564204e-05, - "loss": 0.0962, + "learning_rate": 2.677546689966143e-05, + "loss": 0.0391, "step": 35445 }, { "epoch": 1.65, - "learning_rate": 1.6769959214289064e-05, - "loss": 0.0512, + "learning_rate": 2.6774998829825413e-05, + "loss": 0.0527, "step": 35450 }, { "epoch": 1.65, - "learning_rate": 1.6769490413013924e-05, - "loss": 0.1274, + "learning_rate": 2.6774530759989393e-05, + "loss": 0.1213, "step": 35455 }, { "epoch": 1.65, - "learning_rate": 1.6769021611738784e-05, - "loss": 0.0368, + "learning_rate": 2.677406269015337e-05, + "loss": 0.1702, "step": 35460 }, { "epoch": 1.65, - "learning_rate": 1.6768552810463644e-05, - "loss": 0.1301, + "learning_rate": 2.677359462031735e-05, + "loss": 0.0992, "step": 35465 }, { "epoch": 1.66, - "learning_rate": 1.6768084009188504e-05, - "loss": 0.1465, + "learning_rate": 2.6773126550481333e-05, + "loss": 0.113, "step": 35470 }, { "epoch": 1.66, - "learning_rate": 1.6767615207913367e-05, - "loss": 0.2885, + "learning_rate": 2.6772658480645313e-05, + "loss": 0.2729, "step": 35475 }, { "epoch": 1.66, - "learning_rate": 1.6767146406638227e-05, - "loss": 0.4663, + "learning_rate": 2.6772190410809292e-05, + "loss": 0.3645, "step": 35480 }, { "epoch": 1.66, - "learning_rate": 1.6766677605363087e-05, - "loss": 0.4114, + "learning_rate": 2.6771722340973272e-05, + "loss": 0.1929, "step": 35485 }, { "epoch": 1.66, - "learning_rate": 1.6766208804087947e-05, - "loss": 0.0577, + "learning_rate": 2.6771254271137255e-05, + "loss": 0.0392, "step": 35490 }, { "epoch": 1.66, - "learning_rate": 1.676574000281281e-05, - "loss": 0.0584, + "learning_rate": 2.6770786201301235e-05, + "loss": 0.0357, "step": 35495 }, { "epoch": 1.66, - "learning_rate": 1.676527120153767e-05, - "loss": 0.0834, + "learning_rate": 2.6770318131465215e-05, + "loss": 0.1291, "step": 35500 }, { "epoch": 1.66, - "learning_rate": 1.676480240026253e-05, - "loss": 0.0556, + "learning_rate": 2.6769850061629195e-05, + "loss": 0.0994, "step": 35505 }, { "epoch": 1.66, - "learning_rate": 1.676433359898739e-05, - "loss": 0.2583, + "learning_rate": 2.6769381991793178e-05, + "loss": 0.1167, "step": 35510 }, { "epoch": 1.66, - "learning_rate": 1.676386479771225e-05, - "loss": 0.1383, + "learning_rate": 2.6768913921957158e-05, + "loss": 0.1407, "step": 35515 }, { "epoch": 1.66, - "learning_rate": 1.676339599643711e-05, - "loss": 0.2096, + "learning_rate": 2.6768445852121138e-05, + "loss": 0.1327, "step": 35520 }, { "epoch": 1.66, - "learning_rate": 1.6762927195161974e-05, - "loss": 0.2905, + "learning_rate": 2.6767977782285117e-05, + "loss": 0.1641, "step": 35525 }, { "epoch": 1.66, - "learning_rate": 1.6762458393886834e-05, - "loss": 0.4707, + "learning_rate": 2.6767509712449097e-05, + "loss": 0.3573, "step": 35530 }, { "epoch": 1.66, - "learning_rate": 1.6761989592611693e-05, - "loss": 0.3049, + "learning_rate": 2.6767041642613077e-05, + "loss": 0.2123, "step": 35535 }, { "epoch": 1.66, - "learning_rate": 1.6761520791336553e-05, - "loss": 0.0263, + "learning_rate": 2.6766573572777057e-05, + "loss": 0.0258, "step": 35540 }, { "epoch": 1.66, - "learning_rate": 1.6761051990061413e-05, - "loss": 0.1083, + "learning_rate": 2.676610550294104e-05, + "loss": 0.0469, "step": 35545 }, { "epoch": 1.66, - "learning_rate": 1.6760583188786273e-05, - "loss": 0.1045, + "learning_rate": 2.676563743310502e-05, + "loss": 0.103, "step": 35550 }, { "epoch": 1.66, - "learning_rate": 1.6760114387511137e-05, - "loss": 0.0981, + "learning_rate": 2.6765169363269e-05, + "loss": 0.0814, "step": 35555 }, { "epoch": 1.66, - "learning_rate": 1.6759645586235997e-05, - "loss": 0.1292, + "learning_rate": 2.676470129343298e-05, + "loss": 0.0724, "step": 35560 }, { "epoch": 1.66, - "learning_rate": 1.6759176784960856e-05, - "loss": 0.1232, + "learning_rate": 2.6764233223596963e-05, + "loss": 0.1174, "step": 35565 }, { "epoch": 1.66, - "learning_rate": 1.6758707983685716e-05, - "loss": 0.1619, + "learning_rate": 2.6763765153760943e-05, + "loss": 0.1216, "step": 35570 }, { "epoch": 1.66, - "learning_rate": 1.675823918241058e-05, - "loss": 0.1738, + "learning_rate": 2.6763297083924922e-05, + "loss": 0.2656, "step": 35575 }, { "epoch": 1.66, - "learning_rate": 1.675777038113544e-05, - "loss": 0.3627, + "learning_rate": 2.6762829014088906e-05, + "loss": 0.3507, "step": 35580 }, { "epoch": 1.66, - "learning_rate": 1.67573015798603e-05, - "loss": 0.2957, + "learning_rate": 2.6762360944252885e-05, + "loss": 0.2021, "step": 35585 }, { "epoch": 1.66, - "learning_rate": 1.675683277858516e-05, - "loss": 0.0471, + "learning_rate": 2.6761892874416862e-05, + "loss": 0.0546, "step": 35590 }, { "epoch": 1.66, - "learning_rate": 1.675636397731002e-05, - "loss": 0.0628, + "learning_rate": 2.6761424804580842e-05, + "loss": 0.0881, "step": 35595 }, { "epoch": 1.66, - "learning_rate": 1.675589517603488e-05, - "loss": 0.0737, + "learning_rate": 2.6760956734744825e-05, + "loss": 0.0544, "step": 35600 }, { "epoch": 1.66, - "learning_rate": 1.675542637475974e-05, - "loss": 0.115, + "learning_rate": 2.6760488664908805e-05, + "loss": 0.1474, "step": 35605 }, { "epoch": 1.66, - "learning_rate": 1.67549575734846e-05, - "loss": 0.12, + "learning_rate": 2.6760020595072785e-05, + "loss": 0.1081, "step": 35610 }, { "epoch": 1.66, - "learning_rate": 1.675448877220946e-05, - "loss": 0.1704, + "learning_rate": 2.6759552525236764e-05, + "loss": 0.1159, "step": 35615 }, { "epoch": 1.66, - "learning_rate": 1.6754019970934322e-05, - "loss": 0.1348, + "learning_rate": 2.6759084455400748e-05, + "loss": 0.0846, "step": 35620 }, { "epoch": 1.66, - "learning_rate": 1.6753551169659182e-05, - "loss": 0.1868, + "learning_rate": 2.6758616385564727e-05, + "loss": 0.2357, "step": 35625 }, { "epoch": 1.66, - "learning_rate": 1.6753082368384042e-05, - "loss": 0.4204, + "learning_rate": 2.6758148315728707e-05, + "loss": 0.3101, "step": 35630 }, { "epoch": 1.66, - "learning_rate": 1.6752613567108906e-05, - "loss": 0.3445, + "learning_rate": 2.675768024589269e-05, + "loss": 0.2465, "step": 35635 }, { "epoch": 1.66, - "learning_rate": 1.6752144765833766e-05, - "loss": 0.0448, + "learning_rate": 2.675721217605667e-05, + "loss": 0.0758, "step": 35640 }, { "epoch": 1.66, - "learning_rate": 1.6751675964558626e-05, - "loss": 0.0615, + "learning_rate": 2.675674410622065e-05, + "loss": 0.0579, "step": 35645 }, { "epoch": 1.66, - "learning_rate": 1.6751207163283485e-05, - "loss": 0.0439, + "learning_rate": 2.6756276036384626e-05, + "loss": 0.0876, "step": 35650 }, { "epoch": 1.66, - "learning_rate": 1.6750738362008345e-05, - "loss": 0.1228, + "learning_rate": 2.675580796654861e-05, + "loss": 0.082, "step": 35655 }, { "epoch": 1.66, - "learning_rate": 1.6750269560733205e-05, - "loss": 0.1486, + "learning_rate": 2.675533989671259e-05, + "loss": 0.1288, "step": 35660 }, { "epoch": 1.66, - "learning_rate": 1.674980075945807e-05, - "loss": 0.2386, + "learning_rate": 2.675487182687657e-05, + "loss": 0.1343, "step": 35665 }, { "epoch": 1.66, - "learning_rate": 1.674933195818293e-05, - "loss": 0.1217, + "learning_rate": 2.675440375704055e-05, + "loss": 0.1094, "step": 35670 }, { "epoch": 1.66, - "learning_rate": 1.674886315690779e-05, - "loss": 0.2752, + "learning_rate": 2.6753935687204532e-05, + "loss": 0.1585, "step": 35675 }, { "epoch": 1.66, - "learning_rate": 1.674839435563265e-05, - "loss": 0.4395, + "learning_rate": 2.6753467617368512e-05, + "loss": 0.3234, "step": 35680 }, { "epoch": 1.67, - "learning_rate": 1.674792555435751e-05, - "loss": 0.3155, + "learning_rate": 2.6752999547532492e-05, + "loss": 0.2626, "step": 35685 }, { "epoch": 1.67, - "learning_rate": 1.674745675308237e-05, - "loss": 0.0589, + "learning_rate": 2.6752531477696472e-05, + "loss": 0.0925, "step": 35690 }, { "epoch": 1.67, - "learning_rate": 1.6746987951807228e-05, - "loss": 0.0838, + "learning_rate": 2.6752063407860455e-05, + "loss": 0.0814, "step": 35695 }, { "epoch": 1.67, - "learning_rate": 1.674651915053209e-05, - "loss": 0.0867, + "learning_rate": 2.6751595338024435e-05, + "loss": 0.0954, "step": 35700 }, { "epoch": 1.67, - "learning_rate": 1.674605034925695e-05, - "loss": 0.1114, + "learning_rate": 2.6751127268188415e-05, + "loss": 0.0994, "step": 35705 }, { "epoch": 1.67, - "learning_rate": 1.674558154798181e-05, - "loss": 0.1468, + "learning_rate": 2.6750659198352398e-05, + "loss": 0.0767, "step": 35710 }, { "epoch": 1.67, - "learning_rate": 1.6745112746706675e-05, - "loss": 0.1577, + "learning_rate": 2.6750191128516374e-05, + "loss": 0.138, "step": 35715 }, { "epoch": 1.67, - "learning_rate": 1.6744643945431535e-05, - "loss": 0.1506, + "learning_rate": 2.6749723058680354e-05, + "loss": 0.1812, "step": 35720 }, { "epoch": 1.67, - "learning_rate": 1.6744175144156395e-05, - "loss": 0.2544, + "learning_rate": 2.6749254988844334e-05, + "loss": 0.2075, "step": 35725 }, { "epoch": 1.67, - "learning_rate": 1.6743706342881255e-05, - "loss": 0.2944, + "learning_rate": 2.6748786919008317e-05, + "loss": 0.2661, "step": 35730 }, { "epoch": 1.67, - "learning_rate": 1.6743237541606115e-05, - "loss": 0.2873, + "learning_rate": 2.6748318849172297e-05, + "loss": 0.2096, "step": 35735 }, { "epoch": 1.67, - "learning_rate": 1.6742768740330974e-05, - "loss": 0.0319, + "learning_rate": 2.6747850779336277e-05, + "loss": 0.046, "step": 35740 }, { "epoch": 1.67, - "learning_rate": 1.6742299939055834e-05, - "loss": 0.0863, + "learning_rate": 2.6747382709500257e-05, + "loss": 0.091, "step": 35745 }, { "epoch": 1.67, - "learning_rate": 1.6741831137780694e-05, - "loss": 0.1371, + "learning_rate": 2.674691463966424e-05, + "loss": 0.0816, "step": 35750 }, { "epoch": 1.67, - "learning_rate": 1.6741362336505558e-05, - "loss": 0.0674, + "learning_rate": 2.674644656982822e-05, + "loss": 0.0655, "step": 35755 }, { "epoch": 1.67, - "learning_rate": 1.6740893535230418e-05, - "loss": 0.1225, + "learning_rate": 2.67459784999922e-05, + "loss": 0.1439, "step": 35760 }, { "epoch": 1.67, - "learning_rate": 1.6740424733955278e-05, - "loss": 0.2157, + "learning_rate": 2.6745510430156183e-05, + "loss": 0.1144, "step": 35765 }, { "epoch": 1.67, - "learning_rate": 1.6739955932680137e-05, - "loss": 0.1537, + "learning_rate": 2.6745042360320162e-05, + "loss": 0.1321, "step": 35770 }, { "epoch": 1.67, - "learning_rate": 1.6739487131404997e-05, - "loss": 0.1671, + "learning_rate": 2.6744574290484142e-05, + "loss": 0.2693, "step": 35775 }, { "epoch": 1.67, - "learning_rate": 1.673901833012986e-05, - "loss": 0.2707, + "learning_rate": 2.674410622064812e-05, + "loss": 0.2617, "step": 35780 }, { "epoch": 1.67, - "learning_rate": 1.673854952885472e-05, - "loss": 0.3311, + "learning_rate": 2.6743638150812102e-05, + "loss": 0.2595, "step": 35785 }, { "epoch": 1.67, - "learning_rate": 1.673808072757958e-05, - "loss": 0.0228, + "learning_rate": 2.6743170080976082e-05, + "loss": 0.0289, "step": 35790 }, { "epoch": 1.67, - "learning_rate": 1.673761192630444e-05, - "loss": 0.1209, + "learning_rate": 2.674270201114006e-05, + "loss": 0.0667, "step": 35795 }, { "epoch": 1.67, - "learning_rate": 1.67371431250293e-05, - "loss": 0.0254, + "learning_rate": 2.674223394130404e-05, + "loss": 0.1146, "step": 35800 }, { "epoch": 1.67, - "learning_rate": 1.6736674323754164e-05, - "loss": 0.1099, + "learning_rate": 2.6741765871468025e-05, + "loss": 0.1248, "step": 35805 }, { "epoch": 1.67, - "learning_rate": 1.6736205522479024e-05, - "loss": 0.2051, + "learning_rate": 2.6741297801632004e-05, + "loss": 0.0679, "step": 35810 }, { "epoch": 1.67, - "learning_rate": 1.6735736721203884e-05, - "loss": 0.2122, + "learning_rate": 2.6740829731795984e-05, + "loss": 0.1903, "step": 35815 }, { "epoch": 1.67, - "learning_rate": 1.6735267919928744e-05, - "loss": 0.2434, + "learning_rate": 2.6740361661959967e-05, + "loss": 0.2129, "step": 35820 }, { "epoch": 1.67, - "learning_rate": 1.6734799118653603e-05, - "loss": 0.2191, + "learning_rate": 2.6739893592123947e-05, + "loss": 0.1297, "step": 35825 }, { "epoch": 1.67, - "learning_rate": 1.6734330317378463e-05, - "loss": 0.3036, + "learning_rate": 2.6739425522287927e-05, + "loss": 0.2295, "step": 35830 }, { "epoch": 1.67, - "learning_rate": 1.6733861516103323e-05, - "loss": 0.2985, + "learning_rate": 2.6738957452451907e-05, + "loss": 0.2495, "step": 35835 }, { "epoch": 1.67, - "learning_rate": 1.6733392714828183e-05, - "loss": 0.0408, + "learning_rate": 2.6738489382615887e-05, + "loss": 0.0683, "step": 35840 }, { "epoch": 1.67, - "learning_rate": 1.6732923913553047e-05, - "loss": 0.0536, + "learning_rate": 2.6738021312779866e-05, + "loss": 0.0677, "step": 35845 }, { "epoch": 1.67, - "learning_rate": 1.6732455112277907e-05, - "loss": 0.0663, + "learning_rate": 2.6737553242943846e-05, + "loss": 0.0445, "step": 35850 }, { "epoch": 1.67, - "learning_rate": 1.6731986311002766e-05, - "loss": 0.1087, + "learning_rate": 2.6737085173107826e-05, + "loss": 0.0884, "step": 35855 }, { "epoch": 1.67, - "learning_rate": 1.673151750972763e-05, - "loss": 0.1669, + "learning_rate": 2.673661710327181e-05, + "loss": 0.1165, "step": 35860 }, { "epoch": 1.67, - "learning_rate": 1.673104870845249e-05, - "loss": 0.1232, + "learning_rate": 2.673614903343579e-05, + "loss": 0.2289, "step": 35865 }, { "epoch": 1.67, - "learning_rate": 1.673057990717735e-05, - "loss": 0.154, + "learning_rate": 2.673568096359977e-05, + "loss": 0.148, "step": 35870 }, { "epoch": 1.67, - "learning_rate": 1.673011110590221e-05, - "loss": 0.2032, + "learning_rate": 2.6735212893763752e-05, + "loss": 0.3205, "step": 35875 }, { "epoch": 1.67, - "learning_rate": 1.672964230462707e-05, - "loss": 0.3006, + "learning_rate": 2.6734744823927732e-05, + "loss": 0.3924, "step": 35880 }, { "epoch": 1.67, - "learning_rate": 1.672917350335193e-05, - "loss": 0.2368, + "learning_rate": 2.6734276754091712e-05, + "loss": 0.2781, "step": 35885 }, { "epoch": 1.67, - "learning_rate": 1.672870470207679e-05, - "loss": 0.0608, + "learning_rate": 2.673380868425569e-05, + "loss": 0.0914, "step": 35890 }, { "epoch": 1.67, - "learning_rate": 1.6728235900801653e-05, - "loss": 0.0982, + "learning_rate": 2.6733340614419675e-05, + "loss": 0.1206, "step": 35895 }, { "epoch": 1.68, - "learning_rate": 1.6727767099526513e-05, - "loss": 0.082, + "learning_rate": 2.6732872544583655e-05, + "loss": 0.107, "step": 35900 }, { "epoch": 1.68, - "learning_rate": 1.6727298298251373e-05, - "loss": 0.1158, + "learning_rate": 2.673240447474763e-05, + "loss": 0.196, "step": 35905 }, { "epoch": 1.68, - "learning_rate": 1.6726829496976233e-05, - "loss": 0.0961, + "learning_rate": 2.673193640491161e-05, + "loss": 0.0616, "step": 35910 }, { "epoch": 1.68, - "learning_rate": 1.6726360695701092e-05, - "loss": 0.157, + "learning_rate": 2.6731468335075594e-05, + "loss": 0.1961, "step": 35915 }, { "epoch": 1.68, - "learning_rate": 1.6725891894425952e-05, - "loss": 0.181, + "learning_rate": 2.6731000265239574e-05, + "loss": 0.2083, "step": 35920 }, { "epoch": 1.68, - "learning_rate": 1.6725423093150816e-05, - "loss": 0.2806, + "learning_rate": 2.6730532195403554e-05, + "loss": 0.2404, "step": 35925 }, { "epoch": 1.68, - "learning_rate": 1.6724954291875676e-05, - "loss": 0.3032, + "learning_rate": 2.6730064125567534e-05, + "loss": 0.4073, "step": 35930 }, { "epoch": 1.68, - "learning_rate": 1.6724485490600536e-05, - "loss": 0.3249, + "learning_rate": 2.6729596055731517e-05, + "loss": 0.2989, "step": 35935 }, { "epoch": 1.68, - "learning_rate": 1.6724016689325396e-05, - "loss": 0.055, + "learning_rate": 2.6729127985895497e-05, + "loss": 0.0604, "step": 35940 }, { "epoch": 1.68, - "learning_rate": 1.672354788805026e-05, - "loss": 0.0191, + "learning_rate": 2.6728659916059476e-05, + "loss": 0.1098, "step": 35945 }, { "epoch": 1.68, - "learning_rate": 1.672307908677512e-05, - "loss": 0.0989, + "learning_rate": 2.672819184622346e-05, + "loss": 0.1052, "step": 35950 }, { "epoch": 1.68, - "learning_rate": 1.672261028549998e-05, - "loss": 0.0961, + "learning_rate": 2.672772377638744e-05, + "loss": 0.0487, "step": 35955 }, { "epoch": 1.68, - "learning_rate": 1.672214148422484e-05, - "loss": 0.1469, + "learning_rate": 2.672725570655142e-05, + "loss": 0.0529, "step": 35960 }, { "epoch": 1.68, - "learning_rate": 1.67216726829497e-05, - "loss": 0.1081, + "learning_rate": 2.67267876367154e-05, + "loss": 0.1622, "step": 35965 }, { "epoch": 1.68, - "learning_rate": 1.672120388167456e-05, - "loss": 0.1817, + "learning_rate": 2.672631956687938e-05, + "loss": 0.1384, "step": 35970 }, { "epoch": 1.68, - "learning_rate": 1.672073508039942e-05, - "loss": 0.2109, + "learning_rate": 2.672585149704336e-05, + "loss": 0.2306, "step": 35975 }, { "epoch": 1.68, - "learning_rate": 1.672026627912428e-05, - "loss": 0.4137, + "learning_rate": 2.672538342720734e-05, + "loss": 0.2583, "step": 35980 }, { "epoch": 1.68, - "learning_rate": 1.671979747784914e-05, - "loss": 0.2394, + "learning_rate": 2.672491535737132e-05, + "loss": 0.205, "step": 35985 }, { "epoch": 1.68, - "learning_rate": 1.6719328676574e-05, - "loss": 0.0622, + "learning_rate": 2.67244472875353e-05, + "loss": 0.0341, "step": 35990 }, { "epoch": 1.68, - "learning_rate": 1.671885987529886e-05, - "loss": 0.0821, + "learning_rate": 2.672397921769928e-05, + "loss": 0.0919, "step": 35995 }, { "epoch": 1.68, - "learning_rate": 1.6718391074023725e-05, - "loss": 0.0751, + "learning_rate": 2.672351114786326e-05, + "loss": 0.076, "step": 36000 }, { "epoch": 1.68, - "learning_rate": 1.6717922272748585e-05, - "loss": 0.1294, + "learning_rate": 2.6723043078027244e-05, + "loss": 0.0696, "step": 36005 }, { "epoch": 1.68, - "learning_rate": 1.6717453471473445e-05, - "loss": 0.1102, + "learning_rate": 2.6722575008191224e-05, + "loss": 0.0602, "step": 36010 }, { "epoch": 1.68, - "learning_rate": 1.6716984670198305e-05, - "loss": 0.0996, + "learning_rate": 2.6722106938355204e-05, + "loss": 0.1402, "step": 36015 }, { "epoch": 1.68, - "learning_rate": 1.6716515868923165e-05, - "loss": 0.2002, + "learning_rate": 2.6721638868519184e-05, + "loss": 0.1106, "step": 36020 }, { "epoch": 1.68, - "learning_rate": 1.6716047067648025e-05, - "loss": 0.2179, + "learning_rate": 2.6721170798683167e-05, + "loss": 0.3324, "step": 36025 }, { "epoch": 1.68, - "learning_rate": 1.6715578266372884e-05, - "loss": 0.3336, + "learning_rate": 2.6720702728847143e-05, + "loss": 0.2599, "step": 36030 }, { "epoch": 1.68, - "learning_rate": 1.6715109465097748e-05, - "loss": 0.264, + "learning_rate": 2.6720234659011123e-05, + "loss": 0.3429, "step": 36035 }, { "epoch": 1.68, - "learning_rate": 1.6714640663822608e-05, - "loss": 0.0501, + "learning_rate": 2.6719766589175103e-05, + "loss": 0.0509, "step": 36040 }, { "epoch": 1.68, - "learning_rate": 1.6714171862547468e-05, - "loss": 0.0216, + "learning_rate": 2.6719298519339086e-05, + "loss": 0.0872, "step": 36045 }, { "epoch": 1.68, - "learning_rate": 1.6713703061272328e-05, - "loss": 0.0658, + "learning_rate": 2.6718830449503066e-05, + "loss": 0.063, "step": 36050 }, { "epoch": 1.68, - "learning_rate": 1.6713234259997188e-05, - "loss": 0.1062, + "learning_rate": 2.6718362379667046e-05, + "loss": 0.0953, "step": 36055 }, { "epoch": 1.68, - "learning_rate": 1.6712765458722047e-05, - "loss": 0.1623, + "learning_rate": 2.671789430983103e-05, + "loss": 0.1015, "step": 36060 }, { "epoch": 1.68, - "learning_rate": 1.671229665744691e-05, - "loss": 0.1972, + "learning_rate": 2.671742623999501e-05, + "loss": 0.0843, "step": 36065 }, { "epoch": 1.68, - "learning_rate": 1.671182785617177e-05, - "loss": 0.1783, + "learning_rate": 2.671695817015899e-05, + "loss": 0.1827, "step": 36070 }, { "epoch": 1.68, - "learning_rate": 1.671135905489663e-05, - "loss": 0.2079, + "learning_rate": 2.671649010032297e-05, + "loss": 0.1748, "step": 36075 }, { "epoch": 1.68, - "learning_rate": 1.6710890253621494e-05, - "loss": 0.4228, + "learning_rate": 2.6716022030486952e-05, + "loss": 0.3294, "step": 36080 }, { "epoch": 1.68, - "learning_rate": 1.6710421452346354e-05, - "loss": 0.2925, + "learning_rate": 2.671555396065093e-05, + "loss": 0.4892, "step": 36085 }, { "epoch": 1.68, - "learning_rate": 1.6709952651071214e-05, - "loss": 0.0565, + "learning_rate": 2.671508589081491e-05, + "loss": 0.0427, "step": 36090 }, { "epoch": 1.68, - "learning_rate": 1.6709483849796074e-05, - "loss": 0.0666, + "learning_rate": 2.6714617820978888e-05, + "loss": 0.1603, "step": 36095 }, { "epoch": 1.68, - "learning_rate": 1.6709015048520934e-05, - "loss": 0.5187, + "learning_rate": 2.671414975114287e-05, + "loss": 0.0572, "step": 36100 }, { "epoch": 1.68, - "learning_rate": 1.6708546247245794e-05, - "loss": 0.1156, + "learning_rate": 2.671368168130685e-05, + "loss": 0.1353, "step": 36105 }, { "epoch": 1.68, - "learning_rate": 1.6708077445970654e-05, - "loss": 0.1737, + "learning_rate": 2.671321361147083e-05, + "loss": 0.0941, "step": 36110 }, { "epoch": 1.69, - "learning_rate": 1.6707608644695514e-05, - "loss": 0.2334, + "learning_rate": 2.671274554163481e-05, + "loss": 0.1012, "step": 36115 }, { "epoch": 1.69, - "learning_rate": 1.6707139843420373e-05, - "loss": 0.1251, + "learning_rate": 2.6712277471798794e-05, + "loss": 0.2129, "step": 36120 }, { "epoch": 1.69, - "learning_rate": 1.6706671042145233e-05, - "loss": 0.2372, + "learning_rate": 2.6711809401962774e-05, + "loss": 0.1594, "step": 36125 }, { "epoch": 1.69, - "learning_rate": 1.6706202240870097e-05, - "loss": 0.3235, + "learning_rate": 2.6711341332126753e-05, + "loss": 0.2319, "step": 36130 }, { "epoch": 1.69, - "learning_rate": 1.6705733439594957e-05, - "loss": 0.1987, + "learning_rate": 2.6710873262290737e-05, + "loss": 0.2331, "step": 36135 }, { "epoch": 1.69, - "learning_rate": 1.6705264638319817e-05, - "loss": 0.0099, + "learning_rate": 2.6710405192454716e-05, + "loss": 0.0648, "step": 36140 }, { "epoch": 1.69, - "learning_rate": 1.670479583704468e-05, - "loss": 0.0683, + "learning_rate": 2.6709937122618696e-05, + "loss": 0.06, "step": 36145 }, { "epoch": 1.69, - "learning_rate": 1.670432703576954e-05, - "loss": 0.0952, + "learning_rate": 2.6709469052782676e-05, + "loss": 0.1357, "step": 36150 }, { "epoch": 1.69, - "learning_rate": 1.67038582344944e-05, - "loss": 0.0887, + "learning_rate": 2.6709000982946656e-05, + "loss": 0.1054, "step": 36155 }, { "epoch": 1.69, - "learning_rate": 1.670338943321926e-05, - "loss": 0.0684, + "learning_rate": 2.6708532913110636e-05, + "loss": 0.0946, "step": 36160 }, { "epoch": 1.69, - "learning_rate": 1.670292063194412e-05, - "loss": 0.1626, + "learning_rate": 2.6708064843274615e-05, + "loss": 0.1108, "step": 36165 }, { "epoch": 1.69, - "learning_rate": 1.670245183066898e-05, - "loss": 0.2195, + "learning_rate": 2.6707596773438595e-05, + "loss": 0.1554, "step": 36170 }, { "epoch": 1.69, - "learning_rate": 1.6701983029393843e-05, - "loss": 0.2328, + "learning_rate": 2.670712870360258e-05, + "loss": 0.2176, "step": 36175 }, { "epoch": 1.69, - "learning_rate": 1.6701514228118703e-05, - "loss": 0.3627, + "learning_rate": 2.670666063376656e-05, + "loss": 0.3576, "step": 36180 }, { "epoch": 1.69, - "learning_rate": 1.6701045426843563e-05, - "loss": 0.2381, + "learning_rate": 2.6706192563930538e-05, + "loss": 0.224, "step": 36185 }, { "epoch": 1.69, - "learning_rate": 1.6700576625568423e-05, - "loss": 0.0505, + "learning_rate": 2.670572449409452e-05, + "loss": 0.0446, "step": 36190 }, { "epoch": 1.69, - "learning_rate": 1.6700107824293283e-05, - "loss": 0.0658, + "learning_rate": 2.67052564242585e-05, + "loss": 0.0795, "step": 36195 }, { "epoch": 1.69, - "learning_rate": 1.6699639023018143e-05, - "loss": 0.0611, + "learning_rate": 2.670478835442248e-05, + "loss": 0.138, "step": 36200 }, { "epoch": 1.69, - "learning_rate": 1.6699170221743002e-05, - "loss": 0.0871, + "learning_rate": 2.670432028458646e-05, + "loss": 0.0851, "step": 36205 }, { "epoch": 1.69, - "learning_rate": 1.6698701420467866e-05, - "loss": 0.1666, + "learning_rate": 2.6703852214750444e-05, + "loss": 0.1084, "step": 36210 }, { "epoch": 1.69, - "learning_rate": 1.6698232619192726e-05, - "loss": 0.0819, + "learning_rate": 2.6703384144914424e-05, + "loss": 0.0979, "step": 36215 }, { "epoch": 1.69, - "learning_rate": 1.6697763817917586e-05, - "loss": 0.1677, + "learning_rate": 2.67029160750784e-05, + "loss": 0.2009, "step": 36220 }, { "epoch": 1.69, - "learning_rate": 1.669729501664245e-05, - "loss": 0.2659, + "learning_rate": 2.670244800524238e-05, + "loss": 0.1659, "step": 36225 }, { "epoch": 1.69, - "learning_rate": 1.669682621536731e-05, - "loss": 0.2803, + "learning_rate": 2.6701979935406363e-05, + "loss": 0.3288, "step": 36230 }, { "epoch": 1.69, - "learning_rate": 1.669635741409217e-05, - "loss": 0.2431, + "learning_rate": 2.6701511865570343e-05, + "loss": 0.2036, "step": 36235 }, { "epoch": 1.69, - "learning_rate": 1.669588861281703e-05, - "loss": 0.04, + "learning_rate": 2.6701043795734323e-05, + "loss": 0.0384, "step": 36240 }, { "epoch": 1.69, - "learning_rate": 1.669541981154189e-05, - "loss": 0.0686, + "learning_rate": 2.6700575725898306e-05, + "loss": 0.0857, "step": 36245 }, { "epoch": 1.69, - "learning_rate": 1.669495101026675e-05, - "loss": 0.0622, + "learning_rate": 2.6700107656062286e-05, + "loss": 0.1182, "step": 36250 }, { "epoch": 1.69, - "learning_rate": 1.669448220899161e-05, - "loss": 0.0969, + "learning_rate": 2.6699639586226266e-05, + "loss": 0.0459, "step": 36255 }, { "epoch": 1.69, - "learning_rate": 1.669401340771647e-05, - "loss": 0.0752, + "learning_rate": 2.6699171516390246e-05, + "loss": 0.0812, "step": 36260 }, { "epoch": 1.69, - "learning_rate": 1.669354460644133e-05, - "loss": 0.0922, + "learning_rate": 2.669870344655423e-05, + "loss": 0.1471, "step": 36265 }, { "epoch": 1.69, - "learning_rate": 1.6693075805166192e-05, - "loss": 0.1751, + "learning_rate": 2.669823537671821e-05, + "loss": 0.1491, "step": 36270 }, { "epoch": 1.69, - "learning_rate": 1.6692607003891052e-05, - "loss": 0.2008, + "learning_rate": 2.669776730688219e-05, + "loss": 0.1411, "step": 36275 }, { "epoch": 1.69, - "learning_rate": 1.669213820261591e-05, - "loss": 0.38, + "learning_rate": 2.6697299237046168e-05, + "loss": 0.2339, "step": 36280 }, { "epoch": 1.69, - "learning_rate": 1.669166940134077e-05, - "loss": 0.3769, + "learning_rate": 2.6696831167210148e-05, + "loss": 0.2651, "step": 36285 }, { "epoch": 1.69, - "learning_rate": 1.6691200600065635e-05, - "loss": 0.0415, + "learning_rate": 2.6696363097374128e-05, + "loss": 0.0353, "step": 36290 }, { "epoch": 1.69, - "learning_rate": 1.6690731798790495e-05, - "loss": 0.057, + "learning_rate": 2.6695895027538108e-05, + "loss": 0.0558, "step": 36295 }, { "epoch": 1.69, - "learning_rate": 1.6690262997515355e-05, - "loss": 0.073, + "learning_rate": 2.6695426957702087e-05, + "loss": 0.063, "step": 36300 }, { "epoch": 1.69, - "learning_rate": 1.6689794196240215e-05, - "loss": 0.0942, + "learning_rate": 2.669495888786607e-05, + "loss": 0.1057, "step": 36305 }, { "epoch": 1.69, - "learning_rate": 1.6689325394965075e-05, - "loss": 0.1002, + "learning_rate": 2.669449081803005e-05, + "loss": 0.0718, "step": 36310 }, { "epoch": 1.69, - "learning_rate": 1.6688856593689938e-05, - "loss": 0.1361, + "learning_rate": 2.669402274819403e-05, + "loss": 0.1395, "step": 36315 }, { "epoch": 1.69, - "learning_rate": 1.6688387792414798e-05, - "loss": 0.142, + "learning_rate": 2.6693554678358014e-05, + "loss": 0.2643, "step": 36320 }, { "epoch": 1.69, - "learning_rate": 1.6687918991139658e-05, - "loss": 0.319, + "learning_rate": 2.6693086608521993e-05, + "loss": 0.2175, "step": 36325 }, { "epoch": 1.7, - "learning_rate": 1.6687450189864518e-05, - "loss": 0.3598, + "learning_rate": 2.6692618538685973e-05, + "loss": 0.3338, "step": 36330 }, { "epoch": 1.7, - "learning_rate": 1.6686981388589378e-05, - "loss": 0.2306, + "learning_rate": 2.6692150468849953e-05, + "loss": 0.1787, "step": 36335 }, { "epoch": 1.7, - "learning_rate": 1.6686512587314238e-05, - "loss": 0.0275, + "learning_rate": 2.6691682399013936e-05, + "loss": 0.0506, "step": 36340 }, { "epoch": 1.7, - "learning_rate": 1.6686043786039098e-05, - "loss": 0.0459, + "learning_rate": 2.6691214329177913e-05, + "loss": 0.0777, "step": 36345 }, { "epoch": 1.7, - "learning_rate": 1.668557498476396e-05, - "loss": 0.0861, + "learning_rate": 2.6690746259341892e-05, + "loss": 0.0732, "step": 36350 }, { "epoch": 1.7, - "learning_rate": 1.668510618348882e-05, - "loss": 0.113, + "learning_rate": 2.6690278189505872e-05, + "loss": 0.0563, "step": 36355 }, { "epoch": 1.7, - "learning_rate": 1.668463738221368e-05, - "loss": 0.127, + "learning_rate": 2.6689810119669855e-05, + "loss": 0.0894, "step": 36360 }, { "epoch": 1.7, - "learning_rate": 1.668416858093854e-05, - "loss": 0.2109, + "learning_rate": 2.6689342049833835e-05, + "loss": 0.1885, "step": 36365 }, { "epoch": 1.7, - "learning_rate": 1.6683699779663404e-05, - "loss": 0.1536, + "learning_rate": 2.6688873979997815e-05, + "loss": 0.1141, "step": 36370 }, { "epoch": 1.7, - "learning_rate": 1.6683230978388264e-05, - "loss": 0.2749, + "learning_rate": 2.66884059101618e-05, + "loss": 0.1828, "step": 36375 }, { "epoch": 1.7, - "learning_rate": 1.6682762177113124e-05, - "loss": 0.4515, + "learning_rate": 2.6687937840325778e-05, + "loss": 0.3184, "step": 36380 }, { "epoch": 1.7, - "learning_rate": 1.6682293375837984e-05, - "loss": 0.2774, + "learning_rate": 2.6687469770489758e-05, + "loss": 0.4257, "step": 36385 }, { "epoch": 1.7, - "learning_rate": 1.6681824574562844e-05, - "loss": 0.0556, + "learning_rate": 2.6687001700653738e-05, + "loss": 0.0557, "step": 36390 }, { "epoch": 1.7, - "learning_rate": 1.6681355773287704e-05, - "loss": 0.0561, + "learning_rate": 2.668653363081772e-05, + "loss": 0.1363, "step": 36395 }, { "epoch": 1.7, - "learning_rate": 1.6680886972012564e-05, - "loss": 0.0449, + "learning_rate": 2.66860655609817e-05, + "loss": 0.0601, "step": 36400 }, { "epoch": 1.7, - "learning_rate": 1.6680418170737427e-05, - "loss": 0.0686, + "learning_rate": 2.668559749114568e-05, + "loss": 0.0535, "step": 36405 }, { "epoch": 1.7, - "learning_rate": 1.6679949369462287e-05, - "loss": 0.1487, + "learning_rate": 2.6685129421309657e-05, + "loss": 0.1276, "step": 36410 }, { "epoch": 1.7, - "learning_rate": 1.6679480568187147e-05, - "loss": 0.1531, + "learning_rate": 2.668466135147364e-05, + "loss": 0.0647, "step": 36415 }, { "epoch": 1.7, - "learning_rate": 1.6679011766912007e-05, - "loss": 0.1708, + "learning_rate": 2.668419328163762e-05, + "loss": 0.1669, "step": 36420 }, { "epoch": 1.7, - "learning_rate": 1.6678542965636867e-05, - "loss": 0.3106, + "learning_rate": 2.66837252118016e-05, + "loss": 0.2521, "step": 36425 }, { "epoch": 1.7, - "learning_rate": 1.667807416436173e-05, - "loss": 0.2997, + "learning_rate": 2.6683257141965583e-05, + "loss": 0.2862, "step": 36430 }, { "epoch": 1.7, - "learning_rate": 1.667760536308659e-05, - "loss": 0.3525, + "learning_rate": 2.6682789072129563e-05, + "loss": 0.2764, "step": 36435 }, { "epoch": 1.7, - "learning_rate": 1.667713656181145e-05, - "loss": 0.0381, + "learning_rate": 2.6682321002293543e-05, + "loss": 0.0828, "step": 36440 }, { "epoch": 1.7, - "learning_rate": 1.667666776053631e-05, - "loss": 0.058, + "learning_rate": 2.6681852932457523e-05, + "loss": 0.0915, "step": 36445 }, { "epoch": 1.7, - "learning_rate": 1.667619895926117e-05, - "loss": 0.0493, + "learning_rate": 2.6681384862621506e-05, + "loss": 0.0906, "step": 36450 }, { "epoch": 1.7, - "learning_rate": 1.6675730157986033e-05, - "loss": 0.0938, + "learning_rate": 2.6680916792785486e-05, + "loss": 0.0871, "step": 36455 }, { "epoch": 1.7, - "learning_rate": 1.6675261356710893e-05, - "loss": 0.0731, + "learning_rate": 2.6680448722949465e-05, + "loss": 0.1012, "step": 36460 }, { "epoch": 1.7, - "learning_rate": 1.6674792555435753e-05, - "loss": 0.1313, + "learning_rate": 2.6679980653113445e-05, + "loss": 0.1285, "step": 36465 }, { "epoch": 1.7, - "learning_rate": 1.6674323754160613e-05, - "loss": 0.1673, + "learning_rate": 2.667951258327743e-05, + "loss": 0.1791, "step": 36470 }, { "epoch": 1.7, - "learning_rate": 1.6673854952885473e-05, - "loss": 0.2234, + "learning_rate": 2.6679044513441405e-05, + "loss": 0.178, "step": 36475 }, { "epoch": 1.7, - "learning_rate": 1.6673386151610333e-05, - "loss": 0.3119, + "learning_rate": 2.6678576443605385e-05, + "loss": 0.4208, "step": 36480 }, { "epoch": 1.7, - "learning_rate": 1.6672917350335193e-05, - "loss": 0.3143, + "learning_rate": 2.6678108373769364e-05, + "loss": 0.2376, "step": 36485 }, { "epoch": 1.7, - "learning_rate": 1.6672448549060053e-05, - "loss": 0.061, + "learning_rate": 2.6677640303933348e-05, + "loss": 0.0551, "step": 36490 }, { "epoch": 1.7, - "learning_rate": 1.6671979747784916e-05, - "loss": 0.0817, + "learning_rate": 2.6677172234097327e-05, + "loss": 0.04, "step": 36495 }, { "epoch": 1.7, - "learning_rate": 1.6671510946509776e-05, - "loss": 0.0985, + "learning_rate": 2.6676704164261307e-05, + "loss": 0.0675, "step": 36500 }, { "epoch": 1.7, - "learning_rate": 1.6671042145234636e-05, - "loss": 0.055, + "learning_rate": 2.667623609442529e-05, + "loss": 0.0488, "step": 36505 }, { "epoch": 1.7, - "learning_rate": 1.66705733439595e-05, - "loss": 0.0985, + "learning_rate": 2.667576802458927e-05, + "loss": 0.094, "step": 36510 }, { "epoch": 1.7, - "learning_rate": 1.667010454268436e-05, - "loss": 0.1559, + "learning_rate": 2.667529995475325e-05, + "loss": 0.1092, "step": 36515 }, { "epoch": 1.7, - "learning_rate": 1.666963574140922e-05, - "loss": 0.1947, + "learning_rate": 2.667483188491723e-05, + "loss": 0.1357, "step": 36520 }, { "epoch": 1.7, - "learning_rate": 1.666916694013408e-05, - "loss": 0.2521, + "learning_rate": 2.6674363815081213e-05, + "loss": 0.0793, "step": 36525 }, { "epoch": 1.7, - "learning_rate": 1.666869813885894e-05, - "loss": 0.3802, + "learning_rate": 2.6673895745245193e-05, + "loss": 0.3235, "step": 36530 }, { "epoch": 1.7, - "learning_rate": 1.66682293375838e-05, - "loss": 0.4049, + "learning_rate": 2.667342767540917e-05, + "loss": 0.2925, "step": 36535 }, { "epoch": 1.71, - "learning_rate": 1.666776053630866e-05, - "loss": 0.018, + "learning_rate": 2.667295960557315e-05, + "loss": 0.1054, "step": 36540 }, { "epoch": 1.71, - "learning_rate": 1.6667291735033522e-05, - "loss": 0.0357, + "learning_rate": 2.6672491535737132e-05, + "loss": 0.0482, "step": 36545 }, { "epoch": 1.71, - "learning_rate": 1.6666822933758382e-05, - "loss": 0.112, + "learning_rate": 2.6672023465901112e-05, + "loss": 0.0982, "step": 36550 }, { "epoch": 1.71, - "learning_rate": 1.6666354132483242e-05, - "loss": 0.0682, + "learning_rate": 2.6671555396065092e-05, + "loss": 0.1188, "step": 36555 }, { "epoch": 1.71, - "learning_rate": 1.6665885331208102e-05, - "loss": 0.1314, + "learning_rate": 2.6671087326229075e-05, + "loss": 0.0915, "step": 36560 }, { "epoch": 1.71, - "learning_rate": 1.6665416529932962e-05, - "loss": 0.1949, + "learning_rate": 2.6670619256393055e-05, + "loss": 0.1817, "step": 36565 }, { "epoch": 1.71, - "learning_rate": 1.666494772865782e-05, - "loss": 0.1298, + "learning_rate": 2.6670151186557035e-05, + "loss": 0.2731, "step": 36570 }, { "epoch": 1.71, - "learning_rate": 1.6664478927382685e-05, - "loss": 0.1495, + "learning_rate": 2.6669683116721015e-05, + "loss": 0.1788, "step": 36575 }, { "epoch": 1.71, - "learning_rate": 1.6664010126107545e-05, - "loss": 0.2125, + "learning_rate": 2.6669215046884998e-05, + "loss": 0.2102, "step": 36580 }, { "epoch": 1.71, - "learning_rate": 1.6663541324832405e-05, - "loss": 0.193, + "learning_rate": 2.6668746977048978e-05, + "loss": 0.2778, "step": 36585 }, { "epoch": 1.71, - "learning_rate": 1.6663072523557265e-05, - "loss": 0.0462, + "learning_rate": 2.6668278907212958e-05, + "loss": 0.0297, "step": 36590 }, { "epoch": 1.71, - "learning_rate": 1.6662603722282128e-05, - "loss": 0.087, + "learning_rate": 2.6667810837376937e-05, + "loss": 0.0796, "step": 36595 }, { "epoch": 1.71, - "learning_rate": 1.6662134921006988e-05, - "loss": 0.0735, + "learning_rate": 2.6667342767540917e-05, + "loss": 0.0984, "step": 36600 }, { "epoch": 1.71, - "learning_rate": 1.6661666119731848e-05, - "loss": 0.1022, + "learning_rate": 2.6666874697704897e-05, + "loss": 0.0883, "step": 36605 }, { "epoch": 1.71, - "learning_rate": 1.6661197318456708e-05, - "loss": 0.1351, + "learning_rate": 2.6666406627868877e-05, + "loss": 0.1426, "step": 36610 }, { "epoch": 1.71, - "learning_rate": 1.6660728517181568e-05, - "loss": 0.1015, + "learning_rate": 2.666593855803286e-05, + "loss": 0.1302, "step": 36615 }, { "epoch": 1.71, - "learning_rate": 1.6660259715906428e-05, - "loss": 0.1699, + "learning_rate": 2.666547048819684e-05, + "loss": 0.1131, "step": 36620 }, { "epoch": 1.71, - "learning_rate": 1.6659790914631288e-05, - "loss": 0.1358, + "learning_rate": 2.666500241836082e-05, + "loss": 0.1583, "step": 36625 }, { "epoch": 1.71, - "learning_rate": 1.6659322113356148e-05, - "loss": 0.3579, + "learning_rate": 2.66645343485248e-05, + "loss": 0.2502, "step": 36630 }, { "epoch": 1.71, - "learning_rate": 1.6658853312081008e-05, - "loss": 0.3272, + "learning_rate": 2.6664066278688783e-05, + "loss": 0.1735, "step": 36635 }, { "epoch": 1.71, - "learning_rate": 1.665838451080587e-05, - "loss": 0.0505, + "learning_rate": 2.6663598208852763e-05, + "loss": 0.0247, "step": 36640 }, { "epoch": 1.71, - "learning_rate": 1.665791570953073e-05, - "loss": 0.1199, + "learning_rate": 2.6663130139016742e-05, + "loss": 0.0666, "step": 36645 }, { "epoch": 1.71, - "learning_rate": 1.665744690825559e-05, - "loss": 0.0736, + "learning_rate": 2.6662662069180722e-05, + "loss": 0.0861, "step": 36650 }, { "epoch": 1.71, - "learning_rate": 1.6656978106980454e-05, - "loss": 0.0974, + "learning_rate": 2.6662193999344705e-05, + "loss": 0.1031, "step": 36655 }, { "epoch": 1.71, - "learning_rate": 1.6656509305705314e-05, - "loss": 0.0455, + "learning_rate": 2.6661725929508682e-05, + "loss": 0.2127, "step": 36660 }, { "epoch": 1.71, - "learning_rate": 1.6656040504430174e-05, - "loss": 0.1603, + "learning_rate": 2.666125785967266e-05, + "loss": 0.1631, "step": 36665 }, { "epoch": 1.71, - "learning_rate": 1.6655571703155034e-05, - "loss": 0.1358, + "learning_rate": 2.6660789789836645e-05, + "loss": 0.1586, "step": 36670 }, { "epoch": 1.71, - "learning_rate": 1.6655102901879894e-05, - "loss": 0.1231, + "learning_rate": 2.6660321720000625e-05, + "loss": 0.1788, "step": 36675 }, { "epoch": 1.71, - "learning_rate": 1.6654634100604754e-05, - "loss": 0.439, + "learning_rate": 2.6659853650164604e-05, + "loss": 0.3141, "step": 36680 }, { "epoch": 1.71, - "learning_rate": 1.6654165299329617e-05, - "loss": 0.2816, + "learning_rate": 2.6659385580328584e-05, + "loss": 0.174, "step": 36685 }, { "epoch": 1.71, - "learning_rate": 1.6653696498054477e-05, - "loss": 0.0421, + "learning_rate": 2.6658917510492567e-05, + "loss": 0.054, "step": 36690 }, { "epoch": 1.71, - "learning_rate": 1.6653227696779337e-05, - "loss": 0.0871, + "learning_rate": 2.6658449440656547e-05, + "loss": 0.0253, "step": 36695 }, { "epoch": 1.71, - "learning_rate": 1.6652758895504197e-05, - "loss": 0.0918, + "learning_rate": 2.6657981370820527e-05, + "loss": 0.1346, "step": 36700 }, { "epoch": 1.71, - "learning_rate": 1.6652290094229057e-05, - "loss": 0.0698, + "learning_rate": 2.6657513300984507e-05, + "loss": 0.099, "step": 36705 }, { "epoch": 1.71, - "learning_rate": 1.6651821292953917e-05, - "loss": 0.1282, + "learning_rate": 2.665704523114849e-05, + "loss": 0.2015, "step": 36710 }, { "epoch": 1.71, - "learning_rate": 1.6651352491678777e-05, - "loss": 0.172, + "learning_rate": 2.665657716131247e-05, + "loss": 0.166, "step": 36715 }, { "epoch": 1.71, - "learning_rate": 1.665088369040364e-05, - "loss": 0.1262, + "learning_rate": 2.665610909147645e-05, + "loss": 0.1954, "step": 36720 }, { "epoch": 1.71, - "learning_rate": 1.66504148891285e-05, - "loss": 0.2513, + "learning_rate": 2.6655641021640426e-05, + "loss": 0.2481, "step": 36725 }, { "epoch": 1.71, - "learning_rate": 1.664994608785336e-05, - "loss": 0.4673, + "learning_rate": 2.665517295180441e-05, + "loss": 0.3079, "step": 36730 }, { "epoch": 1.71, - "learning_rate": 1.6649477286578223e-05, - "loss": 0.2621, + "learning_rate": 2.665470488196839e-05, + "loss": 0.196, "step": 36735 }, { "epoch": 1.71, - "learning_rate": 1.6649008485303083e-05, - "loss": 0.055, + "learning_rate": 2.665423681213237e-05, + "loss": 0.0433, "step": 36740 }, { "epoch": 1.71, - "learning_rate": 1.6648539684027943e-05, - "loss": 0.0606, + "learning_rate": 2.6653768742296352e-05, + "loss": 0.0548, "step": 36745 }, { "epoch": 1.71, - "learning_rate": 1.6648070882752803e-05, - "loss": 0.0547, + "learning_rate": 2.6653300672460332e-05, + "loss": 0.0999, "step": 36750 }, { "epoch": 1.72, - "learning_rate": 1.6647602081477663e-05, - "loss": 0.0559, + "learning_rate": 2.6652832602624312e-05, + "loss": 0.1155, "step": 36755 }, { "epoch": 1.72, - "learning_rate": 1.6647133280202523e-05, - "loss": 0.0763, + "learning_rate": 2.6652364532788292e-05, + "loss": 0.1976, "step": 36760 }, { "epoch": 1.72, - "learning_rate": 1.6646664478927383e-05, - "loss": 0.0889, + "learning_rate": 2.6651896462952275e-05, + "loss": 0.1469, "step": 36765 }, { "epoch": 1.72, - "learning_rate": 1.6646195677652243e-05, - "loss": 0.1193, + "learning_rate": 2.6651428393116255e-05, + "loss": 0.1902, "step": 36770 }, { "epoch": 1.72, - "learning_rate": 1.6645726876377103e-05, - "loss": 0.2715, + "learning_rate": 2.6650960323280235e-05, + "loss": 0.2093, "step": 36775 }, { "epoch": 1.72, - "learning_rate": 1.6645258075101966e-05, - "loss": 0.2703, + "learning_rate": 2.6650492253444214e-05, + "loss": 0.232, "step": 36780 }, { "epoch": 1.72, - "learning_rate": 1.6644789273826826e-05, - "loss": 0.3361, + "learning_rate": 2.6650024183608198e-05, + "loss": 0.1965, "step": 36785 }, { "epoch": 1.72, - "learning_rate": 1.6644320472551686e-05, - "loss": 0.0797, + "learning_rate": 2.6649556113772174e-05, + "loss": 0.0518, "step": 36790 }, { "epoch": 1.72, - "learning_rate": 1.6643851671276546e-05, - "loss": 0.0685, + "learning_rate": 2.6649088043936154e-05, + "loss": 0.0649, "step": 36795 }, { "epoch": 1.72, - "learning_rate": 1.664338287000141e-05, - "loss": 0.0875, + "learning_rate": 2.6648619974100137e-05, + "loss": 0.0456, "step": 36800 }, { "epoch": 1.72, - "learning_rate": 1.664291406872627e-05, - "loss": 0.1153, + "learning_rate": 2.6648151904264117e-05, + "loss": 0.1178, "step": 36805 }, { "epoch": 1.72, - "learning_rate": 1.664244526745113e-05, - "loss": 0.0888, + "learning_rate": 2.6647683834428097e-05, + "loss": 0.0895, "step": 36810 }, { "epoch": 1.72, - "learning_rate": 1.664197646617599e-05, - "loss": 0.1436, + "learning_rate": 2.6647215764592076e-05, + "loss": 0.1355, "step": 36815 }, { "epoch": 1.72, - "learning_rate": 1.664150766490085e-05, - "loss": 0.1164, + "learning_rate": 2.664674769475606e-05, + "loss": 0.1733, "step": 36820 }, { "epoch": 1.72, - "learning_rate": 1.6641038863625712e-05, - "loss": 0.2245, + "learning_rate": 2.664627962492004e-05, + "loss": 0.3833, "step": 36825 }, { "epoch": 1.72, - "learning_rate": 1.6640570062350572e-05, - "loss": 0.2687, + "learning_rate": 2.664581155508402e-05, + "loss": 0.1653, "step": 36830 }, { "epoch": 1.72, - "learning_rate": 1.6640101261075432e-05, - "loss": 0.2908, + "learning_rate": 2.6645343485248e-05, + "loss": 0.2345, "step": 36835 }, { "epoch": 1.72, - "learning_rate": 1.6639632459800292e-05, - "loss": 0.0288, + "learning_rate": 2.6644875415411982e-05, + "loss": 0.0921, "step": 36840 }, { "epoch": 1.72, - "learning_rate": 1.6639163658525152e-05, - "loss": 0.0551, + "learning_rate": 2.6644407345575962e-05, + "loss": 0.0572, "step": 36845 }, { "epoch": 1.72, - "learning_rate": 1.6638694857250012e-05, - "loss": 0.0861, + "learning_rate": 2.664393927573994e-05, + "loss": 0.1026, "step": 36850 }, { "epoch": 1.72, - "learning_rate": 1.6638226055974872e-05, - "loss": 0.0831, + "learning_rate": 2.6643471205903922e-05, + "loss": 0.077, "step": 36855 }, { "epoch": 1.72, - "learning_rate": 1.6637757254699735e-05, - "loss": 0.1346, + "learning_rate": 2.66430031360679e-05, + "loss": 0.1494, "step": 36860 }, { "epoch": 1.72, - "learning_rate": 1.6637288453424595e-05, - "loss": 0.1029, + "learning_rate": 2.664253506623188e-05, + "loss": 0.2267, "step": 36865 }, { "epoch": 1.72, - "learning_rate": 1.6636819652149455e-05, - "loss": 0.18, + "learning_rate": 2.664206699639586e-05, + "loss": 0.1093, "step": 36870 }, { "epoch": 1.72, - "learning_rate": 1.6636350850874315e-05, - "loss": 0.1931, + "learning_rate": 2.6641598926559844e-05, + "loss": 0.134, "step": 36875 }, { "epoch": 1.72, - "learning_rate": 1.6635882049599178e-05, - "loss": 0.3946, + "learning_rate": 2.6641130856723824e-05, + "loss": 0.3072, "step": 36880 }, { "epoch": 1.72, - "learning_rate": 1.6635413248324038e-05, - "loss": 0.3143, + "learning_rate": 2.6640662786887804e-05, + "loss": 0.3128, "step": 36885 }, { "epoch": 1.72, - "learning_rate": 1.6634944447048898e-05, - "loss": 0.0555, + "learning_rate": 2.6640194717051784e-05, + "loss": 0.0741, "step": 36890 }, { "epoch": 1.72, - "learning_rate": 1.6634475645773758e-05, - "loss": 0.0364, + "learning_rate": 2.6639726647215767e-05, + "loss": 0.0445, "step": 36895 }, { "epoch": 1.72, - "learning_rate": 1.6634006844498618e-05, - "loss": 0.0298, + "learning_rate": 2.6639258577379747e-05, + "loss": 0.1302, "step": 36900 }, { "epoch": 1.72, - "learning_rate": 1.6633538043223478e-05, - "loss": 0.0705, + "learning_rate": 2.6638790507543727e-05, + "loss": 0.058, "step": 36905 }, { "epoch": 1.72, - "learning_rate": 1.6633069241948338e-05, - "loss": 0.1701, + "learning_rate": 2.6638322437707707e-05, + "loss": 0.0709, "step": 36910 }, { "epoch": 1.72, - "learning_rate": 1.6632600440673198e-05, - "loss": 0.1294, + "learning_rate": 2.6637854367871686e-05, + "loss": 0.1321, "step": 36915 }, { "epoch": 1.72, - "learning_rate": 1.663213163939806e-05, - "loss": 0.2156, + "learning_rate": 2.6637386298035666e-05, + "loss": 0.1702, "step": 36920 }, { "epoch": 1.72, - "learning_rate": 1.663166283812292e-05, - "loss": 0.2613, + "learning_rate": 2.6636918228199646e-05, + "loss": 0.3435, "step": 36925 }, { "epoch": 1.72, - "learning_rate": 1.663119403684778e-05, - "loss": 0.3791, + "learning_rate": 2.663645015836363e-05, + "loss": 0.2582, "step": 36930 }, { "epoch": 1.72, - "learning_rate": 1.663072523557264e-05, - "loss": 0.286, + "learning_rate": 2.663598208852761e-05, + "loss": 0.3827, "step": 36935 }, { "epoch": 1.72, - "learning_rate": 1.6630256434297504e-05, - "loss": 0.0744, + "learning_rate": 2.663551401869159e-05, + "loss": 0.0782, "step": 36940 }, { "epoch": 1.72, - "learning_rate": 1.6629787633022364e-05, - "loss": 0.0724, + "learning_rate": 2.663504594885557e-05, + "loss": 0.0933, "step": 36945 }, { "epoch": 1.72, - "learning_rate": 1.6629318831747224e-05, - "loss": 0.076, + "learning_rate": 2.6634577879019552e-05, + "loss": 0.0798, "step": 36950 }, { "epoch": 1.72, - "learning_rate": 1.6628850030472084e-05, - "loss": 0.0432, + "learning_rate": 2.6634109809183532e-05, + "loss": 0.1146, "step": 36955 }, { "epoch": 1.72, - "learning_rate": 1.6628381229196944e-05, - "loss": 0.1873, + "learning_rate": 2.663364173934751e-05, + "loss": 0.104, "step": 36960 }, { "epoch": 1.72, - "learning_rate": 1.6627912427921807e-05, - "loss": 0.1363, + "learning_rate": 2.663317366951149e-05, + "loss": 0.1681, "step": 36965 }, { "epoch": 1.73, - "learning_rate": 1.6627443626646667e-05, - "loss": 0.2091, + "learning_rate": 2.6632705599675475e-05, + "loss": 0.1804, "step": 36970 }, { "epoch": 1.73, - "learning_rate": 1.6626974825371527e-05, - "loss": 0.2916, + "learning_rate": 2.6632237529839454e-05, + "loss": 0.233, "step": 36975 }, { "epoch": 1.73, - "learning_rate": 1.6626506024096387e-05, - "loss": 0.3069, + "learning_rate": 2.663176946000343e-05, + "loss": 0.2976, "step": 36980 }, { "epoch": 1.73, - "learning_rate": 1.6626037222821247e-05, - "loss": 0.3146, + "learning_rate": 2.6631301390167414e-05, + "loss": 0.1425, "step": 36985 }, { "epoch": 1.73, - "learning_rate": 1.6625568421546107e-05, - "loss": 0.0509, + "learning_rate": 2.6630833320331394e-05, + "loss": 0.0479, "step": 36990 }, { "epoch": 1.73, - "learning_rate": 1.6625099620270967e-05, - "loss": 0.1014, + "learning_rate": 2.6630365250495374e-05, + "loss": 0.0603, "step": 36995 }, { "epoch": 1.73, - "learning_rate": 1.6624630818995827e-05, - "loss": 0.1048, + "learning_rate": 2.6629897180659353e-05, + "loss": 0.0688, "step": 37000 }, { "epoch": 1.73, - "learning_rate": 1.662416201772069e-05, - "loss": 0.0465, + "learning_rate": 2.6629429110823337e-05, + "loss": 0.0407, "step": 37005 }, { "epoch": 1.73, - "learning_rate": 1.662369321644555e-05, - "loss": 0.0972, + "learning_rate": 2.6628961040987316e-05, + "loss": 0.077, "step": 37010 }, { "epoch": 1.73, - "learning_rate": 1.662322441517041e-05, - "loss": 0.1586, + "learning_rate": 2.6628492971151296e-05, + "loss": 0.1341, "step": 37015 }, { "epoch": 1.73, - "learning_rate": 1.6622755613895273e-05, - "loss": 0.1031, + "learning_rate": 2.6628024901315276e-05, + "loss": 0.1999, "step": 37020 }, { "epoch": 1.73, - "learning_rate": 1.6622286812620133e-05, - "loss": 0.226, + "learning_rate": 2.662755683147926e-05, + "loss": 0.1489, "step": 37025 }, { "epoch": 1.73, - "learning_rate": 1.6621818011344993e-05, - "loss": 0.2767, + "learning_rate": 2.662708876164324e-05, + "loss": 0.3498, "step": 37030 }, { "epoch": 1.73, - "learning_rate": 1.6621349210069853e-05, - "loss": 0.1909, + "learning_rate": 2.662662069180722e-05, + "loss": 0.2524, "step": 37035 }, { "epoch": 1.73, - "learning_rate": 1.6620880408794713e-05, - "loss": 0.0913, + "learning_rate": 2.66261526219712e-05, + "loss": 0.0547, "step": 37040 }, { "epoch": 1.73, - "learning_rate": 1.6620411607519573e-05, - "loss": 0.1058, + "learning_rate": 2.662568455213518e-05, + "loss": 0.0783, "step": 37045 }, { "epoch": 1.73, - "learning_rate": 1.6619942806244433e-05, - "loss": 0.0695, + "learning_rate": 2.662521648229916e-05, + "loss": 0.0745, "step": 37050 }, { "epoch": 1.73, - "learning_rate": 1.6619474004969296e-05, - "loss": 0.0649, + "learning_rate": 2.6624748412463138e-05, + "loss": 0.1096, "step": 37055 }, { "epoch": 1.73, - "learning_rate": 1.6619005203694156e-05, - "loss": 0.1028, + "learning_rate": 2.662428034262712e-05, + "loss": 0.0486, "step": 37060 }, { "epoch": 1.73, - "learning_rate": 1.6618536402419016e-05, - "loss": 0.125, + "learning_rate": 2.66238122727911e-05, + "loss": 0.1119, "step": 37065 }, { "epoch": 1.73, - "learning_rate": 1.6618067601143876e-05, - "loss": 0.1258, + "learning_rate": 2.662334420295508e-05, + "loss": 0.2016, "step": 37070 }, { "epoch": 1.73, - "learning_rate": 1.6617598799868736e-05, - "loss": 0.2151, + "learning_rate": 2.662287613311906e-05, + "loss": 0.2012, "step": 37075 }, { "epoch": 1.73, - "learning_rate": 1.6617129998593596e-05, - "loss": 0.2019, + "learning_rate": 2.6622408063283044e-05, + "loss": 0.4105, "step": 37080 }, { "epoch": 1.73, - "learning_rate": 1.661666119731846e-05, - "loss": 0.2783, + "learning_rate": 2.6621939993447024e-05, + "loss": 0.2848, "step": 37085 }, { "epoch": 1.73, - "learning_rate": 1.661619239604332e-05, - "loss": 0.038, + "learning_rate": 2.6621471923611004e-05, + "loss": 0.0369, "step": 37090 }, { "epoch": 1.73, - "learning_rate": 1.661572359476818e-05, - "loss": 0.0266, + "learning_rate": 2.6621003853774984e-05, + "loss": 0.0729, "step": 37095 }, { "epoch": 1.73, - "learning_rate": 1.661525479349304e-05, - "loss": 0.0531, + "learning_rate": 2.6620535783938967e-05, + "loss": 0.0477, "step": 37100 }, { "epoch": 1.73, - "learning_rate": 1.6614785992217902e-05, - "loss": 0.0735, + "learning_rate": 2.6620067714102943e-05, + "loss": 0.0941, "step": 37105 }, { "epoch": 1.73, - "learning_rate": 1.6614317190942762e-05, - "loss": 0.1017, + "learning_rate": 2.6619599644266923e-05, + "loss": 0.1189, "step": 37110 }, { "epoch": 1.73, - "learning_rate": 1.6613848389667622e-05, - "loss": 0.0881, + "learning_rate": 2.6619131574430906e-05, + "loss": 0.1189, "step": 37115 }, { "epoch": 1.73, - "learning_rate": 1.6613379588392482e-05, - "loss": 0.1619, + "learning_rate": 2.6618663504594886e-05, + "loss": 0.1714, "step": 37120 }, { "epoch": 1.73, - "learning_rate": 1.6612910787117342e-05, - "loss": 0.128, + "learning_rate": 2.6618195434758866e-05, + "loss": 0.2479, "step": 37125 }, { "epoch": 1.73, - "learning_rate": 1.6612441985842202e-05, - "loss": 0.269, + "learning_rate": 2.6617727364922846e-05, + "loss": 0.319, "step": 37130 }, { "epoch": 1.73, - "learning_rate": 1.6611973184567062e-05, - "loss": 0.2594, + "learning_rate": 2.661725929508683e-05, + "loss": 0.2928, "step": 37135 }, { "epoch": 1.73, - "learning_rate": 1.6611504383291922e-05, - "loss": 0.0342, + "learning_rate": 2.661679122525081e-05, + "loss": 0.0276, "step": 37140 }, { "epoch": 1.73, - "learning_rate": 1.6611035582016782e-05, - "loss": 0.0537, + "learning_rate": 2.661632315541479e-05, + "loss": 0.0419, "step": 37145 }, { "epoch": 1.73, - "learning_rate": 1.6610566780741645e-05, - "loss": 0.1418, + "learning_rate": 2.6615855085578768e-05, + "loss": 0.0781, "step": 37150 }, { "epoch": 1.73, - "learning_rate": 1.6610097979466505e-05, - "loss": 0.0692, + "learning_rate": 2.661538701574275e-05, + "loss": 0.0888, "step": 37155 }, { "epoch": 1.73, - "learning_rate": 1.6609629178191365e-05, - "loss": 0.2006, + "learning_rate": 2.661491894590673e-05, + "loss": 0.0736, "step": 37160 }, { "epoch": 1.73, - "learning_rate": 1.660916037691623e-05, - "loss": 0.1502, + "learning_rate": 2.661445087607071e-05, + "loss": 0.1355, "step": 37165 }, { "epoch": 1.73, - "learning_rate": 1.6608691575641088e-05, - "loss": 0.2243, + "learning_rate": 2.661398280623469e-05, + "loss": 0.1683, "step": 37170 }, { "epoch": 1.73, - "learning_rate": 1.6608222774365948e-05, - "loss": 0.1569, + "learning_rate": 2.661351473639867e-05, + "loss": 0.227, "step": 37175 }, { "epoch": 1.73, - "learning_rate": 1.6607753973090808e-05, - "loss": 0.1983, + "learning_rate": 2.661304666656265e-05, + "loss": 0.388, "step": 37180 }, { "epoch": 1.74, - "learning_rate": 1.6607285171815668e-05, - "loss": 0.2452, + "learning_rate": 2.661257859672663e-05, + "loss": 0.2326, "step": 37185 }, { "epoch": 1.74, - "learning_rate": 1.6606816370540528e-05, - "loss": 0.0124, + "learning_rate": 2.6612110526890614e-05, + "loss": 0.0476, "step": 37190 }, { "epoch": 1.74, - "learning_rate": 1.660634756926539e-05, - "loss": 0.0762, + "learning_rate": 2.6611642457054593e-05, + "loss": 0.053, "step": 37195 }, { "epoch": 1.74, - "learning_rate": 1.660587876799025e-05, - "loss": 0.0947, + "learning_rate": 2.6611174387218573e-05, + "loss": 0.0602, "step": 37200 }, { "epoch": 1.74, - "learning_rate": 1.660540996671511e-05, - "loss": 0.07, + "learning_rate": 2.6610706317382553e-05, + "loss": 0.0839, "step": 37205 }, { "epoch": 1.74, - "learning_rate": 1.660494116543997e-05, - "loss": 0.0849, + "learning_rate": 2.6610238247546536e-05, + "loss": 0.1323, "step": 37210 }, { "epoch": 1.74, - "learning_rate": 1.660447236416483e-05, - "loss": 0.0985, + "learning_rate": 2.6609770177710516e-05, + "loss": 0.2045, "step": 37215 }, { "epoch": 1.74, - "learning_rate": 1.660400356288969e-05, - "loss": 0.0841, + "learning_rate": 2.6609302107874496e-05, + "loss": 0.1737, "step": 37220 }, { "epoch": 1.74, - "learning_rate": 1.660353476161455e-05, - "loss": 0.1331, + "learning_rate": 2.660883403803848e-05, + "loss": 0.2023, "step": 37225 }, { "epoch": 1.74, - "learning_rate": 1.6603065960339414e-05, - "loss": 0.2944, + "learning_rate": 2.6608365968202456e-05, + "loss": 0.1926, "step": 37230 }, { "epoch": 1.74, - "learning_rate": 1.6602597159064274e-05, - "loss": 0.2725, + "learning_rate": 2.6607897898366435e-05, + "loss": 0.2431, "step": 37235 }, { "epoch": 1.74, - "learning_rate": 1.6602128357789134e-05, - "loss": 0.0535, + "learning_rate": 2.6607429828530415e-05, + "loss": 0.0595, "step": 37240 }, { "epoch": 1.74, - "learning_rate": 1.6601659556513997e-05, - "loss": 0.0732, + "learning_rate": 2.66069617586944e-05, + "loss": 0.1053, "step": 37245 }, { "epoch": 1.74, - "learning_rate": 1.6601190755238857e-05, - "loss": 0.1134, + "learning_rate": 2.6606493688858378e-05, + "loss": 0.2247, "step": 37250 }, { "epoch": 1.74, - "learning_rate": 1.6600721953963717e-05, - "loss": 0.1084, + "learning_rate": 2.6606025619022358e-05, + "loss": 0.1125, "step": 37255 }, { "epoch": 1.74, - "learning_rate": 1.6600253152688577e-05, - "loss": 0.2014, + "learning_rate": 2.6605557549186338e-05, + "loss": 0.0894, "step": 37260 }, { "epoch": 1.74, - "learning_rate": 1.6599784351413437e-05, - "loss": 0.1195, + "learning_rate": 2.660508947935032e-05, + "loss": 0.1214, "step": 37265 }, { "epoch": 1.74, - "learning_rate": 1.6599315550138297e-05, - "loss": 0.1519, + "learning_rate": 2.66046214095143e-05, + "loss": 0.1771, "step": 37270 }, { "epoch": 1.74, - "learning_rate": 1.6598846748863157e-05, - "loss": 0.3407, + "learning_rate": 2.660415333967828e-05, + "loss": 0.1925, "step": 37275 }, { "epoch": 1.74, - "learning_rate": 1.6598377947588017e-05, - "loss": 0.2364, + "learning_rate": 2.6603685269842264e-05, + "loss": 0.3131, "step": 37280 }, { "epoch": 1.74, - "learning_rate": 1.6597909146312877e-05, - "loss": 0.251, + "learning_rate": 2.6603217200006244e-05, + "loss": 0.2135, "step": 37285 }, { "epoch": 1.74, - "learning_rate": 1.659744034503774e-05, - "loss": 0.0888, + "learning_rate": 2.6602749130170224e-05, + "loss": 0.0122, "step": 37290 }, { "epoch": 1.74, - "learning_rate": 1.65969715437626e-05, - "loss": 0.0529, + "learning_rate": 2.66022810603342e-05, + "loss": 0.0769, "step": 37295 }, { "epoch": 1.74, - "learning_rate": 1.659650274248746e-05, - "loss": 0.0577, + "learning_rate": 2.6601812990498183e-05, + "loss": 0.1122, "step": 37300 }, { "epoch": 1.74, - "learning_rate": 1.659603394121232e-05, - "loss": 0.0705, + "learning_rate": 2.6601344920662163e-05, + "loss": 0.0907, "step": 37305 }, { "epoch": 1.74, - "learning_rate": 1.6595565139937183e-05, - "loss": 0.0848, + "learning_rate": 2.6600876850826143e-05, + "loss": 0.1191, "step": 37310 }, { "epoch": 1.74, - "learning_rate": 1.6595096338662043e-05, - "loss": 0.0932, + "learning_rate": 2.6600408780990123e-05, + "loss": 0.104, "step": 37315 }, { "epoch": 1.74, - "learning_rate": 1.6594627537386903e-05, - "loss": 0.2257, + "learning_rate": 2.6599940711154106e-05, + "loss": 0.0915, "step": 37320 }, { "epoch": 1.74, - "learning_rate": 1.6594158736111763e-05, - "loss": 0.1595, + "learning_rate": 2.6599472641318086e-05, + "loss": 0.2602, "step": 37325 }, { "epoch": 1.74, - "learning_rate": 1.6593689934836623e-05, - "loss": 0.282, + "learning_rate": 2.6599004571482065e-05, + "loss": 0.2612, "step": 37330 }, { "epoch": 1.74, - "learning_rate": 1.6593221133561486e-05, - "loss": 0.2518, + "learning_rate": 2.6598536501646045e-05, + "loss": 0.3259, "step": 37335 }, { "epoch": 1.74, - "learning_rate": 1.6592752332286346e-05, - "loss": 0.061, + "learning_rate": 2.659806843181003e-05, + "loss": 0.0526, "step": 37340 }, { "epoch": 1.74, - "learning_rate": 1.6592283531011206e-05, - "loss": 0.0853, + "learning_rate": 2.6597600361974008e-05, + "loss": 0.0308, "step": 37345 }, { "epoch": 1.74, - "learning_rate": 1.6591814729736066e-05, - "loss": 0.0483, + "learning_rate": 2.6597132292137988e-05, + "loss": 0.064, "step": 37350 }, { "epoch": 1.74, - "learning_rate": 1.6591345928460926e-05, - "loss": 0.0589, + "learning_rate": 2.6596664222301968e-05, + "loss": 0.1317, "step": 37355 }, { "epoch": 1.74, - "learning_rate": 1.6590877127185786e-05, - "loss": 0.1197, + "learning_rate": 2.6596196152465948e-05, + "loss": 0.1705, "step": 37360 }, { "epoch": 1.74, - "learning_rate": 1.6590408325910646e-05, - "loss": 0.1924, + "learning_rate": 2.6595728082629928e-05, + "loss": 0.1577, "step": 37365 }, { "epoch": 1.74, - "learning_rate": 1.658993952463551e-05, - "loss": 0.1366, + "learning_rate": 2.6595260012793907e-05, + "loss": 0.2225, "step": 37370 }, { "epoch": 1.74, - "learning_rate": 1.658947072336037e-05, - "loss": 0.2581, + "learning_rate": 2.659479194295789e-05, + "loss": 0.2118, "step": 37375 }, { "epoch": 1.74, - "learning_rate": 1.658900192208523e-05, - "loss": 0.3968, + "learning_rate": 2.659432387312187e-05, + "loss": 0.2221, "step": 37380 }, { "epoch": 1.74, - "learning_rate": 1.658853312081009e-05, - "loss": 0.2865, + "learning_rate": 2.659385580328585e-05, + "loss": 0.2258, "step": 37385 }, { "epoch": 1.74, - "learning_rate": 1.6588064319534952e-05, - "loss": 0.0164, + "learning_rate": 2.659338773344983e-05, + "loss": 0.0274, "step": 37390 }, { "epoch": 1.74, - "learning_rate": 1.6587595518259812e-05, - "loss": 0.0344, + "learning_rate": 2.6592919663613813e-05, + "loss": 0.0474, "step": 37395 }, { "epoch": 1.75, - "learning_rate": 1.6587126716984672e-05, - "loss": 0.0603, + "learning_rate": 2.6592451593777793e-05, + "loss": 0.0394, "step": 37400 }, { "epoch": 1.75, - "learning_rate": 1.6586657915709532e-05, - "loss": 0.1074, + "learning_rate": 2.6591983523941773e-05, + "loss": 0.0899, "step": 37405 }, { "epoch": 1.75, - "learning_rate": 1.6586189114434392e-05, - "loss": 0.1031, + "learning_rate": 2.6591515454105756e-05, + "loss": 0.1538, "step": 37410 }, { "epoch": 1.75, - "learning_rate": 1.6585720313159252e-05, - "loss": 0.1761, + "learning_rate": 2.6591047384269736e-05, + "loss": 0.1078, "step": 37415 }, { "epoch": 1.75, - "learning_rate": 1.6585251511884112e-05, - "loss": 0.0686, + "learning_rate": 2.6590579314433712e-05, + "loss": 0.1219, "step": 37420 }, { "epoch": 1.75, - "learning_rate": 1.6584782710608972e-05, - "loss": 0.1569, + "learning_rate": 2.6590111244597692e-05, + "loss": 0.2225, "step": 37425 }, { "epoch": 1.75, - "learning_rate": 1.6584313909333835e-05, - "loss": 0.3677, + "learning_rate": 2.6589643174761675e-05, + "loss": 0.4069, "step": 37430 }, { "epoch": 1.75, - "learning_rate": 1.6583845108058695e-05, - "loss": 0.3944, + "learning_rate": 2.6589175104925655e-05, + "loss": 0.2965, "step": 37435 }, { "epoch": 1.75, - "learning_rate": 1.6583376306783555e-05, - "loss": 0.0395, + "learning_rate": 2.6588707035089635e-05, + "loss": 0.1096, "step": 37440 }, { "epoch": 1.75, - "learning_rate": 1.6582907505508415e-05, - "loss": 0.0894, + "learning_rate": 2.6588238965253615e-05, + "loss": 0.0941, "step": 37445 }, { "epoch": 1.75, - "learning_rate": 1.658243870423328e-05, - "loss": 0.0371, + "learning_rate": 2.6587770895417598e-05, + "loss": 0.0937, "step": 37450 }, { "epoch": 1.75, - "learning_rate": 1.658196990295814e-05, - "loss": 0.1232, + "learning_rate": 2.6587302825581578e-05, + "loss": 0.0944, "step": 37455 }, { "epoch": 1.75, - "learning_rate": 1.6581501101682998e-05, - "loss": 0.1115, + "learning_rate": 2.6586834755745558e-05, + "loss": 0.1426, "step": 37460 }, { "epoch": 1.75, - "learning_rate": 1.6581032300407858e-05, - "loss": 0.1376, + "learning_rate": 2.658636668590954e-05, + "loss": 0.1898, "step": 37465 }, { "epoch": 1.75, - "learning_rate": 1.6580563499132718e-05, - "loss": 0.138, + "learning_rate": 2.658589861607352e-05, + "loss": 0.178, "step": 37470 }, { "epoch": 1.75, - "learning_rate": 1.658009469785758e-05, - "loss": 0.2424, + "learning_rate": 2.65854305462375e-05, + "loss": 0.2354, "step": 37475 }, { "epoch": 1.75, - "learning_rate": 1.657962589658244e-05, - "loss": 0.3563, + "learning_rate": 2.658496247640148e-05, + "loss": 0.2504, "step": 37480 }, { "epoch": 1.75, - "learning_rate": 1.65791570953073e-05, - "loss": 0.2199, + "learning_rate": 2.658449440656546e-05, + "loss": 0.279, "step": 37485 }, { "epoch": 1.75, - "learning_rate": 1.657868829403216e-05, - "loss": 0.0285, + "learning_rate": 2.658402633672944e-05, + "loss": 0.0669, "step": 37490 }, { "epoch": 1.75, - "learning_rate": 1.657821949275702e-05, - "loss": 0.0596, + "learning_rate": 2.658355826689342e-05, + "loss": 0.0254, "step": 37495 }, { "epoch": 1.75, - "learning_rate": 1.657775069148188e-05, - "loss": 0.0993, + "learning_rate": 2.65830901970574e-05, + "loss": 0.054, "step": 37500 }, { "epoch": 1.75, - "learning_rate": 1.657728189020674e-05, - "loss": 0.118, + "learning_rate": 2.6582622127221383e-05, + "loss": 0.135, "step": 37505 }, { "epoch": 1.75, - "learning_rate": 1.65768130889316e-05, - "loss": 0.1173, + "learning_rate": 2.6582154057385363e-05, + "loss": 0.1475, "step": 37510 }, { "epoch": 1.75, - "learning_rate": 1.6576344287656464e-05, - "loss": 0.1519, + "learning_rate": 2.6581685987549342e-05, + "loss": 0.1666, "step": 37515 }, { "epoch": 1.75, - "learning_rate": 1.6575875486381324e-05, - "loss": 0.1847, + "learning_rate": 2.6581217917713322e-05, + "loss": 0.1303, "step": 37520 }, { "epoch": 1.75, - "learning_rate": 1.6575406685106184e-05, - "loss": 0.1778, + "learning_rate": 2.6580749847877305e-05, + "loss": 0.2202, "step": 37525 }, { "epoch": 1.75, - "learning_rate": 1.6574937883831048e-05, - "loss": 0.399, + "learning_rate": 2.6580281778041285e-05, + "loss": 0.2511, "step": 37530 }, { "epoch": 1.75, - "learning_rate": 1.6574469082555907e-05, - "loss": 0.2536, + "learning_rate": 2.6579813708205265e-05, + "loss": 0.1981, "step": 37535 }, { "epoch": 1.75, - "learning_rate": 1.6574000281280767e-05, - "loss": 0.0367, + "learning_rate": 2.6579345638369248e-05, + "loss": 0.0422, "step": 37540 }, { "epoch": 1.75, - "learning_rate": 1.6573531480005627e-05, - "loss": 0.0508, + "learning_rate": 2.6578877568533225e-05, + "loss": 0.0778, "step": 37545 }, { "epoch": 1.75, - "learning_rate": 1.6573062678730487e-05, - "loss": 0.0708, + "learning_rate": 2.6578409498697205e-05, + "loss": 0.0912, "step": 37550 }, { "epoch": 1.75, - "learning_rate": 1.6572593877455347e-05, - "loss": 0.1328, + "learning_rate": 2.6577941428861184e-05, + "loss": 0.081, "step": 37555 }, { "epoch": 1.75, - "learning_rate": 1.6572125076180207e-05, - "loss": 0.1137, + "learning_rate": 2.6577473359025168e-05, + "loss": 0.1607, "step": 37560 }, { "epoch": 1.75, - "learning_rate": 1.6571656274905067e-05, - "loss": 0.1144, + "learning_rate": 2.6577005289189147e-05, + "loss": 0.1126, "step": 37565 }, { "epoch": 1.75, - "learning_rate": 1.657118747362993e-05, - "loss": 0.103, + "learning_rate": 2.6576537219353127e-05, + "loss": 0.1883, "step": 37570 }, { "epoch": 1.75, - "learning_rate": 1.657071867235479e-05, - "loss": 0.2853, + "learning_rate": 2.6576069149517107e-05, + "loss": 0.1592, "step": 37575 }, { "epoch": 1.75, - "learning_rate": 1.657024987107965e-05, - "loss": 0.3472, + "learning_rate": 2.657560107968109e-05, + "loss": 0.2201, "step": 37580 }, { "epoch": 1.75, - "learning_rate": 1.656978106980451e-05, - "loss": 0.2283, + "learning_rate": 2.657513300984507e-05, + "loss": 0.2566, "step": 37585 }, { "epoch": 1.75, - "learning_rate": 1.656931226852937e-05, - "loss": 0.0364, + "learning_rate": 2.657466494000905e-05, + "loss": 0.0542, "step": 37590 }, { "epoch": 1.75, - "learning_rate": 1.6568843467254233e-05, - "loss": 0.0524, + "learning_rate": 2.6574196870173033e-05, + "loss": 0.0666, "step": 37595 }, { "epoch": 1.75, - "learning_rate": 1.6568374665979093e-05, - "loss": 0.1779, + "learning_rate": 2.6573728800337013e-05, + "loss": 0.0645, "step": 37600 }, { "epoch": 1.75, - "learning_rate": 1.6567905864703953e-05, - "loss": 0.1489, + "learning_rate": 2.6573260730500993e-05, + "loss": 0.0866, "step": 37605 }, { "epoch": 1.75, - "learning_rate": 1.6567437063428813e-05, - "loss": 0.1019, + "learning_rate": 2.657279266066497e-05, + "loss": 0.1118, "step": 37610 }, { "epoch": 1.76, - "learning_rate": 1.6566968262153677e-05, - "loss": 0.2143, + "learning_rate": 2.6572324590828952e-05, + "loss": 0.1532, "step": 37615 }, { "epoch": 1.76, - "learning_rate": 1.6566499460878536e-05, - "loss": 0.21, + "learning_rate": 2.6571856520992932e-05, + "loss": 0.1636, "step": 37620 }, { "epoch": 1.76, - "learning_rate": 1.6566030659603396e-05, - "loss": 0.1246, + "learning_rate": 2.6571388451156912e-05, + "loss": 0.263, "step": 37625 }, { "epoch": 1.76, - "learning_rate": 1.6565561858328256e-05, - "loss": 0.2592, + "learning_rate": 2.6570920381320892e-05, + "loss": 0.2552, "step": 37630 }, { "epoch": 1.76, - "learning_rate": 1.6565093057053116e-05, - "loss": 0.194, + "learning_rate": 2.6570452311484875e-05, + "loss": 0.2765, "step": 37635 }, { "epoch": 1.76, - "learning_rate": 1.6564624255777976e-05, - "loss": 0.0456, + "learning_rate": 2.6569984241648855e-05, + "loss": 0.0426, "step": 37640 }, { "epoch": 1.76, - "learning_rate": 1.6564155454502836e-05, - "loss": 0.1152, + "learning_rate": 2.6569516171812835e-05, + "loss": 0.0402, "step": 37645 }, { "epoch": 1.76, - "learning_rate": 1.6563686653227696e-05, - "loss": 0.0478, + "learning_rate": 2.6569048101976818e-05, + "loss": 0.0363, "step": 37650 }, { "epoch": 1.76, - "learning_rate": 1.6563217851952556e-05, - "loss": 0.0457, + "learning_rate": 2.6568580032140798e-05, + "loss": 0.105, "step": 37655 }, { "epoch": 1.76, - "learning_rate": 1.656274905067742e-05, - "loss": 0.1135, + "learning_rate": 2.6568111962304777e-05, + "loss": 0.1412, "step": 37660 }, { "epoch": 1.76, - "learning_rate": 1.656228024940228e-05, - "loss": 0.1155, + "learning_rate": 2.6567643892468757e-05, + "loss": 0.1898, "step": 37665 }, { "epoch": 1.76, - "learning_rate": 1.656181144812714e-05, - "loss": 0.1544, + "learning_rate": 2.656717582263274e-05, + "loss": 0.1251, "step": 37670 }, { "epoch": 1.76, - "learning_rate": 1.6561342646852003e-05, - "loss": 0.2425, + "learning_rate": 2.6566707752796717e-05, + "loss": 0.1053, "step": 37675 }, { "epoch": 1.76, - "learning_rate": 1.6560873845576862e-05, - "loss": 0.3254, + "learning_rate": 2.6566239682960697e-05, + "loss": 0.3299, "step": 37680 }, { "epoch": 1.76, - "learning_rate": 1.6560405044301722e-05, - "loss": 0.3002, + "learning_rate": 2.6565771613124677e-05, + "loss": 0.3651, "step": 37685 }, { "epoch": 1.76, - "learning_rate": 1.6559936243026582e-05, - "loss": 0.0239, + "learning_rate": 2.656530354328866e-05, + "loss": 0.0508, "step": 37690 }, { "epoch": 1.76, - "learning_rate": 1.6559467441751442e-05, - "loss": 0.0624, + "learning_rate": 2.656483547345264e-05, + "loss": 0.0858, "step": 37695 }, { "epoch": 1.76, - "learning_rate": 1.6558998640476302e-05, - "loss": 0.059, + "learning_rate": 2.656436740361662e-05, + "loss": 0.0825, "step": 37700 }, { "epoch": 1.76, - "learning_rate": 1.6558529839201166e-05, - "loss": 0.0739, + "learning_rate": 2.65638993337806e-05, + "loss": 0.1217, "step": 37705 }, { "epoch": 1.76, - "learning_rate": 1.6558061037926025e-05, - "loss": 0.1165, + "learning_rate": 2.6563431263944582e-05, + "loss": 0.1133, "step": 37710 }, { "epoch": 1.76, - "learning_rate": 1.6557592236650885e-05, - "loss": 0.2043, + "learning_rate": 2.6562963194108562e-05, + "loss": 0.1573, "step": 37715 }, { "epoch": 1.76, - "learning_rate": 1.6557123435375745e-05, - "loss": 0.1547, + "learning_rate": 2.6562495124272542e-05, + "loss": 0.1862, "step": 37720 }, { "epoch": 1.76, - "learning_rate": 1.6556654634100605e-05, - "loss": 0.2518, + "learning_rate": 2.6562027054436525e-05, + "loss": 0.212, "step": 37725 }, { "epoch": 1.76, - "learning_rate": 1.6556185832825465e-05, - "loss": 0.2447, + "learning_rate": 2.6561558984600505e-05, + "loss": 0.4626, "step": 37730 }, { "epoch": 1.76, - "learning_rate": 1.655571703155033e-05, - "loss": 0.2753, + "learning_rate": 2.656109091476448e-05, + "loss": 0.2542, "step": 37735 }, { "epoch": 1.76, - "learning_rate": 1.655524823027519e-05, - "loss": 0.039, + "learning_rate": 2.656062284492846e-05, + "loss": 0.0593, "step": 37740 }, { "epoch": 1.76, - "learning_rate": 1.655477942900005e-05, - "loss": 0.034, + "learning_rate": 2.6560154775092445e-05, + "loss": 0.0434, "step": 37745 }, { "epoch": 1.76, - "learning_rate": 1.655431062772491e-05, - "loss": 0.1092, + "learning_rate": 2.6559686705256424e-05, + "loss": 0.1627, "step": 37750 }, { "epoch": 1.76, - "learning_rate": 1.655384182644977e-05, - "loss": 0.1046, + "learning_rate": 2.6559218635420404e-05, + "loss": 0.0383, "step": 37755 }, { "epoch": 1.76, - "learning_rate": 1.655337302517463e-05, - "loss": 0.1291, + "learning_rate": 2.6558750565584384e-05, + "loss": 0.1075, "step": 37760 }, { "epoch": 1.76, - "learning_rate": 1.655290422389949e-05, - "loss": 0.0632, + "learning_rate": 2.6558282495748367e-05, + "loss": 0.092, "step": 37765 }, { "epoch": 1.76, - "learning_rate": 1.655243542262435e-05, - "loss": 0.1347, + "learning_rate": 2.6557814425912347e-05, + "loss": 0.1523, "step": 37770 }, { "epoch": 1.76, - "learning_rate": 1.655196662134921e-05, - "loss": 0.2168, + "learning_rate": 2.6557346356076327e-05, + "loss": 0.189, "step": 37775 }, { "epoch": 1.76, - "learning_rate": 1.655149782007407e-05, - "loss": 0.2543, + "learning_rate": 2.655687828624031e-05, + "loss": 0.2829, "step": 37780 }, { "epoch": 1.76, - "learning_rate": 1.655102901879893e-05, - "loss": 0.2751, + "learning_rate": 2.655641021640429e-05, + "loss": 0.2666, "step": 37785 }, { "epoch": 1.76, - "learning_rate": 1.655056021752379e-05, - "loss": 0.0621, + "learning_rate": 2.655594214656827e-05, + "loss": 0.0286, "step": 37790 }, { "epoch": 1.76, - "learning_rate": 1.655009141624865e-05, - "loss": 0.0669, + "learning_rate": 2.655547407673225e-05, + "loss": 0.0591, "step": 37795 }, { "epoch": 1.76, - "learning_rate": 1.6549622614973514e-05, - "loss": 0.1206, + "learning_rate": 2.655500600689623e-05, + "loss": 0.0541, "step": 37800 }, { "epoch": 1.76, - "learning_rate": 1.6549153813698374e-05, - "loss": 0.0601, + "learning_rate": 2.655453793706021e-05, + "loss": 0.1371, "step": 37805 }, { "epoch": 1.76, - "learning_rate": 1.6548685012423234e-05, - "loss": 0.1387, + "learning_rate": 2.655406986722419e-05, + "loss": 0.1488, "step": 37810 }, { "epoch": 1.76, - "learning_rate": 1.6548216211148098e-05, - "loss": 0.1075, + "learning_rate": 2.655360179738817e-05, + "loss": 0.1617, "step": 37815 }, { "epoch": 1.76, - "learning_rate": 1.6547747409872958e-05, - "loss": 0.1224, + "learning_rate": 2.6553133727552152e-05, + "loss": 0.1725, "step": 37820 }, { "epoch": 1.76, - "learning_rate": 1.6547278608597817e-05, - "loss": 0.2139, + "learning_rate": 2.6552665657716132e-05, + "loss": 0.2266, "step": 37825 }, { "epoch": 1.77, - "learning_rate": 1.6546809807322677e-05, - "loss": 0.322, + "learning_rate": 2.655219758788011e-05, + "loss": 0.3259, "step": 37830 }, { "epoch": 1.77, - "learning_rate": 1.6546341006047537e-05, - "loss": 0.2943, + "learning_rate": 2.6551729518044095e-05, + "loss": 0.2705, "step": 37835 }, { "epoch": 1.77, - "learning_rate": 1.6545872204772397e-05, - "loss": 0.0263, + "learning_rate": 2.6551261448208075e-05, + "loss": 0.0615, "step": 37840 }, { "epoch": 1.77, - "learning_rate": 1.654540340349726e-05, - "loss": 0.1074, + "learning_rate": 2.6550793378372054e-05, + "loss": 0.0913, "step": 37845 }, { "epoch": 1.77, - "learning_rate": 1.654493460222212e-05, - "loss": 0.0846, + "learning_rate": 2.6550325308536034e-05, + "loss": 0.0386, "step": 37850 }, { "epoch": 1.77, - "learning_rate": 1.654446580094698e-05, - "loss": 0.1118, + "learning_rate": 2.6549857238700017e-05, + "loss": 0.0523, "step": 37855 }, { "epoch": 1.77, - "learning_rate": 1.654399699967184e-05, - "loss": 0.123, + "learning_rate": 2.6549389168863994e-05, + "loss": 0.1444, "step": 37860 }, { "epoch": 1.77, - "learning_rate": 1.65435281983967e-05, - "loss": 0.101, + "learning_rate": 2.6548921099027974e-05, + "loss": 0.1165, "step": 37865 }, { "epoch": 1.77, - "learning_rate": 1.654305939712156e-05, - "loss": 0.1544, + "learning_rate": 2.6548453029191954e-05, + "loss": 0.1582, "step": 37870 }, { "epoch": 1.77, - "learning_rate": 1.654259059584642e-05, - "loss": 0.1887, + "learning_rate": 2.6547984959355937e-05, + "loss": 0.224, "step": 37875 }, { "epoch": 1.77, - "learning_rate": 1.6542121794571284e-05, - "loss": 0.2745, + "learning_rate": 2.6547516889519917e-05, + "loss": 0.2586, "step": 37880 }, { "epoch": 1.77, - "learning_rate": 1.6541652993296143e-05, - "loss": 0.2631, + "learning_rate": 2.6547048819683896e-05, + "loss": 0.2661, "step": 37885 }, { "epoch": 1.77, - "learning_rate": 1.6541184192021003e-05, - "loss": 0.0647, + "learning_rate": 2.654658074984788e-05, + "loss": 0.0212, "step": 37890 }, { "epoch": 1.77, - "learning_rate": 1.6540715390745867e-05, - "loss": 0.0606, + "learning_rate": 2.654611268001186e-05, + "loss": 0.0424, "step": 37895 }, { "epoch": 1.77, - "learning_rate": 1.6540246589470727e-05, - "loss": 0.0729, + "learning_rate": 2.654564461017584e-05, + "loss": 0.0965, "step": 37900 }, { "epoch": 1.77, - "learning_rate": 1.6539777788195587e-05, - "loss": 0.0672, + "learning_rate": 2.654517654033982e-05, + "loss": 0.1297, "step": 37905 }, { "epoch": 1.77, - "learning_rate": 1.6539308986920447e-05, - "loss": 0.0966, + "learning_rate": 2.6544708470503802e-05, + "loss": 0.1428, "step": 37910 }, { "epoch": 1.77, - "learning_rate": 1.6538840185645306e-05, - "loss": 0.1284, + "learning_rate": 2.6544240400667782e-05, + "loss": 0.1064, "step": 37915 }, { "epoch": 1.77, - "learning_rate": 1.6538371384370166e-05, - "loss": 0.1478, + "learning_rate": 2.6543772330831762e-05, + "loss": 0.2083, "step": 37920 }, { "epoch": 1.77, - "learning_rate": 1.6537902583095026e-05, - "loss": 0.1679, + "learning_rate": 2.6543304260995738e-05, + "loss": 0.2806, "step": 37925 }, { "epoch": 1.77, - "learning_rate": 1.6537433781819886e-05, - "loss": 0.2213, + "learning_rate": 2.654283619115972e-05, + "loss": 0.2333, "step": 37930 }, { "epoch": 1.77, - "learning_rate": 1.6536964980544746e-05, - "loss": 0.3319, + "learning_rate": 2.65423681213237e-05, + "loss": 0.208, "step": 37935 }, { "epoch": 1.77, - "learning_rate": 1.653649617926961e-05, - "loss": 0.0198, + "learning_rate": 2.654190005148768e-05, + "loss": 0.0392, "step": 37940 }, { "epoch": 1.77, - "learning_rate": 1.653602737799447e-05, - "loss": 0.0484, + "learning_rate": 2.654143198165166e-05, + "loss": 0.0543, "step": 37945 }, { "epoch": 1.77, - "learning_rate": 1.653555857671933e-05, - "loss": 0.0247, + "learning_rate": 2.6540963911815644e-05, + "loss": 0.0375, "step": 37950 }, { "epoch": 1.77, - "learning_rate": 1.653508977544419e-05, - "loss": 0.1661, + "learning_rate": 2.6540495841979624e-05, + "loss": 0.0513, "step": 37955 }, { "epoch": 1.77, - "learning_rate": 1.6534620974169053e-05, - "loss": 0.1487, + "learning_rate": 2.6540027772143604e-05, + "loss": 0.0978, "step": 37960 }, { "epoch": 1.77, - "learning_rate": 1.6534152172893913e-05, - "loss": 0.1593, + "learning_rate": 2.6539559702307587e-05, + "loss": 0.1591, "step": 37965 }, { "epoch": 1.77, - "learning_rate": 1.6533683371618773e-05, - "loss": 0.2306, + "learning_rate": 2.6539091632471567e-05, + "loss": 0.186, "step": 37970 }, { "epoch": 1.77, - "learning_rate": 1.6533214570343632e-05, - "loss": 0.1424, + "learning_rate": 2.6538623562635547e-05, + "loss": 0.2742, "step": 37975 }, { "epoch": 1.77, - "learning_rate": 1.6532745769068492e-05, - "loss": 0.1848, + "learning_rate": 2.6538155492799526e-05, + "loss": 0.2661, "step": 37980 }, { "epoch": 1.77, - "learning_rate": 1.6532276967793356e-05, - "loss": 0.1803, + "learning_rate": 2.653768742296351e-05, + "loss": 0.3483, "step": 37985 }, { "epoch": 1.77, - "learning_rate": 1.6531808166518216e-05, - "loss": 0.0608, + "learning_rate": 2.6537219353127486e-05, + "loss": 0.0242, "step": 37990 }, { "epoch": 1.77, - "learning_rate": 1.6531339365243076e-05, - "loss": 0.0602, + "learning_rate": 2.6536751283291466e-05, + "loss": 0.0226, "step": 37995 }, { "epoch": 1.77, - "learning_rate": 1.6530870563967936e-05, - "loss": 0.0948, + "learning_rate": 2.6536283213455446e-05, + "loss": 0.0236, "step": 38000 }, { "epoch": 1.77, - "learning_rate": 1.6530401762692795e-05, - "loss": 0.0966, + "learning_rate": 2.653581514361943e-05, + "loss": 0.0715, "step": 38005 }, { "epoch": 1.77, - "learning_rate": 1.6529932961417655e-05, - "loss": 0.1048, + "learning_rate": 2.653534707378341e-05, + "loss": 0.1452, "step": 38010 }, { "epoch": 1.77, - "learning_rate": 1.6529464160142515e-05, - "loss": 0.2214, + "learning_rate": 2.653487900394739e-05, + "loss": 0.0935, "step": 38015 }, { "epoch": 1.77, - "learning_rate": 1.6528995358867375e-05, - "loss": 0.1509, + "learning_rate": 2.6534410934111372e-05, + "loss": 0.1286, "step": 38020 }, { "epoch": 1.77, - "learning_rate": 1.652852655759224e-05, - "loss": 0.1914, + "learning_rate": 2.653394286427535e-05, + "loss": 0.2023, "step": 38025 }, { "epoch": 1.77, - "learning_rate": 1.65280577563171e-05, - "loss": 0.1909, + "learning_rate": 2.653347479443933e-05, + "loss": 0.2038, "step": 38030 }, { "epoch": 1.77, - "learning_rate": 1.652758895504196e-05, - "loss": 0.2226, + "learning_rate": 2.653300672460331e-05, + "loss": 0.3278, "step": 38035 }, { "epoch": 1.77, - "learning_rate": 1.6527120153766822e-05, - "loss": 0.085, + "learning_rate": 2.6532538654767294e-05, + "loss": 0.0397, "step": 38040 }, { "epoch": 1.78, - "learning_rate": 1.652665135249168e-05, - "loss": 0.0693, + "learning_rate": 2.6532070584931274e-05, + "loss": 0.1115, "step": 38045 }, { "epoch": 1.78, - "learning_rate": 1.652618255121654e-05, - "loss": 0.0931, + "learning_rate": 2.653160251509525e-05, + "loss": 0.0542, "step": 38050 }, { "epoch": 1.78, - "learning_rate": 1.65257137499414e-05, - "loss": 0.0726, + "learning_rate": 2.653113444525923e-05, + "loss": 0.0861, "step": 38055 }, { "epoch": 1.78, - "learning_rate": 1.652524494866626e-05, - "loss": 0.0906, + "learning_rate": 2.6530666375423214e-05, + "loss": 0.1127, "step": 38060 }, { "epoch": 1.78, - "learning_rate": 1.652477614739112e-05, - "loss": 0.1186, + "learning_rate": 2.6530198305587194e-05, + "loss": 0.1366, "step": 38065 }, { "epoch": 1.78, - "learning_rate": 1.652430734611598e-05, - "loss": 0.2951, + "learning_rate": 2.6529730235751173e-05, + "loss": 0.1672, "step": 38070 }, { "epoch": 1.78, - "learning_rate": 1.652383854484084e-05, - "loss": 0.2605, + "learning_rate": 2.6529262165915157e-05, + "loss": 0.2479, "step": 38075 }, { "epoch": 1.78, - "learning_rate": 1.6523369743565705e-05, - "loss": 0.3871, + "learning_rate": 2.6528794096079136e-05, + "loss": 0.3074, "step": 38080 }, { "epoch": 1.78, - "learning_rate": 1.6522900942290565e-05, - "loss": 0.1769, + "learning_rate": 2.6528326026243116e-05, + "loss": 0.2058, "step": 38085 }, { "epoch": 1.78, - "learning_rate": 1.6522432141015424e-05, - "loss": 0.0579, + "learning_rate": 2.6527857956407096e-05, + "loss": 0.0266, "step": 38090 }, { "epoch": 1.78, - "learning_rate": 1.6521963339740284e-05, - "loss": 0.0193, + "learning_rate": 2.652738988657108e-05, + "loss": 0.0687, "step": 38095 }, { "epoch": 1.78, - "learning_rate": 1.6521494538465144e-05, - "loss": 0.03, + "learning_rate": 2.652692181673506e-05, + "loss": 0.0764, "step": 38100 }, { "epoch": 1.78, - "learning_rate": 1.6521025737190008e-05, - "loss": 0.0837, + "learning_rate": 2.652645374689904e-05, + "loss": 0.0698, "step": 38105 }, { "epoch": 1.78, - "learning_rate": 1.6520556935914868e-05, - "loss": 0.1056, + "learning_rate": 2.652598567706302e-05, + "loss": 0.1162, "step": 38110 }, { "epoch": 1.78, - "learning_rate": 1.6520088134639728e-05, - "loss": 0.1369, + "learning_rate": 2.6525517607227e-05, + "loss": 0.1294, "step": 38115 }, { "epoch": 1.78, - "learning_rate": 1.6519619333364587e-05, - "loss": 0.0668, + "learning_rate": 2.6525049537390978e-05, + "loss": 0.1742, "step": 38120 }, { "epoch": 1.78, - "learning_rate": 1.651915053208945e-05, - "loss": 0.1117, + "learning_rate": 2.6524581467554958e-05, + "loss": 0.125, "step": 38125 }, { "epoch": 1.78, - "learning_rate": 1.651868173081431e-05, - "loss": 0.3179, + "learning_rate": 2.6524113397718938e-05, + "loss": 0.3642, "step": 38130 }, { "epoch": 1.78, - "learning_rate": 1.651821292953917e-05, - "loss": 0.3893, + "learning_rate": 2.652364532788292e-05, + "loss": 0.2685, "step": 38135 }, { "epoch": 1.78, - "learning_rate": 1.651774412826403e-05, - "loss": 0.0323, + "learning_rate": 2.65231772580469e-05, + "loss": 0.0532, "step": 38140 }, { "epoch": 1.78, - "learning_rate": 1.651727532698889e-05, - "loss": 0.0336, + "learning_rate": 2.652270918821088e-05, + "loss": 0.0493, "step": 38145 }, { "epoch": 1.78, - "learning_rate": 1.651680652571375e-05, - "loss": 0.0483, + "learning_rate": 2.6522241118374864e-05, + "loss": 0.0884, "step": 38150 }, { "epoch": 1.78, - "learning_rate": 1.651633772443861e-05, - "loss": 0.0455, + "learning_rate": 2.6521773048538844e-05, + "loss": 0.0669, "step": 38155 }, { "epoch": 1.78, - "learning_rate": 1.651586892316347e-05, - "loss": 0.0886, + "learning_rate": 2.6521304978702824e-05, + "loss": 0.12, "step": 38160 }, { "epoch": 1.78, - "learning_rate": 1.6515400121888334e-05, - "loss": 0.0876, + "learning_rate": 2.6520836908866803e-05, + "loss": 0.0843, "step": 38165 }, { "epoch": 1.78, - "learning_rate": 1.6514931320613194e-05, - "loss": 0.2145, + "learning_rate": 2.6520368839030787e-05, + "loss": 0.2058, "step": 38170 }, { "epoch": 1.78, - "learning_rate": 1.6514462519338054e-05, - "loss": 0.1848, + "learning_rate": 2.6519900769194766e-05, + "loss": 0.1966, "step": 38175 }, { "epoch": 1.78, - "learning_rate": 1.6513993718062913e-05, - "loss": 0.2946, + "learning_rate": 2.6519432699358743e-05, + "loss": 0.3597, "step": 38180 }, { "epoch": 1.78, - "learning_rate": 1.6513524916787777e-05, - "loss": 0.3475, + "learning_rate": 2.6518964629522723e-05, + "loss": 0.3126, "step": 38185 }, { "epoch": 1.78, - "learning_rate": 1.6513056115512637e-05, - "loss": 0.0299, + "learning_rate": 2.6518496559686706e-05, + "loss": 0.0241, "step": 38190 }, { "epoch": 1.78, - "learning_rate": 1.6512587314237497e-05, - "loss": 0.1046, + "learning_rate": 2.6518028489850686e-05, + "loss": 0.0725, "step": 38195 }, { "epoch": 1.78, - "learning_rate": 1.6512118512962357e-05, - "loss": 0.1024, + "learning_rate": 2.6517560420014666e-05, + "loss": 0.044, "step": 38200 }, { "epoch": 1.78, - "learning_rate": 1.6511649711687216e-05, - "loss": 0.0565, + "learning_rate": 2.651709235017865e-05, + "loss": 0.1043, "step": 38205 }, { "epoch": 1.78, - "learning_rate": 1.6511180910412076e-05, - "loss": 0.101, + "learning_rate": 2.651662428034263e-05, + "loss": 0.0978, "step": 38210 }, { "epoch": 1.78, - "learning_rate": 1.6510712109136936e-05, - "loss": 0.0764, + "learning_rate": 2.651615621050661e-05, + "loss": 0.2056, "step": 38215 }, { "epoch": 1.78, - "learning_rate": 1.65102433078618e-05, - "loss": 0.131, + "learning_rate": 2.6515688140670588e-05, + "loss": 0.1408, "step": 38220 }, { "epoch": 1.78, - "learning_rate": 1.650977450658666e-05, - "loss": 0.2516, + "learning_rate": 2.651522007083457e-05, + "loss": 0.1687, "step": 38225 }, { "epoch": 1.78, - "learning_rate": 1.650930570531152e-05, - "loss": 0.3604, + "learning_rate": 2.651475200099855e-05, + "loss": 0.2897, "step": 38230 }, { "epoch": 1.78, - "learning_rate": 1.650883690403638e-05, - "loss": 0.1973, + "learning_rate": 2.651428393116253e-05, + "loss": 0.1992, "step": 38235 }, { "epoch": 1.78, - "learning_rate": 1.650836810276124e-05, - "loss": 0.0291, + "learning_rate": 2.6513815861326507e-05, + "loss": 0.0673, "step": 38240 }, { "epoch": 1.78, - "learning_rate": 1.6507899301486103e-05, - "loss": 0.0749, + "learning_rate": 2.651334779149049e-05, + "loss": 0.058, "step": 38245 }, { "epoch": 1.78, - "learning_rate": 1.6507430500210963e-05, - "loss": 0.0902, + "learning_rate": 2.651287972165447e-05, + "loss": 0.1164, "step": 38250 }, { "epoch": 1.79, - "learning_rate": 1.6506961698935823e-05, - "loss": 0.0639, + "learning_rate": 2.651241165181845e-05, + "loss": 0.1625, "step": 38255 }, { "epoch": 1.79, - "learning_rate": 1.6506492897660683e-05, - "loss": 0.0581, + "learning_rate": 2.6511943581982434e-05, + "loss": 0.1882, "step": 38260 }, { "epoch": 1.79, - "learning_rate": 1.6506024096385546e-05, - "loss": 0.1075, + "learning_rate": 2.6511475512146413e-05, + "loss": 0.1015, "step": 38265 }, { "epoch": 1.79, - "learning_rate": 1.6505555295110406e-05, - "loss": 0.1366, + "learning_rate": 2.6511007442310393e-05, + "loss": 0.2987, "step": 38270 }, { "epoch": 1.79, - "learning_rate": 1.6505086493835266e-05, - "loss": 0.1522, + "learning_rate": 2.6510539372474373e-05, + "loss": 0.1598, "step": 38275 }, { "epoch": 1.79, - "learning_rate": 1.6504617692560126e-05, - "loss": 0.2609, + "learning_rate": 2.6510071302638356e-05, + "loss": 0.2135, "step": 38280 }, { "epoch": 1.79, - "learning_rate": 1.6504148891284986e-05, - "loss": 0.2683, + "learning_rate": 2.6509603232802336e-05, + "loss": 0.2599, "step": 38285 }, { "epoch": 1.79, - "learning_rate": 1.6503680090009846e-05, - "loss": 0.0229, + "learning_rate": 2.6509135162966316e-05, + "loss": 0.0309, "step": 38290 }, { "epoch": 1.79, - "learning_rate": 1.6503211288734705e-05, - "loss": 0.0375, + "learning_rate": 2.6508667093130296e-05, + "loss": 0.0275, "step": 38295 }, { "epoch": 1.79, - "learning_rate": 1.6502742487459565e-05, - "loss": 0.056, + "learning_rate": 2.650819902329428e-05, + "loss": 0.0937, "step": 38300 }, { "epoch": 1.79, - "learning_rate": 1.6502273686184425e-05, - "loss": 0.1326, + "learning_rate": 2.6507730953458255e-05, + "loss": 0.0716, "step": 38305 }, { "epoch": 1.79, - "learning_rate": 1.650180488490929e-05, - "loss": 0.0702, + "learning_rate": 2.6507262883622235e-05, + "loss": 0.158, "step": 38310 }, { "epoch": 1.79, - "learning_rate": 1.650133608363415e-05, - "loss": 0.1117, + "learning_rate": 2.6506794813786215e-05, + "loss": 0.1843, "step": 38315 }, { "epoch": 1.79, - "learning_rate": 1.650086728235901e-05, - "loss": 0.1461, + "learning_rate": 2.6506326743950198e-05, + "loss": 0.2241, "step": 38320 }, { "epoch": 1.79, - "learning_rate": 1.6500398481083872e-05, - "loss": 0.2323, + "learning_rate": 2.6505858674114178e-05, + "loss": 0.1921, "step": 38325 }, { "epoch": 1.79, - "learning_rate": 1.6499929679808732e-05, - "loss": 0.3953, + "learning_rate": 2.6505390604278158e-05, + "loss": 0.3494, "step": 38330 }, { "epoch": 1.79, - "learning_rate": 1.6499460878533592e-05, - "loss": 0.2693, + "learning_rate": 2.650492253444214e-05, + "loss": 0.2617, "step": 38335 }, { "epoch": 1.79, - "learning_rate": 1.649899207725845e-05, - "loss": 0.0308, + "learning_rate": 2.650445446460612e-05, + "loss": 0.0337, "step": 38340 }, { "epoch": 1.79, - "learning_rate": 1.649852327598331e-05, - "loss": 0.0674, + "learning_rate": 2.65039863947701e-05, + "loss": 0.0641, "step": 38345 }, { "epoch": 1.79, - "learning_rate": 1.649805447470817e-05, - "loss": 0.0875, + "learning_rate": 2.650351832493408e-05, + "loss": 0.104, "step": 38350 }, { "epoch": 1.79, - "learning_rate": 1.6497585673433035e-05, - "loss": 0.0632, + "learning_rate": 2.6503050255098064e-05, + "loss": 0.0843, "step": 38355 }, { "epoch": 1.79, - "learning_rate": 1.6497116872157895e-05, - "loss": 0.1288, + "learning_rate": 2.6502582185262043e-05, + "loss": 0.0992, "step": 38360 }, { "epoch": 1.79, - "learning_rate": 1.6496648070882755e-05, - "loss": 0.1426, + "learning_rate": 2.6502114115426023e-05, + "loss": 0.1044, "step": 38365 }, { "epoch": 1.79, - "learning_rate": 1.6496179269607615e-05, - "loss": 0.1112, + "learning_rate": 2.650164604559e-05, + "loss": 0.1495, "step": 38370 }, { "epoch": 1.79, - "learning_rate": 1.6495710468332475e-05, - "loss": 0.2597, + "learning_rate": 2.6501177975753983e-05, + "loss": 0.1757, "step": 38375 }, { "epoch": 1.79, - "learning_rate": 1.6495241667057335e-05, - "loss": 0.4737, + "learning_rate": 2.6500709905917963e-05, + "loss": 0.3565, "step": 38380 }, { "epoch": 1.79, - "learning_rate": 1.6494772865782194e-05, - "loss": 0.2821, + "learning_rate": 2.6500241836081943e-05, + "loss": 0.28, "step": 38385 }, { "epoch": 1.79, - "learning_rate": 1.6494304064507058e-05, - "loss": 0.0208, + "learning_rate": 2.6499773766245926e-05, + "loss": 0.0916, "step": 38390 }, { "epoch": 1.79, - "learning_rate": 1.6493835263231918e-05, - "loss": 0.0651, + "learning_rate": 2.6499305696409906e-05, + "loss": 0.054, "step": 38395 }, { "epoch": 1.79, - "learning_rate": 1.6493366461956778e-05, - "loss": 0.1064, + "learning_rate": 2.6498837626573885e-05, + "loss": 0.0463, "step": 38400 }, { "epoch": 1.79, - "learning_rate": 1.649289766068164e-05, - "loss": 0.0954, + "learning_rate": 2.6498369556737865e-05, + "loss": 0.0573, "step": 38405 }, { "epoch": 1.79, - "learning_rate": 1.64924288594065e-05, - "loss": 0.1097, + "learning_rate": 2.649790148690185e-05, + "loss": 0.0691, "step": 38410 }, { "epoch": 1.79, - "learning_rate": 1.649196005813136e-05, - "loss": 0.0886, + "learning_rate": 2.6497433417065828e-05, + "loss": 0.0652, "step": 38415 }, { "epoch": 1.79, - "learning_rate": 1.649149125685622e-05, - "loss": 0.1399, + "learning_rate": 2.6496965347229808e-05, + "loss": 0.1039, "step": 38420 }, { "epoch": 1.79, - "learning_rate": 1.649102245558108e-05, - "loss": 0.1572, + "learning_rate": 2.6496497277393788e-05, + "loss": 0.2298, "step": 38425 }, { "epoch": 1.79, - "learning_rate": 1.649055365430594e-05, - "loss": 0.2984, + "learning_rate": 2.6496029207557768e-05, + "loss": 0.2337, "step": 38430 }, { "epoch": 1.79, - "learning_rate": 1.64900848530308e-05, - "loss": 0.2507, + "learning_rate": 2.6495561137721747e-05, + "loss": 0.3419, "step": 38435 }, { "epoch": 1.79, - "learning_rate": 1.648961605175566e-05, - "loss": 0.076, + "learning_rate": 2.6495093067885727e-05, + "loss": 0.0615, "step": 38440 }, { "epoch": 1.79, - "learning_rate": 1.648914725048052e-05, - "loss": 0.0529, + "learning_rate": 2.649462499804971e-05, + "loss": 0.048, "step": 38445 }, { "epoch": 1.79, - "learning_rate": 1.6488678449205384e-05, - "loss": 0.071, + "learning_rate": 2.649415692821369e-05, + "loss": 0.0772, "step": 38450 }, { "epoch": 1.79, - "learning_rate": 1.6488209647930244e-05, - "loss": 0.0789, + "learning_rate": 2.649368885837767e-05, + "loss": 0.0492, "step": 38455 }, { "epoch": 1.79, - "learning_rate": 1.6487740846655104e-05, - "loss": 0.2206, + "learning_rate": 2.649322078854165e-05, + "loss": 0.1067, "step": 38460 }, { "epoch": 1.79, - "learning_rate": 1.6487272045379964e-05, - "loss": 0.1089, + "learning_rate": 2.6492752718705633e-05, + "loss": 0.1087, "step": 38465 }, { "epoch": 1.8, - "learning_rate": 1.6486803244104827e-05, - "loss": 0.1432, + "learning_rate": 2.6492284648869613e-05, + "loss": 0.0963, "step": 38470 }, { "epoch": 1.8, - "learning_rate": 1.6486334442829687e-05, - "loss": 0.1734, + "learning_rate": 2.6491816579033593e-05, + "loss": 0.1953, "step": 38475 }, { "epoch": 1.8, - "learning_rate": 1.6485865641554547e-05, - "loss": 0.3328, + "learning_rate": 2.6491348509197573e-05, + "loss": 0.2136, "step": 38480 }, { "epoch": 1.8, - "learning_rate": 1.6485396840279407e-05, - "loss": 0.2703, + "learning_rate": 2.6490880439361556e-05, + "loss": 0.2515, "step": 38485 }, { "epoch": 1.8, - "learning_rate": 1.6484928039004267e-05, - "loss": 0.0144, + "learning_rate": 2.6490412369525536e-05, + "loss": 0.0383, "step": 38490 }, { "epoch": 1.8, - "learning_rate": 1.648445923772913e-05, - "loss": 0.0543, + "learning_rate": 2.6489944299689512e-05, + "loss": 0.0402, "step": 38495 }, { "epoch": 1.8, - "learning_rate": 1.648399043645399e-05, - "loss": 0.053, + "learning_rate": 2.6489476229853492e-05, + "loss": 0.125, "step": 38500 }, { "epoch": 1.8, - "learning_rate": 1.648352163517885e-05, - "loss": 0.1421, + "learning_rate": 2.6489008160017475e-05, + "loss": 0.0805, "step": 38505 }, { "epoch": 1.8, - "learning_rate": 1.648305283390371e-05, - "loss": 0.1658, + "learning_rate": 2.6488540090181455e-05, + "loss": 0.0915, "step": 38510 }, { "epoch": 1.8, - "learning_rate": 1.648258403262857e-05, - "loss": 0.1931, + "learning_rate": 2.6488072020345435e-05, + "loss": 0.1007, "step": 38515 }, { "epoch": 1.8, - "learning_rate": 1.648211523135343e-05, - "loss": 0.185, + "learning_rate": 2.6487603950509418e-05, + "loss": 0.1853, "step": 38520 }, { "epoch": 1.8, - "learning_rate": 1.648164643007829e-05, - "loss": 0.1703, + "learning_rate": 2.6487135880673398e-05, + "loss": 0.214, "step": 38525 }, { "epoch": 1.8, - "learning_rate": 1.648117762880315e-05, - "loss": 0.3438, + "learning_rate": 2.6486667810837378e-05, + "loss": 0.2567, "step": 38530 }, { "epoch": 1.8, - "learning_rate": 1.6480708827528013e-05, - "loss": 0.2311, + "learning_rate": 2.6486199741001357e-05, + "loss": 0.2431, "step": 38535 }, { "epoch": 1.8, - "learning_rate": 1.6480240026252873e-05, - "loss": 0.0695, + "learning_rate": 2.648573167116534e-05, + "loss": 0.1163, "step": 38540 }, { "epoch": 1.8, - "learning_rate": 1.6479771224977733e-05, - "loss": 0.0261, + "learning_rate": 2.648526360132932e-05, + "loss": 0.0315, "step": 38545 }, { "epoch": 1.8, - "learning_rate": 1.6479302423702596e-05, - "loss": 0.0674, + "learning_rate": 2.64847955314933e-05, + "loss": 0.0545, "step": 38550 }, { "epoch": 1.8, - "learning_rate": 1.6478833622427456e-05, - "loss": 0.0389, + "learning_rate": 2.648432746165728e-05, + "loss": 0.1156, "step": 38555 }, { "epoch": 1.8, - "learning_rate": 1.6478364821152316e-05, - "loss": 0.0607, + "learning_rate": 2.648385939182126e-05, + "loss": 0.0526, "step": 38560 }, { "epoch": 1.8, - "learning_rate": 1.6477896019877176e-05, - "loss": 0.1092, + "learning_rate": 2.648339132198524e-05, + "loss": 0.0689, "step": 38565 }, { "epoch": 1.8, - "learning_rate": 1.6477427218602036e-05, - "loss": 0.1172, + "learning_rate": 2.648292325214922e-05, + "loss": 0.1696, "step": 38570 }, { "epoch": 1.8, - "learning_rate": 1.6476958417326896e-05, - "loss": 0.2803, + "learning_rate": 2.6482455182313203e-05, + "loss": 0.2157, "step": 38575 }, { "epoch": 1.8, - "learning_rate": 1.6476489616051756e-05, - "loss": 0.3335, + "learning_rate": 2.6481987112477183e-05, + "loss": 0.4338, "step": 38580 }, { "epoch": 1.8, - "learning_rate": 1.6476020814776616e-05, - "loss": 0.2211, + "learning_rate": 2.6481519042641162e-05, + "loss": 0.2748, "step": 38585 }, { "epoch": 1.8, - "learning_rate": 1.647555201350148e-05, - "loss": 0.017, + "learning_rate": 2.6481050972805142e-05, + "loss": 0.0525, "step": 38590 }, { "epoch": 1.8, - "learning_rate": 1.647508321222634e-05, - "loss": 0.0326, + "learning_rate": 2.6480582902969125e-05, + "loss": 0.0672, "step": 38595 }, { "epoch": 1.8, - "learning_rate": 1.64746144109512e-05, - "loss": 0.1442, + "learning_rate": 2.6480114833133105e-05, + "loss": 0.0741, "step": 38600 }, { "epoch": 1.8, - "learning_rate": 1.647414560967606e-05, - "loss": 0.0696, + "learning_rate": 2.6479646763297085e-05, + "loss": 0.1044, "step": 38605 }, { "epoch": 1.8, - "learning_rate": 1.647367680840092e-05, - "loss": 0.0805, + "learning_rate": 2.6479178693461065e-05, + "loss": 0.1173, "step": 38610 }, { "epoch": 1.8, - "learning_rate": 1.6473208007125782e-05, - "loss": 0.1016, + "learning_rate": 2.6478710623625048e-05, + "loss": 0.0755, "step": 38615 }, { "epoch": 1.8, - "learning_rate": 1.6472739205850642e-05, - "loss": 0.1932, + "learning_rate": 2.6478242553789024e-05, + "loss": 0.1642, "step": 38620 }, { "epoch": 1.8, - "learning_rate": 1.6472270404575502e-05, - "loss": 0.2845, + "learning_rate": 2.6477774483953004e-05, + "loss": 0.1546, "step": 38625 }, { "epoch": 1.8, - "learning_rate": 1.647180160330036e-05, - "loss": 0.2894, + "learning_rate": 2.6477306414116987e-05, + "loss": 0.3669, "step": 38630 }, { "epoch": 1.8, - "learning_rate": 1.6471332802025225e-05, - "loss": 0.2238, + "learning_rate": 2.6476838344280967e-05, + "loss": 0.4163, "step": 38635 }, { "epoch": 1.8, - "learning_rate": 1.6470864000750085e-05, - "loss": 0.0211, + "learning_rate": 2.6476370274444947e-05, + "loss": 0.0366, "step": 38640 }, { "epoch": 1.8, - "learning_rate": 1.6470395199474945e-05, - "loss": 0.1023, + "learning_rate": 2.6475902204608927e-05, + "loss": 0.0352, "step": 38645 }, { "epoch": 1.8, - "learning_rate": 1.6469926398199805e-05, - "loss": 0.0599, + "learning_rate": 2.647543413477291e-05, + "loss": 0.0581, "step": 38650 }, { "epoch": 1.8, - "learning_rate": 1.6469457596924665e-05, - "loss": 0.0586, + "learning_rate": 2.647496606493689e-05, + "loss": 0.0955, "step": 38655 }, { "epoch": 1.8, - "learning_rate": 1.6468988795649525e-05, - "loss": 0.0788, + "learning_rate": 2.647449799510087e-05, + "loss": 0.1382, "step": 38660 }, { "epoch": 1.8, - "learning_rate": 1.6468519994374385e-05, - "loss": 0.0547, + "learning_rate": 2.647402992526485e-05, + "loss": 0.1084, "step": 38665 }, { "epoch": 1.8, - "learning_rate": 1.6468051193099245e-05, - "loss": 0.17, + "learning_rate": 2.6473561855428833e-05, + "loss": 0.143, "step": 38670 }, { "epoch": 1.8, - "learning_rate": 1.6467582391824108e-05, - "loss": 0.1925, + "learning_rate": 2.6473093785592813e-05, + "loss": 0.0941, "step": 38675 }, { "epoch": 1.8, - "learning_rate": 1.6467113590548968e-05, - "loss": 0.3395, + "learning_rate": 2.6472625715756792e-05, + "loss": 0.3088, "step": 38680 }, { "epoch": 1.81, - "learning_rate": 1.6466644789273828e-05, - "loss": 0.2278, + "learning_rate": 2.6472157645920772e-05, + "loss": 0.2556, "step": 38685 }, { "epoch": 1.81, - "learning_rate": 1.6466175987998688e-05, - "loss": 0.1107, + "learning_rate": 2.6471689576084752e-05, + "loss": 0.0559, "step": 38690 }, { "epoch": 1.81, - "learning_rate": 1.646570718672355e-05, - "loss": 0.098, + "learning_rate": 2.6471221506248732e-05, + "loss": 0.0947, "step": 38695 }, { "epoch": 1.81, - "learning_rate": 1.646523838544841e-05, - "loss": 0.0812, + "learning_rate": 2.647075343641271e-05, + "loss": 0.1005, "step": 38700 }, { "epoch": 1.81, - "learning_rate": 1.646476958417327e-05, - "loss": 0.1406, + "learning_rate": 2.6470285366576695e-05, + "loss": 0.1112, "step": 38705 }, { "epoch": 1.81, - "learning_rate": 1.646430078289813e-05, - "loss": 0.0664, + "learning_rate": 2.6469817296740675e-05, + "loss": 0.2133, "step": 38710 }, { "epoch": 1.81, - "learning_rate": 1.646383198162299e-05, - "loss": 0.1197, + "learning_rate": 2.6469349226904655e-05, + "loss": 0.123, "step": 38715 }, { "epoch": 1.81, - "learning_rate": 1.646336318034785e-05, - "loss": 0.2626, + "learning_rate": 2.6468881157068634e-05, + "loss": 0.1556, "step": 38720 }, { "epoch": 1.81, - "learning_rate": 1.646289437907271e-05, - "loss": 0.131, + "learning_rate": 2.6468413087232618e-05, + "loss": 0.2376, "step": 38725 }, { "epoch": 1.81, - "learning_rate": 1.6462425577797574e-05, - "loss": 0.3359, + "learning_rate": 2.6467945017396597e-05, + "loss": 0.3147, "step": 38730 }, { "epoch": 1.81, - "learning_rate": 1.6461956776522434e-05, - "loss": 0.3169, + "learning_rate": 2.6467476947560577e-05, + "loss": 0.2535, "step": 38735 }, { "epoch": 1.81, - "learning_rate": 1.6461487975247294e-05, - "loss": 0.04, + "learning_rate": 2.6467008877724557e-05, + "loss": 0.0479, "step": 38740 }, { "epoch": 1.81, - "learning_rate": 1.6461019173972154e-05, - "loss": 0.05, + "learning_rate": 2.6466540807888537e-05, + "loss": 0.0924, "step": 38745 }, { "epoch": 1.81, - "learning_rate": 1.6460550372697014e-05, - "loss": 0.1038, + "learning_rate": 2.6466072738052517e-05, + "loss": 0.0957, "step": 38750 }, { "epoch": 1.81, - "learning_rate": 1.6460081571421877e-05, - "loss": 0.077, + "learning_rate": 2.6465604668216496e-05, + "loss": 0.1482, "step": 38755 }, { "epoch": 1.81, - "learning_rate": 1.6459612770146737e-05, - "loss": 0.0512, + "learning_rate": 2.646513659838048e-05, + "loss": 0.1378, "step": 38760 }, { "epoch": 1.81, - "learning_rate": 1.6459143968871597e-05, - "loss": 0.1305, + "learning_rate": 2.646466852854446e-05, + "loss": 0.1682, "step": 38765 }, { "epoch": 1.81, - "learning_rate": 1.6458675167596457e-05, - "loss": 0.1466, + "learning_rate": 2.646420045870844e-05, + "loss": 0.2081, "step": 38770 }, { "epoch": 1.81, - "learning_rate": 1.645820636632132e-05, - "loss": 0.2115, + "learning_rate": 2.646373238887242e-05, + "loss": 0.2407, "step": 38775 }, { "epoch": 1.81, - "learning_rate": 1.645773756504618e-05, - "loss": 0.2877, + "learning_rate": 2.6463264319036402e-05, + "loss": 0.2936, "step": 38780 }, { "epoch": 1.81, - "learning_rate": 1.645726876377104e-05, - "loss": 0.2355, + "learning_rate": 2.6462796249200382e-05, + "loss": 0.2447, "step": 38785 }, { "epoch": 1.81, - "learning_rate": 1.64567999624959e-05, - "loss": 0.0291, + "learning_rate": 2.6462328179364362e-05, + "loss": 0.0566, "step": 38790 }, { "epoch": 1.81, - "learning_rate": 1.645633116122076e-05, - "loss": 0.0392, + "learning_rate": 2.6461860109528342e-05, + "loss": 0.0584, "step": 38795 }, { "epoch": 1.81, - "learning_rate": 1.645586235994562e-05, - "loss": 0.0575, + "learning_rate": 2.6461392039692325e-05, + "loss": 0.1093, "step": 38800 }, { "epoch": 1.81, - "learning_rate": 1.645539355867048e-05, - "loss": 0.1003, + "learning_rate": 2.6460923969856305e-05, + "loss": 0.0997, "step": 38805 }, { "epoch": 1.81, - "learning_rate": 1.645492475739534e-05, - "loss": 0.1671, + "learning_rate": 2.646045590002028e-05, + "loss": 0.0793, "step": 38810 }, { "epoch": 1.81, - "learning_rate": 1.64544559561202e-05, - "loss": 0.1108, + "learning_rate": 2.6459987830184264e-05, + "loss": 0.1457, "step": 38815 }, { "epoch": 1.81, - "learning_rate": 1.6453987154845063e-05, - "loss": 0.1881, + "learning_rate": 2.6459519760348244e-05, + "loss": 0.1768, "step": 38820 }, { "epoch": 1.81, - "learning_rate": 1.6453518353569923e-05, - "loss": 0.2588, + "learning_rate": 2.6459051690512224e-05, + "loss": 0.2911, "step": 38825 }, { "epoch": 1.81, - "learning_rate": 1.6453049552294783e-05, - "loss": 0.2032, + "learning_rate": 2.6458583620676204e-05, + "loss": 0.2443, "step": 38830 }, { "epoch": 1.81, - "learning_rate": 1.6452580751019646e-05, - "loss": 0.2829, + "learning_rate": 2.6458115550840187e-05, + "loss": 0.3313, "step": 38835 }, { "epoch": 1.81, - "learning_rate": 1.6452111949744506e-05, - "loss": 0.0422, + "learning_rate": 2.6457647481004167e-05, + "loss": 0.0332, "step": 38840 }, { "epoch": 1.81, - "learning_rate": 1.6451643148469366e-05, - "loss": 0.0484, + "learning_rate": 2.6457179411168147e-05, + "loss": 0.1396, "step": 38845 }, { "epoch": 1.81, - "learning_rate": 1.6451174347194226e-05, - "loss": 0.048, + "learning_rate": 2.6456711341332127e-05, + "loss": 0.0749, "step": 38850 }, { "epoch": 1.81, - "learning_rate": 1.6450705545919086e-05, - "loss": 0.1151, + "learning_rate": 2.645624327149611e-05, + "loss": 0.1388, "step": 38855 }, { "epoch": 1.81, - "learning_rate": 1.6450236744643946e-05, - "loss": 0.126, + "learning_rate": 2.645577520166009e-05, + "loss": 0.0646, "step": 38860 }, { "epoch": 1.81, - "learning_rate": 1.6449767943368806e-05, - "loss": 0.1759, + "learning_rate": 2.645530713182407e-05, + "loss": 0.1281, "step": 38865 }, { "epoch": 1.81, - "learning_rate": 1.644929914209367e-05, - "loss": 0.1153, + "learning_rate": 2.6454839061988053e-05, + "loss": 0.1946, "step": 38870 }, { "epoch": 1.81, - "learning_rate": 1.644883034081853e-05, - "loss": 0.2098, + "learning_rate": 2.645437099215203e-05, + "loss": 0.2856, "step": 38875 }, { "epoch": 1.81, - "learning_rate": 1.644836153954339e-05, - "loss": 0.3271, + "learning_rate": 2.645390292231601e-05, + "loss": 0.2936, "step": 38880 }, { "epoch": 1.81, - "learning_rate": 1.644789273826825e-05, - "loss": 0.3566, + "learning_rate": 2.645343485247999e-05, + "loss": 0.2996, "step": 38885 }, { "epoch": 1.81, - "learning_rate": 1.644742393699311e-05, - "loss": 0.0361, + "learning_rate": 2.6452966782643972e-05, + "loss": 0.0434, "step": 38890 }, { "epoch": 1.81, - "learning_rate": 1.644695513571797e-05, - "loss": 0.0997, + "learning_rate": 2.645249871280795e-05, + "loss": 0.0587, "step": 38895 }, { "epoch": 1.82, - "learning_rate": 1.6446486334442832e-05, - "loss": 0.0565, + "learning_rate": 2.645203064297193e-05, + "loss": 0.0594, "step": 38900 }, { "epoch": 1.82, - "learning_rate": 1.6446017533167692e-05, - "loss": 0.1225, + "learning_rate": 2.645156257313591e-05, + "loss": 0.0412, "step": 38905 }, { "epoch": 1.82, - "learning_rate": 1.6445548731892552e-05, - "loss": 0.0687, + "learning_rate": 2.6451094503299895e-05, + "loss": 0.08, "step": 38910 }, { "epoch": 1.82, - "learning_rate": 1.6445079930617415e-05, - "loss": 0.151, + "learning_rate": 2.6450626433463874e-05, + "loss": 0.1877, "step": 38915 }, { "epoch": 1.82, - "learning_rate": 1.6444611129342275e-05, - "loss": 0.1709, + "learning_rate": 2.6450158363627854e-05, + "loss": 0.1803, "step": 38920 }, { "epoch": 1.82, - "learning_rate": 1.6444142328067135e-05, - "loss": 0.1887, + "learning_rate": 2.6449690293791834e-05, + "loss": 0.1956, "step": 38925 }, { "epoch": 1.82, - "learning_rate": 1.6443673526791995e-05, - "loss": 0.2965, + "learning_rate": 2.6449222223955817e-05, + "loss": 0.1952, "step": 38930 }, { "epoch": 1.82, - "learning_rate": 1.6443204725516855e-05, - "loss": 0.2268, + "learning_rate": 2.6448754154119794e-05, + "loss": 0.2757, "step": 38935 }, { "epoch": 1.82, - "learning_rate": 1.6442735924241715e-05, - "loss": 0.036, + "learning_rate": 2.6448286084283773e-05, + "loss": 0.0558, "step": 38940 }, { "epoch": 1.82, - "learning_rate": 1.6442267122966575e-05, - "loss": 0.0206, + "learning_rate": 2.6447818014447757e-05, + "loss": 0.0174, "step": 38945 }, { "epoch": 1.82, - "learning_rate": 1.6441798321691435e-05, - "loss": 0.0413, + "learning_rate": 2.6447349944611736e-05, + "loss": 0.0503, "step": 38950 }, { "epoch": 1.82, - "learning_rate": 1.6441329520416295e-05, - "loss": 0.1107, + "learning_rate": 2.6446881874775716e-05, + "loss": 0.1026, "step": 38955 }, { "epoch": 1.82, - "learning_rate": 1.6440860719141158e-05, - "loss": 0.2031, + "learning_rate": 2.6446413804939696e-05, + "loss": 0.0998, "step": 38960 }, { "epoch": 1.82, - "learning_rate": 1.6440391917866018e-05, - "loss": 0.1474, + "learning_rate": 2.644594573510368e-05, + "loss": 0.1039, "step": 38965 }, { "epoch": 1.82, - "learning_rate": 1.6439923116590878e-05, - "loss": 0.1769, + "learning_rate": 2.644547766526766e-05, + "loss": 0.1503, "step": 38970 }, { "epoch": 1.82, - "learning_rate": 1.6439454315315738e-05, - "loss": 0.1849, + "learning_rate": 2.644500959543164e-05, + "loss": 0.2892, "step": 38975 }, { "epoch": 1.82, - "learning_rate": 1.64389855140406e-05, - "loss": 0.3524, + "learning_rate": 2.644454152559562e-05, + "loss": 0.3429, "step": 38980 }, { "epoch": 1.82, - "learning_rate": 1.643851671276546e-05, - "loss": 0.3994, + "learning_rate": 2.6444073455759602e-05, + "loss": 0.2999, "step": 38985 }, { "epoch": 1.82, - "learning_rate": 1.643804791149032e-05, - "loss": 0.0317, + "learning_rate": 2.6443605385923582e-05, + "loss": 0.075, "step": 38990 }, { "epoch": 1.82, - "learning_rate": 1.643757911021518e-05, - "loss": 0.0741, + "learning_rate": 2.644313731608756e-05, + "loss": 0.0762, "step": 38995 }, { "epoch": 1.82, - "learning_rate": 1.643711030894004e-05, - "loss": 0.0658, + "learning_rate": 2.644266924625154e-05, + "loss": 0.0375, "step": 39000 }, { "epoch": 1.82, - "learning_rate": 1.6436641507664904e-05, - "loss": 0.1135, + "learning_rate": 2.644220117641552e-05, + "loss": 0.0747, "step": 39005 }, { "epoch": 1.82, - "learning_rate": 1.6436172706389764e-05, - "loss": 0.0792, + "learning_rate": 2.64417331065795e-05, + "loss": 0.1126, "step": 39010 }, { "epoch": 1.82, - "learning_rate": 1.6435703905114624e-05, - "loss": 0.0805, + "learning_rate": 2.644126503674348e-05, + "loss": 0.1672, "step": 39015 }, { "epoch": 1.82, - "learning_rate": 1.6435235103839484e-05, - "loss": 0.1, + "learning_rate": 2.6440796966907464e-05, + "loss": 0.093, "step": 39020 }, { "epoch": 1.82, - "learning_rate": 1.6434766302564344e-05, - "loss": 0.1059, + "learning_rate": 2.6440328897071444e-05, + "loss": 0.2025, "step": 39025 }, { "epoch": 1.82, - "learning_rate": 1.6434297501289204e-05, - "loss": 0.2548, + "learning_rate": 2.6439860827235424e-05, + "loss": 0.3027, "step": 39030 }, { "epoch": 1.82, - "learning_rate": 1.6433828700014064e-05, - "loss": 0.2284, + "learning_rate": 2.6439392757399404e-05, + "loss": 0.1986, "step": 39035 }, { "epoch": 1.82, - "learning_rate": 1.6433359898738924e-05, - "loss": 0.0653, + "learning_rate": 2.6438924687563387e-05, + "loss": 0.0359, "step": 39040 }, { "epoch": 1.82, - "learning_rate": 1.6432891097463787e-05, - "loss": 0.0668, + "learning_rate": 2.6438456617727367e-05, + "loss": 0.0378, "step": 39045 }, { "epoch": 1.82, - "learning_rate": 1.6432422296188647e-05, - "loss": 0.0843, + "learning_rate": 2.6437988547891346e-05, + "loss": 0.1124, "step": 39050 }, { "epoch": 1.82, - "learning_rate": 1.6431953494913507e-05, - "loss": 0.0682, + "learning_rate": 2.643752047805533e-05, + "loss": 0.1104, "step": 39055 }, { "epoch": 1.82, - "learning_rate": 1.643148469363837e-05, - "loss": 0.1166, + "learning_rate": 2.643705240821931e-05, + "loss": 0.1282, "step": 39060 }, { "epoch": 1.82, - "learning_rate": 1.643101589236323e-05, - "loss": 0.1199, + "learning_rate": 2.6436584338383286e-05, + "loss": 0.166, "step": 39065 }, { "epoch": 1.82, - "learning_rate": 1.643054709108809e-05, - "loss": 0.1583, + "learning_rate": 2.6436116268547266e-05, + "loss": 0.2484, "step": 39070 }, { "epoch": 1.82, - "learning_rate": 1.643007828981295e-05, - "loss": 0.1735, + "learning_rate": 2.643564819871125e-05, + "loss": 0.1008, "step": 39075 }, { "epoch": 1.82, - "learning_rate": 1.642960948853781e-05, - "loss": 0.2407, + "learning_rate": 2.643518012887523e-05, + "loss": 0.3377, "step": 39080 }, { "epoch": 1.82, - "learning_rate": 1.642914068726267e-05, - "loss": 0.2047, + "learning_rate": 2.643471205903921e-05, + "loss": 0.2496, "step": 39085 }, { "epoch": 1.82, - "learning_rate": 1.642867188598753e-05, - "loss": 0.0946, + "learning_rate": 2.6434243989203188e-05, + "loss": 0.0415, "step": 39090 }, { "epoch": 1.82, - "learning_rate": 1.642820308471239e-05, - "loss": 0.1094, + "learning_rate": 2.643377591936717e-05, + "loss": 0.0437, "step": 39095 }, { "epoch": 1.82, - "learning_rate": 1.6427734283437253e-05, - "loss": 0.092, + "learning_rate": 2.643330784953115e-05, + "loss": 0.0635, "step": 39100 }, { "epoch": 1.82, - "learning_rate": 1.6427265482162113e-05, - "loss": 0.069, + "learning_rate": 2.643283977969513e-05, + "loss": 0.0702, "step": 39105 }, { "epoch": 1.82, - "learning_rate": 1.6426796680886973e-05, - "loss": 0.0871, + "learning_rate": 2.643237170985911e-05, + "loss": 0.152, "step": 39110 }, { "epoch": 1.83, - "learning_rate": 1.6426327879611833e-05, - "loss": 0.1514, + "learning_rate": 2.6431903640023094e-05, + "loss": 0.1106, "step": 39115 }, { "epoch": 1.83, - "learning_rate": 1.6425859078336693e-05, - "loss": 0.2458, + "learning_rate": 2.6431435570187074e-05, + "loss": 0.1144, "step": 39120 }, { "epoch": 1.83, - "learning_rate": 1.6425390277061556e-05, - "loss": 0.2014, + "learning_rate": 2.643096750035105e-05, + "loss": 0.1665, "step": 39125 }, { "epoch": 1.83, - "learning_rate": 1.6424921475786416e-05, - "loss": 0.2175, + "learning_rate": 2.6430499430515034e-05, + "loss": 0.4198, "step": 39130 }, { "epoch": 1.83, - "learning_rate": 1.6424452674511276e-05, - "loss": 0.2471, + "learning_rate": 2.6430031360679013e-05, + "loss": 0.1532, "step": 39135 }, { "epoch": 1.83, - "learning_rate": 1.6423983873236136e-05, - "loss": 0.022, + "learning_rate": 2.6429563290842993e-05, + "loss": 0.0502, "step": 39140 }, { "epoch": 1.83, - "learning_rate": 1.6423515071961e-05, - "loss": 0.0456, + "learning_rate": 2.6429095221006973e-05, + "loss": 0.042, "step": 39145 }, { "epoch": 1.83, - "learning_rate": 1.642304627068586e-05, - "loss": 0.1, + "learning_rate": 2.6428627151170956e-05, + "loss": 0.0995, "step": 39150 }, { "epoch": 1.83, - "learning_rate": 1.642257746941072e-05, - "loss": 0.0888, + "learning_rate": 2.6428159081334936e-05, + "loss": 0.0965, "step": 39155 }, { "epoch": 1.83, - "learning_rate": 1.642210866813558e-05, - "loss": 0.0947, + "learning_rate": 2.6427691011498916e-05, + "loss": 0.1431, "step": 39160 }, { "epoch": 1.83, - "learning_rate": 1.642163986686044e-05, - "loss": 0.141, + "learning_rate": 2.6427222941662896e-05, + "loss": 0.2606, "step": 39165 }, { "epoch": 1.83, - "learning_rate": 1.64211710655853e-05, - "loss": 0.1317, + "learning_rate": 2.642675487182688e-05, + "loss": 0.2053, "step": 39170 }, { "epoch": 1.83, - "learning_rate": 1.642070226431016e-05, - "loss": 0.1457, + "learning_rate": 2.642628680199086e-05, + "loss": 0.2393, "step": 39175 }, { "epoch": 1.83, - "learning_rate": 1.642023346303502e-05, - "loss": 0.2767, + "learning_rate": 2.642581873215484e-05, + "loss": 0.3359, "step": 39180 }, { "epoch": 1.83, - "learning_rate": 1.6419764661759882e-05, - "loss": 0.2418, + "learning_rate": 2.6425350662318822e-05, + "loss": 0.2666, "step": 39185 }, { "epoch": 1.83, - "learning_rate": 1.6419295860484742e-05, - "loss": 0.0442, + "learning_rate": 2.6424882592482798e-05, + "loss": 0.0378, "step": 39190 }, { "epoch": 1.83, - "learning_rate": 1.6418827059209602e-05, - "loss": 0.1675, + "learning_rate": 2.6424414522646778e-05, + "loss": 0.125, "step": 39195 }, { "epoch": 1.83, - "learning_rate": 1.6418358257934465e-05, - "loss": 0.0598, + "learning_rate": 2.6423946452810758e-05, + "loss": 0.0993, "step": 39200 }, { "epoch": 1.83, - "learning_rate": 1.6417889456659325e-05, - "loss": 0.0661, + "learning_rate": 2.642347838297474e-05, + "loss": 0.0649, "step": 39205 }, { "epoch": 1.83, - "learning_rate": 1.6417420655384185e-05, - "loss": 0.1537, + "learning_rate": 2.642301031313872e-05, + "loss": 0.1498, "step": 39210 }, { "epoch": 1.83, - "learning_rate": 1.6416951854109045e-05, - "loss": 0.0756, + "learning_rate": 2.64225422433027e-05, + "loss": 0.1484, "step": 39215 }, { "epoch": 1.83, - "learning_rate": 1.6416483052833905e-05, - "loss": 0.1681, + "learning_rate": 2.642207417346668e-05, + "loss": 0.1065, "step": 39220 }, { "epoch": 1.83, - "learning_rate": 1.6416014251558765e-05, - "loss": 0.2648, + "learning_rate": 2.6421606103630664e-05, + "loss": 0.1668, "step": 39225 }, { "epoch": 1.83, - "learning_rate": 1.6415545450283625e-05, - "loss": 0.3191, + "learning_rate": 2.6421138033794644e-05, + "loss": 0.4266, "step": 39230 }, { "epoch": 1.83, - "learning_rate": 1.6415076649008485e-05, - "loss": 0.199, + "learning_rate": 2.6420669963958623e-05, + "loss": 0.218, "step": 39235 }, { "epoch": 1.83, - "learning_rate": 1.6414607847733348e-05, - "loss": 0.0329, + "learning_rate": 2.6420201894122607e-05, + "loss": 0.0613, "step": 39240 }, { "epoch": 1.83, - "learning_rate": 1.6414139046458208e-05, - "loss": 0.1063, + "learning_rate": 2.6419733824286586e-05, + "loss": 0.0745, "step": 39245 }, { "epoch": 1.83, - "learning_rate": 1.6413670245183068e-05, - "loss": 0.0787, + "learning_rate": 2.6419265754450563e-05, + "loss": 0.0922, "step": 39250 }, { "epoch": 1.83, - "learning_rate": 1.6413201443907928e-05, - "loss": 0.0685, + "learning_rate": 2.6418797684614543e-05, + "loss": 0.0721, "step": 39255 }, { "epoch": 1.83, - "learning_rate": 1.6412732642632788e-05, - "loss": 0.1426, + "learning_rate": 2.6418329614778526e-05, + "loss": 0.1169, "step": 39260 }, { "epoch": 1.83, - "learning_rate": 1.641226384135765e-05, - "loss": 0.1309, + "learning_rate": 2.6417861544942506e-05, + "loss": 0.0791, "step": 39265 }, { "epoch": 1.83, - "learning_rate": 1.641179504008251e-05, - "loss": 0.1751, + "learning_rate": 2.6417393475106485e-05, + "loss": 0.1684, "step": 39270 }, { "epoch": 1.83, - "learning_rate": 1.641132623880737e-05, - "loss": 0.2695, + "learning_rate": 2.6416925405270465e-05, + "loss": 0.216, "step": 39275 }, { "epoch": 1.83, - "learning_rate": 1.641085743753223e-05, - "loss": 0.1881, + "learning_rate": 2.641645733543445e-05, + "loss": 0.1538, "step": 39280 }, { "epoch": 1.83, - "learning_rate": 1.6410388636257094e-05, - "loss": 0.2784, + "learning_rate": 2.6415989265598428e-05, + "loss": 0.2589, "step": 39285 }, { "epoch": 1.83, - "learning_rate": 1.6409919834981954e-05, - "loss": 0.044, + "learning_rate": 2.6415521195762408e-05, + "loss": 0.0595, "step": 39290 }, { "epoch": 1.83, - "learning_rate": 1.6409451033706814e-05, - "loss": 0.078, + "learning_rate": 2.641505312592639e-05, + "loss": 0.0859, "step": 39295 }, { "epoch": 1.83, - "learning_rate": 1.6408982232431674e-05, - "loss": 0.0706, + "learning_rate": 2.641458505609037e-05, + "loss": 0.0406, "step": 39300 }, { "epoch": 1.83, - "learning_rate": 1.6408513431156534e-05, - "loss": 0.0571, + "learning_rate": 2.641411698625435e-05, + "loss": 0.0789, "step": 39305 }, { "epoch": 1.83, - "learning_rate": 1.6408044629881394e-05, - "loss": 0.1244, + "learning_rate": 2.641364891641833e-05, + "loss": 0.0925, "step": 39310 }, { "epoch": 1.83, - "learning_rate": 1.6407575828606254e-05, - "loss": 0.092, + "learning_rate": 2.641318084658231e-05, + "loss": 0.0874, "step": 39315 }, { "epoch": 1.83, - "learning_rate": 1.6407107027331114e-05, - "loss": 0.1246, + "learning_rate": 2.641271277674629e-05, + "loss": 0.1556, "step": 39320 }, { "epoch": 1.83, - "learning_rate": 1.6406638226055974e-05, - "loss": 0.15, + "learning_rate": 2.641224470691027e-05, + "loss": 0.2507, "step": 39325 }, { "epoch": 1.84, - "learning_rate": 1.6406169424780837e-05, - "loss": 0.2858, + "learning_rate": 2.641177663707425e-05, + "loss": 0.37, "step": 39330 }, { "epoch": 1.84, - "learning_rate": 1.6405700623505697e-05, - "loss": 0.2405, + "learning_rate": 2.6411308567238233e-05, + "loss": 0.1776, "step": 39335 }, { "epoch": 1.84, - "learning_rate": 1.6405231822230557e-05, - "loss": 0.016, + "learning_rate": 2.6410840497402213e-05, + "loss": 0.028, "step": 39340 }, { "epoch": 1.84, - "learning_rate": 1.640476302095542e-05, - "loss": 0.0616, + "learning_rate": 2.6410372427566193e-05, + "loss": 0.069, "step": 39345 }, { "epoch": 1.84, - "learning_rate": 1.640429421968028e-05, - "loss": 0.0454, + "learning_rate": 2.6409904357730173e-05, + "loss": 0.032, "step": 39350 }, { "epoch": 1.84, - "learning_rate": 1.640382541840514e-05, - "loss": 0.0602, + "learning_rate": 2.6409436287894156e-05, + "loss": 0.0548, "step": 39355 }, { "epoch": 1.84, - "learning_rate": 1.640335661713e-05, - "loss": 0.1301, + "learning_rate": 2.6408968218058136e-05, + "loss": 0.1884, "step": 39360 }, { "epoch": 1.84, - "learning_rate": 1.640288781585486e-05, - "loss": 0.1133, + "learning_rate": 2.6408500148222116e-05, + "loss": 0.1181, "step": 39365 }, { "epoch": 1.84, - "learning_rate": 1.640241901457972e-05, - "loss": 0.1934, + "learning_rate": 2.64080320783861e-05, + "loss": 0.182, "step": 39370 }, { "epoch": 1.84, - "learning_rate": 1.640195021330458e-05, - "loss": 0.1995, + "learning_rate": 2.640756400855008e-05, + "loss": 0.1324, "step": 39375 }, { "epoch": 1.84, - "learning_rate": 1.6401481412029443e-05, - "loss": 0.2625, + "learning_rate": 2.6407095938714055e-05, + "loss": 0.2175, "step": 39380 }, { "epoch": 1.84, - "learning_rate": 1.6401012610754303e-05, - "loss": 0.2129, + "learning_rate": 2.6406627868878035e-05, + "loss": 0.2448, "step": 39385 }, { "epoch": 1.84, - "learning_rate": 1.6400543809479163e-05, - "loss": 0.0542, + "learning_rate": 2.6406159799042018e-05, + "loss": 0.0444, "step": 39390 }, { "epoch": 1.84, - "learning_rate": 1.6400075008204023e-05, - "loss": 0.0899, + "learning_rate": 2.6405691729205998e-05, + "loss": 0.0595, "step": 39395 }, { "epoch": 1.84, - "learning_rate": 1.6399606206928883e-05, - "loss": 0.1162, + "learning_rate": 2.6405223659369978e-05, + "loss": 0.0644, "step": 39400 }, { "epoch": 1.84, - "learning_rate": 1.6399137405653743e-05, - "loss": 0.057, + "learning_rate": 2.6404755589533957e-05, + "loss": 0.0736, "step": 39405 }, { "epoch": 1.84, - "learning_rate": 1.6398668604378606e-05, - "loss": 0.1113, + "learning_rate": 2.640428751969794e-05, + "loss": 0.0735, "step": 39410 }, { "epoch": 1.84, - "learning_rate": 1.6398199803103466e-05, - "loss": 0.1153, + "learning_rate": 2.640381944986192e-05, + "loss": 0.0852, "step": 39415 }, { "epoch": 1.84, - "learning_rate": 1.6397731001828326e-05, - "loss": 0.1444, + "learning_rate": 2.64033513800259e-05, + "loss": 0.1139, "step": 39420 }, { "epoch": 1.84, - "learning_rate": 1.639726220055319e-05, - "loss": 0.232, + "learning_rate": 2.6402883310189884e-05, + "loss": 0.1785, "step": 39425 }, { "epoch": 1.84, - "learning_rate": 1.639679339927805e-05, - "loss": 0.3296, + "learning_rate": 2.6402415240353863e-05, + "loss": 0.268, "step": 39430 }, { "epoch": 1.84, - "learning_rate": 1.639632459800291e-05, - "loss": 0.3146, + "learning_rate": 2.6401947170517843e-05, + "loss": 0.1806, "step": 39435 }, { "epoch": 1.84, - "learning_rate": 1.639585579672777e-05, - "loss": 0.0637, + "learning_rate": 2.640147910068182e-05, + "loss": 0.03, "step": 39440 }, { "epoch": 1.84, - "learning_rate": 1.639538699545263e-05, - "loss": 0.074, + "learning_rate": 2.6401011030845803e-05, + "loss": 0.0586, "step": 39445 }, { "epoch": 1.84, - "learning_rate": 1.639491819417749e-05, - "loss": 0.0524, + "learning_rate": 2.6400542961009783e-05, + "loss": 0.0785, "step": 39450 }, { "epoch": 1.84, - "learning_rate": 1.639444939290235e-05, - "loss": 0.0591, + "learning_rate": 2.6400074891173762e-05, + "loss": 0.0861, "step": 39455 }, { "epoch": 1.84, - "learning_rate": 1.639398059162721e-05, - "loss": 0.1292, + "learning_rate": 2.6399606821337742e-05, + "loss": 0.1614, "step": 39460 }, { "epoch": 1.84, - "learning_rate": 1.639351179035207e-05, - "loss": 0.1521, + "learning_rate": 2.6399138751501725e-05, + "loss": 0.1647, "step": 39465 }, { "epoch": 1.84, - "learning_rate": 1.6393042989076932e-05, - "loss": 0.1973, + "learning_rate": 2.6398670681665705e-05, + "loss": 0.1461, "step": 39470 }, { "epoch": 1.84, - "learning_rate": 1.6392574187801792e-05, - "loss": 0.1846, + "learning_rate": 2.6398202611829685e-05, + "loss": 0.1881, "step": 39475 }, { "epoch": 1.84, - "learning_rate": 1.6392105386526652e-05, - "loss": 0.5535, + "learning_rate": 2.6397734541993668e-05, + "loss": 0.3431, "step": 39480 }, { "epoch": 1.84, - "learning_rate": 1.6391636585251512e-05, - "loss": 0.3364, + "learning_rate": 2.6397266472157648e-05, + "loss": 0.2543, "step": 39485 }, { "epoch": 1.84, - "learning_rate": 1.6391167783976375e-05, - "loss": 0.0231, + "learning_rate": 2.6396798402321628e-05, + "loss": 0.05, "step": 39490 }, { "epoch": 1.84, - "learning_rate": 1.6390698982701235e-05, - "loss": 0.0411, + "learning_rate": 2.6396330332485608e-05, + "loss": 0.0729, "step": 39495 }, { "epoch": 1.84, - "learning_rate": 1.6390230181426095e-05, - "loss": 0.0506, + "learning_rate": 2.639586226264959e-05, + "loss": 0.0622, "step": 39500 }, { "epoch": 1.84, - "learning_rate": 1.6389761380150955e-05, - "loss": 0.3548, + "learning_rate": 2.6395394192813567e-05, + "loss": 0.1133, "step": 39505 }, { "epoch": 1.84, - "learning_rate": 1.6389292578875815e-05, - "loss": 0.0719, + "learning_rate": 2.6394926122977547e-05, + "loss": 0.1117, "step": 39510 }, { "epoch": 1.84, - "learning_rate": 1.6388823777600675e-05, - "loss": 0.1005, + "learning_rate": 2.6394458053141527e-05, + "loss": 0.0936, "step": 39515 }, { "epoch": 1.84, - "learning_rate": 1.6388354976325538e-05, - "loss": 0.1035, + "learning_rate": 2.639398998330551e-05, + "loss": 0.1008, "step": 39520 }, { "epoch": 1.84, - "learning_rate": 1.6387886175050398e-05, - "loss": 0.2762, + "learning_rate": 2.639352191346949e-05, + "loss": 0.212, "step": 39525 }, { "epoch": 1.84, - "learning_rate": 1.6387417373775258e-05, - "loss": 0.2817, + "learning_rate": 2.639305384363347e-05, + "loss": 0.4356, "step": 39530 }, { "epoch": 1.84, - "learning_rate": 1.6386948572500118e-05, - "loss": 0.2149, + "learning_rate": 2.639258577379745e-05, + "loss": 0.2185, "step": 39535 }, { "epoch": 1.84, - "learning_rate": 1.6386479771224978e-05, - "loss": 0.0713, + "learning_rate": 2.6392117703961433e-05, + "loss": 0.0723, "step": 39540 }, { "epoch": 1.85, - "learning_rate": 1.6386010969949838e-05, - "loss": 0.0805, + "learning_rate": 2.6391649634125413e-05, + "loss": 0.0484, "step": 39545 }, { "epoch": 1.85, - "learning_rate": 1.63855421686747e-05, - "loss": 0.0525, + "learning_rate": 2.6391181564289392e-05, + "loss": 0.0843, "step": 39550 }, { "epoch": 1.85, - "learning_rate": 1.638507336739956e-05, - "loss": 0.0531, + "learning_rate": 2.6390713494453376e-05, + "loss": 0.0646, "step": 39555 }, { "epoch": 1.85, - "learning_rate": 1.638460456612442e-05, - "loss": 0.1241, + "learning_rate": 2.6390245424617356e-05, + "loss": 0.0454, "step": 39560 }, { "epoch": 1.85, - "learning_rate": 1.638413576484928e-05, - "loss": 0.2059, + "learning_rate": 2.6389777354781335e-05, + "loss": 0.1635, "step": 39565 }, { "epoch": 1.85, - "learning_rate": 1.6383666963574144e-05, - "loss": 0.2234, + "learning_rate": 2.6389309284945312e-05, + "loss": 0.1328, "step": 39570 }, { "epoch": 1.85, - "learning_rate": 1.6383198162299004e-05, - "loss": 0.1771, + "learning_rate": 2.6388841215109295e-05, + "loss": 0.1878, "step": 39575 }, { "epoch": 1.85, - "learning_rate": 1.6382729361023864e-05, - "loss": 0.3261, + "learning_rate": 2.6388373145273275e-05, + "loss": 0.223, "step": 39580 }, { "epoch": 1.85, - "learning_rate": 1.6382260559748724e-05, - "loss": 0.2079, + "learning_rate": 2.6387905075437255e-05, + "loss": 0.1876, "step": 39585 }, { "epoch": 1.85, - "learning_rate": 1.6381791758473584e-05, - "loss": 0.0562, + "learning_rate": 2.6387437005601234e-05, + "loss": 0.0429, "step": 39590 }, { "epoch": 1.85, - "learning_rate": 1.6381322957198444e-05, - "loss": 0.062, + "learning_rate": 2.6386968935765218e-05, + "loss": 0.0444, "step": 39595 }, { "epoch": 1.85, - "learning_rate": 1.6380854155923304e-05, - "loss": 0.11, + "learning_rate": 2.6386500865929197e-05, + "loss": 0.0619, "step": 39600 }, { "epoch": 1.85, - "learning_rate": 1.6380385354648164e-05, - "loss": 0.0713, + "learning_rate": 2.6386032796093177e-05, + "loss": 0.1302, "step": 39605 }, { "epoch": 1.85, - "learning_rate": 1.6379916553373027e-05, - "loss": 0.1697, + "learning_rate": 2.638556472625716e-05, + "loss": 0.1599, "step": 39610 }, { "epoch": 1.85, - "learning_rate": 1.6379447752097887e-05, - "loss": 0.0959, + "learning_rate": 2.638509665642114e-05, + "loss": 0.0831, "step": 39615 }, { "epoch": 1.85, - "learning_rate": 1.6378978950822747e-05, - "loss": 0.1684, + "learning_rate": 2.638462858658512e-05, + "loss": 0.1128, "step": 39620 }, { "epoch": 1.85, - "learning_rate": 1.6378510149547607e-05, - "loss": 0.1653, + "learning_rate": 2.63841605167491e-05, + "loss": 0.1849, "step": 39625 }, { "epoch": 1.85, - "learning_rate": 1.637804134827247e-05, - "loss": 0.3247, + "learning_rate": 2.638369244691308e-05, + "loss": 0.4699, "step": 39630 }, { "epoch": 1.85, - "learning_rate": 1.637757254699733e-05, - "loss": 0.2619, + "learning_rate": 2.638322437707706e-05, + "loss": 0.2351, "step": 39635 }, { "epoch": 1.85, - "learning_rate": 1.637710374572219e-05, - "loss": 0.0579, + "learning_rate": 2.638275630724104e-05, + "loss": 0.0361, "step": 39640 }, { "epoch": 1.85, - "learning_rate": 1.637663494444705e-05, - "loss": 0.0433, + "learning_rate": 2.638228823740502e-05, + "loss": 0.0308, "step": 39645 }, { "epoch": 1.85, - "learning_rate": 1.637616614317191e-05, - "loss": 0.046, + "learning_rate": 2.6381820167569002e-05, + "loss": 0.0579, "step": 39650 }, { "epoch": 1.85, - "learning_rate": 1.6375697341896773e-05, - "loss": 0.1144, + "learning_rate": 2.6381352097732982e-05, + "loss": 0.0801, "step": 39655 }, { "epoch": 1.85, - "learning_rate": 1.6375228540621633e-05, - "loss": 0.0582, + "learning_rate": 2.6380884027896962e-05, + "loss": 0.1749, "step": 39660 }, { "epoch": 1.85, - "learning_rate": 1.6374759739346493e-05, - "loss": 0.0816, + "learning_rate": 2.6380415958060945e-05, + "loss": 0.1459, "step": 39665 }, { "epoch": 1.85, - "learning_rate": 1.6374290938071353e-05, - "loss": 0.306, + "learning_rate": 2.6379947888224925e-05, + "loss": 0.1231, "step": 39670 }, { "epoch": 1.85, - "learning_rate": 1.6373822136796213e-05, - "loss": 0.1379, + "learning_rate": 2.6379479818388905e-05, + "loss": 0.1966, "step": 39675 }, { "epoch": 1.85, - "learning_rate": 1.6373353335521073e-05, - "loss": 0.2536, + "learning_rate": 2.6379011748552885e-05, + "loss": 0.2489, "step": 39680 }, { "epoch": 1.85, - "learning_rate": 1.6372884534245933e-05, - "loss": 0.3197, + "learning_rate": 2.6378543678716868e-05, + "loss": 0.3014, "step": 39685 }, { "epoch": 1.85, - "learning_rate": 1.6372415732970793e-05, - "loss": 0.0456, + "learning_rate": 2.6378075608880848e-05, + "loss": 0.0527, "step": 39690 }, { "epoch": 1.85, - "learning_rate": 1.6371946931695656e-05, - "loss": 0.0858, + "learning_rate": 2.6377607539044824e-05, + "loss": 0.0413, "step": 39695 }, { "epoch": 1.85, - "learning_rate": 1.6371478130420516e-05, - "loss": 0.0263, + "learning_rate": 2.6377139469208804e-05, + "loss": 0.078, "step": 39700 }, { "epoch": 1.85, - "learning_rate": 1.6371009329145376e-05, - "loss": 0.0828, + "learning_rate": 2.6376671399372787e-05, + "loss": 0.0978, "step": 39705 }, { "epoch": 1.85, - "learning_rate": 1.637054052787024e-05, - "loss": 0.0959, + "learning_rate": 2.6376203329536767e-05, + "loss": 0.1308, "step": 39710 }, { "epoch": 1.85, - "learning_rate": 1.63700717265951e-05, - "loss": 0.1384, + "learning_rate": 2.6375735259700747e-05, + "loss": 0.1082, "step": 39715 }, { "epoch": 1.85, - "learning_rate": 1.636960292531996e-05, - "loss": 0.1486, + "learning_rate": 2.6375267189864727e-05, + "loss": 0.1716, "step": 39720 }, { "epoch": 1.85, - "learning_rate": 1.636913412404482e-05, - "loss": 0.207, + "learning_rate": 2.637479912002871e-05, + "loss": 0.2019, "step": 39725 }, { "epoch": 1.85, - "learning_rate": 1.636866532276968e-05, - "loss": 0.3915, + "learning_rate": 2.637433105019269e-05, + "loss": 0.3588, "step": 39730 }, { "epoch": 1.85, - "learning_rate": 1.636819652149454e-05, - "loss": 0.2934, + "learning_rate": 2.637386298035667e-05, + "loss": 0.2441, "step": 39735 }, { "epoch": 1.85, - "learning_rate": 1.63677277202194e-05, - "loss": 0.0228, + "learning_rate": 2.6373394910520653e-05, + "loss": 0.0206, "step": 39740 }, { "epoch": 1.85, - "learning_rate": 1.636725891894426e-05, - "loss": 0.0695, + "learning_rate": 2.6372926840684632e-05, + "loss": 0.1103, "step": 39745 }, { "epoch": 1.85, - "learning_rate": 1.6366790117669122e-05, - "loss": 0.0624, + "learning_rate": 2.6372458770848612e-05, + "loss": 0.0423, "step": 39750 }, { "epoch": 1.86, - "learning_rate": 1.6366321316393982e-05, - "loss": 0.0658, + "learning_rate": 2.6371990701012592e-05, + "loss": 0.0699, "step": 39755 }, { "epoch": 1.86, - "learning_rate": 1.6365852515118842e-05, - "loss": 0.0543, + "learning_rate": 2.6371522631176572e-05, + "loss": 0.1116, "step": 39760 }, { "epoch": 1.86, - "learning_rate": 1.6365383713843702e-05, - "loss": 0.1347, + "learning_rate": 2.6371054561340552e-05, + "loss": 0.1634, "step": 39765 }, { "epoch": 1.86, - "learning_rate": 1.6364914912568562e-05, - "loss": 0.4042, + "learning_rate": 2.637058649150453e-05, + "loss": 0.122, "step": 39770 }, { "epoch": 1.86, - "learning_rate": 1.6364446111293425e-05, - "loss": 0.2546, + "learning_rate": 2.637011842166851e-05, + "loss": 0.1931, "step": 39775 }, { "epoch": 1.86, - "learning_rate": 1.6363977310018285e-05, - "loss": 0.3395, + "learning_rate": 2.6369650351832495e-05, + "loss": 0.3544, "step": 39780 }, { "epoch": 1.86, - "learning_rate": 1.6363508508743145e-05, - "loss": 0.253, + "learning_rate": 2.6369182281996474e-05, + "loss": 0.2152, "step": 39785 }, { "epoch": 1.86, - "learning_rate": 1.6363039707468005e-05, - "loss": 0.0427, + "learning_rate": 2.6368714212160454e-05, + "loss": 0.0587, "step": 39790 }, { "epoch": 1.86, - "learning_rate": 1.636257090619287e-05, - "loss": 0.0368, + "learning_rate": 2.6368246142324437e-05, + "loss": 0.0413, "step": 39795 }, { "epoch": 1.86, - "learning_rate": 1.636210210491773e-05, - "loss": 0.0467, + "learning_rate": 2.6367778072488417e-05, + "loss": 0.0856, "step": 39800 }, { "epoch": 1.86, - "learning_rate": 1.636163330364259e-05, - "loss": 0.1055, + "learning_rate": 2.6367310002652397e-05, + "loss": 0.0619, "step": 39805 }, { "epoch": 1.86, - "learning_rate": 1.636116450236745e-05, - "loss": 0.0447, + "learning_rate": 2.6366841932816377e-05, + "loss": 0.0829, "step": 39810 }, { "epoch": 1.86, - "learning_rate": 1.6360695701092308e-05, - "loss": 0.108, + "learning_rate": 2.636637386298036e-05, + "loss": 0.1344, "step": 39815 }, { "epoch": 1.86, - "learning_rate": 1.6360226899817168e-05, - "loss": 0.1594, + "learning_rate": 2.6365905793144337e-05, + "loss": 0.1833, "step": 39820 }, { "epoch": 1.86, - "learning_rate": 1.6359758098542028e-05, - "loss": 0.294, + "learning_rate": 2.6365437723308316e-05, + "loss": 0.267, "step": 39825 }, { "epoch": 1.86, - "learning_rate": 1.6359289297266888e-05, - "loss": 0.3903, + "learning_rate": 2.6364969653472296e-05, + "loss": 0.2902, "step": 39830 }, { "epoch": 1.86, - "learning_rate": 1.6358820495991748e-05, - "loss": 0.2344, + "learning_rate": 2.636450158363628e-05, + "loss": 0.2734, "step": 39835 }, { "epoch": 1.86, - "learning_rate": 1.635835169471661e-05, - "loss": 0.0489, + "learning_rate": 2.636403351380026e-05, + "loss": 0.0288, "step": 39840 }, { "epoch": 1.86, - "learning_rate": 1.635788289344147e-05, - "loss": 0.0766, + "learning_rate": 2.636356544396424e-05, + "loss": 0.0335, "step": 39845 }, { "epoch": 1.86, - "learning_rate": 1.635741409216633e-05, - "loss": 0.0605, + "learning_rate": 2.6363097374128222e-05, + "loss": 0.0425, "step": 39850 }, { "epoch": 1.86, - "learning_rate": 1.6356945290891195e-05, - "loss": 0.1088, + "learning_rate": 2.6362629304292202e-05, + "loss": 0.0949, "step": 39855 }, { "epoch": 1.86, - "learning_rate": 1.6356476489616054e-05, - "loss": 0.0864, + "learning_rate": 2.6362161234456182e-05, + "loss": 0.1479, "step": 39860 }, { "epoch": 1.86, - "learning_rate": 1.6356007688340914e-05, - "loss": 0.0917, + "learning_rate": 2.636169316462016e-05, + "loss": 0.1293, "step": 39865 }, { "epoch": 1.86, - "learning_rate": 1.6355538887065774e-05, - "loss": 0.2046, + "learning_rate": 2.6361225094784145e-05, + "loss": 0.1361, "step": 39870 }, { "epoch": 1.86, - "learning_rate": 1.6355070085790634e-05, - "loss": 0.2991, + "learning_rate": 2.6360757024948125e-05, + "loss": 0.1473, "step": 39875 }, { "epoch": 1.86, - "learning_rate": 1.6354601284515494e-05, - "loss": 0.2481, + "learning_rate": 2.6360288955112105e-05, + "loss": 0.3929, "step": 39880 }, { "epoch": 1.86, - "learning_rate": 1.6354132483240354e-05, - "loss": 0.2429, + "learning_rate": 2.635982088527608e-05, + "loss": 0.1703, "step": 39885 }, { "epoch": 1.86, - "learning_rate": 1.6353663681965217e-05, - "loss": 0.0508, + "learning_rate": 2.6359352815440064e-05, + "loss": 0.0499, "step": 39890 }, { "epoch": 1.86, - "learning_rate": 1.6353194880690077e-05, - "loss": 0.0608, + "learning_rate": 2.6358884745604044e-05, + "loss": 0.0729, "step": 39895 }, { "epoch": 1.86, - "learning_rate": 1.6352726079414937e-05, - "loss": 0.0733, + "learning_rate": 2.6358416675768024e-05, + "loss": 0.0829, "step": 39900 }, { "epoch": 1.86, - "learning_rate": 1.6352257278139797e-05, - "loss": 0.0919, + "learning_rate": 2.6357948605932004e-05, + "loss": 0.0523, "step": 39905 }, { "epoch": 1.86, - "learning_rate": 1.6351788476864657e-05, - "loss": 0.1093, + "learning_rate": 2.6357480536095987e-05, + "loss": 0.132, "step": 39910 }, { "epoch": 1.86, - "learning_rate": 1.6351319675589517e-05, - "loss": 0.2387, + "learning_rate": 2.6357012466259967e-05, + "loss": 0.0819, "step": 39915 }, { "epoch": 1.86, - "learning_rate": 1.635085087431438e-05, - "loss": 0.1955, + "learning_rate": 2.6356544396423946e-05, + "loss": 0.2226, "step": 39920 }, { "epoch": 1.86, - "learning_rate": 1.635038207303924e-05, - "loss": 0.1817, + "learning_rate": 2.635607632658793e-05, + "loss": 0.138, "step": 39925 }, { "epoch": 1.86, - "learning_rate": 1.63499132717641e-05, - "loss": 0.3849, + "learning_rate": 2.635560825675191e-05, + "loss": 0.2037, "step": 39930 }, { "epoch": 1.86, - "learning_rate": 1.6349444470488964e-05, - "loss": 0.2834, + "learning_rate": 2.635514018691589e-05, + "loss": 0.1822, "step": 39935 }, { "epoch": 1.86, - "learning_rate": 1.6348975669213824e-05, - "loss": 0.0689, + "learning_rate": 2.635467211707987e-05, + "loss": 0.0095, "step": 39940 }, { "epoch": 1.86, - "learning_rate": 1.6348506867938683e-05, - "loss": 0.0568, + "learning_rate": 2.635420404724385e-05, + "loss": 0.0325, "step": 39945 }, { "epoch": 1.86, - "learning_rate": 1.6348038066663543e-05, - "loss": 0.0516, + "learning_rate": 2.635373597740783e-05, + "loss": 0.0915, "step": 39950 }, { "epoch": 1.86, - "learning_rate": 1.6347569265388403e-05, - "loss": 0.0815, + "learning_rate": 2.635326790757181e-05, + "loss": 0.0729, "step": 39955 }, { "epoch": 1.86, - "learning_rate": 1.6347100464113263e-05, - "loss": 0.1282, + "learning_rate": 2.635279983773579e-05, + "loss": 0.1122, "step": 39960 }, { "epoch": 1.86, - "learning_rate": 1.6346631662838123e-05, - "loss": 0.1942, + "learning_rate": 2.635233176789977e-05, + "loss": 0.1503, "step": 39965 }, { "epoch": 1.87, - "learning_rate": 1.6346162861562983e-05, - "loss": 0.21, + "learning_rate": 2.635186369806375e-05, + "loss": 0.2107, "step": 39970 }, { "epoch": 1.87, - "learning_rate": 1.6345694060287843e-05, - "loss": 0.2174, + "learning_rate": 2.635139562822773e-05, + "loss": 0.1089, "step": 39975 }, { "epoch": 1.87, - "learning_rate": 1.6345225259012706e-05, - "loss": 0.2777, + "learning_rate": 2.6350927558391714e-05, + "loss": 0.3286, "step": 39980 }, { "epoch": 1.87, - "learning_rate": 1.6344756457737566e-05, - "loss": 0.2424, + "learning_rate": 2.6350459488555694e-05, + "loss": 0.2829, "step": 39985 }, { "epoch": 1.87, - "learning_rate": 1.6344287656462426e-05, - "loss": 0.0368, + "learning_rate": 2.6349991418719674e-05, + "loss": 0.0567, "step": 39990 }, { "epoch": 1.87, - "learning_rate": 1.6343818855187286e-05, - "loss": 0.026, + "learning_rate": 2.6349523348883654e-05, + "loss": 0.0397, "step": 39995 }, { "epoch": 1.87, - "learning_rate": 1.634335005391215e-05, - "loss": 0.0642, + "learning_rate": 2.6349055279047637e-05, + "loss": 0.0723, "step": 40000 }, { "epoch": 1.87, - "learning_rate": 1.634288125263701e-05, - "loss": 0.0958, + "learning_rate": 2.6348587209211617e-05, + "loss": 0.1345, "step": 40005 }, { "epoch": 1.87, - "learning_rate": 1.634241245136187e-05, - "loss": 0.1143, + "learning_rate": 2.6348119139375593e-05, + "loss": 0.0691, "step": 40010 }, { "epoch": 1.87, - "learning_rate": 1.634194365008673e-05, - "loss": 0.144, + "learning_rate": 2.6347651069539573e-05, + "loss": 0.1061, "step": 40015 }, { "epoch": 1.87, - "learning_rate": 1.634147484881159e-05, - "loss": 0.1679, + "learning_rate": 2.6347182999703556e-05, + "loss": 0.081, "step": 40020 }, { "epoch": 1.87, - "learning_rate": 1.634100604753645e-05, - "loss": 0.2176, + "learning_rate": 2.6346714929867536e-05, + "loss": 0.2578, "step": 40025 }, { "epoch": 1.87, - "learning_rate": 1.6340537246261313e-05, - "loss": 0.3805, + "learning_rate": 2.6346246860031516e-05, + "loss": 0.222, "step": 40030 }, { "epoch": 1.87, - "learning_rate": 1.6340068444986172e-05, - "loss": 0.2025, + "learning_rate": 2.63457787901955e-05, + "loss": 0.2264, "step": 40035 }, { "epoch": 1.87, - "learning_rate": 1.6339599643711032e-05, - "loss": 0.0308, + "learning_rate": 2.634531072035948e-05, + "loss": 0.0269, "step": 40040 }, { "epoch": 1.87, - "learning_rate": 1.6339130842435892e-05, - "loss": 0.0844, + "learning_rate": 2.634484265052346e-05, + "loss": 0.0937, "step": 40045 }, { "epoch": 1.87, - "learning_rate": 1.6338662041160752e-05, - "loss": 0.0482, + "learning_rate": 2.634437458068744e-05, + "loss": 0.0581, "step": 40050 }, { "epoch": 1.87, - "learning_rate": 1.6338193239885612e-05, - "loss": 0.0883, + "learning_rate": 2.6343906510851422e-05, + "loss": 0.0891, "step": 40055 }, { "epoch": 1.87, - "learning_rate": 1.6337724438610475e-05, - "loss": 0.1405, + "learning_rate": 2.63434384410154e-05, + "loss": 0.0834, "step": 40060 }, { "epoch": 1.87, - "learning_rate": 1.6337255637335335e-05, - "loss": 0.0711, + "learning_rate": 2.634297037117938e-05, + "loss": 0.0933, "step": 40065 }, { "epoch": 1.87, - "learning_rate": 1.6336786836060195e-05, - "loss": 0.228, + "learning_rate": 2.634250230134336e-05, + "loss": 0.0986, "step": 40070 }, { "epoch": 1.87, - "learning_rate": 1.6336318034785055e-05, - "loss": 0.1147, + "learning_rate": 2.634203423150734e-05, + "loss": 0.2179, "step": 40075 }, { "epoch": 1.87, - "learning_rate": 1.633584923350992e-05, - "loss": 0.4093, + "learning_rate": 2.634156616167132e-05, + "loss": 0.4146, "step": 40080 }, { "epoch": 1.87, - "learning_rate": 1.633538043223478e-05, - "loss": 0.1311, + "learning_rate": 2.63410980918353e-05, + "loss": 0.2141, "step": 40085 }, { "epoch": 1.87, - "learning_rate": 1.633491163095964e-05, - "loss": 0.0689, + "learning_rate": 2.6340630021999284e-05, + "loss": 0.0474, "step": 40090 }, { "epoch": 1.87, - "learning_rate": 1.63344428296845e-05, - "loss": 0.0738, + "learning_rate": 2.6340161952163264e-05, + "loss": 0.0566, "step": 40095 }, { "epoch": 1.87, - "learning_rate": 1.633397402840936e-05, - "loss": 0.0874, + "learning_rate": 2.6339693882327244e-05, + "loss": 0.0735, "step": 40100 }, { "epoch": 1.87, - "learning_rate": 1.6333505227134218e-05, - "loss": 0.1441, + "learning_rate": 2.6339225812491223e-05, + "loss": 0.126, "step": 40105 }, { "epoch": 1.87, - "learning_rate": 1.6333036425859078e-05, - "loss": 0.1107, + "learning_rate": 2.6338757742655207e-05, + "loss": 0.1018, "step": 40110 }, { "epoch": 1.87, - "learning_rate": 1.6332567624583938e-05, - "loss": 0.161, + "learning_rate": 2.6338289672819186e-05, + "loss": 0.1289, "step": 40115 }, { "epoch": 1.87, - "learning_rate": 1.63320988233088e-05, - "loss": 0.1873, + "learning_rate": 2.6337821602983166e-05, + "loss": 0.1872, "step": 40120 }, { "epoch": 1.87, - "learning_rate": 1.633163002203366e-05, - "loss": 0.1899, + "learning_rate": 2.6337353533147146e-05, + "loss": 0.2074, "step": 40125 }, { "epoch": 1.87, - "learning_rate": 1.633116122075852e-05, - "loss": 0.161, + "learning_rate": 2.633688546331113e-05, + "loss": 0.3795, "step": 40130 }, { "epoch": 1.87, - "learning_rate": 1.633069241948338e-05, - "loss": 0.2658, + "learning_rate": 2.6336417393475106e-05, + "loss": 0.3749, "step": 40135 }, { "epoch": 1.87, - "learning_rate": 1.6330223618208245e-05, - "loss": 0.06, + "learning_rate": 2.6335949323639086e-05, + "loss": 0.0988, "step": 40140 }, { "epoch": 1.87, - "learning_rate": 1.6329754816933105e-05, - "loss": 0.0345, + "learning_rate": 2.6335481253803065e-05, + "loss": 0.0861, "step": 40145 }, { "epoch": 1.87, - "learning_rate": 1.6329286015657964e-05, - "loss": 0.084, + "learning_rate": 2.633501318396705e-05, + "loss": 0.0576, "step": 40150 }, { "epoch": 1.87, - "learning_rate": 1.6328817214382824e-05, - "loss": 0.1249, + "learning_rate": 2.633454511413103e-05, + "loss": 0.0771, "step": 40155 }, { "epoch": 1.87, - "learning_rate": 1.6328348413107684e-05, - "loss": 0.12, + "learning_rate": 2.6334077044295008e-05, + "loss": 0.1084, "step": 40160 }, { "epoch": 1.87, - "learning_rate": 1.6327879611832544e-05, - "loss": 0.0907, + "learning_rate": 2.633360897445899e-05, + "loss": 0.1072, "step": 40165 }, { "epoch": 1.87, - "learning_rate": 1.6327410810557408e-05, - "loss": 0.1695, + "learning_rate": 2.633314090462297e-05, + "loss": 0.157, "step": 40170 }, { "epoch": 1.87, - "learning_rate": 1.6326942009282268e-05, - "loss": 0.2406, + "learning_rate": 2.633267283478695e-05, + "loss": 0.2229, "step": 40175 }, { "epoch": 1.87, - "learning_rate": 1.6326473208007127e-05, - "loss": 0.3145, + "learning_rate": 2.633220476495093e-05, + "loss": 0.2369, "step": 40180 }, { "epoch": 1.88, - "learning_rate": 1.6326004406731987e-05, - "loss": 0.2744, + "learning_rate": 2.6331736695114914e-05, + "loss": 0.321, "step": 40185 }, { "epoch": 1.88, - "learning_rate": 1.6325535605456847e-05, - "loss": 0.0679, + "learning_rate": 2.6331268625278894e-05, + "loss": 0.0187, "step": 40190 }, { "epoch": 1.88, - "learning_rate": 1.6325066804181707e-05, - "loss": 0.0223, + "learning_rate": 2.6330800555442874e-05, + "loss": 0.0409, "step": 40195 }, { "epoch": 1.88, - "learning_rate": 1.6324598002906567e-05, - "loss": 0.0352, + "learning_rate": 2.633033248560685e-05, + "loss": 0.0531, "step": 40200 }, { "epoch": 1.88, - "learning_rate": 1.632412920163143e-05, - "loss": 0.0538, + "learning_rate": 2.6329864415770833e-05, + "loss": 0.1098, "step": 40205 }, { "epoch": 1.88, - "learning_rate": 1.632366040035629e-05, - "loss": 0.0675, + "learning_rate": 2.6329396345934813e-05, + "loss": 0.1696, "step": 40210 }, { "epoch": 1.88, - "learning_rate": 1.632319159908115e-05, - "loss": 0.1217, + "learning_rate": 2.6328928276098793e-05, + "loss": 0.1012, "step": 40215 }, { "epoch": 1.88, - "learning_rate": 1.6322722797806014e-05, - "loss": 0.109, + "learning_rate": 2.6328460206262776e-05, + "loss": 0.0892, "step": 40220 }, { "epoch": 1.88, - "learning_rate": 1.6322253996530874e-05, - "loss": 0.1773, + "learning_rate": 2.6327992136426756e-05, + "loss": 0.1553, "step": 40225 }, { "epoch": 1.88, - "learning_rate": 1.6321785195255734e-05, - "loss": 0.6679, + "learning_rate": 2.6327524066590736e-05, + "loss": 0.2304, "step": 40230 }, { "epoch": 1.88, - "learning_rate": 1.6321316393980594e-05, - "loss": 0.2886, + "learning_rate": 2.6327055996754716e-05, + "loss": 0.1584, "step": 40235 }, { "epoch": 1.88, - "learning_rate": 1.6320847592705453e-05, - "loss": 0.0511, + "learning_rate": 2.63265879269187e-05, + "loss": 0.0443, "step": 40240 }, { "epoch": 1.88, - "learning_rate": 1.6320378791430313e-05, - "loss": 0.0669, + "learning_rate": 2.632611985708268e-05, + "loss": 0.0389, "step": 40245 }, { "epoch": 1.88, - "learning_rate": 1.6319909990155173e-05, - "loss": 0.0664, + "learning_rate": 2.632565178724666e-05, + "loss": 0.0536, "step": 40250 }, { "epoch": 1.88, - "learning_rate": 1.6319441188880033e-05, - "loss": 0.0805, + "learning_rate": 2.6325183717410638e-05, + "loss": 0.1238, "step": 40255 }, { "epoch": 1.88, - "learning_rate": 1.6318972387604897e-05, - "loss": 0.1264, + "learning_rate": 2.632471564757462e-05, + "loss": 0.1084, "step": 40260 }, { "epoch": 1.88, - "learning_rate": 1.6318503586329756e-05, - "loss": 0.1922, + "learning_rate": 2.6324247577738598e-05, + "loss": 0.1341, "step": 40265 }, { "epoch": 1.88, - "learning_rate": 1.6318034785054616e-05, - "loss": 0.1564, + "learning_rate": 2.6323779507902578e-05, + "loss": 0.1369, "step": 40270 }, { "epoch": 1.88, - "learning_rate": 1.6317565983779476e-05, - "loss": 0.1241, + "learning_rate": 2.632331143806656e-05, + "loss": 0.1298, "step": 40275 }, { "epoch": 1.88, - "learning_rate": 1.6317097182504336e-05, - "loss": 0.2404, + "learning_rate": 2.632284336823054e-05, + "loss": 0.2002, "step": 40280 }, { "epoch": 1.88, - "learning_rate": 1.63166283812292e-05, - "loss": 0.2399, + "learning_rate": 2.632237529839452e-05, + "loss": 0.209, "step": 40285 }, { "epoch": 1.88, - "learning_rate": 1.631615957995406e-05, - "loss": 0.0875, + "learning_rate": 2.63219072285585e-05, + "loss": 0.036, "step": 40290 }, { "epoch": 1.88, - "learning_rate": 1.631569077867892e-05, - "loss": 0.0858, + "learning_rate": 2.6321439158722484e-05, + "loss": 0.0855, "step": 40295 }, { "epoch": 1.88, - "learning_rate": 1.631522197740378e-05, - "loss": 0.0512, + "learning_rate": 2.6320971088886463e-05, + "loss": 0.0532, "step": 40300 }, { "epoch": 1.88, - "learning_rate": 1.6314753176128643e-05, - "loss": 0.0822, + "learning_rate": 2.6320503019050443e-05, + "loss": 0.1029, "step": 40305 }, { "epoch": 1.88, - "learning_rate": 1.6314284374853503e-05, - "loss": 0.2328, + "learning_rate": 2.6320034949214423e-05, + "loss": 0.0991, "step": 40310 }, { "epoch": 1.88, - "learning_rate": 1.6313815573578363e-05, - "loss": 0.1609, + "learning_rate": 2.6319566879378406e-05, + "loss": 0.1283, "step": 40315 }, { "epoch": 1.88, - "learning_rate": 1.6313346772303223e-05, - "loss": 0.1715, + "learning_rate": 2.6319098809542386e-05, + "loss": 0.1572, "step": 40320 }, { "epoch": 1.88, - "learning_rate": 1.6312877971028082e-05, - "loss": 0.1636, + "learning_rate": 2.6318630739706362e-05, + "loss": 0.1945, "step": 40325 }, { "epoch": 1.88, - "learning_rate": 1.6312409169752942e-05, - "loss": 0.2306, + "learning_rate": 2.6318162669870342e-05, + "loss": 0.3183, "step": 40330 }, { "epoch": 1.88, - "learning_rate": 1.6311940368477802e-05, - "loss": 0.2073, + "learning_rate": 2.6317694600034326e-05, + "loss": 0.4122, "step": 40335 }, { "epoch": 1.88, - "learning_rate": 1.6311471567202662e-05, - "loss": 0.0513, + "learning_rate": 2.6317226530198305e-05, + "loss": 0.0527, "step": 40340 }, { "epoch": 1.88, - "learning_rate": 1.6311002765927522e-05, - "loss": 0.0635, + "learning_rate": 2.6316758460362285e-05, + "loss": 0.0604, "step": 40345 }, { "epoch": 1.88, - "learning_rate": 1.6310533964652386e-05, - "loss": 0.0524, + "learning_rate": 2.631629039052627e-05, + "loss": 0.0646, "step": 40350 }, { "epoch": 1.88, - "learning_rate": 1.6310065163377245e-05, - "loss": 0.0711, + "learning_rate": 2.6315822320690248e-05, + "loss": 0.1079, "step": 40355 }, { "epoch": 1.88, - "learning_rate": 1.6309596362102105e-05, - "loss": 0.0976, + "learning_rate": 2.6315354250854228e-05, + "loss": 0.1001, "step": 40360 }, { "epoch": 1.88, - "learning_rate": 1.630912756082697e-05, - "loss": 0.1112, + "learning_rate": 2.6314886181018208e-05, + "loss": 0.1328, "step": 40365 }, { "epoch": 1.88, - "learning_rate": 1.630865875955183e-05, - "loss": 0.1517, + "learning_rate": 2.631441811118219e-05, + "loss": 0.155, "step": 40370 }, { "epoch": 1.88, - "learning_rate": 1.630818995827669e-05, - "loss": 0.181, + "learning_rate": 2.631395004134617e-05, + "loss": 0.1359, "step": 40375 }, { "epoch": 1.88, - "learning_rate": 1.630772115700155e-05, - "loss": 0.2969, + "learning_rate": 2.631348197151015e-05, + "loss": 0.2913, "step": 40380 }, { "epoch": 1.88, - "learning_rate": 1.630725235572641e-05, - "loss": 0.2803, + "learning_rate": 2.631301390167413e-05, + "loss": 0.2886, "step": 40385 }, { "epoch": 1.88, - "learning_rate": 1.630678355445127e-05, - "loss": 0.0327, + "learning_rate": 2.631254583183811e-05, + "loss": 0.062, "step": 40390 }, { "epoch": 1.88, - "learning_rate": 1.630631475317613e-05, - "loss": 0.0737, + "learning_rate": 2.631207776200209e-05, + "loss": 0.0582, "step": 40395 }, { "epoch": 1.89, - "learning_rate": 1.630584595190099e-05, - "loss": 0.0605, + "learning_rate": 2.631160969216607e-05, + "loss": 0.0464, "step": 40400 }, { "epoch": 1.89, - "learning_rate": 1.630537715062585e-05, - "loss": 0.1205, + "learning_rate": 2.6311141622330053e-05, + "loss": 0.1078, "step": 40405 }, { "epoch": 1.89, - "learning_rate": 1.630490834935071e-05, - "loss": 0.0968, + "learning_rate": 2.6310673552494033e-05, + "loss": 0.0807, "step": 40410 }, { "epoch": 1.89, - "learning_rate": 1.630443954807557e-05, - "loss": 0.0997, + "learning_rate": 2.6310205482658013e-05, + "loss": 0.1149, "step": 40415 }, { "epoch": 1.89, - "learning_rate": 1.630397074680043e-05, - "loss": 0.0928, + "learning_rate": 2.6309737412821993e-05, + "loss": 0.1297, "step": 40420 }, { "epoch": 1.89, - "learning_rate": 1.630350194552529e-05, - "loss": 0.1773, + "learning_rate": 2.6309269342985976e-05, + "loss": 0.1764, "step": 40425 }, { "epoch": 1.89, - "learning_rate": 1.6303033144250155e-05, - "loss": 0.2593, + "learning_rate": 2.6308801273149956e-05, + "loss": 0.1905, "step": 40430 }, { "epoch": 1.89, - "learning_rate": 1.6302564342975015e-05, - "loss": 0.2245, + "learning_rate": 2.6308333203313935e-05, + "loss": 0.2088, "step": 40435 }, { "epoch": 1.89, - "learning_rate": 1.6302095541699874e-05, - "loss": 0.0403, + "learning_rate": 2.6307865133477915e-05, + "loss": 0.0662, "step": 40440 }, { "epoch": 1.89, - "learning_rate": 1.6301626740424738e-05, - "loss": 0.0584, + "learning_rate": 2.63073970636419e-05, + "loss": 0.0339, "step": 40445 }, { "epoch": 1.89, - "learning_rate": 1.6301157939149598e-05, - "loss": 0.0841, + "learning_rate": 2.6306928993805878e-05, + "loss": 0.0661, "step": 40450 }, { "epoch": 1.89, - "learning_rate": 1.6300689137874458e-05, - "loss": 0.1322, + "learning_rate": 2.6306460923969855e-05, + "loss": 0.1632, "step": 40455 }, { "epoch": 1.89, - "learning_rate": 1.6300220336599318e-05, - "loss": 0.1035, + "learning_rate": 2.6305992854133838e-05, + "loss": 0.1307, "step": 40460 }, { "epoch": 1.89, - "learning_rate": 1.6299751535324178e-05, - "loss": 0.0839, + "learning_rate": 2.6305524784297818e-05, + "loss": 0.1238, "step": 40465 }, { "epoch": 1.89, - "learning_rate": 1.6299282734049037e-05, - "loss": 0.1201, + "learning_rate": 2.6305056714461798e-05, + "loss": 0.13, "step": 40470 }, { "epoch": 1.89, - "learning_rate": 1.6298813932773897e-05, - "loss": 0.2239, + "learning_rate": 2.6304588644625777e-05, + "loss": 0.2832, "step": 40475 }, { "epoch": 1.89, - "learning_rate": 1.6298345131498757e-05, - "loss": 0.3387, + "learning_rate": 2.630412057478976e-05, + "loss": 0.3345, "step": 40480 }, { "epoch": 1.89, - "learning_rate": 1.6297876330223617e-05, - "loss": 0.2674, + "learning_rate": 2.630365250495374e-05, + "loss": 0.2461, "step": 40485 }, { "epoch": 1.89, - "learning_rate": 1.629740752894848e-05, - "loss": 0.0619, + "learning_rate": 2.630318443511772e-05, + "loss": 0.0824, "step": 40490 }, { "epoch": 1.89, - "learning_rate": 1.629693872767334e-05, - "loss": 0.0873, + "learning_rate": 2.63027163652817e-05, + "loss": 0.0598, "step": 40495 }, { "epoch": 1.89, - "learning_rate": 1.62964699263982e-05, - "loss": 0.0728, + "learning_rate": 2.6302248295445683e-05, + "loss": 0.0739, "step": 40500 }, { "epoch": 1.89, - "learning_rate": 1.629600112512306e-05, - "loss": 0.0871, + "learning_rate": 2.6301780225609663e-05, + "loss": 0.0547, "step": 40505 }, { "epoch": 1.89, - "learning_rate": 1.6295532323847924e-05, - "loss": 0.0998, + "learning_rate": 2.6301312155773643e-05, + "loss": 0.1084, "step": 40510 }, { "epoch": 1.89, - "learning_rate": 1.6295063522572784e-05, - "loss": 0.1339, + "learning_rate": 2.630084408593762e-05, + "loss": 0.0908, "step": 40515 }, { "epoch": 1.89, - "learning_rate": 1.6294594721297644e-05, - "loss": 0.1606, + "learning_rate": 2.6300376016101602e-05, + "loss": 0.1632, "step": 40520 }, { "epoch": 1.89, - "learning_rate": 1.6294125920022504e-05, - "loss": 0.2007, + "learning_rate": 2.6299907946265582e-05, + "loss": 0.1422, "step": 40525 }, { "epoch": 1.89, - "learning_rate": 1.6293657118747363e-05, - "loss": 0.2369, + "learning_rate": 2.6299439876429562e-05, + "loss": 0.5254, "step": 40530 }, { "epoch": 1.89, - "learning_rate": 1.6293188317472223e-05, - "loss": 0.3223, + "learning_rate": 2.6298971806593545e-05, + "loss": 0.2415, "step": 40535 }, { "epoch": 1.89, - "learning_rate": 1.6292719516197087e-05, - "loss": 0.0264, + "learning_rate": 2.6298503736757525e-05, + "loss": 0.023, "step": 40540 }, { "epoch": 1.89, - "learning_rate": 1.6292250714921947e-05, - "loss": 0.0578, + "learning_rate": 2.6298035666921505e-05, + "loss": 0.0293, "step": 40545 }, { "epoch": 1.89, - "learning_rate": 1.6291781913646807e-05, - "loss": 0.0684, + "learning_rate": 2.6297567597085485e-05, + "loss": 0.0752, "step": 40550 }, { "epoch": 1.89, - "learning_rate": 1.6291313112371667e-05, - "loss": 0.0905, + "learning_rate": 2.6297099527249468e-05, + "loss": 0.0975, "step": 40555 }, { "epoch": 1.89, - "learning_rate": 1.6290844311096526e-05, - "loss": 0.1341, + "learning_rate": 2.6296631457413448e-05, + "loss": 0.1287, "step": 40560 }, { "epoch": 1.89, - "learning_rate": 1.6290375509821386e-05, - "loss": 0.0828, + "learning_rate": 2.6296163387577428e-05, + "loss": 0.1435, "step": 40565 }, { "epoch": 1.89, - "learning_rate": 1.628990670854625e-05, - "loss": 0.1134, + "learning_rate": 2.6295695317741407e-05, + "loss": 0.1247, "step": 40570 }, { "epoch": 1.89, - "learning_rate": 1.628943790727111e-05, - "loss": 0.1483, + "learning_rate": 2.629522724790539e-05, + "loss": 0.1938, "step": 40575 }, { "epoch": 1.89, - "learning_rate": 1.628896910599597e-05, - "loss": 0.2516, + "learning_rate": 2.6294759178069367e-05, + "loss": 0.2045, "step": 40580 }, { "epoch": 1.89, - "learning_rate": 1.6288500304720833e-05, - "loss": 0.2559, + "learning_rate": 2.6294291108233347e-05, + "loss": 0.3442, "step": 40585 }, { "epoch": 1.89, - "learning_rate": 1.6288031503445693e-05, - "loss": 0.0355, + "learning_rate": 2.629382303839733e-05, + "loss": 0.0306, "step": 40590 }, { "epoch": 1.89, - "learning_rate": 1.6287562702170553e-05, - "loss": 0.0847, + "learning_rate": 2.629335496856131e-05, + "loss": 0.0456, "step": 40595 }, { "epoch": 1.89, - "learning_rate": 1.6287093900895413e-05, - "loss": 0.0847, + "learning_rate": 2.629288689872529e-05, + "loss": 0.0721, "step": 40600 }, { "epoch": 1.89, - "learning_rate": 1.6286625099620273e-05, - "loss": 0.0802, + "learning_rate": 2.629241882888927e-05, + "loss": 0.1394, "step": 40605 }, { "epoch": 1.89, - "learning_rate": 1.6286156298345133e-05, - "loss": 0.0599, + "learning_rate": 2.6291950759053253e-05, + "loss": 0.1354, "step": 40610 }, { "epoch": 1.9, - "learning_rate": 1.6285687497069993e-05, - "loss": 0.0885, + "learning_rate": 2.6291482689217233e-05, + "loss": 0.1383, "step": 40615 }, { "epoch": 1.9, - "learning_rate": 1.6285218695794852e-05, - "loss": 0.161, + "learning_rate": 2.6291014619381212e-05, + "loss": 0.0947, "step": 40620 }, { "epoch": 1.9, - "learning_rate": 1.6284749894519712e-05, - "loss": 0.2128, + "learning_rate": 2.6290546549545192e-05, + "loss": 0.2224, "step": 40625 }, { "epoch": 1.9, - "learning_rate": 1.6284281093244576e-05, - "loss": 0.2853, + "learning_rate": 2.6290078479709175e-05, + "loss": 0.2473, "step": 40630 }, { "epoch": 1.9, - "learning_rate": 1.6283812291969436e-05, - "loss": 0.2575, + "learning_rate": 2.6289610409873155e-05, + "loss": 0.2824, "step": 40635 }, { "epoch": 1.9, - "learning_rate": 1.6283343490694296e-05, - "loss": 0.0866, + "learning_rate": 2.628914234003713e-05, + "loss": 0.0375, "step": 40640 }, { "epoch": 1.9, - "learning_rate": 1.6282874689419155e-05, - "loss": 0.0454, + "learning_rate": 2.6288674270201115e-05, + "loss": 0.0308, "step": 40645 }, { "epoch": 1.9, - "learning_rate": 1.628240588814402e-05, - "loss": 0.1026, + "learning_rate": 2.6288206200365095e-05, + "loss": 0.0811, "step": 40650 }, { "epoch": 1.9, - "learning_rate": 1.628193708686888e-05, - "loss": 0.0982, + "learning_rate": 2.6287738130529075e-05, + "loss": 0.0656, "step": 40655 }, { "epoch": 1.9, - "learning_rate": 1.628146828559374e-05, - "loss": 0.071, + "learning_rate": 2.6287270060693054e-05, + "loss": 0.126, "step": 40660 }, { "epoch": 1.9, - "learning_rate": 1.62809994843186e-05, - "loss": 0.1744, + "learning_rate": 2.6286801990857038e-05, + "loss": 0.104, "step": 40665 }, { "epoch": 1.9, - "learning_rate": 1.628053068304346e-05, - "loss": 0.19, + "learning_rate": 2.6286333921021017e-05, + "loss": 0.1067, "step": 40670 }, { "epoch": 1.9, - "learning_rate": 1.628006188176832e-05, - "loss": 0.2066, + "learning_rate": 2.6285865851184997e-05, + "loss": 0.1963, "step": 40675 }, { "epoch": 1.9, - "learning_rate": 1.6279593080493182e-05, - "loss": 0.3175, + "learning_rate": 2.6285397781348977e-05, + "loss": 0.2602, "step": 40680 }, { "epoch": 1.9, - "learning_rate": 1.6279124279218042e-05, - "loss": 0.3947, + "learning_rate": 2.628492971151296e-05, + "loss": 0.2171, "step": 40685 }, { "epoch": 1.9, - "learning_rate": 1.62786554779429e-05, - "loss": 0.0417, + "learning_rate": 2.628446164167694e-05, + "loss": 0.056, "step": 40690 }, { "epoch": 1.9, - "learning_rate": 1.627818667666776e-05, - "loss": 0.05, + "learning_rate": 2.628399357184092e-05, + "loss": 0.034, "step": 40695 }, { "epoch": 1.9, - "learning_rate": 1.627771787539262e-05, - "loss": 0.0853, + "learning_rate": 2.6283525502004903e-05, + "loss": 0.0761, "step": 40700 }, { "epoch": 1.9, - "learning_rate": 1.627724907411748e-05, - "loss": 0.1473, + "learning_rate": 2.628305743216888e-05, + "loss": 0.1378, "step": 40705 }, { "epoch": 1.9, - "learning_rate": 1.627678027284234e-05, - "loss": 0.0938, + "learning_rate": 2.628258936233286e-05, + "loss": 0.1445, "step": 40710 }, { "epoch": 1.9, - "learning_rate": 1.6276311471567205e-05, - "loss": 0.1339, + "learning_rate": 2.628212129249684e-05, + "loss": 0.1106, "step": 40715 }, { "epoch": 1.9, - "learning_rate": 1.6275842670292065e-05, - "loss": 0.2072, + "learning_rate": 2.6281653222660822e-05, + "loss": 0.1074, "step": 40720 }, { "epoch": 1.9, - "learning_rate": 1.6275373869016925e-05, - "loss": 0.1397, + "learning_rate": 2.6281185152824802e-05, + "loss": 0.1821, "step": 40725 }, { "epoch": 1.9, - "learning_rate": 1.6274905067741788e-05, - "loss": 0.2763, + "learning_rate": 2.6280717082988782e-05, + "loss": 0.4791, "step": 40730 }, { "epoch": 1.9, - "learning_rate": 1.6274436266466648e-05, - "loss": 0.2513, + "learning_rate": 2.6280249013152762e-05, + "loss": 0.2698, "step": 40735 }, { "epoch": 1.9, - "learning_rate": 1.6273967465191508e-05, - "loss": 0.0345, + "learning_rate": 2.6279780943316745e-05, + "loss": 0.0433, "step": 40740 }, { "epoch": 1.9, - "learning_rate": 1.6273498663916368e-05, - "loss": 0.021, + "learning_rate": 2.6279312873480725e-05, + "loss": 0.0372, "step": 40745 }, { "epoch": 1.9, - "learning_rate": 1.6273029862641228e-05, - "loss": 0.0595, + "learning_rate": 2.6278844803644705e-05, + "loss": 0.0911, "step": 40750 }, { "epoch": 1.9, - "learning_rate": 1.6272561061366088e-05, - "loss": 0.0782, + "learning_rate": 2.6278376733808684e-05, + "loss": 0.0713, "step": 40755 }, { "epoch": 1.9, - "learning_rate": 1.6272092260090948e-05, - "loss": 0.1077, + "learning_rate": 2.6277908663972668e-05, + "loss": 0.1668, "step": 40760 }, { "epoch": 1.9, - "learning_rate": 1.6271623458815807e-05, - "loss": 0.1096, + "learning_rate": 2.6277440594136647e-05, + "loss": 0.1463, "step": 40765 }, { "epoch": 1.9, - "learning_rate": 1.627115465754067e-05, - "loss": 0.168, + "learning_rate": 2.6276972524300624e-05, + "loss": 0.1883, "step": 40770 }, { "epoch": 1.9, - "learning_rate": 1.627068585626553e-05, - "loss": 0.2267, + "learning_rate": 2.6276504454464607e-05, + "loss": 0.2241, "step": 40775 }, { "epoch": 1.9, - "learning_rate": 1.627021705499039e-05, - "loss": 0.445, + "learning_rate": 2.6276036384628587e-05, + "loss": 0.3225, "step": 40780 }, { "epoch": 1.9, - "learning_rate": 1.626974825371525e-05, - "loss": 0.3413, + "learning_rate": 2.6275568314792567e-05, + "loss": 0.2225, "step": 40785 }, { "epoch": 1.9, - "learning_rate": 1.626927945244011e-05, - "loss": 0.0355, + "learning_rate": 2.6275100244956547e-05, + "loss": 0.055, "step": 40790 }, { "epoch": 1.9, - "learning_rate": 1.6268810651164974e-05, - "loss": 0.0478, + "learning_rate": 2.627463217512053e-05, + "loss": 0.0597, "step": 40795 }, { "epoch": 1.9, - "learning_rate": 1.6268341849889834e-05, - "loss": 0.0609, + "learning_rate": 2.627416410528451e-05, + "loss": 0.0441, "step": 40800 }, { "epoch": 1.9, - "learning_rate": 1.6267873048614694e-05, - "loss": 0.1098, + "learning_rate": 2.627369603544849e-05, + "loss": 0.1143, "step": 40805 }, { "epoch": 1.9, - "learning_rate": 1.6267404247339554e-05, - "loss": 0.1211, + "learning_rate": 2.627322796561247e-05, + "loss": 0.0817, "step": 40810 }, { "epoch": 1.9, - "learning_rate": 1.6266935446064414e-05, - "loss": 0.112, + "learning_rate": 2.6272759895776452e-05, + "loss": 0.1266, "step": 40815 }, { "epoch": 1.9, - "learning_rate": 1.6266466644789277e-05, - "loss": 0.2154, + "learning_rate": 2.6272291825940432e-05, + "loss": 0.1432, "step": 40820 }, { "epoch": 1.9, - "learning_rate": 1.6265997843514137e-05, - "loss": 0.1539, + "learning_rate": 2.6271823756104412e-05, + "loss": 0.2385, "step": 40825 }, { "epoch": 1.91, - "learning_rate": 1.6265529042238997e-05, - "loss": 0.2905, + "learning_rate": 2.6271355686268392e-05, + "loss": 0.3393, "step": 40830 }, { "epoch": 1.91, - "learning_rate": 1.6265060240963857e-05, - "loss": 0.1497, + "learning_rate": 2.627088761643237e-05, + "loss": 0.1935, "step": 40835 }, { "epoch": 1.91, - "learning_rate": 1.6264591439688717e-05, - "loss": 0.0331, + "learning_rate": 2.627041954659635e-05, + "loss": 0.0436, "step": 40840 }, { "epoch": 1.91, - "learning_rate": 1.6264122638413577e-05, - "loss": 0.0605, + "learning_rate": 2.626995147676033e-05, + "loss": 0.0911, "step": 40845 }, { "epoch": 1.91, - "learning_rate": 1.6263653837138436e-05, - "loss": 0.0846, + "learning_rate": 2.6269483406924314e-05, + "loss": 0.0434, "step": 40850 }, { "epoch": 1.91, - "learning_rate": 1.6263185035863296e-05, - "loss": 0.0709, + "learning_rate": 2.6269015337088294e-05, + "loss": 0.0843, "step": 40855 }, { "epoch": 1.91, - "learning_rate": 1.626271623458816e-05, - "loss": 0.0923, + "learning_rate": 2.6268547267252274e-05, + "loss": 0.0878, "step": 40860 }, { "epoch": 1.91, - "learning_rate": 1.626224743331302e-05, - "loss": 0.1565, + "learning_rate": 2.6268079197416254e-05, + "loss": 0.1078, "step": 40865 }, { "epoch": 1.91, - "learning_rate": 1.626177863203788e-05, - "loss": 0.1856, + "learning_rate": 2.6267611127580237e-05, + "loss": 0.1954, "step": 40870 }, { "epoch": 1.91, - "learning_rate": 1.6261309830762743e-05, - "loss": 0.2154, + "learning_rate": 2.6267143057744217e-05, + "loss": 0.1516, "step": 40875 }, { "epoch": 1.91, - "learning_rate": 1.6260841029487603e-05, - "loss": 0.4401, + "learning_rate": 2.6266674987908197e-05, + "loss": 0.205, "step": 40880 }, { "epoch": 1.91, - "learning_rate": 1.6260372228212463e-05, - "loss": 0.2305, + "learning_rate": 2.626620691807218e-05, + "loss": 0.4242, "step": 40885 }, { "epoch": 1.91, - "learning_rate": 1.6259903426937323e-05, - "loss": 0.0609, + "learning_rate": 2.626573884823616e-05, + "loss": 0.0665, "step": 40890 }, { "epoch": 1.91, - "learning_rate": 1.6259434625662183e-05, - "loss": 0.0389, + "learning_rate": 2.6265270778400136e-05, + "loss": 0.0406, "step": 40895 }, { "epoch": 1.91, - "learning_rate": 1.6258965824387043e-05, - "loss": 0.1174, + "learning_rate": 2.6264802708564116e-05, + "loss": 0.1, "step": 40900 }, { "epoch": 1.91, - "learning_rate": 1.6258497023111903e-05, - "loss": 0.0794, + "learning_rate": 2.62643346387281e-05, + "loss": 0.0459, "step": 40905 }, { "epoch": 1.91, - "learning_rate": 1.6258028221836766e-05, - "loss": 0.0679, + "learning_rate": 2.626386656889208e-05, + "loss": 0.0963, "step": 40910 }, { "epoch": 1.91, - "learning_rate": 1.6257559420561626e-05, - "loss": 0.2702, + "learning_rate": 2.626339849905606e-05, + "loss": 0.1903, "step": 40915 }, { "epoch": 1.91, - "learning_rate": 1.6257090619286486e-05, - "loss": 0.1589, + "learning_rate": 2.626293042922004e-05, + "loss": 0.1762, "step": 40920 }, { "epoch": 1.91, - "learning_rate": 1.6256621818011346e-05, - "loss": 0.2221, + "learning_rate": 2.6262462359384022e-05, + "loss": 0.1476, "step": 40925 }, { "epoch": 1.91, - "learning_rate": 1.6256153016736206e-05, - "loss": 0.3977, + "learning_rate": 2.6261994289548002e-05, + "loss": 0.2332, "step": 40930 }, { "epoch": 1.91, - "learning_rate": 1.625568421546107e-05, - "loss": 0.2443, + "learning_rate": 2.626152621971198e-05, + "loss": 0.2199, "step": 40935 }, { "epoch": 1.91, - "learning_rate": 1.625521541418593e-05, - "loss": 0.0319, + "learning_rate": 2.626105814987596e-05, + "loss": 0.0099, "step": 40940 }, { "epoch": 1.91, - "learning_rate": 1.625474661291079e-05, - "loss": 0.0884, + "learning_rate": 2.6260590080039945e-05, + "loss": 0.0458, "step": 40945 }, { "epoch": 1.91, - "learning_rate": 1.625427781163565e-05, - "loss": 0.0723, + "learning_rate": 2.6260122010203924e-05, + "loss": 0.083, "step": 40950 }, { "epoch": 1.91, - "learning_rate": 1.6253809010360512e-05, - "loss": 0.0731, + "learning_rate": 2.6259653940367904e-05, + "loss": 0.0522, "step": 40955 }, { "epoch": 1.91, - "learning_rate": 1.6253340209085372e-05, - "loss": 0.1685, + "learning_rate": 2.6259185870531884e-05, + "loss": 0.0786, "step": 40960 }, { "epoch": 1.91, - "learning_rate": 1.6252871407810232e-05, - "loss": 0.1155, + "learning_rate": 2.6258717800695864e-05, + "loss": 0.0891, "step": 40965 }, { "epoch": 1.91, - "learning_rate": 1.6252402606535092e-05, - "loss": 0.1788, + "learning_rate": 2.6258249730859844e-05, + "loss": 0.1781, "step": 40970 }, { "epoch": 1.91, - "learning_rate": 1.6251933805259952e-05, - "loss": 0.2254, + "learning_rate": 2.6257781661023823e-05, + "loss": 0.2083, "step": 40975 }, { "epoch": 1.91, - "learning_rate": 1.6251465003984812e-05, - "loss": 0.2898, + "learning_rate": 2.6257313591187807e-05, + "loss": 0.3915, "step": 40980 }, { "epoch": 1.91, - "learning_rate": 1.625099620270967e-05, - "loss": 0.2073, + "learning_rate": 2.6256845521351787e-05, + "loss": 0.331, "step": 40985 }, { "epoch": 1.91, - "learning_rate": 1.625052740143453e-05, - "loss": 0.0784, + "learning_rate": 2.6256377451515766e-05, + "loss": 0.0316, "step": 40990 }, { "epoch": 1.91, - "learning_rate": 1.625005860015939e-05, - "loss": 0.0628, + "learning_rate": 2.6255909381679746e-05, + "loss": 0.0784, "step": 40995 }, { "epoch": 1.91, - "learning_rate": 1.6249589798884255e-05, - "loss": 0.1004, + "learning_rate": 2.625544131184373e-05, + "loss": 0.0979, "step": 41000 }, { "epoch": 1.91, - "learning_rate": 1.6249120997609115e-05, - "loss": 0.1191, + "learning_rate": 2.625497324200771e-05, + "loss": 0.0366, "step": 41005 }, { "epoch": 1.91, - "learning_rate": 1.6248652196333975e-05, - "loss": 0.2033, + "learning_rate": 2.625450517217169e-05, + "loss": 0.1194, "step": 41010 }, { "epoch": 1.91, - "learning_rate": 1.6248183395058838e-05, - "loss": 0.1165, + "learning_rate": 2.6254037102335672e-05, + "loss": 0.1278, "step": 41015 }, { "epoch": 1.91, - "learning_rate": 1.6247714593783698e-05, - "loss": 0.1041, + "learning_rate": 2.625356903249965e-05, + "loss": 0.1545, "step": 41020 }, { "epoch": 1.91, - "learning_rate": 1.6247245792508558e-05, - "loss": 0.3128, + "learning_rate": 2.625310096266363e-05, + "loss": 0.2239, "step": 41025 }, { "epoch": 1.91, - "learning_rate": 1.6246776991233418e-05, - "loss": 0.2066, + "learning_rate": 2.6252632892827608e-05, + "loss": 0.2885, "step": 41030 }, { "epoch": 1.91, - "learning_rate": 1.6246308189958278e-05, - "loss": 0.1945, + "learning_rate": 2.625216482299159e-05, + "loss": 0.2535, "step": 41035 }, { "epoch": 1.91, - "learning_rate": 1.6245839388683138e-05, - "loss": 0.0559, + "learning_rate": 2.625169675315557e-05, + "loss": 0.0614, "step": 41040 }, { "epoch": 1.92, - "learning_rate": 1.6245370587407998e-05, - "loss": 0.0498, + "learning_rate": 2.625122868331955e-05, + "loss": 0.077, "step": 41045 }, { "epoch": 1.92, - "learning_rate": 1.624490178613286e-05, - "loss": 0.0774, + "learning_rate": 2.625076061348353e-05, + "loss": 0.0631, "step": 41050 }, { "epoch": 1.92, - "learning_rate": 1.624443298485772e-05, - "loss": 0.134, + "learning_rate": 2.6250292543647514e-05, + "loss": 0.0667, "step": 41055 }, { "epoch": 1.92, - "learning_rate": 1.624396418358258e-05, - "loss": 0.0899, + "learning_rate": 2.6249824473811494e-05, + "loss": 0.0974, "step": 41060 }, { "epoch": 1.92, - "learning_rate": 1.624349538230744e-05, - "loss": 0.0978, + "learning_rate": 2.6249356403975474e-05, + "loss": 0.1243, "step": 41065 }, { "epoch": 1.92, - "learning_rate": 1.62430265810323e-05, - "loss": 0.1787, + "learning_rate": 2.6248888334139457e-05, + "loss": 0.0933, "step": 41070 }, { "epoch": 1.92, - "learning_rate": 1.624255777975716e-05, - "loss": 0.2584, + "learning_rate": 2.6248420264303437e-05, + "loss": 0.2899, "step": 41075 }, { "epoch": 1.92, - "learning_rate": 1.6242088978482024e-05, - "loss": 0.4001, + "learning_rate": 2.6247952194467417e-05, + "loss": 0.2763, "step": 41080 }, { "epoch": 1.92, - "learning_rate": 1.6241620177206884e-05, - "loss": 0.2551, + "learning_rate": 2.6247484124631393e-05, + "loss": 0.2076, "step": 41085 }, { "epoch": 1.92, - "learning_rate": 1.6241151375931744e-05, - "loss": 0.0141, + "learning_rate": 2.6247016054795376e-05, + "loss": 0.0124, "step": 41090 }, { "epoch": 1.92, - "learning_rate": 1.6240682574656607e-05, - "loss": 0.0182, + "learning_rate": 2.6246547984959356e-05, + "loss": 0.0327, "step": 41095 }, { "epoch": 1.92, - "learning_rate": 1.6240213773381467e-05, - "loss": 0.0712, + "learning_rate": 2.6246079915123336e-05, + "loss": 0.1161, "step": 41100 }, { "epoch": 1.92, - "learning_rate": 1.6239744972106327e-05, - "loss": 0.0826, + "learning_rate": 2.6245611845287316e-05, + "loss": 0.07, "step": 41105 }, { "epoch": 1.92, - "learning_rate": 1.6239276170831187e-05, - "loss": 0.0982, + "learning_rate": 2.62451437754513e-05, + "loss": 0.0587, "step": 41110 }, { "epoch": 1.92, - "learning_rate": 1.6238807369556047e-05, - "loss": 0.0902, + "learning_rate": 2.624467570561528e-05, + "loss": 0.1299, "step": 41115 }, { "epoch": 1.92, - "learning_rate": 1.6238338568280907e-05, - "loss": 0.1409, + "learning_rate": 2.624420763577926e-05, + "loss": 0.1363, "step": 41120 }, { "epoch": 1.92, - "learning_rate": 1.6237869767005767e-05, - "loss": 0.0901, + "learning_rate": 2.624373956594324e-05, + "loss": 0.1922, "step": 41125 }, { "epoch": 1.92, - "learning_rate": 1.6237400965730627e-05, - "loss": 0.2791, + "learning_rate": 2.624327149610722e-05, + "loss": 0.271, "step": 41130 }, { "epoch": 1.92, - "learning_rate": 1.6236932164455487e-05, - "loss": 0.3241, + "learning_rate": 2.62428034262712e-05, + "loss": 0.2567, "step": 41135 }, { "epoch": 1.92, - "learning_rate": 1.6236463363180347e-05, - "loss": 0.0624, + "learning_rate": 2.624233535643518e-05, + "loss": 0.0299, "step": 41140 }, { "epoch": 1.92, - "learning_rate": 1.623599456190521e-05, - "loss": 0.0258, + "learning_rate": 2.624186728659916e-05, + "loss": 0.0921, "step": 41145 }, { "epoch": 1.92, - "learning_rate": 1.623552576063007e-05, - "loss": 0.0758, + "learning_rate": 2.624139921676314e-05, + "loss": 0.0731, "step": 41150 }, { "epoch": 1.92, - "learning_rate": 1.623505695935493e-05, - "loss": 0.0531, + "learning_rate": 2.624093114692712e-05, + "loss": 0.0317, "step": 41155 }, { "epoch": 1.92, - "learning_rate": 1.6234588158079793e-05, - "loss": 0.0854, + "learning_rate": 2.62404630770911e-05, + "loss": 0.1856, "step": 41160 }, { "epoch": 1.92, - "learning_rate": 1.6234119356804653e-05, - "loss": 0.1209, + "learning_rate": 2.6239995007255084e-05, + "loss": 0.1505, "step": 41165 }, { "epoch": 1.92, - "learning_rate": 1.6233650555529513e-05, - "loss": 0.1532, + "learning_rate": 2.6239526937419063e-05, + "loss": 0.0735, "step": 41170 }, { "epoch": 1.92, - "learning_rate": 1.6233181754254373e-05, - "loss": 0.1413, + "learning_rate": 2.6239058867583043e-05, + "loss": 0.1157, "step": 41175 }, { "epoch": 1.92, - "learning_rate": 1.6232712952979233e-05, - "loss": 0.2444, + "learning_rate": 2.6238590797747023e-05, + "loss": 0.2787, "step": 41180 }, { "epoch": 1.92, - "learning_rate": 1.6232244151704093e-05, - "loss": 0.2288, + "learning_rate": 2.6238122727911006e-05, + "loss": 0.156, "step": 41185 }, { "epoch": 1.92, - "learning_rate": 1.6231775350428956e-05, - "loss": 0.0482, + "learning_rate": 2.6237654658074986e-05, + "loss": 0.0521, "step": 41190 }, { "epoch": 1.92, - "learning_rate": 1.6231306549153816e-05, - "loss": 0.0709, + "learning_rate": 2.6237186588238966e-05, + "loss": 0.038, "step": 41195 }, { "epoch": 1.92, - "learning_rate": 1.6230837747878676e-05, - "loss": 0.1429, + "learning_rate": 2.623671851840295e-05, + "loss": 0.1511, "step": 41200 }, { "epoch": 1.92, - "learning_rate": 1.6230368946603536e-05, - "loss": 0.0677, + "learning_rate": 2.623625044856693e-05, + "loss": 0.0708, "step": 41205 }, { "epoch": 1.92, - "learning_rate": 1.6229900145328396e-05, - "loss": 0.1367, + "learning_rate": 2.6235782378730905e-05, + "loss": 0.0415, "step": 41210 }, { "epoch": 1.92, - "learning_rate": 1.6229431344053256e-05, - "loss": 0.091, + "learning_rate": 2.6235314308894885e-05, + "loss": 0.1097, "step": 41215 }, { "epoch": 1.92, - "learning_rate": 1.6228962542778116e-05, - "loss": 0.1481, + "learning_rate": 2.623484623905887e-05, + "loss": 0.1184, "step": 41220 }, { "epoch": 1.92, - "learning_rate": 1.622849374150298e-05, - "loss": 0.1727, + "learning_rate": 2.6234378169222848e-05, + "loss": 0.2045, "step": 41225 }, { "epoch": 1.92, - "learning_rate": 1.622802494022784e-05, - "loss": 0.2958, + "learning_rate": 2.6233910099386828e-05, + "loss": 0.7751, "step": 41230 }, { "epoch": 1.92, - "learning_rate": 1.62275561389527e-05, - "loss": 0.4008, + "learning_rate": 2.6233442029550808e-05, + "loss": 0.3461, "step": 41235 }, { "epoch": 1.92, - "learning_rate": 1.6227087337677562e-05, - "loss": 0.0504, + "learning_rate": 2.623297395971479e-05, + "loss": 0.0319, "step": 41240 }, { "epoch": 1.92, - "learning_rate": 1.6226618536402422e-05, - "loss": 0.0635, + "learning_rate": 2.623250588987877e-05, + "loss": 0.0489, "step": 41245 }, { "epoch": 1.92, - "learning_rate": 1.6226149735127282e-05, - "loss": 0.0534, + "learning_rate": 2.623203782004275e-05, + "loss": 0.0812, "step": 41250 }, { "epoch": 1.93, - "learning_rate": 1.6225680933852142e-05, - "loss": 0.0808, + "learning_rate": 2.6231569750206734e-05, + "loss": 0.1004, "step": 41255 }, { "epoch": 1.93, - "learning_rate": 1.6225212132577002e-05, - "loss": 0.1256, + "learning_rate": 2.6231101680370714e-05, + "loss": 0.111, "step": 41260 }, { "epoch": 1.93, - "learning_rate": 1.6224743331301862e-05, - "loss": 0.2142, + "learning_rate": 2.6230633610534694e-05, + "loss": 0.096, "step": 41265 }, { "epoch": 1.93, - "learning_rate": 1.6224274530026722e-05, - "loss": 0.1487, + "learning_rate": 2.6230165540698673e-05, + "loss": 0.1388, "step": 41270 }, { "epoch": 1.93, - "learning_rate": 1.622380572875158e-05, - "loss": 0.2907, + "learning_rate": 2.6229697470862653e-05, + "loss": 0.1167, "step": 41275 }, { "epoch": 1.93, - "learning_rate": 1.622333692747644e-05, - "loss": 0.4025, + "learning_rate": 2.6229229401026633e-05, + "loss": 0.3065, "step": 41280 }, { "epoch": 1.93, - "learning_rate": 1.6222868126201305e-05, - "loss": 0.2121, + "learning_rate": 2.6228761331190613e-05, + "loss": 0.1655, "step": 41285 }, { "epoch": 1.93, - "learning_rate": 1.6222399324926165e-05, - "loss": 0.0294, + "learning_rate": 2.6228293261354593e-05, + "loss": 0.0553, "step": 41290 }, { "epoch": 1.93, - "learning_rate": 1.6221930523651025e-05, - "loss": 0.061, + "learning_rate": 2.6227825191518576e-05, + "loss": 0.0778, "step": 41295 }, { "epoch": 1.93, - "learning_rate": 1.6221461722375885e-05, - "loss": 0.0598, + "learning_rate": 2.6227357121682556e-05, + "loss": 0.0503, "step": 41300 }, { "epoch": 1.93, - "learning_rate": 1.6220992921100748e-05, - "loss": 0.1083, + "learning_rate": 2.6226889051846536e-05, + "loss": 0.0601, "step": 41305 }, { "epoch": 1.93, - "learning_rate": 1.6220524119825608e-05, - "loss": 0.1211, + "learning_rate": 2.6226420982010515e-05, + "loss": 0.123, "step": 41310 }, { "epoch": 1.93, - "learning_rate": 1.6220055318550468e-05, - "loss": 0.1688, + "learning_rate": 2.62259529121745e-05, + "loss": 0.2035, "step": 41315 }, { "epoch": 1.93, - "learning_rate": 1.6219586517275328e-05, - "loss": 0.1301, + "learning_rate": 2.622548484233848e-05, + "loss": 0.1594, "step": 41320 }, { "epoch": 1.93, - "learning_rate": 1.6219117716000188e-05, - "loss": 0.1663, + "learning_rate": 2.6225016772502458e-05, + "loss": 0.2528, "step": 41325 }, { "epoch": 1.93, - "learning_rate": 1.621864891472505e-05, - "loss": 0.1977, + "learning_rate": 2.622454870266644e-05, + "loss": 0.5483, "step": 41330 }, { "epoch": 1.93, - "learning_rate": 1.621818011344991e-05, - "loss": 0.2396, + "learning_rate": 2.6224080632830418e-05, + "loss": 0.2355, "step": 41335 }, { "epoch": 1.93, - "learning_rate": 1.621771131217477e-05, - "loss": 0.0523, + "learning_rate": 2.6223612562994398e-05, + "loss": 0.0509, "step": 41340 }, { "epoch": 1.93, - "learning_rate": 1.621724251089963e-05, - "loss": 0.0638, + "learning_rate": 2.6223144493158377e-05, + "loss": 0.0884, "step": 41345 }, { "epoch": 1.93, - "learning_rate": 1.621677370962449e-05, - "loss": 0.0662, + "learning_rate": 2.622267642332236e-05, + "loss": 0.0321, "step": 41350 }, { "epoch": 1.93, - "learning_rate": 1.621630490834935e-05, - "loss": 0.1036, + "learning_rate": 2.622220835348634e-05, + "loss": 0.1214, "step": 41355 }, { "epoch": 1.93, - "learning_rate": 1.621583610707421e-05, - "loss": 0.0878, + "learning_rate": 2.622174028365032e-05, + "loss": 0.0687, "step": 41360 }, { "epoch": 1.93, - "learning_rate": 1.6215367305799074e-05, - "loss": 0.1043, + "learning_rate": 2.62212722138143e-05, + "loss": 0.0954, "step": 41365 }, { "epoch": 1.93, - "learning_rate": 1.6214898504523934e-05, - "loss": 0.1287, + "learning_rate": 2.6220804143978283e-05, + "loss": 0.152, "step": 41370 }, { "epoch": 1.93, - "learning_rate": 1.6214429703248794e-05, - "loss": 0.2675, + "learning_rate": 2.6220336074142263e-05, + "loss": 0.1892, "step": 41375 }, { "epoch": 1.93, - "learning_rate": 1.6213960901973654e-05, - "loss": 0.2608, + "learning_rate": 2.6219868004306243e-05, + "loss": 0.3289, "step": 41380 }, { "epoch": 1.93, - "learning_rate": 1.6213492100698517e-05, - "loss": 0.2334, + "learning_rate": 2.6219399934470226e-05, + "loss": 0.2048, "step": 41385 }, { "epoch": 1.93, - "learning_rate": 1.6213023299423377e-05, - "loss": 0.0367, + "learning_rate": 2.6218931864634206e-05, + "loss": 0.0957, "step": 41390 }, { "epoch": 1.93, - "learning_rate": 1.6212554498148237e-05, - "loss": 0.0891, + "learning_rate": 2.6218463794798186e-05, + "loss": 0.0559, "step": 41395 }, { "epoch": 1.93, - "learning_rate": 1.6212085696873097e-05, - "loss": 0.0596, + "learning_rate": 2.6217995724962162e-05, + "loss": 0.068, "step": 41400 }, { "epoch": 1.93, - "learning_rate": 1.6211616895597957e-05, - "loss": 0.1102, + "learning_rate": 2.6217527655126145e-05, + "loss": 0.1379, "step": 41405 }, { "epoch": 1.93, - "learning_rate": 1.6211148094322817e-05, - "loss": 0.0846, + "learning_rate": 2.6217059585290125e-05, + "loss": 0.1487, "step": 41410 }, { "epoch": 1.93, - "learning_rate": 1.6210679293047677e-05, - "loss": 0.2469, + "learning_rate": 2.6216591515454105e-05, + "loss": 0.0974, "step": 41415 }, { "epoch": 1.93, - "learning_rate": 1.621021049177254e-05, - "loss": 0.1392, + "learning_rate": 2.6216123445618085e-05, + "loss": 0.1483, "step": 41420 }, { "epoch": 1.93, - "learning_rate": 1.62097416904974e-05, - "loss": 0.223, + "learning_rate": 2.6215655375782068e-05, + "loss": 0.1493, "step": 41425 }, { "epoch": 1.93, - "learning_rate": 1.620927288922226e-05, - "loss": 0.3501, + "learning_rate": 2.6215187305946048e-05, + "loss": 0.2837, "step": 41430 }, { "epoch": 1.93, - "learning_rate": 1.620880408794712e-05, - "loss": 0.1642, + "learning_rate": 2.6214719236110028e-05, + "loss": 0.2083, "step": 41435 }, { "epoch": 1.93, - "learning_rate": 1.620833528667198e-05, - "loss": 0.0356, + "learning_rate": 2.621425116627401e-05, + "loss": 0.077, "step": 41440 }, { "epoch": 1.93, - "learning_rate": 1.6207866485396843e-05, - "loss": 0.1539, + "learning_rate": 2.621378309643799e-05, + "loss": 0.0595, "step": 41445 }, { "epoch": 1.93, - "learning_rate": 1.6207397684121703e-05, - "loss": 0.1197, + "learning_rate": 2.621331502660197e-05, + "loss": 0.0702, "step": 41450 }, { "epoch": 1.93, - "learning_rate": 1.6206928882846563e-05, - "loss": 0.0775, + "learning_rate": 2.621284695676595e-05, + "loss": 0.0853, "step": 41455 }, { "epoch": 1.93, - "learning_rate": 1.6206460081571423e-05, - "loss": 0.0948, + "learning_rate": 2.6212378886929934e-05, + "loss": 0.0832, "step": 41460 }, { "epoch": 1.93, - "learning_rate": 1.6205991280296283e-05, - "loss": 0.1299, + "learning_rate": 2.621191081709391e-05, + "loss": 0.0786, "step": 41465 }, { "epoch": 1.94, - "learning_rate": 1.6205522479021146e-05, - "loss": 0.1459, + "learning_rate": 2.621144274725789e-05, + "loss": 0.1633, "step": 41470 }, { "epoch": 1.94, - "learning_rate": 1.6205053677746006e-05, - "loss": 0.187, + "learning_rate": 2.621097467742187e-05, + "loss": 0.2989, "step": 41475 }, { "epoch": 1.94, - "learning_rate": 1.6204584876470866e-05, - "loss": 0.3319, + "learning_rate": 2.6210506607585853e-05, + "loss": 0.2603, "step": 41480 }, { "epoch": 1.94, - "learning_rate": 1.6204116075195726e-05, - "loss": 0.1926, + "learning_rate": 2.6210038537749833e-05, + "loss": 0.2763, "step": 41485 }, { "epoch": 1.94, - "learning_rate": 1.6203647273920586e-05, - "loss": 0.0347, + "learning_rate": 2.6209570467913812e-05, + "loss": 0.0806, "step": 41490 }, { "epoch": 1.94, - "learning_rate": 1.6203178472645446e-05, - "loss": 0.0729, + "learning_rate": 2.6209102398077796e-05, + "loss": 0.1053, "step": 41495 }, { "epoch": 1.94, - "learning_rate": 1.6202709671370306e-05, - "loss": 0.0343, + "learning_rate": 2.6208634328241775e-05, + "loss": 0.0842, "step": 41500 }, { "epoch": 1.94, - "learning_rate": 1.6202240870095166e-05, - "loss": 0.1035, + "learning_rate": 2.6208166258405755e-05, + "loss": 0.0804, "step": 41505 }, { "epoch": 1.94, - "learning_rate": 1.620177206882003e-05, - "loss": 0.0586, + "learning_rate": 2.6207698188569735e-05, + "loss": 0.1078, "step": 41510 }, { "epoch": 1.94, - "learning_rate": 1.620130326754489e-05, - "loss": 0.1304, + "learning_rate": 2.620723011873372e-05, + "loss": 0.1337, "step": 41515 }, { "epoch": 1.94, - "learning_rate": 1.620083446626975e-05, - "loss": 0.0746, + "learning_rate": 2.6206762048897698e-05, + "loss": 0.1703, "step": 41520 }, { "epoch": 1.94, - "learning_rate": 1.6200365664994612e-05, - "loss": 0.2772, + "learning_rate": 2.6206293979061675e-05, + "loss": 0.2391, "step": 41525 }, { "epoch": 1.94, - "learning_rate": 1.6199896863719472e-05, - "loss": 0.3602, + "learning_rate": 2.6205825909225654e-05, + "loss": 0.3403, "step": 41530 }, { "epoch": 1.94, - "learning_rate": 1.6199428062444332e-05, - "loss": 0.2218, + "learning_rate": 2.6205357839389638e-05, + "loss": 0.2625, "step": 41535 }, { "epoch": 1.94, - "learning_rate": 1.6198959261169192e-05, - "loss": 0.0288, + "learning_rate": 2.6204889769553617e-05, + "loss": 0.1023, "step": 41540 }, { "epoch": 1.94, - "learning_rate": 1.6198490459894052e-05, - "loss": 0.0953, + "learning_rate": 2.6204421699717597e-05, + "loss": 0.0718, "step": 41545 }, { "epoch": 1.94, - "learning_rate": 1.6198021658618912e-05, - "loss": 0.0825, + "learning_rate": 2.6203953629881577e-05, + "loss": 0.0991, "step": 41550 }, { "epoch": 1.94, - "learning_rate": 1.6197552857343772e-05, - "loss": 0.0721, + "learning_rate": 2.620348556004556e-05, + "loss": 0.0729, "step": 41555 }, { "epoch": 1.94, - "learning_rate": 1.6197084056068635e-05, - "loss": 0.0856, + "learning_rate": 2.620301749020954e-05, + "loss": 0.1279, "step": 41560 }, { "epoch": 1.94, - "learning_rate": 1.6196615254793495e-05, - "loss": 0.1164, + "learning_rate": 2.620254942037352e-05, + "loss": 0.0683, "step": 41565 }, { "epoch": 1.94, - "learning_rate": 1.6196146453518355e-05, - "loss": 0.1362, + "learning_rate": 2.6202081350537503e-05, + "loss": 0.0898, "step": 41570 }, { "epoch": 1.94, - "learning_rate": 1.6195677652243215e-05, - "loss": 0.1411, + "learning_rate": 2.6201613280701483e-05, + "loss": 0.2204, "step": 41575 }, { "epoch": 1.94, - "learning_rate": 1.6195208850968075e-05, - "loss": 0.3744, + "learning_rate": 2.6201145210865463e-05, + "loss": 0.3399, "step": 41580 }, { "epoch": 1.94, - "learning_rate": 1.6194740049692935e-05, - "loss": 0.218, + "learning_rate": 2.6200677141029443e-05, + "loss": 0.244, "step": 41585 }, { "epoch": 1.94, - "learning_rate": 1.6194271248417798e-05, - "loss": 0.029, + "learning_rate": 2.6200209071193422e-05, + "loss": 0.0466, "step": 41590 }, { "epoch": 1.94, - "learning_rate": 1.6193802447142658e-05, - "loss": 0.0284, + "learning_rate": 2.6199741001357402e-05, + "loss": 0.0319, "step": 41595 }, { "epoch": 1.94, - "learning_rate": 1.6193333645867518e-05, - "loss": 0.1183, + "learning_rate": 2.6199272931521382e-05, + "loss": 0.0508, "step": 41600 }, { "epoch": 1.94, - "learning_rate": 1.6192864844592378e-05, - "loss": 0.1304, + "learning_rate": 2.6198804861685362e-05, + "loss": 0.0871, "step": 41605 }, { "epoch": 1.94, - "learning_rate": 1.619239604331724e-05, - "loss": 0.1123, + "learning_rate": 2.6198336791849345e-05, + "loss": 0.0987, "step": 41610 }, { "epoch": 1.94, - "learning_rate": 1.61919272420421e-05, - "loss": 0.1057, + "learning_rate": 2.6197868722013325e-05, + "loss": 0.1401, "step": 41615 }, { "epoch": 1.94, - "learning_rate": 1.619145844076696e-05, - "loss": 0.2111, + "learning_rate": 2.6197400652177305e-05, + "loss": 0.0965, "step": 41620 }, { "epoch": 1.94, - "learning_rate": 1.619098963949182e-05, - "loss": 0.1933, + "learning_rate": 2.6196932582341288e-05, + "loss": 0.1949, "step": 41625 }, { "epoch": 1.94, - "learning_rate": 1.619052083821668e-05, - "loss": 0.2101, + "learning_rate": 2.6196464512505268e-05, + "loss": 0.2828, "step": 41630 }, { "epoch": 1.94, - "learning_rate": 1.619005203694154e-05, - "loss": 0.4121, + "learning_rate": 2.6195996442669248e-05, + "loss": 0.2189, "step": 41635 }, { "epoch": 1.94, - "learning_rate": 1.61895832356664e-05, - "loss": 0.0507, + "learning_rate": 2.6195528372833227e-05, + "loss": 0.0161, "step": 41640 }, { "epoch": 1.94, - "learning_rate": 1.618911443439126e-05, - "loss": 0.0774, + "learning_rate": 2.619506030299721e-05, + "loss": 0.0631, "step": 41645 }, { "epoch": 1.94, - "learning_rate": 1.618864563311612e-05, - "loss": 0.0937, + "learning_rate": 2.619459223316119e-05, + "loss": 0.0599, "step": 41650 }, { "epoch": 1.94, - "learning_rate": 1.6188176831840984e-05, - "loss": 0.104, + "learning_rate": 2.6194124163325167e-05, + "loss": 0.103, "step": 41655 }, { "epoch": 1.94, - "learning_rate": 1.6187708030565844e-05, - "loss": 0.0718, + "learning_rate": 2.6193656093489147e-05, + "loss": 0.0704, "step": 41660 }, { "epoch": 1.94, - "learning_rate": 1.6187239229290704e-05, - "loss": 0.2121, + "learning_rate": 2.619318802365313e-05, + "loss": 0.1196, "step": 41665 }, { "epoch": 1.94, - "learning_rate": 1.6186770428015567e-05, - "loss": 0.1222, + "learning_rate": 2.619271995381711e-05, + "loss": 0.1629, "step": 41670 }, { "epoch": 1.94, - "learning_rate": 1.6186301626740427e-05, - "loss": 0.1626, + "learning_rate": 2.619225188398109e-05, + "loss": 0.1673, "step": 41675 }, { "epoch": 1.94, - "learning_rate": 1.6185832825465287e-05, - "loss": 0.2618, + "learning_rate": 2.6191783814145073e-05, + "loss": 0.284, "step": 41680 }, { "epoch": 1.95, - "learning_rate": 1.6185364024190147e-05, - "loss": 0.1554, + "learning_rate": 2.6191315744309052e-05, + "loss": 0.2082, "step": 41685 }, { "epoch": 1.95, - "learning_rate": 1.6184895222915007e-05, - "loss": 0.0277, + "learning_rate": 2.6190847674473032e-05, + "loss": 0.0637, "step": 41690 }, { "epoch": 1.95, - "learning_rate": 1.6184426421639867e-05, - "loss": 0.0889, + "learning_rate": 2.6190379604637012e-05, + "loss": 0.0804, "step": 41695 }, { "epoch": 1.95, - "learning_rate": 1.618395762036473e-05, - "loss": 0.0996, + "learning_rate": 2.6189911534800995e-05, + "loss": 0.0269, "step": 41700 }, { "epoch": 1.95, - "learning_rate": 1.618348881908959e-05, - "loss": 0.0605, + "learning_rate": 2.6189443464964975e-05, + "loss": 0.0865, "step": 41705 }, { "epoch": 1.95, - "learning_rate": 1.618302001781445e-05, - "loss": 0.1412, + "learning_rate": 2.6188975395128955e-05, + "loss": 0.0704, "step": 41710 }, { "epoch": 1.95, - "learning_rate": 1.618255121653931e-05, - "loss": 0.1696, + "learning_rate": 2.618850732529293e-05, + "loss": 0.1167, "step": 41715 }, { "epoch": 1.95, - "learning_rate": 1.618208241526417e-05, - "loss": 0.1668, + "learning_rate": 2.6188039255456915e-05, + "loss": 0.2401, "step": 41720 }, { "epoch": 1.95, - "learning_rate": 1.618161361398903e-05, - "loss": 0.198, + "learning_rate": 2.6187571185620894e-05, + "loss": 0.0924, "step": 41725 }, { "epoch": 1.95, - "learning_rate": 1.618114481271389e-05, - "loss": 0.3409, + "learning_rate": 2.6187103115784874e-05, + "loss": 0.222, "step": 41730 }, { "epoch": 1.95, - "learning_rate": 1.6180676011438753e-05, - "loss": 0.1873, + "learning_rate": 2.6186635045948854e-05, + "loss": 0.2267, "step": 41735 }, { "epoch": 1.95, - "learning_rate": 1.6180207210163613e-05, - "loss": 0.0503, + "learning_rate": 2.6186166976112837e-05, + "loss": 0.0438, "step": 41740 }, { "epoch": 1.95, - "learning_rate": 1.6179738408888473e-05, - "loss": 0.0428, + "learning_rate": 2.6185698906276817e-05, + "loss": 0.0657, "step": 41745 }, { "epoch": 1.95, - "learning_rate": 1.6179269607613336e-05, - "loss": 0.0873, + "learning_rate": 2.6185230836440797e-05, + "loss": 0.0662, "step": 41750 }, { "epoch": 1.95, - "learning_rate": 1.6178800806338196e-05, - "loss": 0.0582, + "learning_rate": 2.618476276660478e-05, + "loss": 0.0745, "step": 41755 }, { "epoch": 1.95, - "learning_rate": 1.6178332005063056e-05, - "loss": 0.1057, + "learning_rate": 2.618429469676876e-05, + "loss": 0.1449, "step": 41760 }, { "epoch": 1.95, - "learning_rate": 1.6177863203787916e-05, - "loss": 0.217, + "learning_rate": 2.618382662693274e-05, + "loss": 0.0906, "step": 41765 }, { "epoch": 1.95, - "learning_rate": 1.6177394402512776e-05, - "loss": 0.0945, + "learning_rate": 2.618335855709672e-05, + "loss": 0.1437, "step": 41770 }, { "epoch": 1.95, - "learning_rate": 1.6176925601237636e-05, - "loss": 0.2702, + "learning_rate": 2.6182890487260703e-05, + "loss": 0.1986, "step": 41775 }, { "epoch": 1.95, - "learning_rate": 1.6176456799962496e-05, - "loss": 0.2901, + "learning_rate": 2.618242241742468e-05, + "loss": 0.28, "step": 41780 }, { "epoch": 1.95, - "learning_rate": 1.6175987998687356e-05, - "loss": 0.2701, + "learning_rate": 2.618195434758866e-05, + "loss": 0.2747, "step": 41785 }, { "epoch": 1.95, - "learning_rate": 1.6175519197412216e-05, - "loss": 0.0123, + "learning_rate": 2.618148627775264e-05, + "loss": 0.0362, "step": 41790 }, { "epoch": 1.95, - "learning_rate": 1.617505039613708e-05, - "loss": 0.0755, + "learning_rate": 2.6181018207916622e-05, + "loss": 0.0989, "step": 41795 }, { "epoch": 1.95, - "learning_rate": 1.617458159486194e-05, - "loss": 0.058, + "learning_rate": 2.6180550138080602e-05, + "loss": 0.0329, "step": 41800 }, { "epoch": 1.95, - "learning_rate": 1.61741127935868e-05, - "loss": 0.0831, + "learning_rate": 2.618008206824458e-05, + "loss": 0.0739, "step": 41805 }, { "epoch": 1.95, - "learning_rate": 1.617364399231166e-05, - "loss": 0.0736, + "learning_rate": 2.6179613998408565e-05, + "loss": 0.1304, "step": 41810 }, { "epoch": 1.95, - "learning_rate": 1.6173175191036522e-05, - "loss": 0.1257, + "learning_rate": 2.6179145928572545e-05, + "loss": 0.0963, "step": 41815 }, { "epoch": 1.95, - "learning_rate": 1.6172706389761382e-05, - "loss": 0.147, + "learning_rate": 2.6178677858736524e-05, + "loss": 0.1589, "step": 41820 }, { "epoch": 1.95, - "learning_rate": 1.6172237588486242e-05, - "loss": 0.1401, + "learning_rate": 2.6178209788900504e-05, + "loss": 0.1445, "step": 41825 }, { "epoch": 1.95, - "learning_rate": 1.6171768787211102e-05, - "loss": 0.3274, + "learning_rate": 2.6177741719064488e-05, + "loss": 0.2208, "step": 41830 }, { "epoch": 1.95, - "learning_rate": 1.6171299985935962e-05, - "loss": 0.2624, + "learning_rate": 2.6177273649228467e-05, + "loss": 0.3257, "step": 41835 }, { "epoch": 1.95, - "learning_rate": 1.6170831184660825e-05, - "loss": 0.0651, + "learning_rate": 2.6176805579392447e-05, + "loss": 0.0379, "step": 41840 }, { "epoch": 1.95, - "learning_rate": 1.6170362383385685e-05, - "loss": 0.0283, + "learning_rate": 2.6176337509556424e-05, + "loss": 0.0409, "step": 41845 }, { "epoch": 1.95, - "learning_rate": 1.6169893582110545e-05, - "loss": 0.0477, + "learning_rate": 2.6175869439720407e-05, + "loss": 0.0238, "step": 41850 }, { "epoch": 1.95, - "learning_rate": 1.6169424780835405e-05, - "loss": 0.061, + "learning_rate": 2.6175401369884387e-05, + "loss": 0.1279, "step": 41855 }, { "epoch": 1.95, - "learning_rate": 1.6168955979560265e-05, - "loss": 0.1343, + "learning_rate": 2.6174933300048366e-05, + "loss": 0.122, "step": 41860 }, { "epoch": 1.95, - "learning_rate": 1.6168487178285125e-05, - "loss": 0.1538, + "learning_rate": 2.617446523021235e-05, + "loss": 0.1906, "step": 41865 }, { "epoch": 1.95, - "learning_rate": 1.6168018377009985e-05, - "loss": 0.21, + "learning_rate": 2.617399716037633e-05, + "loss": 0.1503, "step": 41870 }, { "epoch": 1.95, - "learning_rate": 1.6167549575734848e-05, - "loss": 0.1877, + "learning_rate": 2.617352909054031e-05, + "loss": 0.1955, "step": 41875 }, { "epoch": 1.95, - "learning_rate": 1.6167080774459708e-05, - "loss": 0.3532, + "learning_rate": 2.617306102070429e-05, + "loss": 0.3087, "step": 41880 }, { "epoch": 1.95, - "learning_rate": 1.6166611973184568e-05, - "loss": 0.3586, + "learning_rate": 2.6172592950868272e-05, + "loss": 0.4449, "step": 41885 }, { "epoch": 1.95, - "learning_rate": 1.6166143171909428e-05, - "loss": 0.0907, + "learning_rate": 2.6172124881032252e-05, + "loss": 0.0521, "step": 41890 }, { "epoch": 1.95, - "learning_rate": 1.616567437063429e-05, - "loss": 0.0542, + "learning_rate": 2.6171656811196232e-05, + "loss": 0.0868, "step": 41895 }, { "epoch": 1.96, - "learning_rate": 1.616520556935915e-05, - "loss": 0.0861, + "learning_rate": 2.6171188741360212e-05, + "loss": 0.0675, "step": 41900 }, { "epoch": 1.96, - "learning_rate": 1.616473676808401e-05, - "loss": 0.0796, + "learning_rate": 2.617072067152419e-05, + "loss": 0.0765, "step": 41905 }, { "epoch": 1.96, - "learning_rate": 1.616426796680887e-05, - "loss": 0.1031, + "learning_rate": 2.617025260168817e-05, + "loss": 0.0803, "step": 41910 }, { "epoch": 1.96, - "learning_rate": 1.616379916553373e-05, - "loss": 0.0803, + "learning_rate": 2.616978453185215e-05, + "loss": 0.0727, "step": 41915 }, { "epoch": 1.96, - "learning_rate": 1.616333036425859e-05, - "loss": 0.1137, + "learning_rate": 2.616931646201613e-05, + "loss": 0.1943, "step": 41920 }, { "epoch": 1.96, - "learning_rate": 1.616286156298345e-05, - "loss": 0.2324, + "learning_rate": 2.6168848392180114e-05, + "loss": 0.2615, "step": 41925 }, { "epoch": 1.96, - "learning_rate": 1.616239276170831e-05, - "loss": 0.2309, + "learning_rate": 2.6168380322344094e-05, + "loss": 0.2774, "step": 41930 }, { "epoch": 1.96, - "learning_rate": 1.6161923960433174e-05, - "loss": 0.2706, + "learning_rate": 2.6167912252508074e-05, + "loss": 0.2885, "step": 41935 }, { "epoch": 1.96, - "learning_rate": 1.6161455159158034e-05, - "loss": 0.0404, + "learning_rate": 2.6167444182672057e-05, + "loss": 0.0393, "step": 41940 }, { "epoch": 1.96, - "learning_rate": 1.6160986357882894e-05, - "loss": 0.0287, + "learning_rate": 2.6166976112836037e-05, + "loss": 0.052, "step": 41945 }, { "epoch": 1.96, - "learning_rate": 1.6160517556607754e-05, - "loss": 0.1108, + "learning_rate": 2.6166508043000017e-05, + "loss": 0.0562, "step": 41950 }, { "epoch": 1.96, - "learning_rate": 1.6160048755332617e-05, - "loss": 0.0581, + "learning_rate": 2.6166039973163997e-05, + "loss": 0.0304, "step": 41955 }, { "epoch": 1.96, - "learning_rate": 1.6159579954057477e-05, - "loss": 0.1313, + "learning_rate": 2.616557190332798e-05, + "loss": 0.0789, "step": 41960 }, { "epoch": 1.96, - "learning_rate": 1.6159111152782337e-05, - "loss": 0.1299, + "learning_rate": 2.616510383349196e-05, + "loss": 0.1289, "step": 41965 }, { "epoch": 1.96, - "learning_rate": 1.6158642351507197e-05, - "loss": 0.1494, + "learning_rate": 2.6164635763655936e-05, + "loss": 0.2104, "step": 41970 }, { "epoch": 1.96, - "learning_rate": 1.6158173550232057e-05, - "loss": 0.1915, + "learning_rate": 2.6164167693819916e-05, + "loss": 0.3554, "step": 41975 }, { "epoch": 1.96, - "learning_rate": 1.615770474895692e-05, - "loss": 0.3193, + "learning_rate": 2.61636996239839e-05, + "loss": 0.2247, "step": 41980 }, { "epoch": 1.96, - "learning_rate": 1.615723594768178e-05, - "loss": 0.1987, + "learning_rate": 2.616323155414788e-05, + "loss": 0.281, "step": 41985 }, { "epoch": 1.96, - "learning_rate": 1.615676714640664e-05, - "loss": 0.0334, + "learning_rate": 2.616276348431186e-05, + "loss": 0.0692, "step": 41990 }, { "epoch": 1.96, - "learning_rate": 1.61562983451315e-05, - "loss": 0.0295, + "learning_rate": 2.6162295414475842e-05, + "loss": 0.0418, "step": 41995 }, { "epoch": 1.96, - "learning_rate": 1.615582954385636e-05, - "loss": 0.0684, + "learning_rate": 2.616182734463982e-05, + "loss": 0.1275, "step": 42000 }, { "epoch": 1.96, - "learning_rate": 1.615536074258122e-05, - "loss": 0.1138, + "learning_rate": 2.61613592748038e-05, + "loss": 0.1626, "step": 42005 }, { "epoch": 1.96, - "learning_rate": 1.615489194130608e-05, - "loss": 0.1429, + "learning_rate": 2.616089120496778e-05, + "loss": 0.0906, "step": 42010 }, { "epoch": 1.96, - "learning_rate": 1.615442314003094e-05, - "loss": 0.0981, + "learning_rate": 2.6160423135131764e-05, + "loss": 0.1578, "step": 42015 }, { "epoch": 1.96, - "learning_rate": 1.6153954338755803e-05, - "loss": 0.1291, + "learning_rate": 2.6159955065295744e-05, + "loss": 0.1298, "step": 42020 }, { "epoch": 1.96, - "learning_rate": 1.6153485537480663e-05, - "loss": 0.1244, + "learning_rate": 2.6159486995459724e-05, + "loss": 0.1097, "step": 42025 }, { "epoch": 1.96, - "learning_rate": 1.6153016736205523e-05, - "loss": 0.2555, + "learning_rate": 2.61590189256237e-05, + "loss": 0.3597, "step": 42030 }, { "epoch": 1.96, - "learning_rate": 1.6152547934930386e-05, - "loss": 0.261, + "learning_rate": 2.6158550855787684e-05, + "loss": 0.2528, "step": 42035 }, { "epoch": 1.96, - "learning_rate": 1.6152079133655246e-05, - "loss": 0.0461, + "learning_rate": 2.6158082785951664e-05, + "loss": 0.0207, "step": 42040 }, { "epoch": 1.96, - "learning_rate": 1.6151610332380106e-05, - "loss": 0.0987, + "learning_rate": 2.6157614716115643e-05, + "loss": 0.0425, "step": 42045 }, { "epoch": 1.96, - "learning_rate": 1.6151141531104966e-05, - "loss": 0.0703, + "learning_rate": 2.6157146646279627e-05, + "loss": 0.0486, "step": 42050 }, { "epoch": 1.96, - "learning_rate": 1.6150672729829826e-05, - "loss": 0.0625, + "learning_rate": 2.6156678576443606e-05, + "loss": 0.0863, "step": 42055 }, { "epoch": 1.96, - "learning_rate": 1.6150203928554686e-05, - "loss": 0.0667, + "learning_rate": 2.6156210506607586e-05, + "loss": 0.1083, "step": 42060 }, { "epoch": 1.96, - "learning_rate": 1.6149735127279546e-05, - "loss": 0.1145, + "learning_rate": 2.6155742436771566e-05, + "loss": 0.1881, "step": 42065 }, { "epoch": 1.96, - "learning_rate": 1.614926632600441e-05, - "loss": 0.1158, + "learning_rate": 2.615527436693555e-05, + "loss": 0.2123, "step": 42070 }, { "epoch": 1.96, - "learning_rate": 1.614879752472927e-05, - "loss": 0.1947, + "learning_rate": 2.615480629709953e-05, + "loss": 0.2059, "step": 42075 }, { "epoch": 1.96, - "learning_rate": 1.614832872345413e-05, - "loss": 0.3018, + "learning_rate": 2.615433822726351e-05, + "loss": 0.2769, "step": 42080 }, { "epoch": 1.96, - "learning_rate": 1.614785992217899e-05, - "loss": 0.3088, + "learning_rate": 2.615387015742749e-05, + "loss": 0.2228, "step": 42085 }, { "epoch": 1.96, - "learning_rate": 1.614739112090385e-05, - "loss": 0.0916, + "learning_rate": 2.6153402087591472e-05, + "loss": 0.0218, "step": 42090 }, { "epoch": 1.96, - "learning_rate": 1.614692231962871e-05, - "loss": 0.0213, + "learning_rate": 2.615293401775545e-05, + "loss": 0.3974, "step": 42095 }, { "epoch": 1.96, - "learning_rate": 1.6146453518353572e-05, - "loss": 0.0558, + "learning_rate": 2.6152465947919428e-05, + "loss": 0.0647, "step": 42100 }, { "epoch": 1.96, - "learning_rate": 1.6145984717078432e-05, - "loss": 0.0763, + "learning_rate": 2.615199787808341e-05, + "loss": 0.0883, "step": 42105 }, { "epoch": 1.96, - "learning_rate": 1.6145515915803292e-05, - "loss": 0.1617, + "learning_rate": 2.615152980824739e-05, + "loss": 0.0682, "step": 42110 }, { "epoch": 1.97, - "learning_rate": 1.6145047114528152e-05, - "loss": 0.0917, + "learning_rate": 2.615106173841137e-05, + "loss": 0.1631, "step": 42115 }, { "epoch": 1.97, - "learning_rate": 1.6144578313253015e-05, - "loss": 0.2151, + "learning_rate": 2.615059366857535e-05, + "loss": 0.1816, "step": 42120 }, { "epoch": 1.97, - "learning_rate": 1.6144109511977875e-05, - "loss": 0.1872, + "learning_rate": 2.6150125598739334e-05, + "loss": 0.2507, "step": 42125 }, { "epoch": 1.97, - "learning_rate": 1.6143640710702735e-05, - "loss": 0.2114, + "learning_rate": 2.6149657528903314e-05, + "loss": 0.2652, "step": 42130 }, { "epoch": 1.97, - "learning_rate": 1.6143171909427595e-05, - "loss": 0.2838, + "learning_rate": 2.6149189459067294e-05, + "loss": 0.2137, "step": 42135 }, { "epoch": 1.97, - "learning_rate": 1.6142703108152455e-05, - "loss": 0.0481, + "learning_rate": 2.6148721389231273e-05, + "loss": 0.0332, "step": 42140 }, { "epoch": 1.97, - "learning_rate": 1.6142234306877315e-05, - "loss": 0.0809, + "learning_rate": 2.6148253319395257e-05, + "loss": 0.0513, "step": 42145 }, { "epoch": 1.97, - "learning_rate": 1.6141765505602175e-05, - "loss": 0.0956, + "learning_rate": 2.6147785249559236e-05, + "loss": 0.0946, "step": 42150 }, { "epoch": 1.97, - "learning_rate": 1.6141296704327035e-05, - "loss": 0.1684, + "learning_rate": 2.6147317179723216e-05, + "loss": 0.089, "step": 42155 }, { "epoch": 1.97, - "learning_rate": 1.6140827903051895e-05, - "loss": 0.0701, + "learning_rate": 2.6146849109887193e-05, + "loss": 0.0767, "step": 42160 }, { "epoch": 1.97, - "learning_rate": 1.6140359101776758e-05, - "loss": 0.1664, + "learning_rate": 2.6146381040051176e-05, + "loss": 0.0689, "step": 42165 }, { "epoch": 1.97, - "learning_rate": 1.6139890300501618e-05, - "loss": 0.0904, + "learning_rate": 2.6145912970215156e-05, + "loss": 0.1849, "step": 42170 }, { "epoch": 1.97, - "learning_rate": 1.6139421499226478e-05, - "loss": 0.1441, + "learning_rate": 2.6145444900379136e-05, + "loss": 0.2643, "step": 42175 }, { "epoch": 1.97, - "learning_rate": 1.613895269795134e-05, - "loss": 0.2687, + "learning_rate": 2.614497683054312e-05, + "loss": 0.387, "step": 42180 }, { "epoch": 1.97, - "learning_rate": 1.61384838966762e-05, - "loss": 0.184, + "learning_rate": 2.61445087607071e-05, + "loss": 0.2659, "step": 42185 }, { "epoch": 1.97, - "learning_rate": 1.613801509540106e-05, - "loss": 0.0325, + "learning_rate": 2.614404069087108e-05, + "loss": 0.0683, "step": 42190 }, { "epoch": 1.97, - "learning_rate": 1.613754629412592e-05, - "loss": 0.0627, + "learning_rate": 2.6143572621035058e-05, + "loss": 0.0357, "step": 42195 }, { "epoch": 1.97, - "learning_rate": 1.613707749285078e-05, - "loss": 0.0675, + "learning_rate": 2.614310455119904e-05, + "loss": 0.0824, "step": 42200 }, { "epoch": 1.97, - "learning_rate": 1.613660869157564e-05, - "loss": 0.0689, + "learning_rate": 2.614263648136302e-05, + "loss": 0.0815, "step": 42205 }, { "epoch": 1.97, - "learning_rate": 1.6136139890300504e-05, - "loss": 0.0447, + "learning_rate": 2.6142168411527e-05, + "loss": 0.1082, "step": 42210 }, { "epoch": 1.97, - "learning_rate": 1.6135671089025364e-05, - "loss": 0.1771, + "learning_rate": 2.614170034169098e-05, + "loss": 0.1158, "step": 42215 }, { "epoch": 1.97, - "learning_rate": 1.6135202287750224e-05, - "loss": 0.1027, + "learning_rate": 2.614123227185496e-05, + "loss": 0.1754, "step": 42220 }, { "epoch": 1.97, - "learning_rate": 1.6134733486475084e-05, - "loss": 0.1719, + "learning_rate": 2.614076420201894e-05, + "loss": 0.1782, "step": 42225 }, { "epoch": 1.97, - "learning_rate": 1.6134264685199944e-05, - "loss": 0.4006, + "learning_rate": 2.614029613218292e-05, + "loss": 0.2233, "step": 42230 }, { "epoch": 1.97, - "learning_rate": 1.6133795883924804e-05, - "loss": 0.4029, + "learning_rate": 2.6139828062346904e-05, + "loss": 0.2004, "step": 42235 }, { "epoch": 1.97, - "learning_rate": 1.6133327082649664e-05, - "loss": 0.0602, + "learning_rate": 2.6139359992510883e-05, + "loss": 0.0311, "step": 42240 }, { "epoch": 1.97, - "learning_rate": 1.6132858281374527e-05, - "loss": 0.1014, + "learning_rate": 2.6138891922674863e-05, + "loss": 0.0693, "step": 42245 }, { "epoch": 1.97, - "learning_rate": 1.6132389480099387e-05, - "loss": 0.1154, + "learning_rate": 2.6138423852838843e-05, + "loss": 0.0721, "step": 42250 }, { "epoch": 1.97, - "learning_rate": 1.6131920678824247e-05, - "loss": 0.0592, + "learning_rate": 2.6137955783002826e-05, + "loss": 0.072, "step": 42255 }, { "epoch": 1.97, - "learning_rate": 1.613145187754911e-05, - "loss": 0.1081, + "learning_rate": 2.6137487713166806e-05, + "loss": 0.1183, "step": 42260 }, { "epoch": 1.97, - "learning_rate": 1.613098307627397e-05, - "loss": 0.1918, + "learning_rate": 2.6137019643330786e-05, + "loss": 0.1451, "step": 42265 }, { "epoch": 1.97, - "learning_rate": 1.613051427499883e-05, - "loss": 0.14, + "learning_rate": 2.6136551573494766e-05, + "loss": 0.1398, "step": 42270 }, { "epoch": 1.97, - "learning_rate": 1.613004547372369e-05, - "loss": 0.1181, + "learning_rate": 2.613608350365875e-05, + "loss": 0.1883, "step": 42275 }, { "epoch": 1.97, - "learning_rate": 1.612957667244855e-05, - "loss": 0.3222, + "learning_rate": 2.613561543382273e-05, + "loss": 0.2863, "step": 42280 }, { "epoch": 1.97, - "learning_rate": 1.612910787117341e-05, - "loss": 0.1738, + "learning_rate": 2.6135147363986705e-05, + "loss": 0.2521, "step": 42285 }, { "epoch": 1.97, - "learning_rate": 1.612863906989827e-05, - "loss": 0.0451, + "learning_rate": 2.613467929415069e-05, + "loss": 0.0794, "step": 42290 }, { "epoch": 1.97, - "learning_rate": 1.612817026862313e-05, - "loss": 0.0387, + "learning_rate": 2.6134211224314668e-05, + "loss": 0.0666, "step": 42295 }, { "epoch": 1.97, - "learning_rate": 1.612770146734799e-05, - "loss": 0.062, + "learning_rate": 2.6133743154478648e-05, + "loss": 0.0405, "step": 42300 }, { "epoch": 1.97, - "learning_rate": 1.6127232666072853e-05, - "loss": 0.1021, + "learning_rate": 2.6133275084642628e-05, + "loss": 0.0851, "step": 42305 }, { "epoch": 1.97, - "learning_rate": 1.6126763864797713e-05, - "loss": 0.1734, + "learning_rate": 2.613280701480661e-05, + "loss": 0.0478, "step": 42310 }, { "epoch": 1.97, - "learning_rate": 1.6126295063522573e-05, - "loss": 0.1267, + "learning_rate": 2.613233894497059e-05, + "loss": 0.0851, "step": 42315 }, { "epoch": 1.97, - "learning_rate": 1.6125826262247433e-05, - "loss": 0.156, + "learning_rate": 2.613187087513457e-05, + "loss": 0.1475, "step": 42320 }, { "epoch": 1.97, - "learning_rate": 1.6125357460972296e-05, - "loss": 0.1571, + "learning_rate": 2.613140280529855e-05, + "loss": 0.1359, "step": 42325 }, { "epoch": 1.98, - "learning_rate": 1.6124888659697156e-05, - "loss": 0.3486, + "learning_rate": 2.6130934735462534e-05, + "loss": 0.3473, "step": 42330 }, { "epoch": 1.98, - "learning_rate": 1.6124419858422016e-05, - "loss": 0.2881, + "learning_rate": 2.6130466665626513e-05, + "loss": 0.3202, "step": 42335 }, { "epoch": 1.98, - "learning_rate": 1.6123951057146876e-05, - "loss": 0.0239, + "learning_rate": 2.6129998595790493e-05, + "loss": 0.037, "step": 42340 }, { "epoch": 1.98, - "learning_rate": 1.6123482255871736e-05, - "loss": 0.0531, + "learning_rate": 2.6129530525954473e-05, + "loss": 0.0496, "step": 42345 }, { "epoch": 1.98, - "learning_rate": 1.61230134545966e-05, - "loss": 0.0873, + "learning_rate": 2.6129062456118453e-05, + "loss": 0.0428, "step": 42350 }, { "epoch": 1.98, - "learning_rate": 1.612254465332146e-05, - "loss": 0.0827, + "learning_rate": 2.6128594386282433e-05, + "loss": 0.0577, "step": 42355 }, { "epoch": 1.98, - "learning_rate": 1.612207585204632e-05, - "loss": 0.0947, + "learning_rate": 2.6128126316446413e-05, + "loss": 0.102, "step": 42360 }, { "epoch": 1.98, - "learning_rate": 1.612160705077118e-05, - "loss": 0.0902, + "learning_rate": 2.6127658246610396e-05, + "loss": 0.1314, "step": 42365 }, { "epoch": 1.98, - "learning_rate": 1.612113824949604e-05, - "loss": 0.1753, + "learning_rate": 2.6127190176774376e-05, + "loss": 0.1403, "step": 42370 }, { "epoch": 1.98, - "learning_rate": 1.61206694482209e-05, - "loss": 0.1915, + "learning_rate": 2.6126722106938355e-05, + "loss": 0.1798, "step": 42375 }, { "epoch": 1.98, - "learning_rate": 1.612020064694576e-05, - "loss": 0.3008, + "learning_rate": 2.6126254037102335e-05, + "loss": 0.3162, "step": 42380 }, { "epoch": 1.98, - "learning_rate": 1.6119731845670622e-05, - "loss": 0.3702, + "learning_rate": 2.612578596726632e-05, + "loss": 0.1984, "step": 42385 }, { "epoch": 1.98, - "learning_rate": 1.6119263044395482e-05, - "loss": 0.0867, + "learning_rate": 2.6125317897430298e-05, + "loss": 0.0688, "step": 42390 }, { "epoch": 1.98, - "learning_rate": 1.6118794243120342e-05, - "loss": 0.0771, + "learning_rate": 2.6124849827594278e-05, + "loss": 0.1072, "step": 42395 }, { "epoch": 1.98, - "learning_rate": 1.6118325441845206e-05, - "loss": 0.0406, + "learning_rate": 2.6124381757758258e-05, + "loss": 0.0962, "step": 42400 }, { "epoch": 1.98, - "learning_rate": 1.6117856640570066e-05, - "loss": 0.0628, + "learning_rate": 2.612391368792224e-05, + "loss": 0.0709, "step": 42405 }, { "epoch": 1.98, - "learning_rate": 1.6117387839294926e-05, - "loss": 0.0704, + "learning_rate": 2.6123445618086218e-05, + "loss": 0.1281, "step": 42410 }, { "epoch": 1.98, - "learning_rate": 1.6116919038019785e-05, - "loss": 0.075, + "learning_rate": 2.6122977548250197e-05, + "loss": 0.1265, "step": 42415 }, { "epoch": 1.98, - "learning_rate": 1.6116450236744645e-05, - "loss": 0.1656, + "learning_rate": 2.612250947841418e-05, + "loss": 0.2309, "step": 42420 }, { "epoch": 1.98, - "learning_rate": 1.6115981435469505e-05, - "loss": 0.1958, + "learning_rate": 2.612204140857816e-05, + "loss": 0.1706, "step": 42425 }, { "epoch": 1.98, - "learning_rate": 1.6115512634194365e-05, - "loss": 0.2968, + "learning_rate": 2.612157333874214e-05, + "loss": 0.3254, "step": 42430 }, { "epoch": 1.98, - "learning_rate": 1.6115043832919225e-05, - "loss": 0.2463, + "learning_rate": 2.612110526890612e-05, + "loss": 0.2413, "step": 42435 }, { "epoch": 1.98, - "learning_rate": 1.6114575031644085e-05, - "loss": 0.0686, + "learning_rate": 2.6120637199070103e-05, + "loss": 0.0151, "step": 42440 }, { "epoch": 1.98, - "learning_rate": 1.611410623036895e-05, - "loss": 0.0637, + "learning_rate": 2.6120169129234083e-05, + "loss": 0.0524, "step": 42445 }, { "epoch": 1.98, - "learning_rate": 1.611363742909381e-05, - "loss": 0.0961, + "learning_rate": 2.6119701059398063e-05, + "loss": 0.0802, "step": 42450 }, { "epoch": 1.98, - "learning_rate": 1.611316862781867e-05, - "loss": 0.0605, + "learning_rate": 2.6119232989562043e-05, + "loss": 0.0699, "step": 42455 }, { "epoch": 1.98, - "learning_rate": 1.6112699826543528e-05, - "loss": 0.0825, + "learning_rate": 2.6118764919726026e-05, + "loss": 0.1366, "step": 42460 }, { "epoch": 1.98, - "learning_rate": 1.611223102526839e-05, - "loss": 0.1368, + "learning_rate": 2.6118296849890006e-05, + "loss": 0.1103, "step": 42465 }, { "epoch": 1.98, - "learning_rate": 1.611176222399325e-05, - "loss": 0.1426, + "learning_rate": 2.6117828780053985e-05, + "loss": 0.199, "step": 42470 }, { "epoch": 1.98, - "learning_rate": 1.611129342271811e-05, - "loss": 0.1738, + "learning_rate": 2.6117360710217965e-05, + "loss": 0.1311, "step": 42475 }, { "epoch": 1.98, - "learning_rate": 1.611082462144297e-05, - "loss": 0.271, + "learning_rate": 2.6116892640381945e-05, + "loss": 0.2063, "step": 42480 }, { "epoch": 1.98, - "learning_rate": 1.611035582016783e-05, - "loss": 0.2163, + "learning_rate": 2.6116424570545925e-05, + "loss": 0.1962, "step": 42485 }, { "epoch": 1.98, - "learning_rate": 1.6109887018892695e-05, - "loss": 0.048, + "learning_rate": 2.6115956500709905e-05, + "loss": 0.0497, "step": 42490 }, { "epoch": 1.98, - "learning_rate": 1.6109418217617555e-05, - "loss": 0.034, + "learning_rate": 2.6115488430873888e-05, + "loss": 0.0615, "step": 42495 }, { "epoch": 1.98, - "learning_rate": 1.6108949416342414e-05, - "loss": 0.0202, + "learning_rate": 2.6115020361037868e-05, + "loss": 0.1056, "step": 42500 }, { "epoch": 1.98, - "learning_rate": 1.6108480615067274e-05, - "loss": 0.0828, + "learning_rate": 2.6114552291201848e-05, + "loss": 0.0626, "step": 42505 }, { "epoch": 1.98, - "learning_rate": 1.6108011813792134e-05, - "loss": 0.0536, + "learning_rate": 2.6114084221365827e-05, + "loss": 0.0998, "step": 42510 }, { "epoch": 1.98, - "learning_rate": 1.6107543012516994e-05, - "loss": 0.084, + "learning_rate": 2.611361615152981e-05, + "loss": 0.1034, "step": 42515 }, { "epoch": 1.98, - "learning_rate": 1.6107074211241854e-05, - "loss": 0.2224, + "learning_rate": 2.611314808169379e-05, + "loss": 0.2097, "step": 42520 }, { "epoch": 1.98, - "learning_rate": 1.6106605409966714e-05, - "loss": 0.1419, + "learning_rate": 2.611268001185777e-05, + "loss": 0.273, "step": 42525 }, { "epoch": 1.98, - "learning_rate": 1.6106136608691577e-05, - "loss": 0.3436, + "learning_rate": 2.611221194202175e-05, + "loss": 0.2922, "step": 42530 }, { "epoch": 1.98, - "learning_rate": 1.6105667807416437e-05, - "loss": 0.2449, + "learning_rate": 2.611174387218573e-05, + "loss": 0.3163, "step": 42535 }, { "epoch": 1.98, - "learning_rate": 1.6105199006141297e-05, - "loss": 0.0223, + "learning_rate": 2.611127580234971e-05, + "loss": 0.0387, "step": 42540 }, { "epoch": 1.99, - "learning_rate": 1.610473020486616e-05, - "loss": 0.0835, + "learning_rate": 2.611080773251369e-05, + "loss": 0.0681, "step": 42545 }, { "epoch": 1.99, - "learning_rate": 1.610426140359102e-05, - "loss": 0.05, + "learning_rate": 2.6110339662677673e-05, + "loss": 0.0488, "step": 42550 }, { "epoch": 1.99, - "learning_rate": 1.610379260231588e-05, - "loss": 0.1297, + "learning_rate": 2.6109871592841653e-05, + "loss": 0.0721, "step": 42555 }, { "epoch": 1.99, - "learning_rate": 1.610332380104074e-05, - "loss": 0.0907, + "learning_rate": 2.6109403523005632e-05, + "loss": 0.0597, "step": 42560 }, { "epoch": 1.99, - "learning_rate": 1.61028549997656e-05, - "loss": 0.1403, + "learning_rate": 2.6108935453169612e-05, + "loss": 0.1119, "step": 42565 }, { "epoch": 1.99, - "learning_rate": 1.610238619849046e-05, - "loss": 0.1567, + "learning_rate": 2.6108467383333595e-05, + "loss": 0.149, "step": 42570 }, { "epoch": 1.99, - "learning_rate": 1.610191739721532e-05, - "loss": 0.096, + "learning_rate": 2.6107999313497575e-05, + "loss": 0.0987, "step": 42575 }, { "epoch": 1.99, - "learning_rate": 1.610144859594018e-05, - "loss": 0.3812, + "learning_rate": 2.6107531243661555e-05, + "loss": 0.3054, "step": 42580 }, { "epoch": 1.99, - "learning_rate": 1.6100979794665044e-05, - "loss": 0.2637, + "learning_rate": 2.6107063173825535e-05, + "loss": 0.2318, "step": 42585 }, { "epoch": 1.99, - "learning_rate": 1.6100510993389903e-05, - "loss": 0.0086, + "learning_rate": 2.6106595103989518e-05, + "loss": 0.0366, "step": 42590 }, { "epoch": 1.99, - "learning_rate": 1.6100042192114763e-05, - "loss": 0.0589, + "learning_rate": 2.6106127034153498e-05, + "loss": 0.1054, "step": 42595 }, { "epoch": 1.99, - "learning_rate": 1.6099573390839623e-05, - "loss": 0.1128, + "learning_rate": 2.6105658964317474e-05, + "loss": 0.0642, "step": 42600 }, { "epoch": 1.99, - "learning_rate": 1.6099104589564483e-05, - "loss": 0.117, + "learning_rate": 2.6105190894481458e-05, + "loss": 0.1019, "step": 42605 }, { "epoch": 1.99, - "learning_rate": 1.6098635788289347e-05, - "loss": 0.0931, + "learning_rate": 2.6104722824645437e-05, + "loss": 0.0302, "step": 42610 }, { "epoch": 1.99, - "learning_rate": 1.6098166987014207e-05, - "loss": 0.0807, + "learning_rate": 2.6104254754809417e-05, + "loss": 0.0899, "step": 42615 }, { "epoch": 1.99, - "learning_rate": 1.6097698185739066e-05, - "loss": 0.2223, + "learning_rate": 2.6103786684973397e-05, + "loss": 0.1203, "step": 42620 }, { "epoch": 1.99, - "learning_rate": 1.6097229384463926e-05, - "loss": 0.2597, + "learning_rate": 2.610331861513738e-05, + "loss": 0.2097, "step": 42625 }, { "epoch": 1.99, - "learning_rate": 1.609676058318879e-05, - "loss": 0.3632, + "learning_rate": 2.610285054530136e-05, + "loss": 0.2266, "step": 42630 }, { "epoch": 1.99, - "learning_rate": 1.609629178191365e-05, - "loss": 0.1469, + "learning_rate": 2.610238247546534e-05, + "loss": 0.2678, "step": 42635 }, { "epoch": 1.99, - "learning_rate": 1.609582298063851e-05, - "loss": 0.033, + "learning_rate": 2.610191440562932e-05, + "loss": 0.0429, "step": 42640 }, { "epoch": 1.99, - "learning_rate": 1.609535417936337e-05, - "loss": 0.0802, + "learning_rate": 2.6101446335793303e-05, + "loss": 0.0258, "step": 42645 }, { "epoch": 1.99, - "learning_rate": 1.609488537808823e-05, - "loss": 0.0277, + "learning_rate": 2.6100978265957283e-05, + "loss": 0.099, "step": 42650 }, { "epoch": 1.99, - "learning_rate": 1.609441657681309e-05, - "loss": 0.1024, + "learning_rate": 2.6100510196121262e-05, + "loss": 0.0877, "step": 42655 }, { "epoch": 1.99, - "learning_rate": 1.609394777553795e-05, - "loss": 0.0589, + "learning_rate": 2.6100042126285246e-05, + "loss": 0.1255, "step": 42660 }, { "epoch": 1.99, - "learning_rate": 1.609347897426281e-05, - "loss": 0.0769, + "learning_rate": 2.6099574056449222e-05, + "loss": 0.1109, "step": 42665 }, { "epoch": 1.99, - "learning_rate": 1.6093010172987673e-05, - "loss": 0.1042, + "learning_rate": 2.6099105986613202e-05, + "loss": 0.1523, "step": 42670 }, { "epoch": 1.99, - "learning_rate": 1.6092541371712533e-05, - "loss": 0.3239, + "learning_rate": 2.6098637916777182e-05, + "loss": 0.1558, "step": 42675 }, { "epoch": 1.99, - "learning_rate": 1.6092072570437392e-05, - "loss": 0.3277, + "learning_rate": 2.6098169846941165e-05, + "loss": 0.2726, "step": 42680 }, { "epoch": 1.99, - "learning_rate": 1.6091603769162252e-05, - "loss": 0.3928, + "learning_rate": 2.6097701777105145e-05, + "loss": 0.3082, "step": 42685 }, { "epoch": 1.99, - "learning_rate": 1.6091134967887116e-05, - "loss": 0.0315, + "learning_rate": 2.6097233707269125e-05, + "loss": 0.0495, "step": 42690 }, { "epoch": 1.99, - "learning_rate": 1.6090666166611976e-05, - "loss": 0.0425, + "learning_rate": 2.6096765637433104e-05, + "loss": 0.0402, "step": 42695 }, { "epoch": 1.99, - "learning_rate": 1.6090197365336836e-05, - "loss": 0.0487, + "learning_rate": 2.6096297567597088e-05, + "loss": 0.0397, "step": 42700 }, { "epoch": 1.99, - "learning_rate": 1.6089728564061695e-05, - "loss": 0.0903, + "learning_rate": 2.6095829497761067e-05, + "loss": 0.0766, "step": 42705 }, { "epoch": 1.99, - "learning_rate": 1.6089259762786555e-05, - "loss": 0.1076, + "learning_rate": 2.6095361427925047e-05, + "loss": 0.104, "step": 42710 }, { "epoch": 1.99, - "learning_rate": 1.6088790961511415e-05, - "loss": 0.169, + "learning_rate": 2.609489335808903e-05, + "loss": 0.1302, "step": 42715 }, { "epoch": 1.99, - "learning_rate": 1.608832216023628e-05, - "loss": 0.1955, + "learning_rate": 2.609442528825301e-05, + "loss": 0.1071, "step": 42720 }, { "epoch": 1.99, - "learning_rate": 1.608785335896114e-05, - "loss": 0.2111, + "learning_rate": 2.6093957218416987e-05, + "loss": 0.2285, "step": 42725 }, { "epoch": 1.99, - "learning_rate": 1.6087384557686e-05, - "loss": 0.2163, + "learning_rate": 2.6093489148580966e-05, + "loss": 0.4066, "step": 42730 }, { "epoch": 1.99, - "learning_rate": 1.608691575641086e-05, - "loss": 0.2275, + "learning_rate": 2.609302107874495e-05, + "loss": 0.1984, "step": 42735 }, { "epoch": 1.99, - "learning_rate": 1.608644695513572e-05, - "loss": 0.0338, + "learning_rate": 2.609255300890893e-05, + "loss": 0.0729, "step": 42740 }, { "epoch": 1.99, - "learning_rate": 1.608597815386058e-05, - "loss": 0.0571, + "learning_rate": 2.609208493907291e-05, + "loss": 0.0228, "step": 42745 }, { "epoch": 1.99, - "learning_rate": 1.608550935258544e-05, - "loss": 0.1106, + "learning_rate": 2.609161686923689e-05, + "loss": 0.0371, "step": 42750 }, { "epoch": 2.0, - "learning_rate": 1.60850405513103e-05, - "loss": 0.1291, + "learning_rate": 2.6091148799400872e-05, + "loss": 0.0475, "step": 42755 }, { "epoch": 2.0, - "learning_rate": 1.608457175003516e-05, - "loss": 0.0905, + "learning_rate": 2.6090680729564852e-05, + "loss": 0.0637, "step": 42760 }, { "epoch": 2.0, - "learning_rate": 1.608410294876002e-05, - "loss": 0.1497, + "learning_rate": 2.6090212659728832e-05, + "loss": 0.122, "step": 42765 }, { "epoch": 2.0, - "learning_rate": 1.6083634147484885e-05, - "loss": 0.1673, + "learning_rate": 2.6089744589892812e-05, + "loss": 0.1042, "step": 42770 }, { "epoch": 2.0, - "learning_rate": 1.6083165346209745e-05, - "loss": 0.2101, + "learning_rate": 2.6089276520056795e-05, + "loss": 0.1345, "step": 42775 }, { "epoch": 2.0, - "learning_rate": 1.6082696544934605e-05, - "loss": 0.4179, + "learning_rate": 2.6088808450220775e-05, + "loss": 0.3056, "step": 42780 }, { "epoch": 2.0, - "learning_rate": 1.6082227743659465e-05, - "loss": 0.274, + "learning_rate": 2.6088340380384755e-05, + "loss": 0.2785, "step": 42785 }, { "epoch": 2.0, - "learning_rate": 1.6081758942384325e-05, - "loss": 0.0273, + "learning_rate": 2.6087872310548734e-05, + "loss": 0.0451, "step": 42790 }, { "epoch": 2.0, - "learning_rate": 1.6081290141109184e-05, - "loss": 0.0521, + "learning_rate": 2.6087404240712714e-05, + "loss": 0.0623, "step": 42795 }, { "epoch": 2.0, - "learning_rate": 1.6080821339834044e-05, - "loss": 0.1252, + "learning_rate": 2.6086936170876694e-05, + "loss": 0.074, "step": 42800 }, { "epoch": 2.0, - "learning_rate": 1.6080352538558904e-05, - "loss": 0.0884, + "learning_rate": 2.6086468101040674e-05, + "loss": 0.0478, "step": 42805 }, { "epoch": 2.0, - "learning_rate": 1.6079883737283764e-05, - "loss": 0.1067, + "learning_rate": 2.6086000031204657e-05, + "loss": 0.0585, "step": 42810 }, { "epoch": 2.0, - "learning_rate": 1.6079414936008628e-05, - "loss": 0.1663, + "learning_rate": 2.6085531961368637e-05, + "loss": 0.0827, "step": 42815 }, { "epoch": 2.0, - "learning_rate": 1.6078946134733488e-05, - "loss": 0.0931, + "learning_rate": 2.6085063891532617e-05, + "loss": 0.1835, "step": 42820 }, { "epoch": 2.0, - "learning_rate": 1.6078477333458347e-05, - "loss": 0.2319, + "learning_rate": 2.6084595821696597e-05, + "loss": 0.2746, "step": 42825 }, { "epoch": 2.0, - "learning_rate": 1.607800853218321e-05, - "loss": 0.3311, + "learning_rate": 2.608412775186058e-05, + "loss": 0.3944, "step": 42830 }, { "epoch": 2.0, - "learning_rate": 1.607753973090807e-05, - "loss": 0.2856, + "learning_rate": 2.608365968202456e-05, + "loss": 0.221, "step": 42835 }, { "epoch": 2.0, - "learning_rate": 1.607707092963293e-05, - "loss": 0.0445, + "learning_rate": 2.608319161218854e-05, + "loss": 0.0433, "step": 42840 }, { "epoch": 2.0, - "learning_rate": 1.607660212835779e-05, - "loss": 0.07, + "learning_rate": 2.6082723542352523e-05, + "loss": 0.0439, "step": 42845 }, { "epoch": 2.0, - "learning_rate": 1.607613332708265e-05, - "loss": 0.0974, + "learning_rate": 2.6082255472516502e-05, + "loss": 0.1034, "step": 42850 }, { "epoch": 2.0, - "learning_rate": 1.607566452580751e-05, - "loss": 0.151, + "learning_rate": 2.608178740268048e-05, + "loss": 0.1014, "step": 42855 }, { "epoch": 2.0, - "learning_rate": 1.6075195724532374e-05, - "loss": 0.1649, + "learning_rate": 2.608131933284446e-05, + "loss": 0.2214, "step": 42860 }, { "epoch": 2.0, - "eval_cer": 0.013818165196324908, - "eval_loss": 0.07233013957738876, - "eval_runtime": 378.9809, - "eval_samples_per_second": 50.266, - "eval_steps_per_second": 12.568, - "eval_wer": 0.11786914335334316, + "eval_cer": 0.012627477835976975, + "eval_loss": 0.026034235954284668, + "eval_runtime": 396.3831, + "eval_samples_per_second": 48.06, + "eval_steps_per_second": 12.016, + "eval_wer": 0.10848767794275219, "step": 42862 }, { "epoch": 2.0, - "learning_rate": 1.6074726923257234e-05, - "loss": 0.2466, + "learning_rate": 2.6080851263008442e-05, + "loss": 0.3494, "step": 42865 }, { "epoch": 2.0, - "learning_rate": 1.6074258121982094e-05, - "loss": 0.0398, + "learning_rate": 2.6080383193172422e-05, + "loss": 0.1003, "step": 42870 }, { "epoch": 2.0, - "learning_rate": 1.6073789320706954e-05, - "loss": 0.0557, + "learning_rate": 2.60799151233364e-05, + "loss": 0.0811, "step": 42875 }, { "epoch": 2.0, - "learning_rate": 1.6073320519431813e-05, - "loss": 0.055, + "learning_rate": 2.607944705350038e-05, + "loss": 0.0584, "step": 42880 }, { "epoch": 2.0, - "learning_rate": 1.6072851718156673e-05, - "loss": 0.1247, + "learning_rate": 2.6078978983664365e-05, + "loss": 0.1147, "step": 42885 }, { "epoch": 2.0, - "learning_rate": 1.6072382916881533e-05, - "loss": 0.0794, + "learning_rate": 2.6078510913828344e-05, + "loss": 0.1141, "step": 42890 }, { "epoch": 2.0, - "learning_rate": 1.6071914115606397e-05, - "loss": 0.1031, + "learning_rate": 2.6078042843992324e-05, + "loss": 0.1389, "step": 42895 }, { "epoch": 2.0, - "learning_rate": 1.6071445314331257e-05, - "loss": 0.1369, + "learning_rate": 2.6077574774156307e-05, + "loss": 0.0897, "step": 42900 }, { "epoch": 2.0, - "learning_rate": 1.6070976513056117e-05, - "loss": 0.1704, + "learning_rate": 2.6077106704320287e-05, + "loss": 0.1826, "step": 42905 }, { "epoch": 2.0, - "learning_rate": 1.607050771178098e-05, - "loss": 0.2859, + "learning_rate": 2.6076638634484267e-05, + "loss": 0.2396, "step": 42910 }, { "epoch": 2.0, - "learning_rate": 1.607003891050584e-05, - "loss": 0.3003, + "learning_rate": 2.6076170564648243e-05, + "loss": 0.2953, "step": 42915 }, { "epoch": 2.0, - "learning_rate": 1.60695701092307e-05, - "loss": 0.0153, + "learning_rate": 2.6075702494812227e-05, + "loss": 0.0186, "step": 42920 }, { "epoch": 2.0, - "learning_rate": 1.606910130795556e-05, - "loss": 0.0411, + "learning_rate": 2.6075234424976206e-05, + "loss": 0.0479, "step": 42925 }, { "epoch": 2.0, - "learning_rate": 1.606863250668042e-05, - "loss": 0.038, + "learning_rate": 2.6074766355140186e-05, + "loss": 0.0722, "step": 42930 }, { "epoch": 2.0, - "learning_rate": 1.606816370540528e-05, - "loss": 0.0782, + "learning_rate": 2.6074298285304166e-05, + "loss": 0.1272, "step": 42935 }, { "epoch": 2.0, - "learning_rate": 1.606769490413014e-05, - "loss": 0.1823, + "learning_rate": 2.607383021546815e-05, + "loss": 0.1276, "step": 42940 }, { "epoch": 2.0, - "learning_rate": 1.6067226102855e-05, - "loss": 0.0842, + "learning_rate": 2.607336214563213e-05, + "loss": 0.0871, "step": 42945 }, { "epoch": 2.0, - "learning_rate": 1.606675730157986e-05, - "loss": 0.1666, + "learning_rate": 2.607289407579611e-05, + "loss": 0.1311, "step": 42950 }, { "epoch": 2.0, - "learning_rate": 1.6066288500304723e-05, - "loss": 0.2304, + "learning_rate": 2.607242600596009e-05, + "loss": 0.2117, "step": 42955 }, { "epoch": 2.0, - "learning_rate": 1.6065819699029583e-05, - "loss": 0.1845, + "learning_rate": 2.6071957936124072e-05, + "loss": 0.2268, "step": 42960 }, { "epoch": 2.0, - "learning_rate": 1.6065350897754443e-05, - "loss": 0.3603, + "learning_rate": 2.6071489866288052e-05, + "loss": 0.3229, "step": 42965 }, { "epoch": 2.01, - "learning_rate": 1.6064882096479302e-05, - "loss": 0.1044, + "learning_rate": 2.607102179645203e-05, + "loss": 0.0631, "step": 42970 }, { "epoch": 2.01, - "learning_rate": 1.6064413295204166e-05, - "loss": 0.0342, + "learning_rate": 2.6070553726616015e-05, + "loss": 0.0875, "step": 42975 }, { "epoch": 2.01, - "learning_rate": 1.6063944493929026e-05, - "loss": 0.0837, + "learning_rate": 2.607008565677999e-05, + "loss": 0.0383, "step": 42980 }, { "epoch": 2.01, - "learning_rate": 1.6063475692653886e-05, - "loss": 0.0801, + "learning_rate": 2.606961758694397e-05, + "loss": 0.0947, "step": 42985 }, { "epoch": 2.01, - "learning_rate": 1.6063006891378746e-05, - "loss": 0.1308, + "learning_rate": 2.606914951710795e-05, + "loss": 0.0477, "step": 42990 }, { "epoch": 2.01, - "learning_rate": 1.6062538090103606e-05, - "loss": 0.1231, + "learning_rate": 2.6068681447271934e-05, + "loss": 0.1214, "step": 42995 }, { "epoch": 2.01, - "learning_rate": 1.606206928882847e-05, - "loss": 0.1921, + "learning_rate": 2.6068213377435914e-05, + "loss": 0.1402, "step": 43000 }, { "epoch": 2.01, - "learning_rate": 1.606160048755333e-05, - "loss": 0.2517, + "learning_rate": 2.6067745307599894e-05, + "loss": 0.1877, "step": 43005 }, { "epoch": 2.01, - "learning_rate": 1.606113168627819e-05, - "loss": 0.2169, + "learning_rate": 2.6067277237763874e-05, + "loss": 0.2252, "step": 43010 }, { "epoch": 2.01, - "learning_rate": 1.606066288500305e-05, - "loss": 0.3231, + "learning_rate": 2.6066809167927857e-05, + "loss": 0.3702, "step": 43015 }, { "epoch": 2.01, - "learning_rate": 1.606019408372791e-05, - "loss": 0.049, + "learning_rate": 2.6066341098091837e-05, + "loss": 0.0727, "step": 43020 }, { "epoch": 2.01, - "learning_rate": 1.605972528245277e-05, - "loss": 0.0529, + "learning_rate": 2.6065873028255816e-05, + "loss": 0.0472, "step": 43025 }, { "epoch": 2.01, - "learning_rate": 1.605925648117763e-05, - "loss": 0.0778, + "learning_rate": 2.60654049584198e-05, + "loss": 0.0736, "step": 43030 }, { "epoch": 2.01, - "learning_rate": 1.605878767990249e-05, - "loss": 0.1165, + "learning_rate": 2.606493688858378e-05, + "loss": 0.073, "step": 43035 }, { "epoch": 2.01, - "learning_rate": 1.6058318878627352e-05, - "loss": 0.0635, + "learning_rate": 2.606446881874776e-05, + "loss": 0.1267, "step": 43040 }, { "epoch": 2.01, - "learning_rate": 1.605785007735221e-05, - "loss": 0.1498, + "learning_rate": 2.6064000748911736e-05, + "loss": 0.0941, "step": 43045 }, { "epoch": 2.01, - "learning_rate": 1.605738127607707e-05, - "loss": 0.1469, + "learning_rate": 2.606353267907572e-05, + "loss": 0.1539, "step": 43050 }, { "epoch": 2.01, - "learning_rate": 1.6056912474801935e-05, - "loss": 0.1534, + "learning_rate": 2.60630646092397e-05, + "loss": 0.1133, "step": 43055 }, { "epoch": 2.01, - "learning_rate": 1.6056443673526795e-05, - "loss": 0.1837, + "learning_rate": 2.606259653940368e-05, + "loss": 0.1859, "step": 43060 }, { "epoch": 2.01, - "learning_rate": 1.6055974872251655e-05, - "loss": 0.3012, + "learning_rate": 2.606212846956766e-05, + "loss": 0.4176, "step": 43065 }, { "epoch": 2.01, - "learning_rate": 1.6055506070976515e-05, - "loss": 0.0679, + "learning_rate": 2.606166039973164e-05, + "loss": 0.0344, "step": 43070 }, { "epoch": 2.01, - "learning_rate": 1.6055037269701375e-05, - "loss": 0.0562, + "learning_rate": 2.606119232989562e-05, + "loss": 0.0579, "step": 43075 }, { "epoch": 2.01, - "learning_rate": 1.6054568468426235e-05, - "loss": 0.1606, + "learning_rate": 2.60607242600596e-05, + "loss": 0.0737, "step": 43080 }, { "epoch": 2.01, - "learning_rate": 1.6054099667151094e-05, - "loss": 0.0786, + "learning_rate": 2.6060256190223584e-05, + "loss": 0.0629, "step": 43085 }, { "epoch": 2.01, - "learning_rate": 1.6053630865875954e-05, - "loss": 0.0877, + "learning_rate": 2.6059788120387564e-05, + "loss": 0.1527, "step": 43090 }, { "epoch": 2.01, - "learning_rate": 1.6053162064600818e-05, - "loss": 0.1015, + "learning_rate": 2.6059320050551544e-05, + "loss": 0.0979, "step": 43095 }, { "epoch": 2.01, - "learning_rate": 1.6052693263325678e-05, - "loss": 0.1345, + "learning_rate": 2.6058851980715524e-05, + "loss": 0.1139, "step": 43100 }, { "epoch": 2.01, - "learning_rate": 1.6052224462050538e-05, - "loss": 0.1603, + "learning_rate": 2.6058383910879504e-05, + "loss": 0.1651, "step": 43105 }, { "epoch": 2.01, - "learning_rate": 1.6051755660775398e-05, - "loss": 0.2164, + "learning_rate": 2.6057915841043483e-05, + "loss": 0.24, "step": 43110 }, { "epoch": 2.01, - "learning_rate": 1.6051286859500257e-05, - "loss": 0.2828, + "learning_rate": 2.6057447771207463e-05, + "loss": 0.3547, "step": 43115 }, { "epoch": 2.01, - "learning_rate": 1.605081805822512e-05, - "loss": 0.0067, + "learning_rate": 2.6056979701371443e-05, + "loss": 0.0932, "step": 43120 }, { "epoch": 2.01, - "learning_rate": 1.605034925694998e-05, - "loss": 0.0261, + "learning_rate": 2.6056511631535426e-05, + "loss": 0.0421, "step": 43125 }, { "epoch": 2.01, - "learning_rate": 1.604988045567484e-05, - "loss": 0.0532, + "learning_rate": 2.6056043561699406e-05, + "loss": 0.0528, "step": 43130 }, { "epoch": 2.01, - "learning_rate": 1.60494116543997e-05, - "loss": 0.1057, + "learning_rate": 2.6055575491863386e-05, + "loss": 0.0339, "step": 43135 }, { "epoch": 2.01, - "learning_rate": 1.6048942853124564e-05, - "loss": 0.119, + "learning_rate": 2.6055107422027366e-05, + "loss": 0.1307, "step": 43140 }, { "epoch": 2.01, - "learning_rate": 1.6048474051849424e-05, - "loss": 0.0828, + "learning_rate": 2.605463935219135e-05, + "loss": 0.0994, "step": 43145 }, { "epoch": 2.01, - "learning_rate": 1.6048005250574284e-05, - "loss": 0.1604, + "learning_rate": 2.605417128235533e-05, + "loss": 0.1795, "step": 43150 }, { "epoch": 2.01, - "learning_rate": 1.6047536449299144e-05, - "loss": 0.1541, + "learning_rate": 2.605370321251931e-05, + "loss": 0.1295, "step": 43155 }, { "epoch": 2.01, - "learning_rate": 1.6047067648024004e-05, - "loss": 0.3743, + "learning_rate": 2.6053235142683292e-05, + "loss": 0.2791, "step": 43160 }, { "epoch": 2.01, - "learning_rate": 1.6046598846748864e-05, - "loss": 0.3275, + "learning_rate": 2.605276707284727e-05, + "loss": 0.3165, "step": 43165 }, { "epoch": 2.01, - "learning_rate": 1.6046130045473724e-05, - "loss": 0.0713, + "learning_rate": 2.6052299003011248e-05, + "loss": 0.0509, "step": 43170 }, { "epoch": 2.01, - "learning_rate": 1.6045661244198583e-05, - "loss": 0.1079, + "learning_rate": 2.6051830933175228e-05, + "loss": 0.0227, "step": 43175 }, { "epoch": 2.01, - "learning_rate": 1.6045192442923447e-05, - "loss": 0.0531, + "learning_rate": 2.605136286333921e-05, + "loss": 0.0606, "step": 43180 }, { "epoch": 2.02, - "learning_rate": 1.6044723641648307e-05, - "loss": 0.0501, + "learning_rate": 2.605089479350319e-05, + "loss": 0.0552, "step": 43185 }, { "epoch": 2.02, - "learning_rate": 1.6044254840373167e-05, - "loss": 0.1146, + "learning_rate": 2.605042672366717e-05, + "loss": 0.1295, "step": 43190 }, { "epoch": 2.02, - "learning_rate": 1.6043786039098027e-05, - "loss": 0.1149, + "learning_rate": 2.604995865383115e-05, + "loss": 0.0784, "step": 43195 }, { "epoch": 2.02, - "learning_rate": 1.604331723782289e-05, - "loss": 0.2056, + "learning_rate": 2.6049490583995134e-05, + "loss": 0.1391, "step": 43200 }, { "epoch": 2.02, - "learning_rate": 1.604284843654775e-05, - "loss": 0.1313, + "learning_rate": 2.6049022514159114e-05, + "loss": 0.1368, "step": 43205 }, { "epoch": 2.02, - "learning_rate": 1.604237963527261e-05, - "loss": 0.2624, + "learning_rate": 2.6048554444323093e-05, + "loss": 0.2506, "step": 43210 }, { "epoch": 2.02, - "learning_rate": 1.604191083399747e-05, - "loss": 0.3201, + "learning_rate": 2.6048086374487077e-05, + "loss": 0.2382, "step": 43215 }, { "epoch": 2.02, - "learning_rate": 1.604144203272233e-05, - "loss": 0.0401, + "learning_rate": 2.6047618304651056e-05, + "loss": 0.0312, "step": 43220 }, { "epoch": 2.02, - "learning_rate": 1.604097323144719e-05, - "loss": 0.1277, + "learning_rate": 2.6047150234815036e-05, + "loss": 0.066, "step": 43225 }, { "epoch": 2.02, - "learning_rate": 1.604050443017205e-05, - "loss": 0.0803, + "learning_rate": 2.6046682164979016e-05, + "loss": 0.0797, "step": 43230 }, { "epoch": 2.02, - "learning_rate": 1.6040035628896913e-05, - "loss": 0.092, + "learning_rate": 2.6046214095142996e-05, + "loss": 0.0473, "step": 43235 }, { "epoch": 2.02, - "learning_rate": 1.6039566827621773e-05, - "loss": 0.1218, + "learning_rate": 2.6045746025306976e-05, + "loss": 0.1001, "step": 43240 }, { "epoch": 2.02, - "learning_rate": 1.6039098026346633e-05, - "loss": 0.0504, + "learning_rate": 2.6045277955470955e-05, + "loss": 0.0724, "step": 43245 }, { "epoch": 2.02, - "learning_rate": 1.6038629225071493e-05, - "loss": 0.1807, + "learning_rate": 2.6044809885634935e-05, + "loss": 0.1473, "step": 43250 }, { "epoch": 2.02, - "learning_rate": 1.6038160423796353e-05, - "loss": 0.1809, + "learning_rate": 2.604434181579892e-05, + "loss": 0.1675, "step": 43255 }, { "epoch": 2.02, - "learning_rate": 1.6037691622521216e-05, - "loss": 0.3912, + "learning_rate": 2.60438737459629e-05, + "loss": 0.1769, "step": 43260 }, { "epoch": 2.02, - "learning_rate": 1.6037222821246076e-05, - "loss": 0.2558, + "learning_rate": 2.6043405676126878e-05, + "loss": 0.262, "step": 43265 }, { "epoch": 2.02, - "learning_rate": 1.6036754019970936e-05, - "loss": 0.096, + "learning_rate": 2.604293760629086e-05, + "loss": 0.036, "step": 43270 }, { "epoch": 2.02, - "learning_rate": 1.6036285218695796e-05, - "loss": 0.0349, + "learning_rate": 2.604246953645484e-05, + "loss": 0.0645, "step": 43275 }, { "epoch": 2.02, - "learning_rate": 1.603581641742066e-05, - "loss": 0.0889, + "learning_rate": 2.604200146661882e-05, + "loss": 0.0808, "step": 43280 }, { "epoch": 2.02, - "learning_rate": 1.603534761614552e-05, - "loss": 0.0881, + "learning_rate": 2.60415333967828e-05, + "loss": 0.0532, "step": 43285 }, { "epoch": 2.02, - "learning_rate": 1.603487881487038e-05, - "loss": 0.1415, + "learning_rate": 2.6041065326946784e-05, + "loss": 0.063, "step": 43290 }, { "epoch": 2.02, - "learning_rate": 1.603441001359524e-05, - "loss": 0.1533, + "learning_rate": 2.604059725711076e-05, + "loss": 0.1389, "step": 43295 }, { "epoch": 2.02, - "learning_rate": 1.60339412123201e-05, - "loss": 0.2526, + "learning_rate": 2.604012918727474e-05, + "loss": 0.1972, "step": 43300 }, { "epoch": 2.02, - "learning_rate": 1.603347241104496e-05, - "loss": 0.2226, + "learning_rate": 2.603966111743872e-05, + "loss": 0.1335, "step": 43305 }, { "epoch": 2.02, - "learning_rate": 1.603300360976982e-05, - "loss": 0.2101, + "learning_rate": 2.6039193047602703e-05, + "loss": 0.3182, "step": 43310 }, { "epoch": 2.02, - "learning_rate": 1.603253480849468e-05, - "loss": 0.291, + "learning_rate": 2.6038724977766683e-05, + "loss": 0.2989, "step": 43315 }, { "epoch": 2.02, - "learning_rate": 1.603206600721954e-05, - "loss": 0.0347, + "learning_rate": 2.6038256907930663e-05, + "loss": 0.0236, "step": 43320 }, { "epoch": 2.02, - "learning_rate": 1.6031597205944402e-05, - "loss": 0.0621, + "learning_rate": 2.6037788838094643e-05, + "loss": 0.0543, "step": 43325 }, { "epoch": 2.02, - "learning_rate": 1.6031128404669262e-05, - "loss": 0.0558, + "learning_rate": 2.6037320768258626e-05, + "loss": 0.0413, "step": 43330 }, { "epoch": 2.02, - "learning_rate": 1.603065960339412e-05, - "loss": 0.0694, + "learning_rate": 2.6036852698422606e-05, + "loss": 0.0411, "step": 43335 }, { "epoch": 2.02, - "learning_rate": 1.6030190802118985e-05, - "loss": 0.1065, + "learning_rate": 2.6036384628586586e-05, + "loss": 0.0893, "step": 43340 }, { "epoch": 2.02, - "learning_rate": 1.6029722000843845e-05, - "loss": 0.0916, + "learning_rate": 2.603591655875057e-05, + "loss": 0.0652, "step": 43345 }, { "epoch": 2.02, - "learning_rate": 1.6029253199568705e-05, - "loss": 0.1291, + "learning_rate": 2.603544848891455e-05, + "loss": 0.102, "step": 43350 }, { "epoch": 2.02, - "learning_rate": 1.6028784398293565e-05, - "loss": 0.1963, + "learning_rate": 2.603498041907853e-05, + "loss": 0.1159, "step": 43355 }, { "epoch": 2.02, - "learning_rate": 1.6028315597018425e-05, - "loss": 0.2028, + "learning_rate": 2.6034512349242505e-05, + "loss": 0.1669, "step": 43360 }, { "epoch": 2.02, - "learning_rate": 1.6027846795743285e-05, - "loss": 0.2797, + "learning_rate": 2.6034044279406488e-05, + "loss": 0.3293, "step": 43365 }, { "epoch": 2.02, - "learning_rate": 1.6027377994468148e-05, - "loss": 0.082, + "learning_rate": 2.6033576209570468e-05, + "loss": 0.0544, "step": 43370 }, { "epoch": 2.02, - "learning_rate": 1.6026909193193008e-05, - "loss": 0.0206, + "learning_rate": 2.6033108139734448e-05, + "loss": 0.0811, "step": 43375 }, { "epoch": 2.02, - "learning_rate": 1.6026440391917868e-05, - "loss": 0.0824, + "learning_rate": 2.6032640069898427e-05, + "loss": 0.0443, "step": 43380 }, { "epoch": 2.02, - "learning_rate": 1.6025971590642728e-05, - "loss": 0.0229, + "learning_rate": 2.603217200006241e-05, + "loss": 0.0427, "step": 43385 }, { "epoch": 2.02, - "learning_rate": 1.6025502789367588e-05, - "loss": 0.0882, + "learning_rate": 2.603170393022639e-05, + "loss": 0.1606, "step": 43390 }, { "epoch": 2.02, - "learning_rate": 1.6025033988092448e-05, - "loss": 0.093, + "learning_rate": 2.603123586039037e-05, + "loss": 0.0655, "step": 43395 }, { "epoch": 2.03, - "learning_rate": 1.6024565186817308e-05, - "loss": 0.162, + "learning_rate": 2.6030767790554354e-05, + "loss": 0.1189, "step": 43400 }, { "epoch": 2.03, - "learning_rate": 1.602409638554217e-05, - "loss": 0.175, + "learning_rate": 2.6030299720718333e-05, + "loss": 0.2558, "step": 43405 }, { "epoch": 2.03, - "learning_rate": 1.602362758426703e-05, - "loss": 0.1903, + "learning_rate": 2.6029831650882313e-05, + "loss": 0.2996, "step": 43410 }, { "epoch": 2.03, - "learning_rate": 1.602315878299189e-05, - "loss": 0.2453, + "learning_rate": 2.6029363581046293e-05, + "loss": 0.3012, "step": 43415 }, { "epoch": 2.03, - "learning_rate": 1.6022689981716754e-05, - "loss": 0.0193, + "learning_rate": 2.6028895511210273e-05, + "loss": 0.0208, "step": 43420 }, { "epoch": 2.03, - "learning_rate": 1.6022221180441614e-05, - "loss": 0.0242, + "learning_rate": 2.6028427441374253e-05, + "loss": 0.0588, "step": 43425 }, { "epoch": 2.03, - "learning_rate": 1.6021752379166474e-05, - "loss": 0.0957, + "learning_rate": 2.6027959371538232e-05, + "loss": 0.0582, "step": 43430 }, { "epoch": 2.03, - "learning_rate": 1.6021283577891334e-05, - "loss": 0.0662, + "learning_rate": 2.6027491301702212e-05, + "loss": 0.1172, "step": 43435 }, { "epoch": 2.03, - "learning_rate": 1.6020814776616194e-05, - "loss": 0.1751, + "learning_rate": 2.6027023231866195e-05, + "loss": 0.0628, "step": 43440 }, { "epoch": 2.03, - "learning_rate": 1.6020345975341054e-05, - "loss": 0.0665, + "learning_rate": 2.6026555162030175e-05, + "loss": 0.0249, "step": 43445 }, { "epoch": 2.03, - "learning_rate": 1.6019877174065914e-05, - "loss": 0.1302, + "learning_rate": 2.6026087092194155e-05, + "loss": 0.1262, "step": 43450 }, { "epoch": 2.03, - "learning_rate": 1.6019408372790774e-05, - "loss": 0.1834, + "learning_rate": 2.602561902235814e-05, + "loss": 0.2652, "step": 43455 }, { "epoch": 2.03, - "learning_rate": 1.6018939571515634e-05, - "loss": 0.2339, + "learning_rate": 2.6025150952522118e-05, + "loss": 0.1766, "step": 43460 }, { "epoch": 2.03, - "learning_rate": 1.6018470770240497e-05, - "loss": 0.2381, + "learning_rate": 2.6024682882686098e-05, + "loss": 0.2257, "step": 43465 }, { "epoch": 2.03, - "learning_rate": 1.6018001968965357e-05, - "loss": 0.055, + "learning_rate": 2.6024214812850078e-05, + "loss": 0.0303, "step": 43470 }, { "epoch": 2.03, - "learning_rate": 1.6017533167690217e-05, - "loss": 0.0649, + "learning_rate": 2.602374674301406e-05, + "loss": 0.0402, "step": 43475 }, { "epoch": 2.03, - "learning_rate": 1.6017064366415077e-05, - "loss": 0.0983, + "learning_rate": 2.602327867317804e-05, + "loss": 0.0562, "step": 43480 }, { "epoch": 2.03, - "learning_rate": 1.601659556513994e-05, - "loss": 0.0965, + "learning_rate": 2.6022810603342017e-05, + "loss": 0.1124, "step": 43485 }, { "epoch": 2.03, - "learning_rate": 1.60161267638648e-05, - "loss": 0.133, + "learning_rate": 2.6022342533505997e-05, + "loss": 0.0864, "step": 43490 }, { "epoch": 2.03, - "learning_rate": 1.601565796258966e-05, - "loss": 0.0916, + "learning_rate": 2.602187446366998e-05, + "loss": 0.0988, "step": 43495 }, { "epoch": 2.03, - "learning_rate": 1.601518916131452e-05, - "loss": 0.1319, + "learning_rate": 2.602140639383396e-05, + "loss": 0.1198, "step": 43500 }, { "epoch": 2.03, - "learning_rate": 1.601472036003938e-05, - "loss": 0.2172, + "learning_rate": 2.602093832399794e-05, + "loss": 0.2089, "step": 43505 }, { "epoch": 2.03, - "learning_rate": 1.6014251558764243e-05, - "loss": 0.3607, + "learning_rate": 2.6020470254161923e-05, + "loss": 0.2975, "step": 43510 }, { "epoch": 2.03, - "learning_rate": 1.6013782757489103e-05, - "loss": 0.4852, + "learning_rate": 2.6020002184325903e-05, + "loss": 0.3667, "step": 43515 }, { "epoch": 2.03, - "learning_rate": 1.6013313956213963e-05, - "loss": 0.0733, + "learning_rate": 2.6019534114489883e-05, + "loss": 0.0904, "step": 43520 }, { "epoch": 2.03, - "learning_rate": 1.6012845154938823e-05, - "loss": 0.0488, + "learning_rate": 2.6019066044653863e-05, + "loss": 0.0354, "step": 43525 }, { "epoch": 2.03, - "learning_rate": 1.6012376353663683e-05, - "loss": 0.0537, + "learning_rate": 2.6018597974817846e-05, + "loss": 0.0737, "step": 43530 }, { "epoch": 2.03, - "learning_rate": 1.6011907552388543e-05, - "loss": 0.0909, + "learning_rate": 2.6018129904981826e-05, + "loss": 0.0782, "step": 43535 }, { "epoch": 2.03, - "learning_rate": 1.6011438751113403e-05, - "loss": 0.0844, + "learning_rate": 2.6017661835145805e-05, + "loss": 0.0542, "step": 43540 }, { "epoch": 2.03, - "learning_rate": 1.6010969949838263e-05, - "loss": 0.12, + "learning_rate": 2.6017193765309785e-05, + "loss": 0.1266, "step": 43545 }, { "epoch": 2.03, - "learning_rate": 1.6010501148563126e-05, - "loss": 0.1497, + "learning_rate": 2.6016725695473765e-05, + "loss": 0.1402, "step": 43550 }, { "epoch": 2.03, - "learning_rate": 1.6010032347287986e-05, - "loss": 0.231, + "learning_rate": 2.6016257625637745e-05, + "loss": 0.1939, "step": 43555 }, { "epoch": 2.03, - "learning_rate": 1.6009563546012846e-05, - "loss": 0.2282, + "learning_rate": 2.6015789555801725e-05, + "loss": 0.2771, "step": 43560 }, { "epoch": 2.03, - "learning_rate": 1.600909474473771e-05, - "loss": 0.3212, + "learning_rate": 2.6015321485965704e-05, + "loss": 0.2739, "step": 43565 }, { "epoch": 2.03, - "learning_rate": 1.600862594346257e-05, - "loss": 0.0352, + "learning_rate": 2.6014853416129688e-05, + "loss": 0.0758, "step": 43570 }, { "epoch": 2.03, - "learning_rate": 1.600815714218743e-05, - "loss": 0.0511, + "learning_rate": 2.6014385346293667e-05, + "loss": 0.0429, "step": 43575 }, { "epoch": 2.03, - "learning_rate": 1.600768834091229e-05, - "loss": 0.1067, + "learning_rate": 2.6013917276457647e-05, + "loss": 0.0594, "step": 43580 }, { "epoch": 2.03, - "learning_rate": 1.600721953963715e-05, - "loss": 0.0649, + "learning_rate": 2.601344920662163e-05, + "loss": 0.0997, "step": 43585 }, { "epoch": 2.03, - "learning_rate": 1.600675073836201e-05, - "loss": 0.0548, + "learning_rate": 2.601298113678561e-05, + "loss": 0.1139, "step": 43590 }, { "epoch": 2.03, - "learning_rate": 1.600628193708687e-05, - "loss": 0.0562, + "learning_rate": 2.601251306694959e-05, + "loss": 0.1391, "step": 43595 }, { "epoch": 2.03, - "learning_rate": 1.600581313581173e-05, - "loss": 0.1239, + "learning_rate": 2.601204499711357e-05, + "loss": 0.1522, "step": 43600 }, { "epoch": 2.03, - "learning_rate": 1.6005344334536592e-05, - "loss": 0.195, + "learning_rate": 2.6011576927277553e-05, + "loss": 0.1734, "step": 43605 }, { "epoch": 2.03, - "learning_rate": 1.6004875533261452e-05, - "loss": 0.3106, + "learning_rate": 2.601110885744153e-05, + "loss": 0.3617, "step": 43610 }, { "epoch": 2.04, - "learning_rate": 1.6004406731986312e-05, - "loss": 0.257, + "learning_rate": 2.601064078760551e-05, + "loss": 0.292, "step": 43615 }, { "epoch": 2.04, - "learning_rate": 1.6003937930711172e-05, - "loss": 0.0264, + "learning_rate": 2.601017271776949e-05, + "loss": 0.0504, "step": 43620 }, { "epoch": 2.04, - "learning_rate": 1.6003469129436032e-05, - "loss": 0.0908, + "learning_rate": 2.6009704647933472e-05, + "loss": 0.0315, "step": 43625 }, { "epoch": 2.04, - "learning_rate": 1.6003000328160895e-05, - "loss": 0.0849, + "learning_rate": 2.6009236578097452e-05, + "loss": 0.0434, "step": 43630 }, { "epoch": 2.04, - "learning_rate": 1.6002531526885755e-05, - "loss": 0.0908, + "learning_rate": 2.6008768508261432e-05, + "loss": 0.0796, "step": 43635 }, { "epoch": 2.04, - "learning_rate": 1.6002062725610615e-05, - "loss": 0.0562, + "learning_rate": 2.6008300438425415e-05, + "loss": 0.0695, "step": 43640 }, { "epoch": 2.04, - "learning_rate": 1.6001593924335475e-05, - "loss": 0.0963, + "learning_rate": 2.6007832368589395e-05, + "loss": 0.0673, "step": 43645 }, { "epoch": 2.04, - "learning_rate": 1.6001125123060338e-05, - "loss": 0.1417, + "learning_rate": 2.6007364298753375e-05, + "loss": 0.1279, "step": 43650 }, { "epoch": 2.04, - "learning_rate": 1.6000656321785198e-05, - "loss": 0.1321, + "learning_rate": 2.6006896228917355e-05, + "loss": 0.203, "step": 43655 }, { "epoch": 2.04, - "learning_rate": 1.6000187520510058e-05, - "loss": 0.2516, + "learning_rate": 2.6006428159081338e-05, + "loss": 0.2057, "step": 43660 }, { "epoch": 2.04, - "learning_rate": 1.5999718719234918e-05, - "loss": 0.2598, + "learning_rate": 2.6005960089245318e-05, + "loss": 0.2759, "step": 43665 }, { "epoch": 2.04, - "learning_rate": 1.5999249917959778e-05, - "loss": 0.0211, + "learning_rate": 2.6005492019409298e-05, + "loss": 0.0515, "step": 43670 }, { "epoch": 2.04, - "learning_rate": 1.5998781116684638e-05, - "loss": 0.1165, + "learning_rate": 2.6005023949573274e-05, + "loss": 0.0193, "step": 43675 }, { "epoch": 2.04, - "learning_rate": 1.5998312315409498e-05, - "loss": 0.0921, + "learning_rate": 2.6004555879737257e-05, + "loss": 0.0598, "step": 43680 }, { "epoch": 2.04, - "learning_rate": 1.5997843514134358e-05, - "loss": 0.087, + "learning_rate": 2.6004087809901237e-05, + "loss": 0.0771, "step": 43685 }, { "epoch": 2.04, - "learning_rate": 1.599737471285922e-05, - "loss": 0.0793, + "learning_rate": 2.6003619740065217e-05, + "loss": 0.0433, "step": 43690 }, { "epoch": 2.04, - "learning_rate": 1.599690591158408e-05, - "loss": 0.097, + "learning_rate": 2.60031516702292e-05, + "loss": 0.1658, "step": 43695 }, { "epoch": 2.04, - "learning_rate": 1.599643711030894e-05, - "loss": 0.1542, + "learning_rate": 2.600268360039318e-05, + "loss": 0.0976, "step": 43700 }, { "epoch": 2.04, - "learning_rate": 1.59959683090338e-05, - "loss": 0.2022, + "learning_rate": 2.600221553055716e-05, + "loss": 0.222, "step": 43705 }, { "epoch": 2.04, - "learning_rate": 1.5995499507758664e-05, - "loss": 0.292, + "learning_rate": 2.600174746072114e-05, + "loss": 0.2616, "step": 43710 }, { "epoch": 2.04, - "learning_rate": 1.5995030706483524e-05, - "loss": 0.3516, + "learning_rate": 2.6001279390885123e-05, + "loss": 0.2958, "step": 43715 }, { "epoch": 2.04, - "learning_rate": 1.5994561905208384e-05, - "loss": 0.0639, + "learning_rate": 2.6000811321049103e-05, + "loss": 0.0172, "step": 43720 }, { "epoch": 2.04, - "learning_rate": 1.5994093103933244e-05, - "loss": 0.0484, + "learning_rate": 2.6000343251213082e-05, + "loss": 0.0272, "step": 43725 }, { "epoch": 2.04, - "learning_rate": 1.5993624302658104e-05, - "loss": 0.0859, + "learning_rate": 2.5999875181377062e-05, + "loss": 0.0986, "step": 43730 }, { "epoch": 2.04, - "learning_rate": 1.5993155501382964e-05, - "loss": 0.0987, + "learning_rate": 2.5999407111541042e-05, + "loss": 0.0818, "step": 43735 }, { "epoch": 2.04, - "learning_rate": 1.5992686700107824e-05, - "loss": 0.0731, + "learning_rate": 2.5998939041705022e-05, + "loss": 0.099, "step": 43740 }, { "epoch": 2.04, - "learning_rate": 1.5992217898832687e-05, - "loss": 0.1355, + "learning_rate": 2.5998470971869e-05, + "loss": 0.0866, "step": 43745 }, { "epoch": 2.04, - "learning_rate": 1.5991749097557547e-05, - "loss": 0.1258, + "learning_rate": 2.599800290203298e-05, + "loss": 0.1462, "step": 43750 }, { "epoch": 2.04, - "learning_rate": 1.5991280296282407e-05, - "loss": 0.1247, + "learning_rate": 2.5997534832196965e-05, + "loss": 0.1062, "step": 43755 }, { "epoch": 2.04, - "learning_rate": 1.5990811495007267e-05, - "loss": 0.2308, + "learning_rate": 2.5997066762360944e-05, + "loss": 0.1921, "step": 43760 }, { "epoch": 2.04, - "learning_rate": 1.5990342693732127e-05, - "loss": 0.3262, + "learning_rate": 2.5996598692524924e-05, + "loss": 0.3237, "step": 43765 }, { "epoch": 2.04, - "learning_rate": 1.598987389245699e-05, - "loss": 0.0529, + "learning_rate": 2.5996130622688907e-05, + "loss": 0.0672, "step": 43770 }, { "epoch": 2.04, - "learning_rate": 1.598940509118185e-05, - "loss": 0.0394, + "learning_rate": 2.5995662552852887e-05, + "loss": 0.0397, "step": 43775 }, { "epoch": 2.04, - "learning_rate": 1.598893628990671e-05, - "loss": 0.0503, + "learning_rate": 2.5995194483016867e-05, + "loss": 0.048, "step": 43780 }, { "epoch": 2.04, - "learning_rate": 1.598846748863157e-05, - "loss": 0.0712, + "learning_rate": 2.5994726413180847e-05, + "loss": 0.0685, "step": 43785 }, { "epoch": 2.04, - "learning_rate": 1.5987998687356433e-05, - "loss": 0.0774, + "learning_rate": 2.599425834334483e-05, + "loss": 0.0745, "step": 43790 }, { "epoch": 2.04, - "learning_rate": 1.5987529886081293e-05, - "loss": 0.1446, + "learning_rate": 2.599379027350881e-05, + "loss": 0.0742, "step": 43795 }, { "epoch": 2.04, - "learning_rate": 1.5987061084806153e-05, - "loss": 0.1228, + "learning_rate": 2.5993322203672786e-05, + "loss": 0.1082, "step": 43800 }, { "epoch": 2.04, - "learning_rate": 1.5986592283531013e-05, - "loss": 0.2051, + "learning_rate": 2.5992854133836766e-05, + "loss": 0.2451, "step": 43805 }, { "epoch": 2.04, - "learning_rate": 1.5986123482255873e-05, - "loss": 0.2713, + "learning_rate": 2.599238606400075e-05, + "loss": 0.2064, "step": 43810 }, { "epoch": 2.04, - "learning_rate": 1.5985654680980733e-05, - "loss": 0.298, + "learning_rate": 2.599191799416473e-05, + "loss": 0.3373, "step": 43815 }, { "epoch": 2.04, - "learning_rate": 1.5985185879705593e-05, - "loss": 0.0534, + "learning_rate": 2.599144992432871e-05, + "loss": 0.0609, "step": 43820 }, { "epoch": 2.04, - "learning_rate": 1.5984717078430453e-05, - "loss": 0.039, + "learning_rate": 2.5990981854492692e-05, + "loss": 0.0868, "step": 43825 }, { "epoch": 2.05, - "learning_rate": 1.5984248277155313e-05, - "loss": 0.09, + "learning_rate": 2.5990513784656672e-05, + "loss": 0.0606, "step": 43830 }, { "epoch": 2.05, - "learning_rate": 1.5983779475880176e-05, - "loss": 0.1248, + "learning_rate": 2.5990045714820652e-05, + "loss": 0.1133, "step": 43835 }, { "epoch": 2.05, - "learning_rate": 1.5983310674605036e-05, - "loss": 0.1089, + "learning_rate": 2.5989577644984632e-05, + "loss": 0.0708, "step": 43840 }, { "epoch": 2.05, - "learning_rate": 1.5982841873329896e-05, - "loss": 0.1591, + "learning_rate": 2.5989109575148615e-05, + "loss": 0.1147, "step": 43845 }, { "epoch": 2.05, - "learning_rate": 1.598237307205476e-05, - "loss": 0.0722, + "learning_rate": 2.5988641505312595e-05, + "loss": 0.1057, "step": 43850 }, { "epoch": 2.05, - "learning_rate": 1.598190427077962e-05, - "loss": 0.2335, + "learning_rate": 2.5988173435476575e-05, + "loss": 0.1761, "step": 43855 }, { "epoch": 2.05, - "learning_rate": 1.598143546950448e-05, - "loss": 0.2227, + "learning_rate": 2.5987705365640554e-05, + "loss": 0.2206, "step": 43860 }, { "epoch": 2.05, - "learning_rate": 1.598096666822934e-05, - "loss": 0.4275, + "learning_rate": 2.5987237295804534e-05, + "loss": 0.4006, "step": 43865 }, { "epoch": 2.05, - "learning_rate": 1.59804978669542e-05, - "loss": 0.0902, + "learning_rate": 2.5986769225968514e-05, + "loss": 0.0626, "step": 43870 }, { "epoch": 2.05, - "learning_rate": 1.598002906567906e-05, - "loss": 0.0339, + "learning_rate": 2.5986301156132494e-05, + "loss": 0.0403, "step": 43875 }, { "epoch": 2.05, - "learning_rate": 1.597956026440392e-05, - "loss": 0.0849, + "learning_rate": 2.5985833086296477e-05, + "loss": 0.0535, "step": 43880 }, { "epoch": 2.05, - "learning_rate": 1.5979091463128782e-05, - "loss": 0.0877, + "learning_rate": 2.5985365016460457e-05, + "loss": 0.0547, "step": 43885 }, { "epoch": 2.05, - "learning_rate": 1.5978622661853642e-05, - "loss": 0.0713, + "learning_rate": 2.5984896946624437e-05, + "loss": 0.1163, "step": 43890 }, { "epoch": 2.05, - "learning_rate": 1.5978153860578502e-05, - "loss": 0.0822, + "learning_rate": 2.5984428876788416e-05, + "loss": 0.1051, "step": 43895 }, { "epoch": 2.05, - "learning_rate": 1.5977685059303362e-05, - "loss": 0.1345, + "learning_rate": 2.59839608069524e-05, + "loss": 0.1141, "step": 43900 }, { "epoch": 2.05, - "learning_rate": 1.5977216258028222e-05, - "loss": 0.1678, + "learning_rate": 2.598349273711638e-05, + "loss": 0.2341, "step": 43905 }, { "epoch": 2.05, - "learning_rate": 1.5976747456753082e-05, - "loss": 0.2043, + "learning_rate": 2.598302466728036e-05, + "loss": 0.2875, "step": 43910 }, { "epoch": 2.05, - "learning_rate": 1.5976278655477945e-05, - "loss": 0.2602, + "learning_rate": 2.598255659744434e-05, + "loss": 0.212, "step": 43915 }, { "epoch": 2.05, - "learning_rate": 1.5975809854202805e-05, - "loss": 0.0879, + "learning_rate": 2.5982088527608322e-05, + "loss": 0.0225, "step": 43920 }, { "epoch": 2.05, - "learning_rate": 1.5975341052927665e-05, - "loss": 0.0494, + "learning_rate": 2.59816204577723e-05, + "loss": 0.0303, "step": 43925 }, { "epoch": 2.05, - "learning_rate": 1.597487225165253e-05, - "loss": 0.0878, + "learning_rate": 2.598115238793628e-05, + "loss": 0.0887, "step": 43930 }, { "epoch": 2.05, - "learning_rate": 1.5974403450377388e-05, - "loss": 0.0827, + "learning_rate": 2.598068431810026e-05, + "loss": 0.0739, "step": 43935 }, { "epoch": 2.05, - "learning_rate": 1.5973934649102248e-05, - "loss": 0.1332, + "learning_rate": 2.598021624826424e-05, + "loss": 0.0731, "step": 43940 }, { "epoch": 2.05, - "learning_rate": 1.5973465847827108e-05, - "loss": 0.1587, + "learning_rate": 2.597974817842822e-05, + "loss": 0.1504, "step": 43945 }, { "epoch": 2.05, - "learning_rate": 1.5972997046551968e-05, - "loss": 0.1737, + "learning_rate": 2.59792801085922e-05, + "loss": 0.1396, "step": 43950 }, { "epoch": 2.05, - "learning_rate": 1.5972528245276828e-05, - "loss": 0.2788, + "learning_rate": 2.5978812038756184e-05, + "loss": 0.1312, "step": 43955 }, { "epoch": 2.05, - "learning_rate": 1.5972059444001688e-05, - "loss": 0.3036, + "learning_rate": 2.5978343968920164e-05, + "loss": 0.3022, "step": 43960 }, { "epoch": 2.05, - "learning_rate": 1.5971590642726548e-05, - "loss": 0.2977, + "learning_rate": 2.5977875899084144e-05, + "loss": 0.4683, "step": 43965 }, { "epoch": 2.05, - "learning_rate": 1.5971121841451408e-05, - "loss": 0.0365, + "learning_rate": 2.5977407829248124e-05, + "loss": 0.081, "step": 43970 }, { "epoch": 2.05, - "learning_rate": 1.597065304017627e-05, - "loss": 0.0257, + "learning_rate": 2.5976939759412107e-05, + "loss": 0.0326, "step": 43975 }, { "epoch": 2.05, - "learning_rate": 1.597018423890113e-05, - "loss": 0.0948, + "learning_rate": 2.5976471689576087e-05, + "loss": 0.0402, "step": 43980 }, { "epoch": 2.05, - "learning_rate": 1.596971543762599e-05, - "loss": 0.0868, + "learning_rate": 2.5976003619740067e-05, + "loss": 0.0648, "step": 43985 }, { "epoch": 2.05, - "learning_rate": 1.596924663635085e-05, - "loss": 0.0878, + "learning_rate": 2.5975535549904043e-05, + "loss": 0.1982, "step": 43990 }, { "epoch": 2.05, - "learning_rate": 1.5968777835075714e-05, - "loss": 0.0816, + "learning_rate": 2.5975067480068026e-05, + "loss": 0.1179, "step": 43995 }, { "epoch": 2.05, - "learning_rate": 1.5968309033800574e-05, - "loss": 0.0745, + "learning_rate": 2.5974599410232006e-05, + "loss": 0.127, "step": 44000 }, { "epoch": 2.05, - "learning_rate": 1.5967840232525434e-05, - "loss": 0.266, + "learning_rate": 2.5974131340395986e-05, + "loss": 0.1602, "step": 44005 }, { "epoch": 2.05, - "learning_rate": 1.5967371431250294e-05, - "loss": 0.178, + "learning_rate": 2.597366327055997e-05, + "loss": 0.2638, "step": 44010 }, { "epoch": 2.05, - "learning_rate": 1.5966902629975154e-05, - "loss": 0.2893, + "learning_rate": 2.597319520072395e-05, + "loss": 0.2213, "step": 44015 }, { "epoch": 2.05, - "learning_rate": 1.5966433828700017e-05, - "loss": 0.0461, + "learning_rate": 2.597272713088793e-05, + "loss": 0.0252, "step": 44020 }, { "epoch": 2.05, - "learning_rate": 1.5965965027424877e-05, - "loss": 0.0394, + "learning_rate": 2.597225906105191e-05, + "loss": 0.0487, "step": 44025 }, { "epoch": 2.05, - "learning_rate": 1.5965496226149737e-05, - "loss": 0.0646, + "learning_rate": 2.5971790991215892e-05, + "loss": 0.0386, "step": 44030 }, { "epoch": 2.05, - "learning_rate": 1.5965027424874597e-05, - "loss": 0.0966, + "learning_rate": 2.5971322921379872e-05, + "loss": 0.0931, "step": 44035 }, { "epoch": 2.05, - "learning_rate": 1.5964558623599457e-05, - "loss": 0.1131, + "learning_rate": 2.597085485154385e-05, + "loss": 0.1834, "step": 44040 }, { "epoch": 2.06, - "learning_rate": 1.5964089822324317e-05, - "loss": 0.1476, + "learning_rate": 2.597038678170783e-05, + "loss": 0.2191, "step": 44045 }, { "epoch": 2.06, - "learning_rate": 1.5963621021049177e-05, - "loss": 0.1769, + "learning_rate": 2.5969918711871815e-05, + "loss": 0.1914, "step": 44050 }, { "epoch": 2.06, - "learning_rate": 1.5963152219774037e-05, - "loss": 0.2321, + "learning_rate": 2.596945064203579e-05, + "loss": 0.1308, "step": 44055 }, { "epoch": 2.06, - "learning_rate": 1.59626834184989e-05, - "loss": 0.2395, + "learning_rate": 2.596898257219977e-05, + "loss": 0.3082, "step": 44060 }, { "epoch": 2.06, - "learning_rate": 1.596221461722376e-05, - "loss": 0.316, + "learning_rate": 2.5968514502363754e-05, + "loss": 0.2636, "step": 44065 }, { "epoch": 2.06, - "learning_rate": 1.596174581594862e-05, - "loss": 0.006, + "learning_rate": 2.5968046432527734e-05, + "loss": 0.1079, "step": 44070 }, { "epoch": 2.06, - "learning_rate": 1.5961277014673483e-05, - "loss": 0.0283, + "learning_rate": 2.5967578362691714e-05, + "loss": 0.0352, "step": 44075 }, { "epoch": 2.06, - "learning_rate": 1.5960808213398343e-05, - "loss": 0.0903, + "learning_rate": 2.5967110292855693e-05, + "loss": 0.057, "step": 44080 }, { "epoch": 2.06, - "learning_rate": 1.5960339412123203e-05, - "loss": 0.047, + "learning_rate": 2.5966642223019677e-05, + "loss": 0.0525, "step": 44085 }, { "epoch": 2.06, - "learning_rate": 1.5959870610848063e-05, - "loss": 0.1183, + "learning_rate": 2.5966174153183656e-05, + "loss": 0.1108, "step": 44090 }, { "epoch": 2.06, - "learning_rate": 1.5959401809572923e-05, - "loss": 0.1107, + "learning_rate": 2.5965706083347636e-05, + "loss": 0.1278, "step": 44095 }, { "epoch": 2.06, - "learning_rate": 1.5958933008297783e-05, - "loss": 0.1509, + "learning_rate": 2.5965238013511616e-05, + "loss": 0.1354, "step": 44100 }, { "epoch": 2.06, - "learning_rate": 1.5958464207022643e-05, - "loss": 0.2046, + "learning_rate": 2.59647699436756e-05, + "loss": 0.159, "step": 44105 }, { "epoch": 2.06, - "learning_rate": 1.5957995405747503e-05, - "loss": 0.3222, + "learning_rate": 2.596430187383958e-05, + "loss": 0.2614, "step": 44110 }, { "epoch": 2.06, - "learning_rate": 1.5957526604472366e-05, - "loss": 0.3044, + "learning_rate": 2.5963833804003556e-05, + "loss": 0.2367, "step": 44115 }, { "epoch": 2.06, - "learning_rate": 1.5957057803197226e-05, - "loss": 0.0688, + "learning_rate": 2.596336573416754e-05, + "loss": 0.053, "step": 44120 }, { "epoch": 2.06, - "learning_rate": 1.5956589001922086e-05, - "loss": 0.0558, + "learning_rate": 2.596289766433152e-05, + "loss": 0.0542, "step": 44125 }, { "epoch": 2.06, - "learning_rate": 1.5956120200646946e-05, - "loss": 0.073, + "learning_rate": 2.59624295944955e-05, + "loss": 0.0561, "step": 44130 }, { "epoch": 2.06, - "learning_rate": 1.595565139937181e-05, - "loss": 0.0916, + "learning_rate": 2.5961961524659478e-05, + "loss": 0.0369, "step": 44135 }, { "epoch": 2.06, - "learning_rate": 1.595518259809667e-05, - "loss": 0.1182, + "learning_rate": 2.596149345482346e-05, + "loss": 0.1458, "step": 44140 }, { "epoch": 2.06, - "learning_rate": 1.595471379682153e-05, - "loss": 0.0756, + "learning_rate": 2.596102538498744e-05, + "loss": 0.0595, "step": 44145 }, { "epoch": 2.06, - "learning_rate": 1.595424499554639e-05, - "loss": 0.1211, + "learning_rate": 2.596055731515142e-05, + "loss": 0.2433, "step": 44150 }, { "epoch": 2.06, - "learning_rate": 1.595377619427125e-05, - "loss": 0.1624, + "learning_rate": 2.59600892453154e-05, + "loss": 0.1633, "step": 44155 }, { "epoch": 2.06, - "learning_rate": 1.5953307392996112e-05, - "loss": 0.2841, + "learning_rate": 2.5959621175479384e-05, + "loss": 0.2184, "step": 44160 }, { "epoch": 2.06, - "learning_rate": 1.5952838591720972e-05, - "loss": 0.3485, + "learning_rate": 2.5959153105643364e-05, + "loss": 0.3487, "step": 44165 }, { "epoch": 2.06, - "learning_rate": 1.5952369790445832e-05, - "loss": 0.092, + "learning_rate": 2.5958685035807344e-05, + "loss": 0.0224, "step": 44170 }, { "epoch": 2.06, - "learning_rate": 1.5951900989170692e-05, - "loss": 0.0326, + "learning_rate": 2.5958216965971324e-05, + "loss": 0.0847, "step": 44175 }, { "epoch": 2.06, - "learning_rate": 1.5951432187895552e-05, - "loss": 0.0569, + "learning_rate": 2.5957748896135303e-05, + "loss": 0.0655, "step": 44180 }, { "epoch": 2.06, - "learning_rate": 1.5950963386620412e-05, - "loss": 0.1249, + "learning_rate": 2.5957280826299283e-05, + "loss": 0.0575, "step": 44185 }, { "epoch": 2.06, - "learning_rate": 1.5950494585345272e-05, - "loss": 0.0785, + "learning_rate": 2.5956812756463263e-05, + "loss": 0.0914, "step": 44190 }, { "epoch": 2.06, - "learning_rate": 1.5950025784070132e-05, - "loss": 0.0755, + "learning_rate": 2.5956344686627246e-05, + "loss": 0.1687, "step": 44195 }, { "epoch": 2.06, - "learning_rate": 1.5949556982794995e-05, - "loss": 0.2121, + "learning_rate": 2.5955876616791226e-05, + "loss": 0.1551, "step": 44200 }, { "epoch": 2.06, - "learning_rate": 1.5949088181519855e-05, - "loss": 0.2509, + "learning_rate": 2.5955408546955206e-05, + "loss": 0.1556, "step": 44205 }, { "epoch": 2.06, - "learning_rate": 1.5948619380244715e-05, - "loss": 0.2829, + "learning_rate": 2.5954940477119186e-05, + "loss": 0.234, "step": 44210 }, { "epoch": 2.06, - "learning_rate": 1.594815057896958e-05, - "loss": 0.3905, + "learning_rate": 2.595447240728317e-05, + "loss": 0.2907, "step": 44215 }, { "epoch": 2.06, - "learning_rate": 1.594768177769444e-05, - "loss": 0.0941, + "learning_rate": 2.595400433744715e-05, + "loss": 0.0634, "step": 44220 }, { "epoch": 2.06, - "learning_rate": 1.5947212976419298e-05, - "loss": 0.0883, + "learning_rate": 2.595353626761113e-05, + "loss": 0.057, "step": 44225 }, { "epoch": 2.06, - "learning_rate": 1.5946744175144158e-05, - "loss": 0.0664, + "learning_rate": 2.595306819777511e-05, + "loss": 0.0898, "step": 44230 }, { "epoch": 2.06, - "learning_rate": 1.5946275373869018e-05, - "loss": 0.048, + "learning_rate": 2.595260012793909e-05, + "loss": 0.0623, "step": 44235 }, { "epoch": 2.06, - "learning_rate": 1.5945806572593878e-05, - "loss": 0.1531, + "learning_rate": 2.595213205810307e-05, + "loss": 0.0834, "step": 44240 }, { "epoch": 2.06, - "learning_rate": 1.5945337771318738e-05, - "loss": 0.0949, + "learning_rate": 2.5951663988267048e-05, + "loss": 0.1182, "step": 44245 }, { "epoch": 2.06, - "learning_rate": 1.5944868970043598e-05, - "loss": 0.1509, + "learning_rate": 2.595119591843103e-05, + "loss": 0.1866, "step": 44250 }, { "epoch": 2.06, - "learning_rate": 1.594440016876846e-05, - "loss": 0.221, + "learning_rate": 2.595072784859501e-05, + "loss": 0.1783, "step": 44255 }, { "epoch": 2.07, - "learning_rate": 1.594393136749332e-05, - "loss": 0.2352, + "learning_rate": 2.595025977875899e-05, + "loss": 0.2101, "step": 44260 }, { "epoch": 2.07, - "learning_rate": 1.594346256621818e-05, - "loss": 0.1831, + "learning_rate": 2.594979170892297e-05, + "loss": 0.3375, "step": 44265 }, { "epoch": 2.07, - "learning_rate": 1.594299376494304e-05, - "loss": 0.0654, + "learning_rate": 2.5949323639086954e-05, + "loss": 0.0616, "step": 44270 }, { "epoch": 2.07, - "learning_rate": 1.59425249636679e-05, - "loss": 0.0374, + "learning_rate": 2.5948855569250933e-05, + "loss": 0.0168, "step": 44275 }, { "epoch": 2.07, - "learning_rate": 1.5942056162392764e-05, - "loss": 0.0869, + "learning_rate": 2.5948387499414913e-05, + "loss": 0.0396, "step": 44280 }, { "epoch": 2.07, - "learning_rate": 1.5941587361117624e-05, - "loss": 0.1089, + "learning_rate": 2.5947919429578893e-05, + "loss": 0.0761, "step": 44285 }, { "epoch": 2.07, - "learning_rate": 1.5941118559842484e-05, - "loss": 0.0679, + "learning_rate": 2.5947451359742876e-05, + "loss": 0.0863, "step": 44290 }, { "epoch": 2.07, - "learning_rate": 1.5940649758567344e-05, - "loss": 0.1117, + "learning_rate": 2.5946983289906856e-05, + "loss": 0.1398, "step": 44295 }, { "epoch": 2.07, - "learning_rate": 1.5940180957292207e-05, - "loss": 0.1786, + "learning_rate": 2.5946515220070836e-05, + "loss": 0.2071, "step": 44300 }, { "epoch": 2.07, - "learning_rate": 1.5939712156017067e-05, - "loss": 0.1472, + "learning_rate": 2.5946047150234816e-05, + "loss": 0.1878, "step": 44305 }, { "epoch": 2.07, - "learning_rate": 1.5939243354741927e-05, - "loss": 0.2769, + "learning_rate": 2.5945579080398796e-05, + "loss": 0.2988, "step": 44310 }, { "epoch": 2.07, - "learning_rate": 1.5938774553466787e-05, - "loss": 0.1834, + "learning_rate": 2.5945111010562775e-05, + "loss": 0.2748, "step": 44315 }, { "epoch": 2.07, - "learning_rate": 1.5938305752191647e-05, - "loss": 0.0698, + "learning_rate": 2.5944642940726755e-05, + "loss": 0.0561, "step": 44320 }, { "epoch": 2.07, - "learning_rate": 1.5937836950916507e-05, - "loss": 0.0685, + "learning_rate": 2.594417487089074e-05, + "loss": 0.0897, "step": 44325 }, { "epoch": 2.07, - "learning_rate": 1.5937368149641367e-05, - "loss": 0.0576, + "learning_rate": 2.5943706801054718e-05, + "loss": 0.025, "step": 44330 }, { "epoch": 2.07, - "learning_rate": 1.5936899348366227e-05, - "loss": 0.1033, + "learning_rate": 2.5943238731218698e-05, + "loss": 0.0864, "step": 44335 }, { "epoch": 2.07, - "learning_rate": 1.5936430547091087e-05, - "loss": 0.0882, + "learning_rate": 2.5942770661382678e-05, + "loss": 0.08, "step": 44340 }, { "epoch": 2.07, - "learning_rate": 1.593596174581595e-05, - "loss": 0.1051, + "learning_rate": 2.594230259154666e-05, + "loss": 0.0743, "step": 44345 }, { "epoch": 2.07, - "learning_rate": 1.593549294454081e-05, - "loss": 0.137, + "learning_rate": 2.594183452171064e-05, + "loss": 0.1179, "step": 44350 }, { "epoch": 2.07, - "learning_rate": 1.593502414326567e-05, - "loss": 0.1292, + "learning_rate": 2.594136645187462e-05, + "loss": 0.1469, "step": 44355 }, { "epoch": 2.07, - "learning_rate": 1.5934555341990533e-05, - "loss": 0.3229, + "learning_rate": 2.59408983820386e-05, + "loss": 0.2232, "step": 44360 }, { "epoch": 2.07, - "learning_rate": 1.5934086540715393e-05, - "loss": 0.4082, + "learning_rate": 2.5940430312202584e-05, + "loss": 0.223, "step": 44365 }, { "epoch": 2.07, - "learning_rate": 1.5933617739440253e-05, - "loss": 0.0691, + "learning_rate": 2.593996224236656e-05, + "loss": 0.0095, "step": 44370 }, { "epoch": 2.07, - "learning_rate": 1.5933148938165113e-05, - "loss": 0.0598, + "learning_rate": 2.593949417253054e-05, + "loss": 0.0184, "step": 44375 }, { "epoch": 2.07, - "learning_rate": 1.5932680136889973e-05, - "loss": 0.0883, + "learning_rate": 2.5939026102694523e-05, + "loss": 0.1017, "step": 44380 }, { "epoch": 2.07, - "learning_rate": 1.5932211335614833e-05, - "loss": 0.0679, + "learning_rate": 2.5938558032858503e-05, + "loss": 0.0862, "step": 44385 }, { "epoch": 2.07, - "learning_rate": 1.5931742534339693e-05, - "loss": 0.0568, + "learning_rate": 2.5938089963022483e-05, + "loss": 0.154, "step": 44390 }, { "epoch": 2.07, - "learning_rate": 1.5931273733064556e-05, - "loss": 0.1136, + "learning_rate": 2.5937621893186463e-05, + "loss": 0.1636, "step": 44395 }, { "epoch": 2.07, - "learning_rate": 1.5930804931789416e-05, - "loss": 0.1673, + "learning_rate": 2.5937153823350446e-05, + "loss": 0.1058, "step": 44400 }, { "epoch": 2.07, - "learning_rate": 1.5930336130514276e-05, - "loss": 0.151, + "learning_rate": 2.5936685753514426e-05, + "loss": 0.234, "step": 44405 }, { "epoch": 2.07, - "learning_rate": 1.5929867329239136e-05, - "loss": 0.2195, + "learning_rate": 2.5936217683678405e-05, + "loss": 0.2028, "step": 44410 }, { "epoch": 2.07, - "learning_rate": 1.5929398527963996e-05, - "loss": 0.3555, + "learning_rate": 2.5935749613842385e-05, + "loss": 0.3016, "step": 44415 }, { "epoch": 2.07, - "learning_rate": 1.5928929726688856e-05, - "loss": 0.0534, + "learning_rate": 2.593528154400637e-05, + "loss": 0.0569, "step": 44420 }, { "epoch": 2.07, - "learning_rate": 1.592846092541372e-05, - "loss": 0.0423, + "learning_rate": 2.5934813474170348e-05, + "loss": 0.0465, "step": 44425 }, { "epoch": 2.07, - "learning_rate": 1.592799212413858e-05, - "loss": 0.0974, + "learning_rate": 2.5934345404334328e-05, + "loss": 0.0423, "step": 44430 }, { "epoch": 2.07, - "learning_rate": 1.592752332286344e-05, - "loss": 0.1153, + "learning_rate": 2.5933877334498308e-05, + "loss": 0.0806, "step": 44435 }, { "epoch": 2.07, - "learning_rate": 1.5927054521588303e-05, - "loss": 0.1223, + "learning_rate": 2.5933409264662288e-05, + "loss": 0.1182, "step": 44440 }, { "epoch": 2.07, - "learning_rate": 1.5926585720313162e-05, - "loss": 0.0456, + "learning_rate": 2.5932941194826268e-05, + "loss": 0.1466, "step": 44445 }, { "epoch": 2.07, - "learning_rate": 1.5926116919038022e-05, - "loss": 0.24, + "learning_rate": 2.5932473124990247e-05, + "loss": 0.2372, "step": 44450 }, { "epoch": 2.07, - "learning_rate": 1.5925648117762882e-05, - "loss": 0.2007, + "learning_rate": 2.593200505515423e-05, + "loss": 0.2064, "step": 44455 }, { "epoch": 2.07, - "learning_rate": 1.5925179316487742e-05, - "loss": 0.3084, + "learning_rate": 2.593153698531821e-05, + "loss": 0.3178, "step": 44460 }, { "epoch": 2.07, - "learning_rate": 1.5924710515212602e-05, - "loss": 0.2493, + "learning_rate": 2.593106891548219e-05, + "loss": 0.3023, "step": 44465 }, { "epoch": 2.08, - "learning_rate": 1.5924241713937462e-05, - "loss": 0.0436, + "learning_rate": 2.593060084564617e-05, + "loss": 0.0771, "step": 44470 }, { "epoch": 2.08, - "learning_rate": 1.5923772912662322e-05, - "loss": 0.0289, + "learning_rate": 2.5930132775810153e-05, + "loss": 0.029, "step": 44475 }, { "epoch": 2.08, - "learning_rate": 1.5923304111387182e-05, - "loss": 0.0585, + "learning_rate": 2.5929664705974133e-05, + "loss": 0.0605, "step": 44480 }, { "epoch": 2.08, - "learning_rate": 1.5922835310112045e-05, - "loss": 0.0515, + "learning_rate": 2.5929196636138113e-05, + "loss": 0.0319, "step": 44485 }, { "epoch": 2.08, - "learning_rate": 1.5922366508836905e-05, - "loss": 0.1049, + "learning_rate": 2.5928728566302096e-05, + "loss": 0.065, "step": 44490 }, { "epoch": 2.08, - "learning_rate": 1.5921897707561765e-05, - "loss": 0.1265, + "learning_rate": 2.5928260496466073e-05, + "loss": 0.0691, "step": 44495 }, { "epoch": 2.08, - "learning_rate": 1.5921428906286625e-05, - "loss": 0.0542, + "learning_rate": 2.5927792426630052e-05, + "loss": 0.1597, "step": 44500 }, { "epoch": 2.08, - "learning_rate": 1.592096010501149e-05, - "loss": 0.3128, + "learning_rate": 2.5927324356794032e-05, + "loss": 0.1046, "step": 44505 }, { "epoch": 2.08, - "learning_rate": 1.592049130373635e-05, - "loss": 0.2474, + "learning_rate": 2.5926856286958015e-05, + "loss": 0.1856, "step": 44510 }, { "epoch": 2.08, - "learning_rate": 1.592002250246121e-05, - "loss": 0.3596, + "learning_rate": 2.5926388217121995e-05, + "loss": 0.3088, "step": 44515 }, { "epoch": 2.08, - "learning_rate": 1.5919553701186068e-05, - "loss": 0.1306, + "learning_rate": 2.5925920147285975e-05, + "loss": 0.0857, "step": 44520 }, { "epoch": 2.08, - "learning_rate": 1.5919084899910928e-05, - "loss": 0.0428, + "learning_rate": 2.5925452077449955e-05, + "loss": 0.0453, "step": 44525 }, { "epoch": 2.08, - "learning_rate": 1.5918616098635788e-05, - "loss": 0.0753, + "learning_rate": 2.5924984007613938e-05, + "loss": 0.0773, "step": 44530 }, { "epoch": 2.08, - "learning_rate": 1.591814729736065e-05, - "loss": 0.12, + "learning_rate": 2.5924515937777918e-05, + "loss": 0.0542, "step": 44535 }, { "epoch": 2.08, - "learning_rate": 1.591767849608551e-05, - "loss": 0.1302, + "learning_rate": 2.5924047867941898e-05, + "loss": 0.0752, "step": 44540 }, { "epoch": 2.08, - "learning_rate": 1.591720969481037e-05, - "loss": 0.1034, + "learning_rate": 2.5923579798105877e-05, + "loss": 0.119, "step": 44545 }, { "epoch": 2.08, - "learning_rate": 1.591674089353523e-05, - "loss": 0.1406, + "learning_rate": 2.592311172826986e-05, + "loss": 0.1554, "step": 44550 }, { "epoch": 2.08, - "learning_rate": 1.591627209226009e-05, - "loss": 0.1504, + "learning_rate": 2.592264365843384e-05, + "loss": 0.1366, "step": 44555 }, { "epoch": 2.08, - "learning_rate": 1.591580329098495e-05, - "loss": 0.3588, + "learning_rate": 2.5922175588597817e-05, + "loss": 0.2023, "step": 44560 }, { "epoch": 2.08, - "learning_rate": 1.5915334489709814e-05, - "loss": 0.3762, + "learning_rate": 2.59217075187618e-05, + "loss": 0.3999, "step": 44565 }, { "epoch": 2.08, - "learning_rate": 1.5914865688434674e-05, - "loss": 0.0344, + "learning_rate": 2.592123944892578e-05, + "loss": 0.0511, "step": 44570 }, { "epoch": 2.08, - "learning_rate": 1.5914396887159534e-05, - "loss": 0.0487, + "learning_rate": 2.592077137908976e-05, + "loss": 0.0905, "step": 44575 }, { "epoch": 2.08, - "learning_rate": 1.5913928085884394e-05, - "loss": 0.0769, + "learning_rate": 2.592030330925374e-05, + "loss": 0.0634, "step": 44580 }, { "epoch": 2.08, - "learning_rate": 1.5913459284609258e-05, - "loss": 0.0923, + "learning_rate": 2.5919835239417723e-05, + "loss": 0.1139, "step": 44585 }, { "epoch": 2.08, - "learning_rate": 1.5912990483334117e-05, - "loss": 0.1312, + "learning_rate": 2.5919367169581703e-05, + "loss": 0.1004, "step": 44590 }, { "epoch": 2.08, - "learning_rate": 1.5912521682058977e-05, - "loss": 0.0953, + "learning_rate": 2.5918899099745682e-05, + "loss": 0.1277, "step": 44595 }, { "epoch": 2.08, - "learning_rate": 1.5912052880783837e-05, - "loss": 0.2189, + "learning_rate": 2.5918431029909662e-05, + "loss": 0.1798, "step": 44600 }, { "epoch": 2.08, - "learning_rate": 1.5911584079508697e-05, - "loss": 0.3004, + "learning_rate": 2.5917962960073645e-05, + "loss": 0.2032, "step": 44605 }, { "epoch": 2.08, - "learning_rate": 1.5911115278233557e-05, - "loss": 0.2755, + "learning_rate": 2.5917494890237625e-05, + "loss": 0.2548, "step": 44610 }, { "epoch": 2.08, - "learning_rate": 1.5910646476958417e-05, - "loss": 0.3, + "learning_rate": 2.5917026820401605e-05, + "loss": 0.4322, "step": 44615 }, { "epoch": 2.08, - "learning_rate": 1.5910177675683277e-05, - "loss": 0.0662, + "learning_rate": 2.5916558750565585e-05, + "loss": 0.0485, "step": 44620 }, { "epoch": 2.08, - "learning_rate": 1.590970887440814e-05, - "loss": 0.0399, + "learning_rate": 2.5916090680729565e-05, + "loss": 0.063, "step": 44625 }, { "epoch": 2.08, - "learning_rate": 1.5909240073133e-05, - "loss": 0.0311, + "learning_rate": 2.5915622610893545e-05, + "loss": 0.0712, "step": 44630 }, { "epoch": 2.08, - "learning_rate": 1.590877127185786e-05, - "loss": 0.0786, + "learning_rate": 2.5915154541057524e-05, + "loss": 0.0526, "step": 44635 }, { "epoch": 2.08, - "learning_rate": 1.590830247058272e-05, - "loss": 0.1089, + "learning_rate": 2.5914686471221508e-05, + "loss": 0.1514, "step": 44640 }, { "epoch": 2.08, - "learning_rate": 1.5907833669307584e-05, - "loss": 0.1173, + "learning_rate": 2.5914218401385487e-05, + "loss": 0.058, "step": 44645 }, { "epoch": 2.08, - "learning_rate": 1.5907364868032443e-05, - "loss": 0.1884, + "learning_rate": 2.5913750331549467e-05, + "loss": 0.0908, "step": 44650 }, { "epoch": 2.08, - "learning_rate": 1.5906896066757303e-05, - "loss": 0.1855, + "learning_rate": 2.5913282261713447e-05, + "loss": 0.1926, "step": 44655 }, { "epoch": 2.08, - "learning_rate": 1.5906427265482163e-05, - "loss": 0.1961, + "learning_rate": 2.591281419187743e-05, + "loss": 0.3495, "step": 44660 }, { "epoch": 2.08, - "learning_rate": 1.5905958464207023e-05, - "loss": 0.3027, + "learning_rate": 2.591234612204141e-05, + "loss": 0.4036, "step": 44665 }, { "epoch": 2.08, - "learning_rate": 1.5905489662931887e-05, - "loss": 0.0615, + "learning_rate": 2.591187805220539e-05, + "loss": 0.0753, "step": 44670 }, { "epoch": 2.08, - "learning_rate": 1.5905020861656747e-05, - "loss": 0.0229, + "learning_rate": 2.5911409982369373e-05, + "loss": 0.0733, "step": 44675 }, { "epoch": 2.08, - "learning_rate": 1.5904552060381606e-05, - "loss": 0.0722, + "learning_rate": 2.5910941912533353e-05, + "loss": 0.0703, "step": 44680 }, { "epoch": 2.09, - "learning_rate": 1.5904083259106466e-05, - "loss": 0.0763, + "learning_rate": 2.591047384269733e-05, + "loss": 0.142, "step": 44685 }, { "epoch": 2.09, - "learning_rate": 1.5903614457831326e-05, - "loss": 0.0572, + "learning_rate": 2.591000577286131e-05, + "loss": 0.1062, "step": 44690 }, { "epoch": 2.09, - "learning_rate": 1.5903145656556186e-05, - "loss": 0.1163, + "learning_rate": 2.5909537703025292e-05, + "loss": 0.0661, "step": 44695 }, { "epoch": 2.09, - "learning_rate": 1.5902676855281046e-05, - "loss": 0.1347, + "learning_rate": 2.5909069633189272e-05, + "loss": 0.1498, "step": 44700 }, { "epoch": 2.09, - "learning_rate": 1.5902208054005906e-05, - "loss": 0.1086, + "learning_rate": 2.5908601563353252e-05, + "loss": 0.1408, "step": 44705 }, { "epoch": 2.09, - "learning_rate": 1.590173925273077e-05, - "loss": 0.2285, + "learning_rate": 2.5908133493517232e-05, + "loss": 0.2573, "step": 44710 }, { "epoch": 2.09, - "learning_rate": 1.590127045145563e-05, - "loss": 0.3338, + "learning_rate": 2.5907665423681215e-05, + "loss": 0.2723, "step": 44715 }, { "epoch": 2.09, - "learning_rate": 1.590080165018049e-05, - "loss": 0.0527, + "learning_rate": 2.5907197353845195e-05, + "loss": 0.0442, "step": 44720 }, { "epoch": 2.09, - "learning_rate": 1.5900332848905353e-05, - "loss": 0.0404, + "learning_rate": 2.5906729284009175e-05, + "loss": 0.0766, "step": 44725 }, { "epoch": 2.09, - "learning_rate": 1.5899864047630213e-05, - "loss": 0.031, + "learning_rate": 2.5906261214173154e-05, + "loss": 0.0274, "step": 44730 }, { "epoch": 2.09, - "learning_rate": 1.5899395246355072e-05, - "loss": 0.0831, + "learning_rate": 2.5905793144337138e-05, + "loss": 0.1058, "step": 44735 }, { "epoch": 2.09, - "learning_rate": 1.5898926445079932e-05, - "loss": 0.0364, + "learning_rate": 2.5905325074501117e-05, + "loss": 0.1708, "step": 44740 }, { "epoch": 2.09, - "learning_rate": 1.5898457643804792e-05, - "loss": 0.1629, + "learning_rate": 2.5904857004665097e-05, + "loss": 0.2116, "step": 44745 }, { "epoch": 2.09, - "learning_rate": 1.5897988842529652e-05, - "loss": 0.166, + "learning_rate": 2.5904388934829077e-05, + "loss": 0.1723, "step": 44750 }, { "epoch": 2.09, - "learning_rate": 1.5897520041254512e-05, - "loss": 0.2196, + "learning_rate": 2.5903920864993057e-05, + "loss": 0.1797, "step": 44755 }, { "epoch": 2.09, - "learning_rate": 1.5897051239979372e-05, - "loss": 0.2828, + "learning_rate": 2.5903452795157037e-05, + "loss": 0.2608, "step": 44760 }, { "epoch": 2.09, - "learning_rate": 1.5896582438704235e-05, - "loss": 0.2774, + "learning_rate": 2.5902984725321017e-05, + "loss": 0.2864, "step": 44765 }, { "epoch": 2.09, - "learning_rate": 1.5896113637429095e-05, - "loss": 0.0914, + "learning_rate": 2.5902516655485e-05, + "loss": 0.1087, "step": 44770 }, { "epoch": 2.09, - "learning_rate": 1.5895644836153955e-05, - "loss": 0.0176, + "learning_rate": 2.590204858564898e-05, + "loss": 0.0488, "step": 44775 }, { "epoch": 2.09, - "learning_rate": 1.5895176034878815e-05, - "loss": 0.1039, + "learning_rate": 2.590158051581296e-05, + "loss": 0.0883, "step": 44780 }, { "epoch": 2.09, - "learning_rate": 1.5894707233603675e-05, - "loss": 0.1057, + "learning_rate": 2.590111244597694e-05, + "loss": 0.0909, "step": 44785 }, { "epoch": 2.09, - "learning_rate": 1.589423843232854e-05, - "loss": 0.0865, + "learning_rate": 2.5900644376140922e-05, + "loss": 0.0817, "step": 44790 }, { "epoch": 2.09, - "learning_rate": 1.58937696310534e-05, - "loss": 0.1795, + "learning_rate": 2.5900176306304902e-05, + "loss": 0.1462, "step": 44795 }, { "epoch": 2.09, - "learning_rate": 1.589330082977826e-05, - "loss": 0.2326, + "learning_rate": 2.5899708236468882e-05, + "loss": 0.1889, "step": 44800 }, { "epoch": 2.09, - "learning_rate": 1.589283202850312e-05, - "loss": 0.1466, + "learning_rate": 2.5899240166632865e-05, + "loss": 0.1847, "step": 44805 }, { "epoch": 2.09, - "learning_rate": 1.589236322722798e-05, - "loss": 0.2534, + "learning_rate": 2.5898772096796842e-05, + "loss": 0.2511, "step": 44810 }, { "epoch": 2.09, - "learning_rate": 1.589189442595284e-05, - "loss": 0.2671, + "learning_rate": 2.589830402696082e-05, + "loss": 0.2548, "step": 44815 }, { "epoch": 2.09, - "learning_rate": 1.58914256246777e-05, - "loss": 0.0678, + "learning_rate": 2.58978359571248e-05, + "loss": 0.0699, "step": 44820 }, { "epoch": 2.09, - "learning_rate": 1.589095682340256e-05, - "loss": 0.043, + "learning_rate": 2.5897367887288785e-05, + "loss": 0.0488, "step": 44825 }, { "epoch": 2.09, - "learning_rate": 1.589048802212742e-05, - "loss": 0.0897, + "learning_rate": 2.5896899817452764e-05, + "loss": 0.0716, "step": 44830 }, { "epoch": 2.09, - "learning_rate": 1.589001922085228e-05, - "loss": 0.0819, + "learning_rate": 2.5896431747616744e-05, + "loss": 0.0266, "step": 44835 }, { "epoch": 2.09, - "learning_rate": 1.588955041957714e-05, - "loss": 0.0692, + "learning_rate": 2.5895963677780724e-05, + "loss": 0.1222, "step": 44840 }, { "epoch": 2.09, - "learning_rate": 1.5889081618302e-05, - "loss": 0.1084, + "learning_rate": 2.5895495607944707e-05, + "loss": 0.1125, "step": 44845 }, { "epoch": 2.09, - "learning_rate": 1.588861281702686e-05, - "loss": 0.1194, + "learning_rate": 2.5895027538108687e-05, + "loss": 0.1399, "step": 44850 }, { "epoch": 2.09, - "learning_rate": 1.5888144015751724e-05, - "loss": 0.1409, + "learning_rate": 2.5894559468272667e-05, + "loss": 0.2239, "step": 44855 }, { "epoch": 2.09, - "learning_rate": 1.5887675214476584e-05, - "loss": 0.3668, + "learning_rate": 2.589409139843665e-05, + "loss": 0.3015, "step": 44860 }, { "epoch": 2.09, - "learning_rate": 1.5887206413201444e-05, - "loss": 0.2747, + "learning_rate": 2.589362332860063e-05, + "loss": 0.2039, "step": 44865 }, { "epoch": 2.09, - "learning_rate": 1.5886737611926308e-05, - "loss": 0.0384, + "learning_rate": 2.589315525876461e-05, + "loss": 0.0715, "step": 44870 }, { "epoch": 2.09, - "learning_rate": 1.5886268810651168e-05, - "loss": 0.0351, + "learning_rate": 2.5892687188928586e-05, + "loss": 0.094, "step": 44875 }, { "epoch": 2.09, - "learning_rate": 1.5885800009376028e-05, - "loss": 0.1084, + "learning_rate": 2.589221911909257e-05, + "loss": 0.0492, "step": 44880 }, { "epoch": 2.09, - "learning_rate": 1.5885331208100887e-05, - "loss": 0.0684, + "learning_rate": 2.589175104925655e-05, + "loss": 0.0639, "step": 44885 }, { "epoch": 2.09, - "learning_rate": 1.5884862406825747e-05, - "loss": 0.0758, + "learning_rate": 2.589128297942053e-05, + "loss": 0.0756, "step": 44890 }, { "epoch": 2.09, - "learning_rate": 1.5884393605550607e-05, - "loss": 0.1907, + "learning_rate": 2.589081490958451e-05, + "loss": 0.1301, "step": 44895 }, { "epoch": 2.1, - "learning_rate": 1.5883924804275467e-05, - "loss": 0.1878, + "learning_rate": 2.5890346839748492e-05, + "loss": 0.1054, "step": 44900 }, { "epoch": 2.1, - "learning_rate": 1.588345600300033e-05, - "loss": 0.1311, + "learning_rate": 2.5889878769912472e-05, + "loss": 0.2198, "step": 44905 }, { "epoch": 2.1, - "learning_rate": 1.588298720172519e-05, - "loss": 0.2296, + "learning_rate": 2.588941070007645e-05, + "loss": 0.3217, "step": 44910 }, { "epoch": 2.1, - "learning_rate": 1.588251840045005e-05, - "loss": 0.2933, + "learning_rate": 2.5888942630240435e-05, + "loss": 0.1799, "step": 44915 }, { "epoch": 2.1, - "learning_rate": 1.588204959917491e-05, - "loss": 0.0965, + "learning_rate": 2.5888474560404415e-05, + "loss": 0.0618, "step": 44920 }, { "epoch": 2.1, - "learning_rate": 1.588158079789977e-05, - "loss": 0.0439, + "learning_rate": 2.5888006490568394e-05, + "loss": 0.0614, "step": 44925 }, { "epoch": 2.1, - "learning_rate": 1.588111199662463e-05, - "loss": 0.0551, + "learning_rate": 2.5887538420732374e-05, + "loss": 0.0542, "step": 44930 }, { "epoch": 2.1, - "learning_rate": 1.5880643195349494e-05, - "loss": 0.1475, + "learning_rate": 2.5887070350896354e-05, + "loss": 0.0665, "step": 44935 }, { "epoch": 2.1, - "learning_rate": 1.5880174394074353e-05, - "loss": 0.0817, + "learning_rate": 2.5886602281060334e-05, + "loss": 0.0419, "step": 44940 }, { "epoch": 2.1, - "learning_rate": 1.5879705592799213e-05, - "loss": 0.1122, + "learning_rate": 2.5886134211224314e-05, + "loss": 0.0984, "step": 44945 }, { "epoch": 2.1, - "learning_rate": 1.5879236791524077e-05, - "loss": 0.2288, + "learning_rate": 2.5885666141388294e-05, + "loss": 0.0959, "step": 44950 }, { "epoch": 2.1, - "learning_rate": 1.5878767990248937e-05, - "loss": 0.184, + "learning_rate": 2.5885198071552277e-05, + "loss": 0.1719, "step": 44955 }, { "epoch": 2.1, - "learning_rate": 1.5878299188973797e-05, - "loss": 0.2115, + "learning_rate": 2.5884730001716257e-05, + "loss": 0.1955, "step": 44960 }, { "epoch": 2.1, - "learning_rate": 1.5877830387698657e-05, - "loss": 0.3778, + "learning_rate": 2.5884261931880236e-05, + "loss": 0.199, "step": 44965 }, { "epoch": 2.1, - "learning_rate": 1.5877361586423516e-05, - "loss": 0.0626, + "learning_rate": 2.5883793862044216e-05, + "loss": 0.0628, "step": 44970 }, { "epoch": 2.1, - "learning_rate": 1.5876892785148376e-05, - "loss": 0.0159, + "learning_rate": 2.58833257922082e-05, + "loss": 0.0414, "step": 44975 }, { "epoch": 2.1, - "learning_rate": 1.5876423983873236e-05, - "loss": 0.0709, + "learning_rate": 2.588285772237218e-05, + "loss": 0.0432, "step": 44980 }, { "epoch": 2.1, - "learning_rate": 1.5875955182598096e-05, - "loss": 0.1002, + "learning_rate": 2.588238965253616e-05, + "loss": 0.1006, "step": 44985 }, { "epoch": 2.1, - "learning_rate": 1.5875486381322956e-05, - "loss": 0.0633, + "learning_rate": 2.5881921582700142e-05, + "loss": 0.1676, "step": 44990 }, { "epoch": 2.1, - "learning_rate": 1.587501758004782e-05, - "loss": 0.1328, + "learning_rate": 2.5881453512864122e-05, + "loss": 0.1348, "step": 44995 }, { "epoch": 2.1, - "learning_rate": 1.587454877877268e-05, - "loss": 0.1123, + "learning_rate": 2.58809854430281e-05, + "loss": 0.1127, "step": 45000 }, { "epoch": 2.1, - "learning_rate": 1.587407997749754e-05, - "loss": 0.258, + "learning_rate": 2.5880517373192078e-05, + "loss": 0.1484, "step": 45005 }, { "epoch": 2.1, - "learning_rate": 1.58736111762224e-05, - "loss": 0.167, + "learning_rate": 2.588004930335606e-05, + "loss": 0.2177, "step": 45010 }, { "epoch": 2.1, - "learning_rate": 1.5873142374947263e-05, - "loss": 0.263, + "learning_rate": 2.587958123352004e-05, + "loss": 0.3085, "step": 45015 }, { "epoch": 2.1, - "learning_rate": 1.5872673573672123e-05, - "loss": 0.0767, + "learning_rate": 2.587911316368402e-05, + "loss": 0.06, "step": 45020 }, { "epoch": 2.1, - "learning_rate": 1.5872204772396983e-05, - "loss": 0.107, + "learning_rate": 2.5878645093848e-05, + "loss": 0.064, "step": 45025 }, { "epoch": 2.1, - "learning_rate": 1.5871735971121842e-05, - "loss": 0.0827, + "learning_rate": 2.5878177024011984e-05, + "loss": 0.0564, "step": 45030 }, { "epoch": 2.1, - "learning_rate": 1.5871267169846702e-05, - "loss": 0.129, + "learning_rate": 2.5877708954175964e-05, + "loss": 0.0696, "step": 45035 }, { "epoch": 2.1, - "learning_rate": 1.5870798368571562e-05, - "loss": 0.0815, + "learning_rate": 2.5877240884339944e-05, + "loss": 0.0959, "step": 45040 }, { "epoch": 2.1, - "learning_rate": 1.5870329567296426e-05, - "loss": 0.1138, + "learning_rate": 2.5876772814503927e-05, + "loss": 0.1166, "step": 45045 }, { "epoch": 2.1, - "learning_rate": 1.5869860766021286e-05, - "loss": 0.0898, + "learning_rate": 2.5876304744667907e-05, + "loss": 0.0576, "step": 45050 }, { "epoch": 2.1, - "learning_rate": 1.5869391964746146e-05, - "loss": 0.2097, + "learning_rate": 2.5875836674831887e-05, + "loss": 0.1842, "step": 45055 }, { "epoch": 2.1, - "learning_rate": 1.5868923163471005e-05, - "loss": 0.3592, + "learning_rate": 2.5875368604995866e-05, + "loss": 0.148, "step": 45060 }, { "epoch": 2.1, - "learning_rate": 1.5868454362195865e-05, - "loss": 0.3279, + "learning_rate": 2.5874900535159846e-05, + "loss": 0.2105, "step": 45065 }, { "epoch": 2.1, - "learning_rate": 1.5867985560920725e-05, - "loss": 0.0615, + "learning_rate": 2.5874432465323826e-05, + "loss": 0.1142, "step": 45070 }, { "epoch": 2.1, - "learning_rate": 1.586751675964559e-05, - "loss": 0.0598, + "learning_rate": 2.5873964395487806e-05, + "loss": 0.0621, "step": 45075 }, { "epoch": 2.1, - "learning_rate": 1.586704795837045e-05, - "loss": 0.0647, + "learning_rate": 2.5873496325651786e-05, + "loss": 0.1044, "step": 45080 }, { "epoch": 2.1, - "learning_rate": 1.586657915709531e-05, - "loss": 0.0312, + "learning_rate": 2.587302825581577e-05, + "loss": 0.0424, "step": 45085 }, { "epoch": 2.1, - "learning_rate": 1.586611035582017e-05, - "loss": 0.0893, + "learning_rate": 2.587256018597975e-05, + "loss": 0.0724, "step": 45090 }, { "epoch": 2.1, - "learning_rate": 1.5865641554545032e-05, - "loss": 0.0462, + "learning_rate": 2.587209211614373e-05, + "loss": 0.0786, "step": 45095 }, { "epoch": 2.1, - "learning_rate": 1.5865172753269892e-05, - "loss": 0.1772, + "learning_rate": 2.5871624046307712e-05, + "loss": 0.2606, "step": 45100 }, { "epoch": 2.1, - "learning_rate": 1.586470395199475e-05, - "loss": 0.2507, + "learning_rate": 2.587115597647169e-05, + "loss": 0.2202, "step": 45105 }, { "epoch": 2.1, - "learning_rate": 1.586423515071961e-05, - "loss": 0.2887, + "learning_rate": 2.587068790663567e-05, + "loss": 0.2612, "step": 45110 }, { "epoch": 2.11, - "learning_rate": 1.586376634944447e-05, - "loss": 0.2688, + "learning_rate": 2.587021983679965e-05, + "loss": 0.3177, "step": 45115 }, { "epoch": 2.11, - "learning_rate": 1.586329754816933e-05, - "loss": 0.0636, + "learning_rate": 2.5869751766963634e-05, + "loss": 0.0566, "step": 45120 }, { "epoch": 2.11, - "learning_rate": 1.586282874689419e-05, - "loss": 0.051, + "learning_rate": 2.586928369712761e-05, + "loss": 0.017, "step": 45125 }, { "epoch": 2.11, - "learning_rate": 1.586235994561905e-05, - "loss": 0.0607, + "learning_rate": 2.586881562729159e-05, + "loss": 0.0516, "step": 45130 }, { "epoch": 2.11, - "learning_rate": 1.5861891144343915e-05, - "loss": 0.0952, + "learning_rate": 2.586834755745557e-05, + "loss": 0.0864, "step": 45135 }, { "epoch": 2.11, - "learning_rate": 1.5861422343068775e-05, - "loss": 0.0968, + "learning_rate": 2.5867879487619554e-05, + "loss": 0.0942, "step": 45140 }, { "epoch": 2.11, - "learning_rate": 1.5860953541793634e-05, - "loss": 0.0867, + "learning_rate": 2.5867411417783534e-05, + "loss": 0.1463, "step": 45145 }, { "epoch": 2.11, - "learning_rate": 1.5860484740518494e-05, - "loss": 0.0972, + "learning_rate": 2.5866943347947513e-05, + "loss": 0.1791, "step": 45150 }, { "epoch": 2.11, - "learning_rate": 1.5860015939243358e-05, - "loss": 0.2867, + "learning_rate": 2.5866475278111493e-05, + "loss": 0.1478, "step": 45155 }, { "epoch": 2.11, - "learning_rate": 1.5859547137968218e-05, - "loss": 0.1727, + "learning_rate": 2.5866007208275476e-05, + "loss": 0.139, "step": 45160 }, { "epoch": 2.11, - "learning_rate": 1.5859078336693078e-05, - "loss": 0.4698, + "learning_rate": 2.5865539138439456e-05, + "loss": 0.2919, "step": 45165 }, { "epoch": 2.11, - "learning_rate": 1.5858609535417938e-05, - "loss": 0.0394, + "learning_rate": 2.5865071068603436e-05, + "loss": 0.0715, "step": 45170 }, { "epoch": 2.11, - "learning_rate": 1.5858140734142797e-05, - "loss": 0.0625, + "learning_rate": 2.586460299876742e-05, + "loss": 0.0356, "step": 45175 }, { "epoch": 2.11, - "learning_rate": 1.5857671932867657e-05, - "loss": 0.0684, + "learning_rate": 2.58641349289314e-05, + "loss": 0.086, "step": 45180 }, { "epoch": 2.11, - "learning_rate": 1.585720313159252e-05, - "loss": 0.069, + "learning_rate": 2.586366685909538e-05, + "loss": 0.0643, "step": 45185 }, { "epoch": 2.11, - "learning_rate": 1.585673433031738e-05, - "loss": 0.0847, + "learning_rate": 2.5863198789259355e-05, + "loss": 0.079, "step": 45190 }, { "epoch": 2.11, - "learning_rate": 1.585626552904224e-05, - "loss": 0.1978, + "learning_rate": 2.586273071942334e-05, + "loss": 0.089, "step": 45195 }, { "epoch": 2.11, - "learning_rate": 1.58557967277671e-05, - "loss": 0.1196, + "learning_rate": 2.5862262649587318e-05, + "loss": 0.1203, "step": 45200 }, { "epoch": 2.11, - "learning_rate": 1.585532792649196e-05, - "loss": 0.2047, + "learning_rate": 2.5861794579751298e-05, + "loss": 0.2201, "step": 45205 }, { "epoch": 2.11, - "learning_rate": 1.585485912521682e-05, - "loss": 0.2853, + "learning_rate": 2.5861326509915278e-05, + "loss": 0.2835, "step": 45210 }, { "epoch": 2.11, - "learning_rate": 1.585439032394168e-05, - "loss": 0.2437, + "learning_rate": 2.586085844007926e-05, + "loss": 0.2557, "step": 45215 }, { "epoch": 2.11, - "learning_rate": 1.5853921522666544e-05, - "loss": 0.087, + "learning_rate": 2.586039037024324e-05, + "loss": 0.0483, "step": 45220 }, { "epoch": 2.11, - "learning_rate": 1.5853452721391404e-05, - "loss": 0.0486, + "learning_rate": 2.585992230040722e-05, + "loss": 0.028, "step": 45225 }, { "epoch": 2.11, - "learning_rate": 1.5852983920116264e-05, - "loss": 0.1025, + "learning_rate": 2.5859454230571204e-05, + "loss": 0.0995, "step": 45230 }, { "epoch": 2.11, - "learning_rate": 1.5852515118841127e-05, - "loss": 0.0609, + "learning_rate": 2.5858986160735184e-05, + "loss": 0.125, "step": 45235 }, { "epoch": 2.11, - "learning_rate": 1.5852046317565987e-05, - "loss": 0.11, + "learning_rate": 2.5858518090899164e-05, + "loss": 0.0796, "step": 45240 }, { "epoch": 2.11, - "learning_rate": 1.5851577516290847e-05, - "loss": 0.1482, + "learning_rate": 2.5858050021063143e-05, + "loss": 0.079, "step": 45245 }, { "epoch": 2.11, - "learning_rate": 1.5851108715015707e-05, - "loss": 0.1591, + "learning_rate": 2.5857581951227127e-05, + "loss": 0.1018, "step": 45250 }, { "epoch": 2.11, - "learning_rate": 1.5850639913740567e-05, - "loss": 0.2293, + "learning_rate": 2.5857113881391103e-05, + "loss": 0.1585, "step": 45255 }, { "epoch": 2.11, - "learning_rate": 1.5850171112465427e-05, - "loss": 0.2316, + "learning_rate": 2.5856645811555083e-05, + "loss": 0.2384, "step": 45260 }, { "epoch": 2.11, - "learning_rate": 1.5849702311190286e-05, - "loss": 0.3227, + "learning_rate": 2.5856177741719063e-05, + "loss": 0.3055, "step": 45265 }, { "epoch": 2.11, - "learning_rate": 1.5849233509915146e-05, - "loss": 0.0498, + "learning_rate": 2.5855709671883046e-05, + "loss": 0.0666, "step": 45270 }, { "epoch": 2.11, - "learning_rate": 1.584876470864001e-05, - "loss": 0.0601, + "learning_rate": 2.5855241602047026e-05, + "loss": 0.1191, "step": 45275 }, { "epoch": 2.11, - "learning_rate": 1.584829590736487e-05, - "loss": 0.0298, + "learning_rate": 2.5854773532211006e-05, + "loss": 0.083, "step": 45280 }, { "epoch": 2.11, - "learning_rate": 1.584782710608973e-05, - "loss": 0.0737, + "learning_rate": 2.585430546237499e-05, + "loss": 0.0824, "step": 45285 }, { "epoch": 2.11, - "learning_rate": 1.584735830481459e-05, - "loss": 0.0653, + "learning_rate": 2.585383739253897e-05, + "loss": 0.1073, "step": 45290 }, { "epoch": 2.11, - "learning_rate": 1.584688950353945e-05, - "loss": 0.1548, + "learning_rate": 2.585336932270295e-05, + "loss": 0.1634, "step": 45295 }, { "epoch": 2.11, - "learning_rate": 1.5846420702264313e-05, - "loss": 0.1249, + "learning_rate": 2.5852901252866928e-05, + "loss": 0.1676, "step": 45300 }, { "epoch": 2.11, - "learning_rate": 1.5845951900989173e-05, - "loss": 0.1399, + "learning_rate": 2.585243318303091e-05, + "loss": 0.2054, "step": 45305 }, { "epoch": 2.11, - "learning_rate": 1.5845483099714033e-05, - "loss": 0.2704, + "learning_rate": 2.585196511319489e-05, + "loss": 0.1974, "step": 45310 }, { "epoch": 2.11, - "learning_rate": 1.5845014298438893e-05, - "loss": 0.2491, + "learning_rate": 2.5851497043358868e-05, + "loss": 0.3973, "step": 45315 }, { "epoch": 2.11, - "learning_rate": 1.5844545497163756e-05, - "loss": 0.0577, + "learning_rate": 2.5851028973522847e-05, + "loss": 0.0979, "step": 45320 }, { "epoch": 2.11, - "learning_rate": 1.5844076695888616e-05, - "loss": 0.0291, + "learning_rate": 2.585056090368683e-05, + "loss": 0.0769, "step": 45325 }, { "epoch": 2.12, - "learning_rate": 1.5843607894613476e-05, - "loss": 0.0801, + "learning_rate": 2.585009283385081e-05, + "loss": 0.0697, "step": 45330 }, { "epoch": 2.12, - "learning_rate": 1.5843139093338336e-05, - "loss": 0.062, + "learning_rate": 2.584962476401479e-05, + "loss": 0.0937, "step": 45335 }, { "epoch": 2.12, - "learning_rate": 1.5842670292063196e-05, - "loss": 0.0672, + "learning_rate": 2.584915669417877e-05, + "loss": 0.0941, "step": 45340 }, { "epoch": 2.12, - "learning_rate": 1.5842201490788056e-05, - "loss": 0.1409, + "learning_rate": 2.5848688624342753e-05, + "loss": 0.075, "step": 45345 }, { "epoch": 2.12, - "learning_rate": 1.5841732689512915e-05, - "loss": 0.1184, + "learning_rate": 2.5848220554506733e-05, + "loss": 0.0991, "step": 45350 }, { "epoch": 2.12, - "learning_rate": 1.5841263888237775e-05, - "loss": 0.2517, + "learning_rate": 2.5847752484670713e-05, + "loss": 0.2013, "step": 45355 }, { "epoch": 2.12, - "learning_rate": 1.5840795086962635e-05, - "loss": 0.1505, + "learning_rate": 2.5847284414834696e-05, + "loss": 0.2069, "step": 45360 }, { "epoch": 2.12, - "learning_rate": 1.58403262856875e-05, - "loss": 0.318, + "learning_rate": 2.5846816344998676e-05, + "loss": 0.223, "step": 45365 }, { "epoch": 2.12, - "learning_rate": 1.583985748441236e-05, - "loss": 0.0626, + "learning_rate": 2.5846348275162656e-05, + "loss": 0.0948, "step": 45370 }, { "epoch": 2.12, - "learning_rate": 1.583938868313722e-05, - "loss": 0.0521, + "learning_rate": 2.5845880205326636e-05, + "loss": 0.0327, "step": 45375 }, { "epoch": 2.12, - "learning_rate": 1.5838919881862082e-05, - "loss": 0.1245, + "learning_rate": 2.5845412135490615e-05, + "loss": 0.0618, "step": 45380 }, { "epoch": 2.12, - "learning_rate": 1.5838451080586942e-05, - "loss": 0.0721, + "learning_rate": 2.5844944065654595e-05, + "loss": 0.2013, "step": 45385 }, { "epoch": 2.12, - "learning_rate": 1.5837982279311802e-05, - "loss": 0.1457, + "learning_rate": 2.5844475995818575e-05, + "loss": 0.0632, "step": 45390 }, { "epoch": 2.12, - "learning_rate": 1.583751347803666e-05, - "loss": 0.132, + "learning_rate": 2.5844007925982555e-05, + "loss": 0.1223, "step": 45395 }, { "epoch": 2.12, - "learning_rate": 1.583704467676152e-05, - "loss": 0.1566, + "learning_rate": 2.5843539856146538e-05, + "loss": 0.0794, "step": 45400 }, { "epoch": 2.12, - "learning_rate": 1.583657587548638e-05, - "loss": 0.2234, + "learning_rate": 2.5843071786310518e-05, + "loss": 0.1949, "step": 45405 }, { "epoch": 2.12, - "learning_rate": 1.583610707421124e-05, - "loss": 0.1806, + "learning_rate": 2.5842603716474498e-05, + "loss": 0.2036, "step": 45410 }, { "epoch": 2.12, - "learning_rate": 1.5835638272936105e-05, - "loss": 0.384, + "learning_rate": 2.584213564663848e-05, + "loss": 0.1645, "step": 45415 }, { "epoch": 2.12, - "learning_rate": 1.5835169471660965e-05, - "loss": 0.0281, + "learning_rate": 2.584166757680246e-05, + "loss": 0.0458, "step": 45420 }, { "epoch": 2.12, - "learning_rate": 1.5834700670385825e-05, - "loss": 0.0603, + "learning_rate": 2.584119950696644e-05, + "loss": 0.041, "step": 45425 }, { "epoch": 2.12, - "learning_rate": 1.5834231869110685e-05, - "loss": 0.0306, + "learning_rate": 2.584073143713042e-05, + "loss": 0.0514, "step": 45430 }, { "epoch": 2.12, - "learning_rate": 1.5833763067835545e-05, - "loss": 0.0526, + "learning_rate": 2.5840263367294404e-05, + "loss": 0.0592, "step": 45435 }, { "epoch": 2.12, - "learning_rate": 1.5833294266560404e-05, - "loss": 0.1193, + "learning_rate": 2.5839795297458383e-05, + "loss": 0.1285, "step": 45440 }, { "epoch": 2.12, - "learning_rate": 1.5832825465285268e-05, - "loss": 0.0691, + "learning_rate": 2.583932722762236e-05, + "loss": 0.089, "step": 45445 }, { "epoch": 2.12, - "learning_rate": 1.5832356664010128e-05, - "loss": 0.1326, + "learning_rate": 2.583885915778634e-05, + "loss": 0.1468, "step": 45450 }, { "epoch": 2.12, - "learning_rate": 1.5831887862734988e-05, - "loss": 0.1811, + "learning_rate": 2.5838391087950323e-05, + "loss": 0.2113, "step": 45455 }, { "epoch": 2.12, - "learning_rate": 1.583141906145985e-05, - "loss": 0.3118, + "learning_rate": 2.5837923018114303e-05, + "loss": 0.292, "step": 45460 }, { "epoch": 2.12, - "learning_rate": 1.583095026018471e-05, - "loss": 0.2734, + "learning_rate": 2.5837454948278283e-05, + "loss": 0.3104, "step": 45465 }, { "epoch": 2.12, - "learning_rate": 1.583048145890957e-05, - "loss": 0.0796, + "learning_rate": 2.5836986878442266e-05, + "loss": 0.115, "step": 45470 }, { "epoch": 2.12, - "learning_rate": 1.583001265763443e-05, - "loss": 0.0566, + "learning_rate": 2.5836518808606246e-05, + "loss": 0.0699, "step": 45475 }, { "epoch": 2.12, - "learning_rate": 1.582954385635929e-05, - "loss": 0.0649, + "learning_rate": 2.5836050738770225e-05, + "loss": 0.0519, "step": 45480 }, { "epoch": 2.12, - "learning_rate": 1.582907505508415e-05, - "loss": 0.07, + "learning_rate": 2.5835582668934205e-05, + "loss": 0.0238, "step": 45485 }, { "epoch": 2.12, - "learning_rate": 1.582860625380901e-05, - "loss": 0.1253, + "learning_rate": 2.583511459909819e-05, + "loss": 0.1441, "step": 45490 }, { "epoch": 2.12, - "learning_rate": 1.582813745253387e-05, - "loss": 0.0574, + "learning_rate": 2.5834646529262168e-05, + "loss": 0.0614, "step": 45495 }, { "epoch": 2.12, - "learning_rate": 1.582766865125873e-05, - "loss": 0.129, + "learning_rate": 2.5834178459426148e-05, + "loss": 0.1103, "step": 45500 }, { "epoch": 2.12, - "learning_rate": 1.5827199849983594e-05, - "loss": 0.2017, + "learning_rate": 2.5833710389590124e-05, + "loss": 0.1489, "step": 45505 }, { "epoch": 2.12, - "learning_rate": 1.5826731048708454e-05, - "loss": 0.2227, + "learning_rate": 2.5833242319754108e-05, + "loss": 0.3097, "step": 45510 }, { "epoch": 2.12, - "learning_rate": 1.5826262247433314e-05, - "loss": 0.3438, + "learning_rate": 2.5832774249918087e-05, + "loss": 0.365, "step": 45515 }, { "epoch": 2.12, - "learning_rate": 1.5825793446158177e-05, - "loss": 0.0246, + "learning_rate": 2.5832306180082067e-05, + "loss": 0.0619, "step": 45520 }, { "epoch": 2.12, - "learning_rate": 1.5825324644883037e-05, - "loss": 0.0709, + "learning_rate": 2.583183811024605e-05, + "loss": 0.0363, "step": 45525 }, { "epoch": 2.12, - "learning_rate": 1.5824855843607897e-05, - "loss": 0.0477, + "learning_rate": 2.583137004041003e-05, + "loss": 0.0892, "step": 45530 }, { "epoch": 2.12, - "learning_rate": 1.5824387042332757e-05, - "loss": 0.1571, + "learning_rate": 2.583090197057401e-05, + "loss": 0.108, "step": 45535 }, { "epoch": 2.12, - "learning_rate": 1.5823918241057617e-05, - "loss": 0.0867, + "learning_rate": 2.583043390073799e-05, + "loss": 0.0585, "step": 45540 }, { "epoch": 2.13, - "learning_rate": 1.5823449439782477e-05, - "loss": 0.1452, + "learning_rate": 2.5829965830901973e-05, + "loss": 0.0997, "step": 45545 }, { "epoch": 2.13, - "learning_rate": 1.5822980638507337e-05, - "loss": 0.1558, + "learning_rate": 2.5829497761065953e-05, + "loss": 0.1027, "step": 45550 }, { "epoch": 2.13, - "learning_rate": 1.58225118372322e-05, - "loss": 0.2085, + "learning_rate": 2.5829029691229933e-05, + "loss": 0.1712, "step": 45555 }, { "epoch": 2.13, - "learning_rate": 1.582204303595706e-05, - "loss": 0.2106, + "learning_rate": 2.5828561621393913e-05, + "loss": 0.2349, "step": 45560 }, { "epoch": 2.13, - "learning_rate": 1.582157423468192e-05, - "loss": 0.2487, + "learning_rate": 2.5828093551557896e-05, + "loss": 0.2188, "step": 45565 }, { "epoch": 2.13, - "learning_rate": 1.582110543340678e-05, - "loss": 0.0285, + "learning_rate": 2.5827625481721872e-05, + "loss": 0.0574, "step": 45570 }, { "epoch": 2.13, - "learning_rate": 1.582063663213164e-05, - "loss": 0.0829, + "learning_rate": 2.5827157411885852e-05, + "loss": 0.0386, "step": 45575 }, { "epoch": 2.13, - "learning_rate": 1.58201678308565e-05, - "loss": 0.1037, + "learning_rate": 2.5826689342049832e-05, + "loss": 0.0655, "step": 45580 }, { "epoch": 2.13, - "learning_rate": 1.5819699029581363e-05, - "loss": 0.1275, + "learning_rate": 2.5826221272213815e-05, + "loss": 0.0543, "step": 45585 }, { "epoch": 2.13, - "learning_rate": 1.5819230228306223e-05, - "loss": 0.0762, + "learning_rate": 2.5825753202377795e-05, + "loss": 0.123, "step": 45590 }, { "epoch": 2.13, - "learning_rate": 1.5818761427031083e-05, - "loss": 0.1647, + "learning_rate": 2.5825285132541775e-05, + "loss": 0.1047, "step": 45595 }, { "epoch": 2.13, - "learning_rate": 1.5818292625755946e-05, - "loss": 0.0876, + "learning_rate": 2.5824817062705758e-05, + "loss": 0.1185, "step": 45600 }, { "epoch": 2.13, - "learning_rate": 1.5817823824480806e-05, - "loss": 0.202, + "learning_rate": 2.5824348992869738e-05, + "loss": 0.2249, "step": 45605 }, { "epoch": 2.13, - "learning_rate": 1.5817355023205666e-05, - "loss": 0.2445, + "learning_rate": 2.5823880923033718e-05, + "loss": 0.2311, "step": 45610 }, { "epoch": 2.13, - "learning_rate": 1.5816886221930526e-05, - "loss": 0.2188, + "learning_rate": 2.5823412853197697e-05, + "loss": 0.2489, "step": 45615 }, { "epoch": 2.13, - "learning_rate": 1.5816417420655386e-05, - "loss": 0.0779, + "learning_rate": 2.582294478336168e-05, + "loss": 0.0881, "step": 45620 }, { "epoch": 2.13, - "learning_rate": 1.5815948619380246e-05, - "loss": 0.0369, + "learning_rate": 2.582247671352566e-05, + "loss": 0.0905, "step": 45625 }, { "epoch": 2.13, - "learning_rate": 1.5815479818105106e-05, - "loss": 0.0595, + "learning_rate": 2.582200864368964e-05, + "loss": 0.0707, "step": 45630 }, { "epoch": 2.13, - "learning_rate": 1.5815011016829966e-05, - "loss": 0.0622, + "learning_rate": 2.5821540573853617e-05, + "loss": 0.051, "step": 45635 }, { "epoch": 2.13, - "learning_rate": 1.5814542215554826e-05, - "loss": 0.0816, + "learning_rate": 2.58210725040176e-05, + "loss": 0.0836, "step": 45640 }, { "epoch": 2.13, - "learning_rate": 1.581407341427969e-05, - "loss": 0.0371, + "learning_rate": 2.582060443418158e-05, + "loss": 0.1078, "step": 45645 }, { "epoch": 2.13, - "learning_rate": 1.581360461300455e-05, - "loss": 0.2105, + "learning_rate": 2.582013636434556e-05, + "loss": 0.1175, "step": 45650 }, { "epoch": 2.13, - "learning_rate": 1.581313581172941e-05, - "loss": 0.2728, + "learning_rate": 2.5819668294509543e-05, + "loss": 0.1433, "step": 45655 }, { "epoch": 2.13, - "learning_rate": 1.581266701045427e-05, - "loss": 0.3118, + "learning_rate": 2.5819200224673523e-05, + "loss": 0.2411, "step": 45660 }, { "epoch": 2.13, - "learning_rate": 1.5812198209179132e-05, - "loss": 0.2977, + "learning_rate": 2.5818732154837502e-05, + "loss": 0.3004, "step": 45665 }, { "epoch": 2.13, - "learning_rate": 1.5811729407903992e-05, - "loss": 0.0961, + "learning_rate": 2.5818264085001482e-05, + "loss": 0.1811, "step": 45670 }, { "epoch": 2.13, - "learning_rate": 1.5811260606628852e-05, - "loss": 0.0623, + "learning_rate": 2.5817796015165465e-05, + "loss": 0.0373, "step": 45675 }, { "epoch": 2.13, - "learning_rate": 1.5810791805353712e-05, - "loss": 0.0298, + "learning_rate": 2.5817327945329445e-05, + "loss": 0.0428, "step": 45680 }, { "epoch": 2.13, - "learning_rate": 1.5810323004078572e-05, - "loss": 0.0836, + "learning_rate": 2.5816859875493425e-05, + "loss": 0.0723, "step": 45685 }, { "epoch": 2.13, - "learning_rate": 1.580985420280343e-05, - "loss": 0.0684, + "learning_rate": 2.5816391805657405e-05, + "loss": 0.1206, "step": 45690 }, { "epoch": 2.13, - "learning_rate": 1.5809385401528295e-05, - "loss": 0.0929, + "learning_rate": 2.5815923735821385e-05, + "loss": 0.082, "step": 45695 }, { "epoch": 2.13, - "learning_rate": 1.5808916600253155e-05, - "loss": 0.1611, + "learning_rate": 2.5815455665985364e-05, + "loss": 0.0632, "step": 45700 }, { "epoch": 2.13, - "learning_rate": 1.5808447798978015e-05, - "loss": 0.1149, + "learning_rate": 2.5814987596149344e-05, + "loss": 0.2105, "step": 45705 }, { "epoch": 2.13, - "learning_rate": 1.5807978997702875e-05, - "loss": 0.1897, + "learning_rate": 2.5814519526313327e-05, + "loss": 0.1711, "step": 45710 }, { "epoch": 2.13, - "learning_rate": 1.5807510196427735e-05, - "loss": 0.345, + "learning_rate": 2.5814051456477307e-05, + "loss": 0.2524, "step": 45715 }, { "epoch": 2.13, - "learning_rate": 1.5807041395152595e-05, - "loss": 0.0345, + "learning_rate": 2.5813583386641287e-05, + "loss": 0.0903, "step": 45720 }, { "epoch": 2.13, - "learning_rate": 1.5806572593877455e-05, - "loss": 0.0692, + "learning_rate": 2.5813115316805267e-05, + "loss": 0.0578, "step": 45725 }, { "epoch": 2.13, - "learning_rate": 1.5806103792602318e-05, - "loss": 0.0905, + "learning_rate": 2.581264724696925e-05, + "loss": 0.0888, "step": 45730 }, { "epoch": 2.13, - "learning_rate": 1.5805634991327178e-05, - "loss": 0.0561, + "learning_rate": 2.581217917713323e-05, + "loss": 0.0723, "step": 45735 }, { "epoch": 2.13, - "learning_rate": 1.5805166190052038e-05, - "loss": 0.064, + "learning_rate": 2.581171110729721e-05, + "loss": 0.1233, "step": 45740 }, { "epoch": 2.13, - "learning_rate": 1.58046973887769e-05, - "loss": 0.1136, + "learning_rate": 2.581124303746119e-05, + "loss": 0.0671, "step": 45745 }, { "epoch": 2.13, - "learning_rate": 1.580422858750176e-05, - "loss": 0.1444, + "learning_rate": 2.5810774967625173e-05, + "loss": 0.1057, "step": 45750 }, { "epoch": 2.13, - "learning_rate": 1.580375978622662e-05, - "loss": 0.226, + "learning_rate": 2.5810306897789153e-05, + "loss": 0.2059, "step": 45755 }, { "epoch": 2.14, - "learning_rate": 1.580329098495148e-05, - "loss": 0.4227, + "learning_rate": 2.580983882795313e-05, + "loss": 0.2111, "step": 45760 }, { "epoch": 2.14, - "learning_rate": 1.580282218367634e-05, - "loss": 0.3622, + "learning_rate": 2.580937075811711e-05, + "loss": 0.3121, "step": 45765 }, { "epoch": 2.14, - "learning_rate": 1.58023533824012e-05, - "loss": 0.0763, + "learning_rate": 2.5808902688281092e-05, + "loss": 0.097, "step": 45770 }, { "epoch": 2.14, - "learning_rate": 1.580188458112606e-05, - "loss": 0.0369, + "learning_rate": 2.5808434618445072e-05, + "loss": 0.0592, "step": 45775 }, { "epoch": 2.14, - "learning_rate": 1.580141577985092e-05, - "loss": 0.045, + "learning_rate": 2.580796654860905e-05, + "loss": 0.0329, "step": 45780 }, { "epoch": 2.14, - "learning_rate": 1.5800946978575784e-05, - "loss": 0.04, + "learning_rate": 2.5807498478773035e-05, + "loss": 0.0813, "step": 45785 }, { "epoch": 2.14, - "learning_rate": 1.5800478177300644e-05, - "loss": 0.0845, + "learning_rate": 2.5807030408937015e-05, + "loss": 0.1641, "step": 45790 }, { "epoch": 2.14, - "learning_rate": 1.5800009376025504e-05, - "loss": 0.126, + "learning_rate": 2.5806562339100995e-05, + "loss": 0.0567, "step": 45795 }, { "epoch": 2.14, - "learning_rate": 1.5799540574750364e-05, - "loss": 0.1245, + "learning_rate": 2.5806094269264974e-05, + "loss": 0.0832, "step": 45800 }, { "epoch": 2.14, - "learning_rate": 1.5799071773475224e-05, - "loss": 0.2071, + "learning_rate": 2.5805626199428958e-05, + "loss": 0.1632, "step": 45805 }, { "epoch": 2.14, - "learning_rate": 1.5798602972200087e-05, - "loss": 0.2901, + "learning_rate": 2.5805158129592937e-05, + "loss": 0.2241, "step": 45810 }, { "epoch": 2.14, - "learning_rate": 1.5798134170924947e-05, - "loss": 0.3112, + "learning_rate": 2.5804690059756917e-05, + "loss": 0.3609, "step": 45815 }, { "epoch": 2.14, - "learning_rate": 1.5797665369649807e-05, - "loss": 0.0668, + "learning_rate": 2.5804221989920897e-05, + "loss": 0.1113, "step": 45820 }, { "epoch": 2.14, - "learning_rate": 1.5797196568374667e-05, - "loss": 0.0743, + "learning_rate": 2.5803753920084877e-05, + "loss": 0.054, "step": 45825 }, { "epoch": 2.14, - "learning_rate": 1.5796727767099527e-05, - "loss": 0.0486, + "learning_rate": 2.5803285850248857e-05, + "loss": 0.0465, "step": 45830 }, { "epoch": 2.14, - "learning_rate": 1.579625896582439e-05, - "loss": 0.0764, + "learning_rate": 2.5802817780412836e-05, + "loss": 0.0529, "step": 45835 }, { "epoch": 2.14, - "learning_rate": 1.579579016454925e-05, - "loss": 0.1223, + "learning_rate": 2.580234971057682e-05, + "loss": 0.1233, "step": 45840 }, { "epoch": 2.14, - "learning_rate": 1.579532136327411e-05, - "loss": 0.1544, + "learning_rate": 2.58018816407408e-05, + "loss": 0.2243, "step": 45845 }, { "epoch": 2.14, - "learning_rate": 1.579485256199897e-05, - "loss": 0.0958, + "learning_rate": 2.580141357090478e-05, + "loss": 0.2473, "step": 45850 }, { "epoch": 2.14, - "learning_rate": 1.579438376072383e-05, - "loss": 0.2034, + "learning_rate": 2.580094550106876e-05, + "loss": 0.2115, "step": 45855 }, { "epoch": 2.14, - "learning_rate": 1.579391495944869e-05, - "loss": 0.2167, + "learning_rate": 2.5800477431232742e-05, + "loss": 0.1574, "step": 45860 }, { "epoch": 2.14, - "learning_rate": 1.579344615817355e-05, - "loss": 0.6983, + "learning_rate": 2.5800009361396722e-05, + "loss": 0.273, "step": 45865 }, { "epoch": 2.14, - "learning_rate": 1.5792977356898413e-05, - "loss": 0.0472, + "learning_rate": 2.5799541291560702e-05, + "loss": 0.0301, "step": 45870 }, { "epoch": 2.14, - "learning_rate": 1.5792508555623273e-05, - "loss": 0.0692, + "learning_rate": 2.5799073221724682e-05, + "loss": 0.0551, "step": 45875 }, { "epoch": 2.14, - "learning_rate": 1.5792039754348133e-05, - "loss": 0.0474, + "learning_rate": 2.5798605151888665e-05, + "loss": 0.0194, "step": 45880 }, { "epoch": 2.14, - "learning_rate": 1.5791570953072993e-05, - "loss": 0.0648, + "learning_rate": 2.579813708205264e-05, + "loss": 0.1256, "step": 45885 }, { "epoch": 2.14, - "learning_rate": 1.5791102151797856e-05, - "loss": 0.0829, + "learning_rate": 2.579766901221662e-05, + "loss": 0.1096, "step": 45890 }, { "epoch": 2.14, - "learning_rate": 1.5790633350522716e-05, - "loss": 0.0981, + "learning_rate": 2.5797200942380604e-05, + "loss": 0.1196, "step": 45895 }, { "epoch": 2.14, - "learning_rate": 1.5790164549247576e-05, - "loss": 0.1775, + "learning_rate": 2.5796732872544584e-05, + "loss": 0.1433, "step": 45900 }, { "epoch": 2.14, - "learning_rate": 1.5789695747972436e-05, - "loss": 0.2469, + "learning_rate": 2.5796264802708564e-05, + "loss": 0.1346, "step": 45905 }, { "epoch": 2.14, - "learning_rate": 1.5789226946697296e-05, - "loss": 0.2104, + "learning_rate": 2.5795796732872544e-05, + "loss": 0.2153, "step": 45910 }, { "epoch": 2.14, - "learning_rate": 1.5788758145422156e-05, - "loss": 0.4498, + "learning_rate": 2.5795328663036527e-05, + "loss": 0.2737, "step": 45915 }, { "epoch": 2.14, - "learning_rate": 1.5788289344147016e-05, - "loss": 0.0632, + "learning_rate": 2.5794860593200507e-05, + "loss": 0.0833, "step": 45920 }, { "epoch": 2.14, - "learning_rate": 1.578782054287188e-05, - "loss": 0.1408, + "learning_rate": 2.5794392523364487e-05, + "loss": 0.0502, "step": 45925 }, { "epoch": 2.14, - "learning_rate": 1.578735174159674e-05, - "loss": 0.0793, + "learning_rate": 2.5793924453528467e-05, + "loss": 0.0853, "step": 45930 }, { "epoch": 2.14, - "learning_rate": 1.57868829403216e-05, - "loss": 0.0435, + "learning_rate": 2.579345638369245e-05, + "loss": 0.0362, "step": 45935 }, { "epoch": 2.14, - "learning_rate": 1.578641413904646e-05, - "loss": 0.1076, + "learning_rate": 2.579298831385643e-05, + "loss": 0.0989, "step": 45940 }, { "epoch": 2.14, - "learning_rate": 1.578594533777132e-05, - "loss": 0.164, + "learning_rate": 2.579252024402041e-05, + "loss": 0.1137, "step": 45945 }, { "epoch": 2.14, - "learning_rate": 1.5785476536496182e-05, - "loss": 0.0789, + "learning_rate": 2.5792052174184386e-05, + "loss": 0.0982, "step": 45950 }, { "epoch": 2.14, - "learning_rate": 1.5785007735221042e-05, - "loss": 0.1608, + "learning_rate": 2.579158410434837e-05, + "loss": 0.2569, "step": 45955 }, { "epoch": 2.14, - "learning_rate": 1.5784538933945902e-05, - "loss": 0.2281, + "learning_rate": 2.579111603451235e-05, + "loss": 0.2473, "step": 45960 }, { "epoch": 2.14, - "learning_rate": 1.5784070132670762e-05, - "loss": 0.225, + "learning_rate": 2.579064796467633e-05, + "loss": 0.2652, "step": 45965 }, { "epoch": 2.15, - "learning_rate": 1.5783601331395625e-05, - "loss": 0.0594, + "learning_rate": 2.5790179894840312e-05, + "loss": 0.0475, "step": 45970 }, { "epoch": 2.15, - "learning_rate": 1.5783132530120485e-05, - "loss": 0.1095, + "learning_rate": 2.578971182500429e-05, + "loss": 0.0636, "step": 45975 }, { "epoch": 2.15, - "learning_rate": 1.5782663728845345e-05, - "loss": 0.0838, + "learning_rate": 2.578924375516827e-05, + "loss": 0.085, "step": 45980 }, { "epoch": 2.15, - "learning_rate": 1.5782194927570205e-05, - "loss": 0.0833, + "learning_rate": 2.578877568533225e-05, + "loss": 0.0648, "step": 45985 }, { "epoch": 2.15, - "learning_rate": 1.5781726126295065e-05, - "loss": 0.1167, + "learning_rate": 2.5788307615496235e-05, + "loss": 0.1436, "step": 45990 }, { "epoch": 2.15, - "learning_rate": 1.5781257325019925e-05, - "loss": 0.1286, + "learning_rate": 2.5787839545660214e-05, + "loss": 0.1249, "step": 45995 }, { "epoch": 2.15, - "learning_rate": 1.5780788523744785e-05, - "loss": 0.2014, + "learning_rate": 2.5787371475824194e-05, + "loss": 0.2969, "step": 46000 }, { "epoch": 2.15, - "learning_rate": 1.5780319722469645e-05, - "loss": 0.2011, + "learning_rate": 2.5786903405988174e-05, + "loss": 0.1492, "step": 46005 }, { "epoch": 2.15, - "learning_rate": 1.5779850921194505e-05, - "loss": 0.2846, + "learning_rate": 2.5786435336152154e-05, + "loss": 0.3294, "step": 46010 }, { "epoch": 2.15, - "learning_rate": 1.5779382119919368e-05, - "loss": 0.337, + "learning_rate": 2.5785967266316134e-05, + "loss": 0.3349, "step": 46015 }, { "epoch": 2.15, - "learning_rate": 1.5778913318644228e-05, - "loss": 0.0337, + "learning_rate": 2.5785499196480113e-05, + "loss": 0.0752, "step": 46020 }, { "epoch": 2.15, - "learning_rate": 1.5778444517369088e-05, - "loss": 0.0547, + "learning_rate": 2.5785031126644097e-05, + "loss": 0.023, "step": 46025 }, { "epoch": 2.15, - "learning_rate": 1.577797571609395e-05, - "loss": 0.0493, + "learning_rate": 2.5784563056808076e-05, + "loss": 0.0877, "step": 46030 }, { "epoch": 2.15, - "learning_rate": 1.577750691481881e-05, - "loss": 0.1682, + "learning_rate": 2.5784094986972056e-05, + "loss": 0.0426, "step": 46035 }, { "epoch": 2.15, - "learning_rate": 1.577703811354367e-05, - "loss": 0.1686, + "learning_rate": 2.5783626917136036e-05, + "loss": 0.0787, "step": 46040 }, { "epoch": 2.15, - "learning_rate": 1.577656931226853e-05, - "loss": 0.1238, + "learning_rate": 2.578315884730002e-05, + "loss": 0.1063, "step": 46045 }, { "epoch": 2.15, - "learning_rate": 1.577610051099339e-05, - "loss": 0.167, + "learning_rate": 2.5782690777464e-05, + "loss": 0.1187, "step": 46050 }, { "epoch": 2.15, - "learning_rate": 1.577563170971825e-05, - "loss": 0.1678, + "learning_rate": 2.578222270762798e-05, + "loss": 0.2699, "step": 46055 }, { "epoch": 2.15, - "learning_rate": 1.577516290844311e-05, - "loss": 0.2137, + "learning_rate": 2.578175463779196e-05, + "loss": 0.3069, "step": 46060 }, { "epoch": 2.15, - "learning_rate": 1.5774694107167974e-05, - "loss": 0.2462, + "learning_rate": 2.5781286567955942e-05, + "loss": 0.2714, "step": 46065 }, { "epoch": 2.15, - "learning_rate": 1.5774225305892834e-05, - "loss": 0.049, + "learning_rate": 2.5780818498119922e-05, + "loss": 0.0248, "step": 46070 }, { "epoch": 2.15, - "learning_rate": 1.5773756504617694e-05, - "loss": 0.0538, + "learning_rate": 2.5780350428283898e-05, + "loss": 0.0766, "step": 46075 }, { "epoch": 2.15, - "learning_rate": 1.5773287703342554e-05, - "loss": 0.1026, + "learning_rate": 2.577988235844788e-05, + "loss": 0.046, "step": 46080 }, { "epoch": 2.15, - "learning_rate": 1.5772818902067414e-05, - "loss": 0.0767, + "learning_rate": 2.577941428861186e-05, + "loss": 0.0724, "step": 46085 }, { "epoch": 2.15, - "learning_rate": 1.5772350100792274e-05, - "loss": 0.0594, + "learning_rate": 2.577894621877584e-05, + "loss": 0.0817, "step": 46090 }, { "epoch": 2.15, - "learning_rate": 1.5771881299517137e-05, - "loss": 0.1368, + "learning_rate": 2.577847814893982e-05, + "loss": 0.0572, "step": 46095 }, { "epoch": 2.15, - "learning_rate": 1.5771412498241997e-05, - "loss": 0.1787, + "learning_rate": 2.5778010079103804e-05, + "loss": 0.1032, "step": 46100 }, { "epoch": 2.15, - "learning_rate": 1.5770943696966857e-05, - "loss": 0.2133, + "learning_rate": 2.5777542009267784e-05, + "loss": 0.2213, "step": 46105 }, { "epoch": 2.15, - "learning_rate": 1.577047489569172e-05, - "loss": 0.1721, + "learning_rate": 2.5777073939431764e-05, + "loss": 0.2276, "step": 46110 }, { "epoch": 2.15, - "learning_rate": 1.577000609441658e-05, - "loss": 0.2872, + "learning_rate": 2.5776605869595744e-05, + "loss": 0.2599, "step": 46115 }, { "epoch": 2.15, - "learning_rate": 1.576953729314144e-05, - "loss": 0.061, + "learning_rate": 2.5776137799759727e-05, + "loss": 0.056, "step": 46120 }, { "epoch": 2.15, - "learning_rate": 1.57690684918663e-05, - "loss": 0.0356, + "learning_rate": 2.5775669729923707e-05, + "loss": 0.0643, "step": 46125 }, { "epoch": 2.15, - "learning_rate": 1.576859969059116e-05, - "loss": 0.0851, + "learning_rate": 2.5775201660087686e-05, + "loss": 0.1097, "step": 46130 }, { "epoch": 2.15, - "learning_rate": 1.576813088931602e-05, - "loss": 0.0768, + "learning_rate": 2.5774733590251666e-05, + "loss": 0.0898, "step": 46135 }, { "epoch": 2.15, - "learning_rate": 1.576766208804088e-05, - "loss": 0.0727, + "learning_rate": 2.5774265520415646e-05, + "loss": 0.0432, "step": 46140 }, { "epoch": 2.15, - "learning_rate": 1.576719328676574e-05, - "loss": 0.1419, + "learning_rate": 2.5773797450579626e-05, + "loss": 0.1249, "step": 46145 }, { "epoch": 2.15, - "learning_rate": 1.57667244854906e-05, - "loss": 0.0965, + "learning_rate": 2.5773329380743606e-05, + "loss": 0.1728, "step": 46150 }, { "epoch": 2.15, - "learning_rate": 1.576625568421546e-05, - "loss": 0.2987, + "learning_rate": 2.577286131090759e-05, + "loss": 0.1268, "step": 46155 }, { "epoch": 2.15, - "learning_rate": 1.5765786882940323e-05, - "loss": 0.2558, + "learning_rate": 2.577239324107157e-05, + "loss": 0.1939, "step": 46160 }, { "epoch": 2.15, - "learning_rate": 1.5765318081665183e-05, - "loss": 0.3177, + "learning_rate": 2.577192517123555e-05, + "loss": 0.2519, "step": 46165 }, { "epoch": 2.15, - "learning_rate": 1.5764849280390043e-05, - "loss": 0.0826, + "learning_rate": 2.5771457101399528e-05, + "loss": 0.4268, "step": 46170 }, { "epoch": 2.15, - "learning_rate": 1.5764380479114906e-05, - "loss": 0.0487, + "learning_rate": 2.577098903156351e-05, + "loss": 0.0604, "step": 46175 }, { "epoch": 2.15, - "learning_rate": 1.5763911677839766e-05, - "loss": 0.0609, + "learning_rate": 2.577052096172749e-05, + "loss": 0.0232, "step": 46180 }, { "epoch": 2.16, - "learning_rate": 1.5763442876564626e-05, - "loss": 0.0953, + "learning_rate": 2.577005289189147e-05, + "loss": 0.0735, "step": 46185 }, { "epoch": 2.16, - "learning_rate": 1.5762974075289486e-05, - "loss": 0.1276, + "learning_rate": 2.576958482205545e-05, + "loss": 0.0903, "step": 46190 }, { "epoch": 2.16, - "learning_rate": 1.5762505274014346e-05, - "loss": 0.1125, + "learning_rate": 2.5769116752219434e-05, + "loss": 0.0929, "step": 46195 }, { "epoch": 2.16, - "learning_rate": 1.5762036472739206e-05, - "loss": 0.19, + "learning_rate": 2.576864868238341e-05, + "loss": 0.1613, "step": 46200 }, { "epoch": 2.16, - "learning_rate": 1.576156767146407e-05, - "loss": 0.2064, + "learning_rate": 2.576818061254739e-05, + "loss": 0.145, "step": 46205 }, { "epoch": 2.16, - "learning_rate": 1.576109887018893e-05, - "loss": 0.1666, + "learning_rate": 2.5767712542711374e-05, + "loss": 0.3726, "step": 46210 }, { "epoch": 2.16, - "learning_rate": 1.576063006891379e-05, - "loss": 0.1832, + "learning_rate": 2.5767244472875353e-05, + "loss": 0.3217, "step": 46215 }, { "epoch": 2.16, - "learning_rate": 1.576016126763865e-05, - "loss": 0.0941, + "learning_rate": 2.5766776403039333e-05, + "loss": 0.0123, "step": 46220 }, { "epoch": 2.16, - "learning_rate": 1.575969246636351e-05, - "loss": 0.0844, + "learning_rate": 2.5766308333203313e-05, + "loss": 0.0311, "step": 46225 }, { "epoch": 2.16, - "learning_rate": 1.575922366508837e-05, - "loss": 0.0803, + "learning_rate": 2.5765840263367296e-05, + "loss": 0.0328, "step": 46230 }, { "epoch": 2.16, - "learning_rate": 1.575875486381323e-05, - "loss": 0.0812, + "learning_rate": 2.5765372193531276e-05, + "loss": 0.0435, "step": 46235 }, { "epoch": 2.16, - "learning_rate": 1.5758286062538092e-05, - "loss": 0.0936, + "learning_rate": 2.5764904123695256e-05, + "loss": 0.1408, "step": 46240 }, { "epoch": 2.16, - "learning_rate": 1.5757817261262952e-05, - "loss": 0.112, + "learning_rate": 2.5764436053859236e-05, + "loss": 0.1077, "step": 46245 }, { "epoch": 2.16, - "learning_rate": 1.5757348459987812e-05, - "loss": 0.1694, + "learning_rate": 2.576396798402322e-05, + "loss": 0.0843, "step": 46250 }, { "epoch": 2.16, - "learning_rate": 1.5756879658712675e-05, - "loss": 0.228, + "learning_rate": 2.57634999141872e-05, + "loss": 0.1736, "step": 46255 }, { "epoch": 2.16, - "learning_rate": 1.5756410857437535e-05, - "loss": 0.2182, + "learning_rate": 2.576303184435118e-05, + "loss": 0.2352, "step": 46260 }, { "epoch": 2.16, - "learning_rate": 1.5755942056162395e-05, - "loss": 0.2981, + "learning_rate": 2.576256377451516e-05, + "loss": 0.246, "step": 46265 }, { "epoch": 2.16, - "learning_rate": 1.5755473254887255e-05, - "loss": 0.0266, + "learning_rate": 2.5762095704679138e-05, + "loss": 0.0334, "step": 46270 }, { "epoch": 2.16, - "learning_rate": 1.5755004453612115e-05, - "loss": 0.0374, + "learning_rate": 2.5761627634843118e-05, + "loss": 0.0407, "step": 46275 }, { "epoch": 2.16, - "learning_rate": 1.5754535652336975e-05, - "loss": 0.0811, + "learning_rate": 2.5761159565007098e-05, + "loss": 0.0385, "step": 46280 }, { "epoch": 2.16, - "learning_rate": 1.5754066851061835e-05, - "loss": 0.0798, + "learning_rate": 2.576069149517108e-05, + "loss": 0.0832, "step": 46285 }, { "epoch": 2.16, - "learning_rate": 1.5753598049786695e-05, - "loss": 0.0449, + "learning_rate": 2.576022342533506e-05, + "loss": 0.0967, "step": 46290 }, { "epoch": 2.16, - "learning_rate": 1.5753129248511558e-05, - "loss": 0.1425, + "learning_rate": 2.575975535549904e-05, + "loss": 0.1262, "step": 46295 }, { "epoch": 2.16, - "learning_rate": 1.5752660447236418e-05, - "loss": 0.1629, + "learning_rate": 2.575928728566302e-05, + "loss": 0.0887, "step": 46300 }, { "epoch": 2.16, - "learning_rate": 1.5752191645961278e-05, - "loss": 0.2877, + "learning_rate": 2.5758819215827004e-05, + "loss": 0.1202, "step": 46305 }, { "epoch": 2.16, - "learning_rate": 1.5751722844686138e-05, - "loss": 0.3094, + "learning_rate": 2.5758351145990984e-05, + "loss": 0.3248, "step": 46310 }, { "epoch": 2.16, - "learning_rate": 1.5751254043410998e-05, - "loss": 0.2833, + "learning_rate": 2.5757883076154963e-05, + "loss": 0.1538, "step": 46315 }, { "epoch": 2.16, - "learning_rate": 1.575078524213586e-05, - "loss": 0.0353, + "learning_rate": 2.5757415006318947e-05, + "loss": 0.0732, "step": 46320 }, { "epoch": 2.16, - "learning_rate": 1.575031644086072e-05, - "loss": 0.0478, + "learning_rate": 2.5756946936482923e-05, + "loss": 0.0257, "step": 46325 }, { "epoch": 2.16, - "learning_rate": 1.574984763958558e-05, - "loss": 0.0581, + "learning_rate": 2.5756478866646903e-05, + "loss": 0.0388, "step": 46330 }, { "epoch": 2.16, - "learning_rate": 1.574937883831044e-05, - "loss": 0.0356, + "learning_rate": 2.5756010796810883e-05, + "loss": 0.0968, "step": 46335 }, { "epoch": 2.16, - "learning_rate": 1.57489100370353e-05, - "loss": 0.125, + "learning_rate": 2.5755542726974866e-05, + "loss": 0.0325, "step": 46340 }, { "epoch": 2.16, - "learning_rate": 1.5748441235760164e-05, - "loss": 0.1341, + "learning_rate": 2.5755074657138846e-05, + "loss": 0.0649, "step": 46345 }, { "epoch": 2.16, - "learning_rate": 1.5747972434485024e-05, - "loss": 0.1914, + "learning_rate": 2.5754606587302825e-05, + "loss": 0.1796, "step": 46350 }, { "epoch": 2.16, - "learning_rate": 1.5747503633209884e-05, - "loss": 0.2266, + "learning_rate": 2.5754138517466805e-05, + "loss": 0.1551, "step": 46355 }, { "epoch": 2.16, - "learning_rate": 1.5747034831934744e-05, - "loss": 0.2149, + "learning_rate": 2.575367044763079e-05, + "loss": 0.3733, "step": 46360 }, { "epoch": 2.16, - "learning_rate": 1.5746566030659604e-05, - "loss": 0.1703, + "learning_rate": 2.5753202377794768e-05, + "loss": 0.3383, "step": 46365 }, { "epoch": 2.16, - "learning_rate": 1.5746097229384464e-05, - "loss": 0.0314, + "learning_rate": 2.5752734307958748e-05, + "loss": 0.045, "step": 46370 }, { "epoch": 2.16, - "learning_rate": 1.5745628428109324e-05, - "loss": 0.0379, + "learning_rate": 2.5752266238122728e-05, + "loss": 0.0682, "step": 46375 }, { "epoch": 2.16, - "learning_rate": 1.5745159626834187e-05, - "loss": 0.0915, + "learning_rate": 2.575179816828671e-05, + "loss": 0.0732, "step": 46380 }, { "epoch": 2.16, - "learning_rate": 1.5744690825559047e-05, - "loss": 0.0746, + "learning_rate": 2.575133009845069e-05, + "loss": 0.0674, "step": 46385 }, { "epoch": 2.16, - "learning_rate": 1.5744222024283907e-05, - "loss": 0.12, + "learning_rate": 2.5750862028614667e-05, + "loss": 0.0772, "step": 46390 }, { "epoch": 2.16, - "learning_rate": 1.5743753223008767e-05, - "loss": 0.2889, + "learning_rate": 2.575039395877865e-05, + "loss": 0.0877, "step": 46395 }, { "epoch": 2.17, - "learning_rate": 1.574328442173363e-05, - "loss": 0.0758, + "learning_rate": 2.574992588894263e-05, + "loss": 0.1246, "step": 46400 }, { "epoch": 2.17, - "learning_rate": 1.574281562045849e-05, - "loss": 0.1937, + "learning_rate": 2.574945781910661e-05, + "loss": 0.1309, "step": 46405 }, { "epoch": 2.17, - "learning_rate": 1.574234681918335e-05, - "loss": 0.2275, + "learning_rate": 2.574898974927059e-05, + "loss": 0.1698, "step": 46410 }, { "epoch": 2.17, - "learning_rate": 1.574187801790821e-05, - "loss": 0.3093, + "learning_rate": 2.5748521679434573e-05, + "loss": 0.2194, "step": 46415 }, { "epoch": 2.17, - "learning_rate": 1.574140921663307e-05, - "loss": 0.0719, + "learning_rate": 2.5748053609598553e-05, + "loss": 0.038, "step": 46420 }, { "epoch": 2.17, - "learning_rate": 1.574094041535793e-05, - "loss": 0.0674, + "learning_rate": 2.5747585539762533e-05, + "loss": 0.0315, "step": 46425 }, { "epoch": 2.17, - "learning_rate": 1.574047161408279e-05, - "loss": 0.0495, + "learning_rate": 2.5747117469926513e-05, + "loss": 0.0625, "step": 46430 }, { "epoch": 2.17, - "learning_rate": 1.5740002812807653e-05, - "loss": 0.0669, + "learning_rate": 2.5746649400090496e-05, + "loss": 0.0699, "step": 46435 }, { "epoch": 2.17, - "learning_rate": 1.5739534011532513e-05, - "loss": 0.0784, + "learning_rate": 2.5746181330254476e-05, + "loss": 0.1504, "step": 46440 }, { "epoch": 2.17, - "learning_rate": 1.5739065210257373e-05, - "loss": 0.1035, + "learning_rate": 2.5745713260418456e-05, + "loss": 0.0954, "step": 46445 }, { "epoch": 2.17, - "learning_rate": 1.5738596408982233e-05, - "loss": 0.1268, + "learning_rate": 2.574524519058244e-05, + "loss": 0.1145, "step": 46450 }, { "epoch": 2.17, - "learning_rate": 1.5738127607707093e-05, - "loss": 0.1276, + "learning_rate": 2.5744777120746415e-05, + "loss": 0.1848, "step": 46455 }, { "epoch": 2.17, - "learning_rate": 1.5737658806431956e-05, - "loss": 0.2203, + "learning_rate": 2.5744309050910395e-05, + "loss": 0.169, "step": 46460 }, { "epoch": 2.17, - "learning_rate": 1.5737190005156816e-05, - "loss": 0.2587, + "learning_rate": 2.5743840981074375e-05, + "loss": 0.2501, "step": 46465 }, { "epoch": 2.17, - "learning_rate": 1.5736721203881676e-05, - "loss": 0.0469, + "learning_rate": 2.5743372911238358e-05, + "loss": 0.0285, "step": 46470 }, { "epoch": 2.17, - "learning_rate": 1.5736252402606536e-05, - "loss": 0.0494, + "learning_rate": 2.5742904841402338e-05, + "loss": 0.0967, "step": 46475 }, { "epoch": 2.17, - "learning_rate": 1.5735783601331396e-05, - "loss": 0.0247, + "learning_rate": 2.5742436771566318e-05, + "loss": 0.0184, "step": 46480 }, { "epoch": 2.17, - "learning_rate": 1.573531480005626e-05, - "loss": 0.1164, + "learning_rate": 2.5741968701730297e-05, + "loss": 0.0831, "step": 46485 }, { "epoch": 2.17, - "learning_rate": 1.573484599878112e-05, - "loss": 0.0751, + "learning_rate": 2.574150063189428e-05, + "loss": 0.0888, "step": 46490 }, { "epoch": 2.17, - "learning_rate": 1.573437719750598e-05, - "loss": 0.1269, + "learning_rate": 2.574103256205826e-05, + "loss": 0.072, "step": 46495 }, { "epoch": 2.17, - "learning_rate": 1.573390839623084e-05, - "loss": 0.146, + "learning_rate": 2.574056449222224e-05, + "loss": 0.2972, "step": 46500 }, { "epoch": 2.17, - "learning_rate": 1.57334395949557e-05, - "loss": 0.1816, + "learning_rate": 2.5740096422386224e-05, + "loss": 0.136, "step": 46505 }, { "epoch": 2.17, - "learning_rate": 1.573297079368056e-05, - "loss": 0.2958, + "learning_rate": 2.5739628352550203e-05, + "loss": 0.2372, "step": 46510 }, { "epoch": 2.17, - "learning_rate": 1.573250199240542e-05, - "loss": 0.2065, + "learning_rate": 2.573916028271418e-05, + "loss": 0.2938, "step": 46515 }, { "epoch": 2.17, - "learning_rate": 1.573203319113028e-05, - "loss": 0.0629, + "learning_rate": 2.573869221287816e-05, + "loss": 0.1196, "step": 46520 }, { "epoch": 2.17, - "learning_rate": 1.5731564389855142e-05, - "loss": 0.0846, + "learning_rate": 2.5738224143042143e-05, + "loss": 0.0345, "step": 46525 }, { "epoch": 2.17, - "learning_rate": 1.5731095588580002e-05, - "loss": 0.0455, + "learning_rate": 2.5737756073206123e-05, + "loss": 0.0601, "step": 46530 }, { "epoch": 2.17, - "learning_rate": 1.5730626787304862e-05, - "loss": 0.0692, + "learning_rate": 2.5737288003370102e-05, + "loss": 0.0258, "step": 46535 }, { "epoch": 2.17, - "learning_rate": 1.5730157986029725e-05, - "loss": 0.1178, + "learning_rate": 2.5736819933534082e-05, + "loss": 0.1341, "step": 46540 }, { "epoch": 2.17, - "learning_rate": 1.5729689184754585e-05, - "loss": 0.1149, + "learning_rate": 2.5736351863698065e-05, + "loss": 0.1137, "step": 46545 }, { "epoch": 2.17, - "learning_rate": 1.5729220383479445e-05, - "loss": 0.1512, + "learning_rate": 2.5735883793862045e-05, + "loss": 0.1336, "step": 46550 }, { "epoch": 2.17, - "learning_rate": 1.5728751582204305e-05, - "loss": 0.1548, + "learning_rate": 2.5735415724026025e-05, + "loss": 0.2547, "step": 46555 }, { "epoch": 2.17, - "learning_rate": 1.5728282780929165e-05, - "loss": 0.177, + "learning_rate": 2.5734947654190005e-05, + "loss": 0.3075, "step": 46560 }, { "epoch": 2.17, - "learning_rate": 1.5727813979654025e-05, - "loss": 0.2508, + "learning_rate": 2.5734479584353988e-05, + "loss": 0.3183, "step": 46565 }, { "epoch": 2.17, - "learning_rate": 1.5727345178378885e-05, - "loss": 0.0904, + "learning_rate": 2.5734011514517968e-05, + "loss": 0.0344, "step": 46570 }, { "epoch": 2.17, - "learning_rate": 1.5726876377103748e-05, - "loss": 0.0596, + "learning_rate": 2.5733543444681948e-05, + "loss": 0.0464, "step": 46575 }, { "epoch": 2.17, - "learning_rate": 1.5726407575828608e-05, - "loss": 0.0812, + "learning_rate": 2.5733075374845928e-05, + "loss": 0.0964, "step": 46580 }, { "epoch": 2.17, - "learning_rate": 1.5725938774553468e-05, - "loss": 0.0603, + "learning_rate": 2.5732607305009907e-05, + "loss": 0.1135, "step": 46585 }, { "epoch": 2.17, - "learning_rate": 1.5725469973278328e-05, - "loss": 0.0903, + "learning_rate": 2.5732139235173887e-05, + "loss": 0.0418, "step": 46590 }, { "epoch": 2.17, - "learning_rate": 1.5725001172003188e-05, - "loss": 0.1102, + "learning_rate": 2.5731671165337867e-05, + "loss": 0.1447, "step": 46595 }, { "epoch": 2.17, - "learning_rate": 1.5724532370728048e-05, - "loss": 0.129, + "learning_rate": 2.573120309550185e-05, + "loss": 0.1324, "step": 46600 }, { "epoch": 2.17, - "learning_rate": 1.572406356945291e-05, - "loss": 0.2716, + "learning_rate": 2.573073502566583e-05, + "loss": 0.1428, "step": 46605 }, { "epoch": 2.17, - "learning_rate": 1.572359476817777e-05, - "loss": 0.2493, + "learning_rate": 2.573026695582981e-05, + "loss": 0.1874, "step": 46610 }, { "epoch": 2.18, - "learning_rate": 1.572312596690263e-05, - "loss": 0.2971, + "learning_rate": 2.572979888599379e-05, + "loss": 0.3622, "step": 46615 }, { "epoch": 2.18, - "learning_rate": 1.5722657165627494e-05, - "loss": 0.0593, + "learning_rate": 2.5729330816157773e-05, + "loss": 0.0439, "step": 46620 }, { "epoch": 2.18, - "learning_rate": 1.5722188364352354e-05, - "loss": 0.0314, + "learning_rate": 2.5728862746321753e-05, + "loss": 0.0388, "step": 46625 }, { "epoch": 2.18, - "learning_rate": 1.5721719563077214e-05, - "loss": 0.0768, + "learning_rate": 2.5728394676485733e-05, + "loss": 0.0778, "step": 46630 }, { "epoch": 2.18, - "learning_rate": 1.5721250761802074e-05, - "loss": 0.0952, + "learning_rate": 2.5727926606649716e-05, + "loss": 0.0736, "step": 46635 }, { "epoch": 2.18, - "learning_rate": 1.5720781960526934e-05, - "loss": 0.0829, + "learning_rate": 2.5727458536813696e-05, + "loss": 0.0724, "step": 46640 }, { "epoch": 2.18, - "learning_rate": 1.5720313159251794e-05, - "loss": 0.0766, + "learning_rate": 2.5726990466977672e-05, + "loss": 0.0939, "step": 46645 }, { "epoch": 2.18, - "learning_rate": 1.5719844357976654e-05, - "loss": 0.0711, + "learning_rate": 2.5726522397141652e-05, + "loss": 0.2064, "step": 46650 }, { "epoch": 2.18, - "learning_rate": 1.5719375556701514e-05, - "loss": 0.1738, + "learning_rate": 2.5726054327305635e-05, + "loss": 0.2022, "step": 46655 }, { "epoch": 2.18, - "learning_rate": 1.5718906755426374e-05, - "loss": 0.1849, + "learning_rate": 2.5725586257469615e-05, + "loss": 0.201, "step": 46660 }, { "epoch": 2.18, - "learning_rate": 1.5718437954151234e-05, - "loss": 0.362, + "learning_rate": 2.5725118187633595e-05, + "loss": 0.3758, "step": 46665 }, { "epoch": 2.18, - "learning_rate": 1.5717969152876097e-05, - "loss": 0.0848, + "learning_rate": 2.5724650117797574e-05, + "loss": 0.0136, "step": 46670 }, { "epoch": 2.18, - "learning_rate": 1.5717500351600957e-05, - "loss": 0.0446, + "learning_rate": 2.5724182047961558e-05, + "loss": 0.0379, "step": 46675 }, { "epoch": 2.18, - "learning_rate": 1.5717031550325817e-05, - "loss": 0.0797, + "learning_rate": 2.5723713978125537e-05, + "loss": 0.0547, "step": 46680 }, { "epoch": 2.18, - "learning_rate": 1.571656274905068e-05, - "loss": 0.107, + "learning_rate": 2.5723245908289517e-05, + "loss": 0.0536, "step": 46685 }, { "epoch": 2.18, - "learning_rate": 1.571609394777554e-05, - "loss": 0.1182, + "learning_rate": 2.57227778384535e-05, + "loss": 0.119, "step": 46690 }, { "epoch": 2.18, - "learning_rate": 1.57156251465004e-05, - "loss": 0.1912, + "learning_rate": 2.572230976861748e-05, + "loss": 0.1019, "step": 46695 }, { "epoch": 2.18, - "learning_rate": 1.571515634522526e-05, - "loss": 0.1293, + "learning_rate": 2.572184169878146e-05, + "loss": 0.23, "step": 46700 }, { "epoch": 2.18, - "learning_rate": 1.571468754395012e-05, - "loss": 0.1855, + "learning_rate": 2.5721373628945437e-05, + "loss": 0.2845, "step": 46705 }, { "epoch": 2.18, - "learning_rate": 1.571421874267498e-05, - "loss": 0.2209, + "learning_rate": 2.572090555910942e-05, + "loss": 0.3395, "step": 46710 }, { "epoch": 2.18, - "learning_rate": 1.5713749941399843e-05, - "loss": 0.4509, + "learning_rate": 2.57204374892734e-05, + "loss": 0.462, "step": 46715 }, { "epoch": 2.18, - "learning_rate": 1.5713281140124703e-05, - "loss": 0.0601, + "learning_rate": 2.571996941943738e-05, + "loss": 0.0359, "step": 46720 }, { "epoch": 2.18, - "learning_rate": 1.5712812338849563e-05, - "loss": 0.0555, + "learning_rate": 2.571950134960136e-05, + "loss": 0.0462, "step": 46725 }, { "epoch": 2.18, - "learning_rate": 1.5712343537574423e-05, - "loss": 0.0454, + "learning_rate": 2.5719033279765342e-05, + "loss": 0.0381, "step": 46730 }, { "epoch": 2.18, - "learning_rate": 1.5711874736299283e-05, - "loss": 0.0598, + "learning_rate": 2.5718565209929322e-05, + "loss": 0.0372, "step": 46735 }, { "epoch": 2.18, - "learning_rate": 1.5711405935024143e-05, - "loss": 0.0963, + "learning_rate": 2.5718097140093302e-05, + "loss": 0.0884, "step": 46740 }, { "epoch": 2.18, - "learning_rate": 1.5710937133749003e-05, - "loss": 0.1205, + "learning_rate": 2.5717629070257282e-05, + "loss": 0.1161, "step": 46745 }, { "epoch": 2.18, - "learning_rate": 1.5710468332473866e-05, - "loss": 0.1481, + "learning_rate": 2.5717161000421265e-05, + "loss": 0.1198, "step": 46750 }, { "epoch": 2.18, - "learning_rate": 1.5709999531198726e-05, - "loss": 0.2012, + "learning_rate": 2.5716692930585245e-05, + "loss": 0.1816, "step": 46755 }, { "epoch": 2.18, - "learning_rate": 1.5709530729923586e-05, - "loss": 0.2832, + "learning_rate": 2.5716224860749225e-05, + "loss": 0.2607, "step": 46760 }, { "epoch": 2.18, - "learning_rate": 1.570906192864845e-05, - "loss": 0.3036, + "learning_rate": 2.5715756790913208e-05, + "loss": 0.2049, "step": 46765 }, { "epoch": 2.18, - "learning_rate": 1.570859312737331e-05, - "loss": 0.0773, + "learning_rate": 2.5715288721077184e-05, + "loss": 0.0805, "step": 46770 }, { "epoch": 2.18, - "learning_rate": 1.570812432609817e-05, - "loss": 0.077, + "learning_rate": 2.5714820651241164e-05, + "loss": 0.0338, "step": 46775 }, { "epoch": 2.18, - "learning_rate": 1.570765552482303e-05, - "loss": 0.054, + "learning_rate": 2.5714352581405144e-05, + "loss": 0.0759, "step": 46780 }, { "epoch": 2.18, - "learning_rate": 1.570718672354789e-05, - "loss": 0.112, + "learning_rate": 2.5713884511569127e-05, + "loss": 0.0604, "step": 46785 }, { "epoch": 2.18, - "learning_rate": 1.570671792227275e-05, - "loss": 0.0699, + "learning_rate": 2.5713416441733107e-05, + "loss": 0.1115, "step": 46790 }, { "epoch": 2.18, - "learning_rate": 1.570624912099761e-05, - "loss": 0.1254, + "learning_rate": 2.5712948371897087e-05, + "loss": 0.129, "step": 46795 }, { "epoch": 2.18, - "learning_rate": 1.570578031972247e-05, - "loss": 0.1808, + "learning_rate": 2.5712480302061067e-05, + "loss": 0.1771, "step": 46800 }, { "epoch": 2.18, - "learning_rate": 1.570531151844733e-05, - "loss": 0.1349, + "learning_rate": 2.571201223222505e-05, + "loss": 0.2118, "step": 46805 }, { "epoch": 2.18, - "learning_rate": 1.5704842717172192e-05, - "loss": 0.2154, + "learning_rate": 2.571154416238903e-05, + "loss": 0.3582, "step": 46810 }, { "epoch": 2.18, - "learning_rate": 1.5704373915897052e-05, - "loss": 0.223, + "learning_rate": 2.571107609255301e-05, + "loss": 0.1941, "step": 46815 }, { "epoch": 2.18, - "learning_rate": 1.5703905114621912e-05, - "loss": 0.0263, + "learning_rate": 2.5710608022716993e-05, + "loss": 0.0445, "step": 46820 }, { "epoch": 2.18, - "learning_rate": 1.5703436313346772e-05, - "loss": 0.0534, + "learning_rate": 2.5710139952880972e-05, + "loss": 0.0163, "step": 46825 }, { "epoch": 2.19, - "learning_rate": 1.5702967512071635e-05, - "loss": 0.0935, + "learning_rate": 2.5709671883044952e-05, + "loss": 0.082, "step": 46830 }, { "epoch": 2.19, - "learning_rate": 1.5702498710796495e-05, - "loss": 0.1136, + "learning_rate": 2.570920381320893e-05, + "loss": 0.0987, "step": 46835 }, { "epoch": 2.19, - "learning_rate": 1.5702029909521355e-05, - "loss": 0.1008, + "learning_rate": 2.5708735743372912e-05, + "loss": 0.0951, "step": 46840 }, { "epoch": 2.19, - "learning_rate": 1.5701561108246215e-05, - "loss": 0.2067, + "learning_rate": 2.5708267673536892e-05, + "loss": 0.1261, "step": 46845 }, { "epoch": 2.19, - "learning_rate": 1.5701092306971075e-05, - "loss": 0.1734, + "learning_rate": 2.570779960370087e-05, + "loss": 0.081, "step": 46850 }, { "epoch": 2.19, - "learning_rate": 1.570062350569594e-05, - "loss": 0.1801, + "learning_rate": 2.570733153386485e-05, + "loss": 0.1243, "step": 46855 }, { "epoch": 2.19, - "learning_rate": 1.57001547044208e-05, - "loss": 0.269, + "learning_rate": 2.5706863464028835e-05, + "loss": 0.2111, "step": 46860 }, { "epoch": 2.19, - "learning_rate": 1.569968590314566e-05, - "loss": 0.2084, + "learning_rate": 2.5706395394192814e-05, + "loss": 0.3578, "step": 46865 }, { "epoch": 2.19, - "learning_rate": 1.5699217101870518e-05, - "loss": 0.0311, + "learning_rate": 2.5705927324356794e-05, + "loss": 0.0699, "step": 46870 }, { "epoch": 2.19, - "learning_rate": 1.5698748300595378e-05, - "loss": 0.0641, + "learning_rate": 2.5705459254520777e-05, + "loss": 0.0366, "step": 46875 }, { "epoch": 2.19, - "learning_rate": 1.5698279499320238e-05, - "loss": 0.0508, + "learning_rate": 2.5704991184684757e-05, + "loss": 0.0836, "step": 46880 }, { "epoch": 2.19, - "learning_rate": 1.5697810698045098e-05, - "loss": 0.0901, + "learning_rate": 2.5704523114848737e-05, + "loss": 0.1054, "step": 46885 }, { "epoch": 2.19, - "learning_rate": 1.569734189676996e-05, - "loss": 0.1451, + "learning_rate": 2.5704055045012717e-05, + "loss": 0.1063, "step": 46890 }, { "epoch": 2.19, - "learning_rate": 1.569687309549482e-05, - "loss": 0.0871, + "learning_rate": 2.5703586975176697e-05, + "loss": 0.1112, "step": 46895 }, { "epoch": 2.19, - "learning_rate": 1.569640429421968e-05, - "loss": 0.1306, + "learning_rate": 2.5703118905340677e-05, + "loss": 0.1248, "step": 46900 }, { "epoch": 2.19, - "learning_rate": 1.569593549294454e-05, - "loss": 0.1422, + "learning_rate": 2.5702650835504656e-05, + "loss": 0.1293, "step": 46905 }, { "epoch": 2.19, - "learning_rate": 1.5695466691669405e-05, - "loss": 0.2327, + "learning_rate": 2.5702182765668636e-05, + "loss": 0.2318, "step": 46910 }, { "epoch": 2.19, - "learning_rate": 1.5694997890394264e-05, - "loss": 0.2564, + "learning_rate": 2.570171469583262e-05, + "loss": 0.3429, "step": 46915 }, { "epoch": 2.19, - "learning_rate": 1.5694529089119124e-05, - "loss": 0.0679, + "learning_rate": 2.57012466259966e-05, + "loss": 0.0605, "step": 46920 }, { "epoch": 2.19, - "learning_rate": 1.5694060287843984e-05, - "loss": 0.0566, + "learning_rate": 2.570077855616058e-05, + "loss": 0.0198, "step": 46925 }, { "epoch": 2.19, - "learning_rate": 1.5693591486568844e-05, - "loss": 0.0412, + "learning_rate": 2.5700310486324562e-05, + "loss": 0.0185, "step": 46930 }, { "epoch": 2.19, - "learning_rate": 1.5693122685293704e-05, - "loss": 0.0899, + "learning_rate": 2.5699842416488542e-05, + "loss": 0.0859, "step": 46935 }, { "epoch": 2.19, - "learning_rate": 1.5692653884018564e-05, - "loss": 0.0676, + "learning_rate": 2.5699374346652522e-05, + "loss": 0.0596, "step": 46940 }, { "epoch": 2.19, - "learning_rate": 1.5692185082743427e-05, - "loss": 0.0635, + "learning_rate": 2.56989062768165e-05, + "loss": 0.1072, "step": 46945 }, { "epoch": 2.19, - "learning_rate": 1.5691716281468287e-05, - "loss": 0.1505, + "learning_rate": 2.5698438206980485e-05, + "loss": 0.2226, "step": 46950 }, { "epoch": 2.19, - "learning_rate": 1.5691247480193147e-05, - "loss": 0.138, + "learning_rate": 2.5697970137144465e-05, + "loss": 0.0692, "step": 46955 }, { "epoch": 2.19, - "learning_rate": 1.5690778678918007e-05, - "loss": 0.3916, + "learning_rate": 2.569750206730844e-05, + "loss": 0.2887, "step": 46960 }, { "epoch": 2.19, - "learning_rate": 1.5690309877642867e-05, - "loss": 0.3525, + "learning_rate": 2.569703399747242e-05, + "loss": 0.2476, "step": 46965 }, { "epoch": 2.19, - "learning_rate": 1.568984107636773e-05, - "loss": 0.0513, + "learning_rate": 2.5696565927636404e-05, + "loss": 0.0231, "step": 46970 }, { "epoch": 2.19, - "learning_rate": 1.568937227509259e-05, - "loss": 0.0724, + "learning_rate": 2.5696097857800384e-05, + "loss": 0.0317, "step": 46975 }, { "epoch": 2.19, - "learning_rate": 1.568890347381745e-05, - "loss": 0.0959, + "learning_rate": 2.5695629787964364e-05, + "loss": 0.1597, "step": 46980 }, { "epoch": 2.19, - "learning_rate": 1.568843467254231e-05, - "loss": 0.1125, + "learning_rate": 2.5695161718128344e-05, + "loss": 0.0719, "step": 46985 }, { "epoch": 2.19, - "learning_rate": 1.568796587126717e-05, - "loss": 0.081, + "learning_rate": 2.5694693648292327e-05, + "loss": 0.1164, "step": 46990 }, { "epoch": 2.19, - "learning_rate": 1.5687497069992034e-05, - "loss": 0.2037, + "learning_rate": 2.5694225578456307e-05, + "loss": 0.0615, "step": 46995 }, { "epoch": 2.19, - "learning_rate": 1.5687028268716893e-05, - "loss": 0.1913, + "learning_rate": 2.5693757508620286e-05, + "loss": 0.1833, "step": 47000 }, { "epoch": 2.19, - "learning_rate": 1.5686559467441753e-05, - "loss": 0.126, + "learning_rate": 2.569328943878427e-05, + "loss": 0.2171, "step": 47005 }, { "epoch": 2.19, - "learning_rate": 1.5686090666166613e-05, - "loss": 0.2837, + "learning_rate": 2.569282136894825e-05, + "loss": 0.1349, "step": 47010 }, { "epoch": 2.19, - "learning_rate": 1.5685621864891473e-05, - "loss": 0.2889, + "learning_rate": 2.569235329911223e-05, + "loss": 0.3012, "step": 47015 }, { "epoch": 2.19, - "learning_rate": 1.5685153063616333e-05, - "loss": 0.0453, + "learning_rate": 2.569188522927621e-05, + "loss": 0.0959, "step": 47020 }, { "epoch": 2.19, - "learning_rate": 1.5684684262341193e-05, - "loss": 0.0343, + "learning_rate": 2.569141715944019e-05, + "loss": 0.0605, "step": 47025 }, { "epoch": 2.19, - "learning_rate": 1.5684215461066053e-05, - "loss": 0.0998, + "learning_rate": 2.569094908960417e-05, + "loss": 0.0373, "step": 47030 }, { "epoch": 2.19, - "learning_rate": 1.5683746659790916e-05, - "loss": 0.0703, + "learning_rate": 2.569048101976815e-05, + "loss": 0.048, "step": 47035 }, { "epoch": 2.19, - "learning_rate": 1.5683277858515776e-05, - "loss": 0.0683, + "learning_rate": 2.569001294993213e-05, + "loss": 0.1739, "step": 47040 }, { "epoch": 2.2, - "learning_rate": 1.5682809057240636e-05, - "loss": 0.1142, + "learning_rate": 2.568954488009611e-05, + "loss": 0.0755, "step": 47045 }, { "epoch": 2.2, - "learning_rate": 1.56823402559655e-05, - "loss": 0.1156, + "learning_rate": 2.568907681026009e-05, + "loss": 0.1085, "step": 47050 }, { "epoch": 2.2, - "learning_rate": 1.568187145469036e-05, - "loss": 0.0922, + "learning_rate": 2.568860874042407e-05, + "loss": 0.0949, "step": 47055 }, { "epoch": 2.2, - "learning_rate": 1.568140265341522e-05, - "loss": 0.1995, + "learning_rate": 2.5688140670588054e-05, + "loss": 0.2284, "step": 47060 }, { "epoch": 2.2, - "learning_rate": 1.568093385214008e-05, - "loss": 0.3706, + "learning_rate": 2.5687672600752034e-05, + "loss": 0.2739, "step": 47065 }, { "epoch": 2.2, - "learning_rate": 1.568046505086494e-05, - "loss": 0.0824, + "learning_rate": 2.5687204530916014e-05, + "loss": 0.0467, "step": 47070 }, { "epoch": 2.2, - "learning_rate": 1.56799962495898e-05, - "loss": 0.0288, + "learning_rate": 2.5686736461079994e-05, + "loss": 0.0681, "step": 47075 }, { "epoch": 2.2, - "learning_rate": 1.567952744831466e-05, - "loss": 0.0757, + "learning_rate": 2.5686268391243977e-05, + "loss": 0.0443, "step": 47080 }, { "epoch": 2.2, - "learning_rate": 1.5679058647039523e-05, - "loss": 0.1502, + "learning_rate": 2.5685800321407954e-05, + "loss": 0.0386, "step": 47085 }, { "epoch": 2.2, - "learning_rate": 1.5678589845764382e-05, - "loss": 0.1346, + "learning_rate": 2.5685332251571933e-05, + "loss": 0.0573, "step": 47090 }, { "epoch": 2.2, - "learning_rate": 1.5678121044489242e-05, - "loss": 0.2792, + "learning_rate": 2.5684864181735913e-05, + "loss": 0.0854, "step": 47095 }, { "epoch": 2.2, - "learning_rate": 1.5677652243214102e-05, - "loss": 0.1159, + "learning_rate": 2.5684396111899896e-05, + "loss": 0.1618, "step": 47100 }, { "epoch": 2.2, - "learning_rate": 1.5677183441938962e-05, - "loss": 0.0856, + "learning_rate": 2.5683928042063876e-05, + "loss": 0.2264, "step": 47105 }, { "epoch": 2.2, - "learning_rate": 1.5676714640663822e-05, - "loss": 0.2502, + "learning_rate": 2.5683459972227856e-05, + "loss": 0.2645, "step": 47110 }, { "epoch": 2.2, - "learning_rate": 1.5676245839388686e-05, - "loss": 0.3666, + "learning_rate": 2.568299190239184e-05, + "loss": 0.2872, "step": 47115 }, { "epoch": 2.2, - "learning_rate": 1.5675777038113545e-05, - "loss": 0.0493, + "learning_rate": 2.568252383255582e-05, + "loss": 0.0214, "step": 47120 }, { "epoch": 2.2, - "learning_rate": 1.5675308236838405e-05, - "loss": 0.0624, + "learning_rate": 2.56820557627198e-05, + "loss": 0.0342, "step": 47125 }, { "epoch": 2.2, - "learning_rate": 1.5674839435563265e-05, - "loss": 0.0632, + "learning_rate": 2.568158769288378e-05, + "loss": 0.1035, "step": 47130 }, { "epoch": 2.2, - "learning_rate": 1.567437063428813e-05, - "loss": 0.0216, + "learning_rate": 2.5681119623047762e-05, + "loss": 0.0843, "step": 47135 }, { "epoch": 2.2, - "learning_rate": 1.567390183301299e-05, - "loss": 0.0983, + "learning_rate": 2.568065155321174e-05, + "loss": 0.1107, "step": 47140 }, { "epoch": 2.2, - "learning_rate": 1.567343303173785e-05, - "loss": 0.1241, + "learning_rate": 2.568018348337572e-05, + "loss": 0.1401, "step": 47145 }, { "epoch": 2.2, - "learning_rate": 1.567296423046271e-05, - "loss": 0.1748, + "learning_rate": 2.5679715413539698e-05, + "loss": 0.1624, "step": 47150 }, { "epoch": 2.2, - "learning_rate": 1.567249542918757e-05, - "loss": 0.1428, + "learning_rate": 2.567924734370368e-05, + "loss": 0.1962, "step": 47155 }, { "epoch": 2.2, - "learning_rate": 1.5672026627912428e-05, - "loss": 0.1892, + "learning_rate": 2.567877927386766e-05, + "loss": 0.3318, "step": 47160 }, { "epoch": 2.2, - "learning_rate": 1.5671557826637288e-05, - "loss": 0.2356, + "learning_rate": 2.567831120403164e-05, + "loss": 0.1882, "step": 47165 }, { "epoch": 2.2, - "learning_rate": 1.5671089025362148e-05, - "loss": 0.0544, + "learning_rate": 2.567784313419562e-05, + "loss": 0.1179, "step": 47170 }, { "epoch": 2.2, - "learning_rate": 1.5670620224087008e-05, - "loss": 0.0387, + "learning_rate": 2.5677375064359604e-05, + "loss": 0.0591, "step": 47175 }, { "epoch": 2.2, - "learning_rate": 1.567015142281187e-05, - "loss": 0.0373, + "learning_rate": 2.5676906994523584e-05, + "loss": 0.0799, "step": 47180 }, { "epoch": 2.2, - "learning_rate": 1.566968262153673e-05, - "loss": 0.0462, + "learning_rate": 2.5676438924687563e-05, + "loss": 0.1135, "step": 47185 }, { "epoch": 2.2, - "learning_rate": 1.566921382026159e-05, - "loss": 0.0844, + "learning_rate": 2.5675970854851547e-05, + "loss": 0.0873, "step": 47190 }, { "epoch": 2.2, - "learning_rate": 1.5668745018986455e-05, - "loss": 0.1622, + "learning_rate": 2.5675502785015526e-05, + "loss": 0.0543, "step": 47195 }, { "epoch": 2.2, - "learning_rate": 1.5668276217711315e-05, - "loss": 0.1004, + "learning_rate": 2.5675034715179506e-05, + "loss": 0.1805, "step": 47200 }, { "epoch": 2.2, - "learning_rate": 1.5667807416436174e-05, - "loss": 0.1573, + "learning_rate": 2.5674566645343486e-05, + "loss": 0.0892, "step": 47205 }, { "epoch": 2.2, - "learning_rate": 1.5667338615161034e-05, - "loss": 0.235, + "learning_rate": 2.5674098575507466e-05, + "loss": 0.2918, "step": 47210 }, { "epoch": 2.2, - "learning_rate": 1.5666869813885894e-05, - "loss": 0.3367, + "learning_rate": 2.5673630505671446e-05, + "loss": 0.221, "step": 47215 }, { "epoch": 2.2, - "learning_rate": 1.5666401012610754e-05, - "loss": 0.0501, + "learning_rate": 2.5673162435835426e-05, + "loss": 0.1082, "step": 47220 }, { "epoch": 2.2, - "learning_rate": 1.5665932211335618e-05, - "loss": 0.0445, + "learning_rate": 2.5672694365999405e-05, + "loss": 0.0489, "step": 47225 }, { "epoch": 2.2, - "learning_rate": 1.5665463410060478e-05, - "loss": 0.0511, + "learning_rate": 2.567222629616339e-05, + "loss": 0.0402, "step": 47230 }, { "epoch": 2.2, - "learning_rate": 1.5664994608785337e-05, - "loss": 0.1131, + "learning_rate": 2.567175822632737e-05, + "loss": 0.048, "step": 47235 }, { "epoch": 2.2, - "learning_rate": 1.5664525807510197e-05, - "loss": 0.0836, + "learning_rate": 2.5671290156491348e-05, + "loss": 0.0809, "step": 47240 }, { "epoch": 2.2, - "learning_rate": 1.5664057006235057e-05, - "loss": 0.0873, + "learning_rate": 2.567082208665533e-05, + "loss": 0.1109, "step": 47245 }, { "epoch": 2.2, - "learning_rate": 1.5663588204959917e-05, - "loss": 0.092, + "learning_rate": 2.567035401681931e-05, + "loss": 0.0536, "step": 47250 }, { "epoch": 2.2, - "learning_rate": 1.5663119403684777e-05, - "loss": 0.1788, + "learning_rate": 2.566988594698329e-05, + "loss": 0.1372, "step": 47255 }, { "epoch": 2.21, - "learning_rate": 1.566265060240964e-05, - "loss": 0.1563, + "learning_rate": 2.566941787714727e-05, + "loss": 0.2469, "step": 47260 }, { "epoch": 2.21, - "learning_rate": 1.56621818011345e-05, - "loss": 0.3004, + "learning_rate": 2.5668949807311254e-05, + "loss": 0.2443, "step": 47265 }, { "epoch": 2.21, - "learning_rate": 1.566171299985936e-05, - "loss": 0.0335, + "learning_rate": 2.5668481737475234e-05, + "loss": 0.045, "step": 47270 }, { "epoch": 2.21, - "learning_rate": 1.5661244198584224e-05, - "loss": 0.053, + "learning_rate": 2.566801366763921e-05, + "loss": 0.0592, "step": 47275 }, { "epoch": 2.21, - "learning_rate": 1.5660775397309084e-05, - "loss": 0.0392, + "learning_rate": 2.566754559780319e-05, + "loss": 0.0674, "step": 47280 }, { "epoch": 2.21, - "learning_rate": 1.5660306596033944e-05, - "loss": 0.056, + "learning_rate": 2.5667077527967173e-05, + "loss": 0.0417, "step": 47285 }, { "epoch": 2.21, - "learning_rate": 1.5659837794758804e-05, - "loss": 0.1362, + "learning_rate": 2.5666609458131153e-05, + "loss": 0.1509, "step": 47290 }, { "epoch": 2.21, - "learning_rate": 1.5659368993483663e-05, - "loss": 0.1154, + "learning_rate": 2.5666141388295133e-05, + "loss": 0.1222, "step": 47295 }, { "epoch": 2.21, - "learning_rate": 1.5658900192208523e-05, - "loss": 0.1525, + "learning_rate": 2.5665673318459116e-05, + "loss": 0.0784, "step": 47300 }, { "epoch": 2.21, - "learning_rate": 1.5658431390933383e-05, - "loss": 0.247, + "learning_rate": 2.5665205248623096e-05, + "loss": 0.0995, "step": 47305 }, { "epoch": 2.21, - "learning_rate": 1.5657962589658243e-05, - "loss": 0.2236, + "learning_rate": 2.5664737178787076e-05, + "loss": 0.2392, "step": 47310 }, { "epoch": 2.21, - "learning_rate": 1.5657493788383103e-05, - "loss": 0.2756, + "learning_rate": 2.5664269108951056e-05, + "loss": 0.2065, "step": 47315 }, { "epoch": 2.21, - "learning_rate": 1.5657024987107967e-05, - "loss": 0.0349, + "learning_rate": 2.566380103911504e-05, + "loss": 0.0554, "step": 47320 }, { "epoch": 2.21, - "learning_rate": 1.5656556185832826e-05, - "loss": 0.0435, + "learning_rate": 2.566333296927902e-05, + "loss": 0.1014, "step": 47325 }, { "epoch": 2.21, - "learning_rate": 1.5656087384557686e-05, - "loss": 0.0459, + "learning_rate": 2.5662864899443e-05, + "loss": 0.0283, "step": 47330 }, { "epoch": 2.21, - "learning_rate": 1.565561858328255e-05, - "loss": 0.1041, + "learning_rate": 2.5662396829606978e-05, + "loss": 0.0521, "step": 47335 }, { "epoch": 2.21, - "learning_rate": 1.565514978200741e-05, - "loss": 0.0852, + "learning_rate": 2.5661928759770958e-05, + "loss": 0.0955, "step": 47340 }, { "epoch": 2.21, - "learning_rate": 1.565468098073227e-05, - "loss": 0.1001, + "learning_rate": 2.5661460689934938e-05, + "loss": 0.074, "step": 47345 }, { "epoch": 2.21, - "learning_rate": 1.565421217945713e-05, - "loss": 0.1142, + "learning_rate": 2.5660992620098918e-05, + "loss": 0.1441, "step": 47350 }, { "epoch": 2.21, - "learning_rate": 1.565374337818199e-05, - "loss": 0.0887, + "learning_rate": 2.5660524550262898e-05, + "loss": 0.1411, "step": 47355 }, { "epoch": 2.21, - "learning_rate": 1.565327457690685e-05, - "loss": 0.2389, + "learning_rate": 2.566005648042688e-05, + "loss": 0.2362, "step": 47360 }, { "epoch": 2.21, - "learning_rate": 1.5652805775631713e-05, - "loss": 0.2464, + "learning_rate": 2.565958841059086e-05, + "loss": 0.2412, "step": 47365 }, { "epoch": 2.21, - "learning_rate": 1.5652336974356573e-05, - "loss": 0.032, + "learning_rate": 2.565912034075484e-05, + "loss": 0.0503, "step": 47370 }, { "epoch": 2.21, - "learning_rate": 1.5651868173081433e-05, - "loss": 0.0374, + "learning_rate": 2.5658652270918824e-05, + "loss": 0.0417, "step": 47375 }, { "epoch": 2.21, - "learning_rate": 1.5651399371806292e-05, - "loss": 0.0476, + "learning_rate": 2.5658184201082803e-05, + "loss": 0.0409, "step": 47380 }, { "epoch": 2.21, - "learning_rate": 1.5650930570531152e-05, - "loss": 0.0691, + "learning_rate": 2.5657716131246783e-05, + "loss": 0.0817, "step": 47385 }, { "epoch": 2.21, - "learning_rate": 1.5650461769256012e-05, - "loss": 0.1024, + "learning_rate": 2.5657248061410763e-05, + "loss": 0.0825, "step": 47390 }, { "epoch": 2.21, - "learning_rate": 1.5649992967980872e-05, - "loss": 0.0723, + "learning_rate": 2.5656779991574746e-05, + "loss": 0.1566, "step": 47395 }, { "epoch": 2.21, - "learning_rate": 1.5649524166705736e-05, - "loss": 0.1205, + "learning_rate": 2.5656311921738723e-05, + "loss": 0.0994, "step": 47400 }, { "epoch": 2.21, - "learning_rate": 1.5649055365430596e-05, - "loss": 0.1828, + "learning_rate": 2.5655843851902702e-05, + "loss": 0.1846, "step": 47405 }, { "epoch": 2.21, - "learning_rate": 1.5648586564155455e-05, - "loss": 0.2641, + "learning_rate": 2.5655375782066682e-05, + "loss": 0.2791, "step": 47410 }, { "epoch": 2.21, - "learning_rate": 1.564811776288032e-05, - "loss": 0.217, + "learning_rate": 2.5654907712230666e-05, + "loss": 0.3791, "step": 47415 }, { "epoch": 2.21, - "learning_rate": 1.564764896160518e-05, - "loss": 0.0275, + "learning_rate": 2.5654439642394645e-05, + "loss": 0.0784, "step": 47420 }, { "epoch": 2.21, - "learning_rate": 1.564718016033004e-05, - "loss": 0.0234, + "learning_rate": 2.5653971572558625e-05, + "loss": 0.0635, "step": 47425 }, { "epoch": 2.21, - "learning_rate": 1.56467113590549e-05, - "loss": 0.019, + "learning_rate": 2.565350350272261e-05, + "loss": 0.1326, "step": 47430 }, { "epoch": 2.21, - "learning_rate": 1.564624255777976e-05, - "loss": 0.0693, + "learning_rate": 2.5653035432886588e-05, + "loss": 0.1016, "step": 47435 }, { "epoch": 2.21, - "learning_rate": 1.564577375650462e-05, - "loss": 0.1484, + "learning_rate": 2.5652567363050568e-05, + "loss": 0.0786, "step": 47440 }, { "epoch": 2.21, - "learning_rate": 1.564530495522948e-05, - "loss": 0.0962, + "learning_rate": 2.5652099293214548e-05, + "loss": 0.1149, "step": 47445 }, { "epoch": 2.21, - "learning_rate": 1.564483615395434e-05, - "loss": 0.1063, + "learning_rate": 2.565163122337853e-05, + "loss": 0.1575, "step": 47450 }, { "epoch": 2.21, - "learning_rate": 1.5644367352679198e-05, - "loss": 0.1541, + "learning_rate": 2.565116315354251e-05, + "loss": 0.2128, "step": 47455 }, { "epoch": 2.21, - "learning_rate": 1.564389855140406e-05, - "loss": 0.2506, + "learning_rate": 2.565069508370649e-05, + "loss": 0.2799, "step": 47460 }, { "epoch": 2.21, - "learning_rate": 1.564342975012892e-05, - "loss": 0.2357, + "learning_rate": 2.5650227013870467e-05, + "loss": 0.3339, "step": 47465 }, { "epoch": 2.22, - "learning_rate": 1.564296094885378e-05, - "loss": 0.043, + "learning_rate": 2.564975894403445e-05, + "loss": 0.0249, "step": 47470 }, { "epoch": 2.22, - "learning_rate": 1.564249214757864e-05, - "loss": 0.0458, + "learning_rate": 2.564929087419843e-05, + "loss": 0.0835, "step": 47475 }, { "epoch": 2.22, - "learning_rate": 1.5642023346303505e-05, - "loss": 0.0539, + "learning_rate": 2.564882280436241e-05, + "loss": 0.032, "step": 47480 }, { "epoch": 2.22, - "learning_rate": 1.5641554545028365e-05, - "loss": 0.0616, + "learning_rate": 2.5648354734526393e-05, + "loss": 0.0497, "step": 47485 }, { "epoch": 2.22, - "learning_rate": 1.5641085743753225e-05, - "loss": 0.15, + "learning_rate": 2.5647886664690373e-05, + "loss": 0.0919, "step": 47490 }, { "epoch": 2.22, - "learning_rate": 1.5640616942478085e-05, - "loss": 0.1106, + "learning_rate": 2.5647418594854353e-05, + "loss": 0.061, "step": 47495 }, { "epoch": 2.22, - "learning_rate": 1.5640148141202944e-05, - "loss": 0.1981, + "learning_rate": 2.5646950525018333e-05, + "loss": 0.1598, "step": 47500 }, { "epoch": 2.22, - "learning_rate": 1.5639679339927808e-05, - "loss": 0.1841, + "learning_rate": 2.5646482455182316e-05, + "loss": 0.1173, "step": 47505 }, { "epoch": 2.22, - "learning_rate": 1.5639210538652668e-05, - "loss": 0.1291, + "learning_rate": 2.5646014385346296e-05, + "loss": 0.3099, "step": 47510 }, { "epoch": 2.22, - "learning_rate": 1.5638741737377528e-05, - "loss": 0.4161, + "learning_rate": 2.5645546315510275e-05, + "loss": 0.3127, "step": 47515 }, { "epoch": 2.22, - "learning_rate": 1.5638272936102388e-05, - "loss": 0.0439, + "learning_rate": 2.5645078245674255e-05, + "loss": 0.0806, "step": 47520 }, { "epoch": 2.22, - "learning_rate": 1.5637804134827248e-05, - "loss": 0.0376, + "learning_rate": 2.5644610175838235e-05, + "loss": 0.0177, "step": 47525 }, { "epoch": 2.22, - "learning_rate": 1.5637335333552107e-05, - "loss": 0.0504, + "learning_rate": 2.5644142106002215e-05, + "loss": 0.0359, "step": 47530 }, { "epoch": 2.22, - "learning_rate": 1.5636866532276967e-05, - "loss": 0.0765, + "learning_rate": 2.5643674036166195e-05, + "loss": 0.0904, "step": 47535 }, { "epoch": 2.22, - "learning_rate": 1.5636397731001827e-05, - "loss": 0.0547, + "learning_rate": 2.5643205966330178e-05, + "loss": 0.1223, "step": 47540 }, { "epoch": 2.22, - "learning_rate": 1.563592892972669e-05, - "loss": 0.0697, + "learning_rate": 2.5642737896494158e-05, + "loss": 0.1329, "step": 47545 }, { "epoch": 2.22, - "learning_rate": 1.563546012845155e-05, - "loss": 0.1192, + "learning_rate": 2.5642269826658138e-05, + "loss": 0.1225, "step": 47550 }, { "epoch": 2.22, - "learning_rate": 1.563499132717641e-05, - "loss": 0.1758, + "learning_rate": 2.5641801756822117e-05, + "loss": 0.196, "step": 47555 }, { "epoch": 2.22, - "learning_rate": 1.5634522525901274e-05, - "loss": 0.3129, + "learning_rate": 2.56413336869861e-05, + "loss": 0.2821, "step": 47560 }, { "epoch": 2.22, - "learning_rate": 1.5634053724626134e-05, - "loss": 0.2956, + "learning_rate": 2.564086561715008e-05, + "loss": 0.3675, "step": 47565 }, { "epoch": 2.22, - "learning_rate": 1.5633584923350994e-05, - "loss": 0.1004, + "learning_rate": 2.564039754731406e-05, + "loss": 0.1089, "step": 47570 }, { "epoch": 2.22, - "learning_rate": 1.5633116122075854e-05, - "loss": 0.033, + "learning_rate": 2.563992947747804e-05, + "loss": 0.0502, "step": 47575 }, { "epoch": 2.22, - "learning_rate": 1.5632647320800714e-05, - "loss": 0.0694, + "learning_rate": 2.5639461407642023e-05, + "loss": 0.04, "step": 47580 }, { "epoch": 2.22, - "learning_rate": 1.5632178519525573e-05, - "loss": 0.0848, + "learning_rate": 2.5638993337806003e-05, + "loss": 0.1266, "step": 47585 }, { "epoch": 2.22, - "learning_rate": 1.5631709718250433e-05, - "loss": 0.1193, + "learning_rate": 2.563852526796998e-05, + "loss": 0.069, "step": 47590 }, { "epoch": 2.22, - "learning_rate": 1.5631240916975297e-05, - "loss": 0.1105, + "learning_rate": 2.563805719813396e-05, + "loss": 0.0625, "step": 47595 }, { "epoch": 2.22, - "learning_rate": 1.5630772115700157e-05, - "loss": 0.2483, + "learning_rate": 2.5637589128297942e-05, + "loss": 0.1356, "step": 47600 }, { "epoch": 2.22, - "learning_rate": 1.5630303314425017e-05, - "loss": 0.186, + "learning_rate": 2.5637121058461922e-05, + "loss": 0.1755, "step": 47605 }, { "epoch": 2.22, - "learning_rate": 1.5629834513149877e-05, - "loss": 0.243, + "learning_rate": 2.5636652988625902e-05, + "loss": 0.2815, "step": 47610 }, { "epoch": 2.22, - "learning_rate": 1.5629365711874736e-05, - "loss": 0.2054, + "learning_rate": 2.5636184918789885e-05, + "loss": 0.3773, "step": 47615 }, { "epoch": 2.22, - "learning_rate": 1.5628896910599596e-05, - "loss": 0.0592, + "learning_rate": 2.5635716848953865e-05, + "loss": 0.0562, "step": 47620 }, { "epoch": 2.22, - "learning_rate": 1.562842810932446e-05, - "loss": 0.0247, + "learning_rate": 2.5635248779117845e-05, + "loss": 0.0617, "step": 47625 }, { "epoch": 2.22, - "learning_rate": 1.562795930804932e-05, - "loss": 0.0993, + "learning_rate": 2.5634780709281825e-05, + "loss": 0.0379, "step": 47630 }, { "epoch": 2.22, - "learning_rate": 1.562749050677418e-05, - "loss": 0.0961, + "learning_rate": 2.5634312639445808e-05, + "loss": 0.0643, "step": 47635 }, { "epoch": 2.22, - "learning_rate": 1.562702170549904e-05, - "loss": 0.1097, + "learning_rate": 2.5633844569609788e-05, + "loss": 0.0903, "step": 47640 }, { "epoch": 2.22, - "learning_rate": 1.5626552904223903e-05, - "loss": 0.0517, + "learning_rate": 2.5633376499773768e-05, + "loss": 0.1456, "step": 47645 }, { "epoch": 2.22, - "learning_rate": 1.5626084102948763e-05, - "loss": 0.103, + "learning_rate": 2.5632908429937747e-05, + "loss": 0.165, "step": 47650 }, { "epoch": 2.22, - "learning_rate": 1.5625615301673623e-05, - "loss": 0.1968, + "learning_rate": 2.5632440360101727e-05, + "loss": 0.133, "step": 47655 }, { "epoch": 2.22, - "learning_rate": 1.5625146500398483e-05, - "loss": 0.4059, + "learning_rate": 2.5631972290265707e-05, + "loss": 0.1501, "step": 47660 }, { "epoch": 2.22, - "learning_rate": 1.5624677699123343e-05, - "loss": 0.3454, + "learning_rate": 2.5631504220429687e-05, + "loss": 0.1967, "step": 47665 }, { "epoch": 2.22, - "learning_rate": 1.5624208897848203e-05, - "loss": 0.07, + "learning_rate": 2.563103615059367e-05, + "loss": 0.0856, "step": 47670 }, { "epoch": 2.22, - "learning_rate": 1.5623740096573062e-05, - "loss": 0.0739, + "learning_rate": 2.563056808075765e-05, + "loss": 0.0795, "step": 47675 }, { "epoch": 2.22, - "learning_rate": 1.5623271295297922e-05, - "loss": 0.0511, + "learning_rate": 2.563010001092163e-05, + "loss": 0.0539, "step": 47680 }, { "epoch": 2.23, - "learning_rate": 1.5622802494022786e-05, - "loss": 0.0457, + "learning_rate": 2.562963194108561e-05, + "loss": 0.0669, "step": 47685 }, { "epoch": 2.23, - "learning_rate": 1.5622333692747646e-05, - "loss": 0.0793, + "learning_rate": 2.5629163871249593e-05, + "loss": 0.0773, "step": 47690 }, { "epoch": 2.23, - "learning_rate": 1.5621864891472506e-05, - "loss": 0.1131, + "learning_rate": 2.5628695801413573e-05, + "loss": 0.0621, "step": 47695 }, { "epoch": 2.23, - "learning_rate": 1.5621396090197366e-05, - "loss": 0.1825, + "learning_rate": 2.5628227731577552e-05, + "loss": 0.1681, "step": 47700 }, { "epoch": 2.23, - "learning_rate": 1.562092728892223e-05, - "loss": 0.1641, + "learning_rate": 2.5627759661741532e-05, + "loss": 0.1648, "step": 47705 }, { "epoch": 2.23, - "learning_rate": 1.562045848764709e-05, - "loss": 0.3252, + "learning_rate": 2.5627291591905515e-05, + "loss": 0.374, "step": 47710 }, { "epoch": 2.23, - "learning_rate": 1.561998968637195e-05, - "loss": 0.2352, + "learning_rate": 2.5626823522069492e-05, + "loss": 0.2538, "step": 47715 }, { "epoch": 2.23, - "learning_rate": 1.561952088509681e-05, - "loss": 0.0826, + "learning_rate": 2.562635545223347e-05, + "loss": 0.0902, "step": 47720 }, { "epoch": 2.23, - "learning_rate": 1.561905208382167e-05, - "loss": 0.0546, + "learning_rate": 2.5625887382397455e-05, + "loss": 0.045, "step": 47725 }, { "epoch": 2.23, - "learning_rate": 1.561858328254653e-05, - "loss": 0.1073, + "learning_rate": 2.5625419312561435e-05, + "loss": 0.1089, "step": 47730 }, { "epoch": 2.23, - "learning_rate": 1.5618114481271392e-05, - "loss": 0.0686, + "learning_rate": 2.5624951242725415e-05, + "loss": 0.0462, "step": 47735 }, { "epoch": 2.23, - "learning_rate": 1.5617645679996252e-05, - "loss": 0.118, + "learning_rate": 2.5624483172889394e-05, + "loss": 0.1399, "step": 47740 }, { "epoch": 2.23, - "learning_rate": 1.561717687872111e-05, - "loss": 0.132, + "learning_rate": 2.5624015103053378e-05, + "loss": 0.0925, "step": 47745 }, { "epoch": 2.23, - "learning_rate": 1.561670807744597e-05, - "loss": 0.1841, + "learning_rate": 2.5623547033217357e-05, + "loss": 0.094, "step": 47750 }, { "epoch": 2.23, - "learning_rate": 1.561623927617083e-05, - "loss": 0.1671, + "learning_rate": 2.5623078963381337e-05, + "loss": 0.2146, "step": 47755 }, { "epoch": 2.23, - "learning_rate": 1.561577047489569e-05, - "loss": 0.3364, + "learning_rate": 2.5622610893545317e-05, + "loss": 0.2848, "step": 47760 }, { "epoch": 2.23, - "learning_rate": 1.5615301673620555e-05, - "loss": 0.2793, + "learning_rate": 2.56221428237093e-05, + "loss": 0.3323, "step": 47765 }, { "epoch": 2.23, - "learning_rate": 1.5614832872345415e-05, - "loss": 0.0919, + "learning_rate": 2.562167475387328e-05, + "loss": 0.0185, "step": 47770 }, { "epoch": 2.23, - "learning_rate": 1.5614364071070275e-05, - "loss": 0.0524, + "learning_rate": 2.562120668403726e-05, + "loss": 0.0359, "step": 47775 }, { "epoch": 2.23, - "learning_rate": 1.5613895269795135e-05, - "loss": 0.107, + "learning_rate": 2.5620738614201236e-05, + "loss": 0.0438, "step": 47780 }, { "epoch": 2.23, - "learning_rate": 1.5613426468519998e-05, - "loss": 0.0647, + "learning_rate": 2.562027054436522e-05, + "loss": 0.103, "step": 47785 }, { "epoch": 2.23, - "learning_rate": 1.5612957667244858e-05, - "loss": 0.1325, + "learning_rate": 2.56198024745292e-05, + "loss": 0.1085, "step": 47790 }, { "epoch": 2.23, - "learning_rate": 1.5612488865969718e-05, - "loss": 0.1216, + "learning_rate": 2.561933440469318e-05, + "loss": 0.1381, "step": 47795 }, { "epoch": 2.23, - "learning_rate": 1.5612020064694578e-05, - "loss": 0.1204, + "learning_rate": 2.5618866334857162e-05, + "loss": 0.1167, "step": 47800 }, { "epoch": 2.23, - "learning_rate": 1.5611551263419438e-05, - "loss": 0.1161, + "learning_rate": 2.5618398265021142e-05, + "loss": 0.1696, "step": 47805 }, { "epoch": 2.23, - "learning_rate": 1.5611082462144298e-05, - "loss": 0.1865, + "learning_rate": 2.5617930195185122e-05, + "loss": 0.2581, "step": 47810 }, { "epoch": 2.23, - "learning_rate": 1.5610613660869158e-05, - "loss": 0.2131, + "learning_rate": 2.5617462125349102e-05, + "loss": 0.2795, "step": 47815 }, { "epoch": 2.23, - "learning_rate": 1.5610144859594017e-05, - "loss": 0.0688, + "learning_rate": 2.5616994055513085e-05, + "loss": 0.0395, "step": 47820 }, { "epoch": 2.23, - "learning_rate": 1.5609676058318877e-05, - "loss": 0.0357, + "learning_rate": 2.5616525985677065e-05, + "loss": 0.0924, "step": 47825 }, { "epoch": 2.23, - "learning_rate": 1.560920725704374e-05, - "loss": 0.0453, + "learning_rate": 2.5616057915841045e-05, + "loss": 0.117, "step": 47830 }, { "epoch": 2.23, - "learning_rate": 1.56087384557686e-05, - "loss": 0.1079, + "learning_rate": 2.5615589846005024e-05, + "loss": 0.0537, "step": 47835 }, { "epoch": 2.23, - "learning_rate": 1.560826965449346e-05, - "loss": 0.0614, + "learning_rate": 2.5615121776169008e-05, + "loss": 0.0896, "step": 47840 }, { "epoch": 2.23, - "learning_rate": 1.5607800853218324e-05, - "loss": 0.1924, + "learning_rate": 2.5614653706332984e-05, + "loss": 0.1674, "step": 47845 }, { "epoch": 2.23, - "learning_rate": 1.5607332051943184e-05, - "loss": 0.1522, + "learning_rate": 2.5614185636496964e-05, + "loss": 0.1446, "step": 47850 }, { "epoch": 2.23, - "learning_rate": 1.5606863250668044e-05, - "loss": 0.1627, + "learning_rate": 2.5613717566660947e-05, + "loss": 0.1778, "step": 47855 }, { "epoch": 2.23, - "learning_rate": 1.5606394449392904e-05, - "loss": 0.28, + "learning_rate": 2.5613249496824927e-05, + "loss": 0.2172, "step": 47860 }, { "epoch": 2.23, - "learning_rate": 1.5605925648117764e-05, - "loss": 0.2299, + "learning_rate": 2.5612781426988907e-05, + "loss": 0.2915, "step": 47865 }, { "epoch": 2.23, - "learning_rate": 1.5605456846842624e-05, - "loss": 0.087, + "learning_rate": 2.5612313357152887e-05, + "loss": 0.042, "step": 47870 }, { "epoch": 2.23, - "learning_rate": 1.5604988045567487e-05, - "loss": 0.0141, + "learning_rate": 2.561184528731687e-05, + "loss": 0.0512, "step": 47875 }, { "epoch": 2.23, - "learning_rate": 1.5604519244292347e-05, - "loss": 0.0714, + "learning_rate": 2.561137721748085e-05, + "loss": 0.0285, "step": 47880 }, { "epoch": 2.23, - "learning_rate": 1.5604050443017207e-05, - "loss": 0.0633, + "learning_rate": 2.561090914764483e-05, + "loss": 0.0876, "step": 47885 }, { "epoch": 2.23, - "learning_rate": 1.5603581641742067e-05, - "loss": 0.1126, + "learning_rate": 2.561044107780881e-05, + "loss": 0.0687, "step": 47890 }, { "epoch": 2.23, - "learning_rate": 1.5603112840466927e-05, - "loss": 0.0534, + "learning_rate": 2.5609973007972792e-05, + "loss": 0.0614, "step": 47895 }, { "epoch": 2.24, - "learning_rate": 1.5602644039191787e-05, - "loss": 0.1151, + "learning_rate": 2.5609504938136772e-05, + "loss": 0.1214, "step": 47900 }, { "epoch": 2.24, - "learning_rate": 1.5602175237916647e-05, - "loss": 0.2126, + "learning_rate": 2.560903686830075e-05, + "loss": 0.162, "step": 47905 }, { "epoch": 2.24, - "learning_rate": 1.560170643664151e-05, - "loss": 0.1806, + "learning_rate": 2.5608568798464732e-05, + "loss": 0.2478, "step": 47910 }, { "epoch": 2.24, - "learning_rate": 1.560123763536637e-05, - "loss": 0.355, + "learning_rate": 2.560810072862871e-05, + "loss": 0.3183, "step": 47915 }, { "epoch": 2.24, - "learning_rate": 1.560076883409123e-05, - "loss": 0.0739, + "learning_rate": 2.560763265879269e-05, + "loss": 0.0465, "step": 47920 }, { "epoch": 2.24, - "learning_rate": 1.5600300032816093e-05, - "loss": 0.0643, + "learning_rate": 2.560716458895667e-05, + "loss": 0.0585, "step": 47925 }, { "epoch": 2.24, - "learning_rate": 1.5599831231540953e-05, - "loss": 0.0717, + "learning_rate": 2.5606696519120655e-05, + "loss": 0.0294, "step": 47930 }, { "epoch": 2.24, - "learning_rate": 1.5599362430265813e-05, - "loss": 0.0994, + "learning_rate": 2.5606228449284634e-05, + "loss": 0.0236, "step": 47935 }, { "epoch": 2.24, - "learning_rate": 1.5598893628990673e-05, - "loss": 0.1345, + "learning_rate": 2.5605760379448614e-05, + "loss": 0.1196, "step": 47940 }, { "epoch": 2.24, - "learning_rate": 1.5598424827715533e-05, - "loss": 0.1039, + "learning_rate": 2.5605292309612594e-05, + "loss": 0.1423, "step": 47945 }, { "epoch": 2.24, - "learning_rate": 1.5597956026440393e-05, - "loss": 0.1926, + "learning_rate": 2.5604824239776577e-05, + "loss": 0.1468, "step": 47950 }, { "epoch": 2.24, - "learning_rate": 1.5597487225165253e-05, - "loss": 0.1395, + "learning_rate": 2.5604356169940557e-05, + "loss": 0.2614, "step": 47955 }, { "epoch": 2.24, - "learning_rate": 1.5597018423890113e-05, - "loss": 0.3645, + "learning_rate": 2.5603888100104537e-05, + "loss": 0.1917, "step": 47960 }, { "epoch": 2.24, - "learning_rate": 1.5596549622614972e-05, - "loss": 0.3706, + "learning_rate": 2.5603420030268517e-05, + "loss": 0.244, "step": 47965 }, { "epoch": 2.24, - "learning_rate": 1.5596080821339836e-05, - "loss": 0.0203, + "learning_rate": 2.5602951960432496e-05, + "loss": 0.0856, "step": 47970 }, { "epoch": 2.24, - "learning_rate": 1.5595612020064696e-05, - "loss": 0.0991, + "learning_rate": 2.5602483890596476e-05, + "loss": 0.0494, "step": 47975 }, { "epoch": 2.24, - "learning_rate": 1.5595143218789556e-05, - "loss": 0.0464, + "learning_rate": 2.5602015820760456e-05, + "loss": 0.0748, "step": 47980 }, { "epoch": 2.24, - "learning_rate": 1.5594674417514416e-05, - "loss": 0.036, + "learning_rate": 2.560154775092444e-05, + "loss": 0.0276, "step": 47985 }, { "epoch": 2.24, - "learning_rate": 1.559420561623928e-05, - "loss": 0.1018, + "learning_rate": 2.560107968108842e-05, + "loss": 0.1039, "step": 47990 }, { "epoch": 2.24, - "learning_rate": 1.559373681496414e-05, - "loss": 0.1252, + "learning_rate": 2.56006116112524e-05, + "loss": 0.113, "step": 47995 }, { "epoch": 2.24, - "learning_rate": 1.5593268013689e-05, - "loss": 0.093, + "learning_rate": 2.560014354141638e-05, + "loss": 0.1198, "step": 48000 }, { "epoch": 2.24, - "learning_rate": 1.559279921241386e-05, - "loss": 0.1209, + "learning_rate": 2.5599675471580362e-05, + "loss": 0.1373, "step": 48005 }, { "epoch": 2.24, - "learning_rate": 1.559233041113872e-05, - "loss": 0.3554, + "learning_rate": 2.5599207401744342e-05, + "loss": 0.3318, "step": 48010 }, { "epoch": 2.24, - "learning_rate": 1.5591861609863582e-05, - "loss": 0.3472, + "learning_rate": 2.559873933190832e-05, + "loss": 0.2747, "step": 48015 }, { "epoch": 2.24, - "learning_rate": 1.5591392808588442e-05, - "loss": 0.0574, + "learning_rate": 2.55982712620723e-05, + "loss": 0.0327, "step": 48020 }, { "epoch": 2.24, - "learning_rate": 1.5590924007313302e-05, - "loss": 0.074, + "learning_rate": 2.5597803192236285e-05, + "loss": 0.0317, "step": 48025 }, { "epoch": 2.24, - "learning_rate": 1.5590455206038162e-05, - "loss": 0.1075, + "learning_rate": 2.5597335122400264e-05, + "loss": 0.0481, "step": 48030 }, { "epoch": 2.24, - "learning_rate": 1.5589986404763022e-05, - "loss": 0.0526, + "learning_rate": 2.559686705256424e-05, + "loss": 0.0458, "step": 48035 }, { "epoch": 2.24, - "learning_rate": 1.558951760348788e-05, - "loss": 0.0969, + "learning_rate": 2.5596398982728224e-05, + "loss": 0.0291, "step": 48040 }, { "epoch": 2.24, - "learning_rate": 1.558904880221274e-05, - "loss": 0.1061, + "learning_rate": 2.5595930912892204e-05, + "loss": 0.1611, "step": 48045 }, { "epoch": 2.24, - "learning_rate": 1.55885800009376e-05, - "loss": 0.2251, + "learning_rate": 2.5595462843056184e-05, + "loss": 0.1481, "step": 48050 }, { "epoch": 2.24, - "learning_rate": 1.5588111199662465e-05, - "loss": 0.2318, + "learning_rate": 2.5594994773220163e-05, + "loss": 0.1921, "step": 48055 }, { "epoch": 2.24, - "learning_rate": 1.5587642398387325e-05, - "loss": 0.1552, + "learning_rate": 2.5594526703384147e-05, + "loss": 0.2832, "step": 48060 }, { "epoch": 2.24, - "learning_rate": 1.5587173597112185e-05, - "loss": 0.3252, + "learning_rate": 2.5594058633548127e-05, + "loss": 0.38, "step": 48065 }, { "epoch": 2.24, - "learning_rate": 1.5586704795837048e-05, - "loss": 0.1035, + "learning_rate": 2.5593590563712106e-05, + "loss": 0.0514, "step": 48070 }, { "epoch": 2.24, - "learning_rate": 1.5586235994561908e-05, - "loss": 0.0411, + "learning_rate": 2.5593122493876086e-05, + "loss": 0.0453, "step": 48075 }, { "epoch": 2.24, - "learning_rate": 1.5585767193286768e-05, - "loss": 0.0677, + "learning_rate": 2.559265442404007e-05, + "loss": 0.074, "step": 48080 }, { "epoch": 2.24, - "learning_rate": 1.5585298392011628e-05, - "loss": 0.0637, + "learning_rate": 2.559218635420405e-05, + "loss": 0.055, "step": 48085 }, { "epoch": 2.24, - "learning_rate": 1.5584829590736488e-05, - "loss": 0.1163, + "learning_rate": 2.559171828436803e-05, + "loss": 0.0755, "step": 48090 }, { "epoch": 2.24, - "learning_rate": 1.5584360789461348e-05, - "loss": 0.0817, + "learning_rate": 2.559125021453201e-05, + "loss": 0.0403, "step": 48095 }, { "epoch": 2.24, - "learning_rate": 1.5583891988186208e-05, - "loss": 0.1213, + "learning_rate": 2.559078214469599e-05, + "loss": 0.2185, "step": 48100 }, { "epoch": 2.24, - "learning_rate": 1.5583423186911068e-05, - "loss": 0.2463, + "learning_rate": 2.559031407485997e-05, + "loss": 0.2096, "step": 48105 }, { "epoch": 2.24, - "learning_rate": 1.558295438563593e-05, - "loss": 0.2425, + "learning_rate": 2.5589846005023948e-05, + "loss": 0.3046, "step": 48110 }, { "epoch": 2.25, - "learning_rate": 1.558248558436079e-05, - "loss": 0.2606, + "learning_rate": 2.558937793518793e-05, + "loss": 0.281, "step": 48115 }, { "epoch": 2.25, - "learning_rate": 1.558201678308565e-05, - "loss": 0.0782, + "learning_rate": 2.558890986535191e-05, + "loss": 0.0864, "step": 48120 }, { "epoch": 2.25, - "learning_rate": 1.558154798181051e-05, - "loss": 0.0525, + "learning_rate": 2.558844179551589e-05, + "loss": 0.0291, "step": 48125 }, { "epoch": 2.25, - "learning_rate": 1.558107918053537e-05, - "loss": 0.0192, + "learning_rate": 2.558797372567987e-05, + "loss": 0.0645, "step": 48130 }, { "epoch": 2.25, - "learning_rate": 1.5580610379260234e-05, - "loss": 0.0877, + "learning_rate": 2.5587505655843854e-05, + "loss": 0.0875, "step": 48135 }, { "epoch": 2.25, - "learning_rate": 1.5580141577985094e-05, - "loss": 0.1597, + "learning_rate": 2.5587037586007834e-05, + "loss": 0.1075, "step": 48140 }, { "epoch": 2.25, - "learning_rate": 1.5579672776709954e-05, - "loss": 0.0772, + "learning_rate": 2.5586569516171814e-05, + "loss": 0.0819, "step": 48145 }, { "epoch": 2.25, - "learning_rate": 1.5579203975434814e-05, - "loss": 0.1402, + "learning_rate": 2.5586101446335794e-05, + "loss": 0.1165, "step": 48150 }, { "epoch": 2.25, - "learning_rate": 1.5578735174159677e-05, - "loss": 0.1657, + "learning_rate": 2.5585633376499777e-05, + "loss": 0.1718, "step": 48155 }, { "epoch": 2.25, - "learning_rate": 1.5578266372884537e-05, - "loss": 0.3195, + "learning_rate": 2.5585165306663753e-05, + "loss": 0.127, "step": 48160 }, { "epoch": 2.25, - "learning_rate": 1.5577797571609397e-05, - "loss": 0.2577, + "learning_rate": 2.5584697236827733e-05, + "loss": 0.3313, "step": 48165 }, { "epoch": 2.25, - "learning_rate": 1.5577328770334257e-05, - "loss": 0.0815, + "learning_rate": 2.5584229166991716e-05, + "loss": 0.0502, "step": 48170 }, { "epoch": 2.25, - "learning_rate": 1.5576859969059117e-05, - "loss": 0.0641, + "learning_rate": 2.5583761097155696e-05, + "loss": 0.0626, "step": 48175 }, { "epoch": 2.25, - "learning_rate": 1.5576391167783977e-05, - "loss": 0.0169, + "learning_rate": 2.5583293027319676e-05, + "loss": 0.0683, "step": 48180 }, { "epoch": 2.25, - "learning_rate": 1.5575922366508837e-05, - "loss": 0.1095, + "learning_rate": 2.5582824957483656e-05, + "loss": 0.0778, "step": 48185 }, { "epoch": 2.25, - "learning_rate": 1.5575453565233697e-05, - "loss": 0.0848, + "learning_rate": 2.558235688764764e-05, + "loss": 0.1569, "step": 48190 }, { "epoch": 2.25, - "learning_rate": 1.557498476395856e-05, - "loss": 0.0564, + "learning_rate": 2.558188881781162e-05, + "loss": 0.1026, "step": 48195 }, { "epoch": 2.25, - "learning_rate": 1.557451596268342e-05, - "loss": 0.2123, + "learning_rate": 2.55814207479756e-05, + "loss": 0.1475, "step": 48200 }, { "epoch": 2.25, - "learning_rate": 1.557404716140828e-05, - "loss": 0.154, + "learning_rate": 2.558095267813958e-05, + "loss": 0.0808, "step": 48205 }, { "epoch": 2.25, - "learning_rate": 1.557357836013314e-05, - "loss": 0.2392, + "learning_rate": 2.558048460830356e-05, + "loss": 0.3092, "step": 48210 }, { "epoch": 2.25, - "learning_rate": 1.5573109558858003e-05, - "loss": 0.3414, + "learning_rate": 2.558001653846754e-05, + "loss": 0.32, "step": 48215 }, { "epoch": 2.25, - "learning_rate": 1.5572640757582863e-05, - "loss": 0.094, + "learning_rate": 2.557954846863152e-05, + "loss": 0.0459, "step": 48220 }, { "epoch": 2.25, - "learning_rate": 1.5572171956307723e-05, - "loss": 0.0229, + "learning_rate": 2.55790803987955e-05, + "loss": 0.0482, "step": 48225 }, { "epoch": 2.25, - "learning_rate": 1.5571703155032583e-05, - "loss": 0.0471, + "learning_rate": 2.557861232895948e-05, + "loss": 0.1041, "step": 48230 }, { "epoch": 2.25, - "learning_rate": 1.5571234353757443e-05, - "loss": 0.1453, + "learning_rate": 2.557814425912346e-05, + "loss": 0.0626, "step": 48235 }, { "epoch": 2.25, - "learning_rate": 1.5570765552482303e-05, - "loss": 0.0759, + "learning_rate": 2.557767618928744e-05, + "loss": 0.1226, "step": 48240 }, { "epoch": 2.25, - "learning_rate": 1.5570296751207166e-05, - "loss": 0.1313, + "learning_rate": 2.5577208119451424e-05, + "loss": 0.1336, "step": 48245 }, { "epoch": 2.25, - "learning_rate": 1.5569827949932026e-05, - "loss": 0.1588, + "learning_rate": 2.5576740049615403e-05, + "loss": 0.1735, "step": 48250 }, { "epoch": 2.25, - "learning_rate": 1.5569359148656886e-05, - "loss": 0.1738, + "learning_rate": 2.5576271979779383e-05, + "loss": 0.1934, "step": 48255 }, { "epoch": 2.25, - "learning_rate": 1.5568890347381746e-05, - "loss": 0.2611, + "learning_rate": 2.5575803909943363e-05, + "loss": 0.3538, "step": 48260 }, { "epoch": 2.25, - "learning_rate": 1.5568421546106606e-05, - "loss": 0.2127, + "learning_rate": 2.5575335840107346e-05, + "loss": 0.223, "step": 48265 }, { "epoch": 2.25, - "learning_rate": 1.5567952744831466e-05, - "loss": 0.0951, + "learning_rate": 2.5574867770271326e-05, + "loss": 0.0087, "step": 48270 }, { "epoch": 2.25, - "learning_rate": 1.556748394355633e-05, - "loss": 0.0701, + "learning_rate": 2.5574399700435306e-05, + "loss": 0.1056, "step": 48275 }, { "epoch": 2.25, - "learning_rate": 1.556701514228119e-05, - "loss": 0.0602, + "learning_rate": 2.557393163059929e-05, + "loss": 0.0458, "step": 48280 }, { "epoch": 2.25, - "learning_rate": 1.556654634100605e-05, - "loss": 0.1011, + "learning_rate": 2.5573463560763266e-05, + "loss": 0.0967, "step": 48285 }, { "epoch": 2.25, - "learning_rate": 1.556607753973091e-05, - "loss": 0.075, + "learning_rate": 2.5572995490927245e-05, + "loss": 0.0967, "step": 48290 }, { "epoch": 2.25, - "learning_rate": 1.5565608738455772e-05, - "loss": 0.0798, + "learning_rate": 2.5572527421091225e-05, + "loss": 0.071, "step": 48295 }, { "epoch": 2.25, - "learning_rate": 1.5565139937180632e-05, - "loss": 0.1493, + "learning_rate": 2.557205935125521e-05, + "loss": 0.1769, "step": 48300 }, { "epoch": 2.25, - "learning_rate": 1.5564671135905492e-05, - "loss": 0.2064, + "learning_rate": 2.5571591281419188e-05, + "loss": 0.1909, "step": 48305 }, { "epoch": 2.25, - "learning_rate": 1.5564202334630352e-05, - "loss": 0.2354, + "learning_rate": 2.5571123211583168e-05, + "loss": 0.1933, "step": 48310 }, { "epoch": 2.25, - "learning_rate": 1.5563733533355212e-05, - "loss": 0.3015, + "learning_rate": 2.5570655141747148e-05, + "loss": 0.2301, "step": 48315 }, { "epoch": 2.25, - "learning_rate": 1.5563264732080072e-05, - "loss": 0.0693, + "learning_rate": 2.557018707191113e-05, + "loss": 0.0581, "step": 48320 }, { "epoch": 2.25, - "learning_rate": 1.5562795930804932e-05, - "loss": 0.0577, + "learning_rate": 2.556971900207511e-05, + "loss": 0.0651, "step": 48325 }, { "epoch": 2.26, - "learning_rate": 1.556232712952979e-05, - "loss": 0.0701, + "learning_rate": 2.556925093223909e-05, + "loss": 0.066, "step": 48330 }, { "epoch": 2.26, - "learning_rate": 1.556185832825465e-05, - "loss": 0.0724, + "learning_rate": 2.5568782862403074e-05, + "loss": 0.0422, "step": 48335 }, { "epoch": 2.26, - "learning_rate": 1.5561389526979515e-05, - "loss": 0.0785, + "learning_rate": 2.5568314792567054e-05, + "loss": 0.098, "step": 48340 }, { "epoch": 2.26, - "learning_rate": 1.5560920725704375e-05, - "loss": 0.1078, + "learning_rate": 2.5567846722731034e-05, + "loss": 0.1011, "step": 48345 }, { "epoch": 2.26, - "learning_rate": 1.5560451924429235e-05, - "loss": 0.1763, + "learning_rate": 2.556737865289501e-05, + "loss": 0.1688, "step": 48350 }, { "epoch": 2.26, - "learning_rate": 1.5559983123154098e-05, - "loss": 0.182, + "learning_rate": 2.5566910583058993e-05, + "loss": 0.1351, "step": 48355 }, { "epoch": 2.26, - "learning_rate": 1.5559514321878958e-05, - "loss": 0.2734, + "learning_rate": 2.5566442513222973e-05, + "loss": 0.1699, "step": 48360 }, { "epoch": 2.26, - "learning_rate": 1.5559045520603818e-05, - "loss": 0.4282, + "learning_rate": 2.5565974443386953e-05, + "loss": 0.3377, "step": 48365 }, { "epoch": 2.26, - "learning_rate": 1.5558576719328678e-05, - "loss": 0.0159, + "learning_rate": 2.5565506373550933e-05, + "loss": 0.0536, "step": 48370 }, { "epoch": 2.26, - "learning_rate": 1.5558107918053538e-05, - "loss": 0.0192, + "learning_rate": 2.5565038303714916e-05, + "loss": 0.0161, "step": 48375 }, { "epoch": 2.26, - "learning_rate": 1.5557639116778398e-05, - "loss": 0.0352, + "learning_rate": 2.5564570233878896e-05, + "loss": 0.0467, "step": 48380 }, { "epoch": 2.26, - "learning_rate": 1.555717031550326e-05, - "loss": 0.0958, + "learning_rate": 2.5564102164042876e-05, + "loss": 0.0788, "step": 48385 }, { "epoch": 2.26, - "learning_rate": 1.555670151422812e-05, - "loss": 0.0457, + "learning_rate": 2.5563634094206855e-05, + "loss": 0.1354, "step": 48390 }, { "epoch": 2.26, - "learning_rate": 1.555623271295298e-05, - "loss": 0.0711, + "learning_rate": 2.556316602437084e-05, + "loss": 0.088, "step": 48395 }, { "epoch": 2.26, - "learning_rate": 1.555576391167784e-05, - "loss": 0.0788, + "learning_rate": 2.556269795453482e-05, + "loss": 0.1352, "step": 48400 }, { "epoch": 2.26, - "learning_rate": 1.55552951104027e-05, - "loss": 0.1404, + "learning_rate": 2.5562229884698798e-05, + "loss": 0.1948, "step": 48405 }, { "epoch": 2.26, - "learning_rate": 1.555482630912756e-05, - "loss": 0.1612, + "learning_rate": 2.5561761814862778e-05, + "loss": 0.2968, "step": 48410 }, { "epoch": 2.26, - "learning_rate": 1.555435750785242e-05, - "loss": 0.1604, + "learning_rate": 2.5561293745026758e-05, + "loss": 0.2943, "step": 48415 }, { "epoch": 2.26, - "learning_rate": 1.5553888706577284e-05, - "loss": 0.0949, + "learning_rate": 2.5560825675190738e-05, + "loss": 0.0413, "step": 48420 }, { "epoch": 2.26, - "learning_rate": 1.5553419905302144e-05, - "loss": 0.0609, + "learning_rate": 2.5560357605354717e-05, + "loss": 0.0396, "step": 48425 }, { "epoch": 2.26, - "learning_rate": 1.5552951104027004e-05, - "loss": 0.0533, + "learning_rate": 2.55598895355187e-05, + "loss": 0.0293, "step": 48430 }, { "epoch": 2.26, - "learning_rate": 1.5552482302751867e-05, - "loss": 0.0413, + "learning_rate": 2.555942146568268e-05, + "loss": 0.0459, "step": 48435 }, { "epoch": 2.26, - "learning_rate": 1.5552013501476727e-05, - "loss": 0.0969, + "learning_rate": 2.555895339584666e-05, + "loss": 0.1337, "step": 48440 }, { "epoch": 2.26, - "learning_rate": 1.5551544700201587e-05, - "loss": 0.082, + "learning_rate": 2.555848532601064e-05, + "loss": 0.0624, "step": 48445 }, { "epoch": 2.26, - "learning_rate": 1.5551075898926447e-05, - "loss": 0.125, + "learning_rate": 2.5558017256174623e-05, + "loss": 0.1602, "step": 48450 }, { "epoch": 2.26, - "learning_rate": 1.5550607097651307e-05, - "loss": 0.1438, + "learning_rate": 2.5557549186338603e-05, + "loss": 0.1678, "step": 48455 }, { "epoch": 2.26, - "learning_rate": 1.5550138296376167e-05, - "loss": 0.2595, + "learning_rate": 2.5557081116502583e-05, + "loss": 0.3125, "step": 48460 }, { "epoch": 2.26, - "learning_rate": 1.5549669495101027e-05, - "loss": 0.3857, + "learning_rate": 2.5556613046666566e-05, + "loss": 0.1925, "step": 48465 }, { "epoch": 2.26, - "learning_rate": 1.5549200693825887e-05, - "loss": 0.0179, + "learning_rate": 2.5556144976830546e-05, + "loss": 0.0865, "step": 48470 }, { "epoch": 2.26, - "learning_rate": 1.5548731892550747e-05, + "learning_rate": 2.5555676906994522e-05, "loss": 0.0542, "step": 48475 }, { "epoch": 2.26, - "learning_rate": 1.554826309127561e-05, - "loss": 0.0466, + "learning_rate": 2.5555208837158502e-05, + "loss": 0.0358, "step": 48480 }, { "epoch": 2.26, - "learning_rate": 1.554779429000047e-05, - "loss": 0.0854, + "learning_rate": 2.5554740767322485e-05, + "loss": 0.0612, "step": 48485 }, { "epoch": 2.26, - "learning_rate": 1.554732548872533e-05, - "loss": 0.1496, + "learning_rate": 2.5554272697486465e-05, + "loss": 0.0953, "step": 48490 }, { "epoch": 2.26, - "learning_rate": 1.554685668745019e-05, - "loss": 0.106, + "learning_rate": 2.5553804627650445e-05, + "loss": 0.071, "step": 48495 }, { "epoch": 2.26, - "learning_rate": 1.5546387886175053e-05, - "loss": 0.2021, + "learning_rate": 2.5553336557814425e-05, + "loss": 0.1023, "step": 48500 }, { "epoch": 2.26, - "learning_rate": 1.5545919084899913e-05, - "loss": 0.1652, + "learning_rate": 2.5552868487978408e-05, + "loss": 0.1225, "step": 48505 }, { "epoch": 2.26, - "learning_rate": 1.5545450283624773e-05, - "loss": 0.2523, + "learning_rate": 2.5552400418142388e-05, + "loss": 0.185, "step": 48510 }, { "epoch": 2.26, - "learning_rate": 1.5544981482349633e-05, - "loss": 0.3244, + "learning_rate": 2.5551932348306368e-05, + "loss": 0.3224, "step": 48515 }, { "epoch": 2.26, - "learning_rate": 1.5544512681074493e-05, - "loss": 0.1141, + "learning_rate": 2.555146427847035e-05, + "loss": 0.0792, "step": 48520 }, { "epoch": 2.26, - "learning_rate": 1.5544043879799356e-05, - "loss": 0.0334, + "learning_rate": 2.555099620863433e-05, + "loss": 0.0224, "step": 48525 }, { "epoch": 2.26, - "learning_rate": 1.5543575078524216e-05, - "loss": 0.0672, + "learning_rate": 2.555052813879831e-05, + "loss": 0.0744, "step": 48530 }, { "epoch": 2.26, - "learning_rate": 1.5543106277249076e-05, - "loss": 0.0809, + "learning_rate": 2.555006006896229e-05, + "loss": 0.0628, "step": 48535 }, { "epoch": 2.26, - "learning_rate": 1.5542637475973936e-05, - "loss": 0.1522, + "learning_rate": 2.554959199912627e-05, + "loss": 0.079, "step": 48540 }, { "epoch": 2.27, - "learning_rate": 1.5542168674698796e-05, - "loss": 0.0487, + "learning_rate": 2.554912392929025e-05, + "loss": 0.1051, "step": 48545 }, { "epoch": 2.27, - "learning_rate": 1.5541699873423656e-05, - "loss": 0.1164, + "learning_rate": 2.554865585945423e-05, + "loss": 0.1271, "step": 48550 }, { "epoch": 2.27, - "learning_rate": 1.5541231072148516e-05, - "loss": 0.2902, + "learning_rate": 2.554818778961821e-05, + "loss": 0.1691, "step": 48555 }, { "epoch": 2.27, - "learning_rate": 1.5540762270873376e-05, - "loss": 0.2218, + "learning_rate": 2.5547719719782193e-05, + "loss": 0.2027, "step": 48560 }, { "epoch": 2.27, - "learning_rate": 1.554029346959824e-05, - "loss": 0.2843, + "learning_rate": 2.5547251649946173e-05, + "loss": 0.2881, "step": 48565 }, { "epoch": 2.27, - "learning_rate": 1.55398246683231e-05, - "loss": 0.0109, + "learning_rate": 2.5546783580110152e-05, + "loss": 0.0433, "step": 48570 }, { "epoch": 2.27, - "learning_rate": 1.553935586704796e-05, - "loss": 0.0523, + "learning_rate": 2.5546315510274132e-05, + "loss": 0.0822, "step": 48575 }, { "epoch": 2.27, - "learning_rate": 1.5538887065772822e-05, - "loss": 0.0514, + "learning_rate": 2.5545847440438116e-05, + "loss": 0.039, "step": 48580 }, { "epoch": 2.27, - "learning_rate": 1.5538418264497682e-05, - "loss": 0.0571, + "learning_rate": 2.5545379370602095e-05, + "loss": 0.0476, "step": 48585 }, { "epoch": 2.27, - "learning_rate": 1.5537949463222542e-05, - "loss": 0.1356, + "learning_rate": 2.5544911300766075e-05, + "loss": 0.1121, "step": 48590 }, { "epoch": 2.27, - "learning_rate": 1.5537480661947402e-05, - "loss": 0.129, + "learning_rate": 2.554444323093006e-05, + "loss": 0.0562, "step": 48595 }, { "epoch": 2.27, - "learning_rate": 1.5537011860672262e-05, - "loss": 0.1741, + "learning_rate": 2.5543975161094035e-05, + "loss": 0.1988, "step": 48600 }, { "epoch": 2.27, - "learning_rate": 1.5536543059397122e-05, - "loss": 0.1214, + "learning_rate": 2.5543507091258015e-05, + "loss": 0.1998, "step": 48605 }, { "epoch": 2.27, - "learning_rate": 1.5536074258121982e-05, - "loss": 0.3035, + "learning_rate": 2.5543039021421994e-05, + "loss": 0.1557, "step": 48610 }, { "epoch": 2.27, - "learning_rate": 1.5535605456846842e-05, - "loss": 0.4002, + "learning_rate": 2.5542570951585978e-05, + "loss": 0.2661, "step": 48615 }, { "epoch": 2.27, - "learning_rate": 1.5535136655571705e-05, - "loss": 0.0531, + "learning_rate": 2.5542102881749957e-05, + "loss": 0.0776, "step": 48620 }, { "epoch": 2.27, - "learning_rate": 1.5534667854296565e-05, - "loss": 0.0398, + "learning_rate": 2.5541634811913937e-05, + "loss": 0.0704, "step": 48625 }, { "epoch": 2.27, - "learning_rate": 1.5534199053021425e-05, - "loss": 0.0443, + "learning_rate": 2.5541166742077917e-05, + "loss": 0.0453, "step": 48630 }, { "epoch": 2.27, - "learning_rate": 1.5533730251746285e-05, - "loss": 0.0927, + "learning_rate": 2.55406986722419e-05, + "loss": 0.0816, "step": 48635 }, { "epoch": 2.27, - "learning_rate": 1.5533261450471145e-05, - "loss": 0.177, + "learning_rate": 2.554023060240588e-05, + "loss": 0.0485, "step": 48640 }, { "epoch": 2.27, - "learning_rate": 1.5532792649196008e-05, - "loss": 0.0691, + "learning_rate": 2.553976253256986e-05, + "loss": 0.14, "step": 48645 }, { "epoch": 2.27, - "learning_rate": 1.5532323847920868e-05, - "loss": 0.1293, + "learning_rate": 2.5539294462733843e-05, + "loss": 0.1704, "step": 48650 }, { "epoch": 2.27, - "learning_rate": 1.5531855046645728e-05, - "loss": 0.1106, + "learning_rate": 2.5538826392897823e-05, + "loss": 0.0825, "step": 48655 }, { "epoch": 2.27, - "learning_rate": 1.5531386245370588e-05, - "loss": 0.2808, + "learning_rate": 2.5538358323061803e-05, + "loss": 0.2581, "step": 48660 }, { "epoch": 2.27, - "learning_rate": 1.553091744409545e-05, - "loss": 0.1944, + "learning_rate": 2.553789025322578e-05, + "loss": 0.2511, "step": 48665 }, { "epoch": 2.27, - "learning_rate": 1.553044864282031e-05, - "loss": 0.1225, + "learning_rate": 2.5537422183389762e-05, + "loss": 0.0491, "step": 48670 }, { "epoch": 2.27, - "learning_rate": 1.552997984154517e-05, - "loss": 0.0264, + "learning_rate": 2.5536954113553742e-05, + "loss": 0.0602, "step": 48675 }, { "epoch": 2.27, - "learning_rate": 1.552951104027003e-05, - "loss": 0.0261, + "learning_rate": 2.5536486043717722e-05, + "loss": 0.0596, "step": 48680 }, { "epoch": 2.27, - "learning_rate": 1.552904223899489e-05, - "loss": 0.036, + "learning_rate": 2.5536017973881702e-05, + "loss": 0.0686, "step": 48685 }, { "epoch": 2.27, - "learning_rate": 1.552857343771975e-05, - "loss": 0.1202, + "learning_rate": 2.5535549904045685e-05, + "loss": 0.0731, "step": 48690 }, { "epoch": 2.27, - "learning_rate": 1.552810463644461e-05, - "loss": 0.1904, + "learning_rate": 2.5535081834209665e-05, + "loss": 0.1049, "step": 48695 }, { "epoch": 2.27, - "learning_rate": 1.552763583516947e-05, - "loss": 0.1372, + "learning_rate": 2.5534613764373645e-05, + "loss": 0.1095, "step": 48700 }, { "epoch": 2.27, - "learning_rate": 1.5527167033894334e-05, - "loss": 0.1831, + "learning_rate": 2.5534145694537628e-05, + "loss": 0.1277, "step": 48705 }, { "epoch": 2.27, - "learning_rate": 1.5526698232619194e-05, - "loss": 0.4118, + "learning_rate": 2.5533677624701608e-05, + "loss": 0.3545, "step": 48710 }, { "epoch": 2.27, - "learning_rate": 1.5526229431344054e-05, - "loss": 0.2792, + "learning_rate": 2.5533209554865588e-05, + "loss": 0.3105, "step": 48715 }, { "epoch": 2.27, - "learning_rate": 1.5525760630068917e-05, - "loss": 0.0797, + "learning_rate": 2.5532741485029567e-05, + "loss": 0.0461, "step": 48720 }, { "epoch": 2.27, - "learning_rate": 1.5525291828793777e-05, - "loss": 0.0872, + "learning_rate": 2.553227341519355e-05, + "loss": 0.0943, "step": 48725 }, { "epoch": 2.27, - "learning_rate": 1.5524823027518637e-05, - "loss": 0.0749, + "learning_rate": 2.5531805345357527e-05, + "loss": 0.0464, "step": 48730 }, { "epoch": 2.27, - "learning_rate": 1.5524354226243497e-05, - "loss": 0.0277, + "learning_rate": 2.5531337275521507e-05, + "loss": 0.0793, "step": 48735 }, { "epoch": 2.27, - "learning_rate": 1.5523885424968357e-05, - "loss": 0.1316, + "learning_rate": 2.5530869205685487e-05, + "loss": 0.1217, "step": 48740 }, { "epoch": 2.27, - "learning_rate": 1.5523416623693217e-05, - "loss": 0.0827, + "learning_rate": 2.553040113584947e-05, + "loss": 0.1033, "step": 48745 }, { "epoch": 2.27, - "learning_rate": 1.5522947822418077e-05, - "loss": 0.0588, + "learning_rate": 2.552993306601345e-05, + "loss": 0.1486, "step": 48750 }, { "epoch": 2.27, - "learning_rate": 1.5522479021142937e-05, - "loss": 0.1939, + "learning_rate": 2.552946499617743e-05, + "loss": 0.185, "step": 48755 }, { "epoch": 2.28, - "learning_rate": 1.55220102198678e-05, - "loss": 0.1868, + "learning_rate": 2.552899692634141e-05, + "loss": 0.2401, "step": 48760 }, { "epoch": 2.28, - "learning_rate": 1.552154141859266e-05, - "loss": 0.2015, + "learning_rate": 2.5528528856505392e-05, + "loss": 0.2602, "step": 48765 }, { "epoch": 2.28, - "learning_rate": 1.552107261731752e-05, - "loss": 0.1162, + "learning_rate": 2.5528060786669372e-05, + "loss": 0.0412, "step": 48770 }, { "epoch": 2.28, - "learning_rate": 1.552060381604238e-05, - "loss": 0.098, + "learning_rate": 2.5527592716833352e-05, + "loss": 0.0727, "step": 48775 }, { "epoch": 2.28, - "learning_rate": 1.552013501476724e-05, - "loss": 0.0625, + "learning_rate": 2.5527124646997335e-05, + "loss": 0.061, "step": 48780 }, { "epoch": 2.28, - "learning_rate": 1.5519666213492103e-05, - "loss": 0.0749, + "learning_rate": 2.5526656577161315e-05, + "loss": 0.0596, "step": 48785 }, { "epoch": 2.28, - "learning_rate": 1.5519197412216963e-05, - "loss": 0.0543, + "learning_rate": 2.552618850732529e-05, + "loss": 0.0897, "step": 48790 }, { "epoch": 2.28, - "learning_rate": 1.5518728610941823e-05, - "loss": 0.1783, + "learning_rate": 2.552572043748927e-05, + "loss": 0.1228, "step": 48795 }, { "epoch": 2.28, - "learning_rate": 1.5518259809666683e-05, - "loss": 0.1378, + "learning_rate": 2.5525252367653255e-05, + "loss": 0.1323, "step": 48800 }, { "epoch": 2.28, - "learning_rate": 1.5517791008391546e-05, - "loss": 0.1705, + "learning_rate": 2.5524784297817234e-05, + "loss": 0.1526, "step": 48805 }, { "epoch": 2.28, - "learning_rate": 1.5517322207116406e-05, - "loss": 0.3341, + "learning_rate": 2.5524316227981214e-05, + "loss": 0.1709, "step": 48810 }, { "epoch": 2.28, - "learning_rate": 1.5516853405841266e-05, - "loss": 0.3355, + "learning_rate": 2.5523848158145194e-05, + "loss": 0.2357, "step": 48815 }, { "epoch": 2.28, - "learning_rate": 1.5516384604566126e-05, - "loss": 0.0187, + "learning_rate": 2.5523380088309177e-05, + "loss": 0.0479, "step": 48820 }, { "epoch": 2.28, - "learning_rate": 1.5515915803290986e-05, - "loss": 0.0375, + "learning_rate": 2.5522912018473157e-05, + "loss": 0.1354, "step": 48825 }, { "epoch": 2.28, - "learning_rate": 1.5515447002015846e-05, - "loss": 0.0405, + "learning_rate": 2.5522443948637137e-05, + "loss": 0.0579, "step": 48830 }, { "epoch": 2.28, - "learning_rate": 1.5514978200740706e-05, - "loss": 0.0343, + "learning_rate": 2.552197587880112e-05, + "loss": 0.0505, "step": 48835 }, { "epoch": 2.28, - "learning_rate": 1.5514509399465566e-05, - "loss": 0.0258, + "learning_rate": 2.55215078089651e-05, + "loss": 0.1908, "step": 48840 }, { "epoch": 2.28, - "learning_rate": 1.5514040598190426e-05, - "loss": 0.125, + "learning_rate": 2.552103973912908e-05, + "loss": 0.0595, "step": 48845 }, { "epoch": 2.28, - "learning_rate": 1.551357179691529e-05, - "loss": 0.1763, + "learning_rate": 2.552057166929306e-05, + "loss": 0.2049, "step": 48850 }, { "epoch": 2.28, - "learning_rate": 1.551310299564015e-05, - "loss": 0.1754, + "learning_rate": 2.552010359945704e-05, + "loss": 0.1395, "step": 48855 }, { "epoch": 2.28, - "learning_rate": 1.551263419436501e-05, - "loss": 0.2588, + "learning_rate": 2.551963552962102e-05, + "loss": 0.162, "step": 48860 }, { "epoch": 2.28, - "learning_rate": 1.5512165393089872e-05, - "loss": 0.3456, + "learning_rate": 2.5519167459785e-05, + "loss": 0.1792, "step": 48865 }, { "epoch": 2.28, - "learning_rate": 1.5511696591814732e-05, - "loss": 0.04, + "learning_rate": 2.551869938994898e-05, + "loss": 0.0393, "step": 48870 }, { "epoch": 2.28, - "learning_rate": 1.5511227790539592e-05, - "loss": 0.0311, + "learning_rate": 2.5518231320112962e-05, + "loss": 0.0813, "step": 48875 }, { "epoch": 2.28, - "learning_rate": 1.5510758989264452e-05, - "loss": 0.0711, + "learning_rate": 2.5517763250276942e-05, + "loss": 0.019, "step": 48880 }, { "epoch": 2.28, - "learning_rate": 1.5510290187989312e-05, - "loss": 0.0476, + "learning_rate": 2.551729518044092e-05, + "loss": 0.0596, "step": 48885 }, { "epoch": 2.28, - "learning_rate": 1.5509821386714172e-05, - "loss": 0.0521, + "learning_rate": 2.5516827110604905e-05, + "loss": 0.1122, "step": 48890 }, { "epoch": 2.28, - "learning_rate": 1.5509352585439035e-05, - "loss": 0.1187, + "learning_rate": 2.5516359040768885e-05, + "loss": 0.084, "step": 48895 }, { "epoch": 2.28, - "learning_rate": 1.5508883784163895e-05, - "loss": 0.2008, + "learning_rate": 2.5515890970932864e-05, + "loss": 0.1043, "step": 48900 }, { "epoch": 2.28, - "learning_rate": 1.5508414982888755e-05, - "loss": 0.1893, + "learning_rate": 2.5515422901096844e-05, + "loss": 0.1767, "step": 48905 }, { "epoch": 2.28, - "learning_rate": 1.5507946181613615e-05, - "loss": 0.2853, + "learning_rate": 2.5514954831260828e-05, + "loss": 0.265, "step": 48910 }, { "epoch": 2.28, - "learning_rate": 1.5507477380338475e-05, - "loss": 0.364, + "learning_rate": 2.5514486761424804e-05, + "loss": 0.2737, "step": 48915 }, { "epoch": 2.28, - "learning_rate": 1.5507008579063335e-05, - "loss": 0.0612, + "learning_rate": 2.5514018691588784e-05, + "loss": 0.0207, "step": 48920 }, { "epoch": 2.28, - "learning_rate": 1.5506539777788195e-05, - "loss": 0.0777, + "learning_rate": 2.5513550621752764e-05, + "loss": 0.0435, "step": 48925 }, { "epoch": 2.28, - "learning_rate": 1.5506070976513058e-05, - "loss": 0.0366, + "learning_rate": 2.5513082551916747e-05, + "loss": 0.0996, "step": 48930 }, { "epoch": 2.28, - "learning_rate": 1.5505602175237918e-05, - "loss": 0.0794, + "learning_rate": 2.5512614482080727e-05, + "loss": 0.0856, "step": 48935 }, { "epoch": 2.28, - "learning_rate": 1.5505133373962778e-05, - "loss": 0.0836, + "learning_rate": 2.5512146412244706e-05, + "loss": 0.085, "step": 48940 }, { "epoch": 2.28, - "learning_rate": 1.550466457268764e-05, - "loss": 0.0946, + "learning_rate": 2.551167834240869e-05, + "loss": 0.0692, "step": 48945 }, { "epoch": 2.28, - "learning_rate": 1.55041957714125e-05, - "loss": 0.1121, + "learning_rate": 2.551121027257267e-05, + "loss": 0.1815, "step": 48950 }, { "epoch": 2.28, - "learning_rate": 1.550372697013736e-05, - "loss": 0.1959, + "learning_rate": 2.551074220273665e-05, + "loss": 0.2231, "step": 48955 }, { "epoch": 2.28, - "learning_rate": 1.550325816886222e-05, - "loss": 0.359, + "learning_rate": 2.551027413290063e-05, + "loss": 0.1885, "step": 48960 }, { "epoch": 2.28, - "learning_rate": 1.550278936758708e-05, - "loss": 0.2793, + "learning_rate": 2.5509806063064612e-05, + "loss": 0.2617, "step": 48965 }, { "epoch": 2.29, - "learning_rate": 1.550232056631194e-05, - "loss": 0.0281, + "learning_rate": 2.5509337993228592e-05, + "loss": 0.0374, "step": 48970 }, { "epoch": 2.29, - "learning_rate": 1.55018517650368e-05, - "loss": 0.0542, + "learning_rate": 2.5508869923392572e-05, + "loss": 0.0236, "step": 48975 }, { "epoch": 2.29, - "learning_rate": 1.550138296376166e-05, - "loss": 0.0876, + "learning_rate": 2.550840185355655e-05, + "loss": 0.0527, "step": 48980 }, { "epoch": 2.29, - "learning_rate": 1.550091416248652e-05, - "loss": 0.086, + "learning_rate": 2.550793378372053e-05, + "loss": 0.0911, "step": 48985 }, { "epoch": 2.29, - "learning_rate": 1.5500445361211384e-05, - "loss": 0.108, + "learning_rate": 2.550746571388451e-05, + "loss": 0.1677, "step": 48990 }, { "epoch": 2.29, - "learning_rate": 1.5499976559936244e-05, - "loss": 0.1401, + "learning_rate": 2.550699764404849e-05, + "loss": 0.0754, "step": 48995 }, { "epoch": 2.29, - "learning_rate": 1.5499507758661104e-05, - "loss": 0.191, + "learning_rate": 2.550652957421247e-05, + "loss": 0.1093, "step": 49000 }, { "epoch": 2.29, - "learning_rate": 1.5499038957385964e-05, - "loss": 0.1504, + "learning_rate": 2.5506061504376454e-05, + "loss": 0.1782, "step": 49005 }, { "epoch": 2.29, - "learning_rate": 1.5498570156110827e-05, - "loss": 0.2, + "learning_rate": 2.5505593434540434e-05, + "loss": 0.3286, "step": 49010 }, { "epoch": 2.29, - "learning_rate": 1.5498101354835687e-05, - "loss": 0.3442, + "learning_rate": 2.5505125364704414e-05, + "loss": 0.3655, "step": 49015 }, { "epoch": 2.29, - "learning_rate": 1.5497632553560547e-05, - "loss": 0.072, + "learning_rate": 2.5504657294868397e-05, + "loss": 0.0308, "step": 49020 }, { "epoch": 2.29, - "learning_rate": 1.5497163752285407e-05, - "loss": 0.0782, + "learning_rate": 2.5504189225032377e-05, + "loss": 0.0628, "step": 49025 }, { "epoch": 2.29, - "learning_rate": 1.5496694951010267e-05, - "loss": 0.0862, + "learning_rate": 2.5503721155196357e-05, + "loss": 0.0821, "step": 49030 }, { "epoch": 2.29, - "learning_rate": 1.549622614973513e-05, - "loss": 0.056, + "learning_rate": 2.5503253085360337e-05, + "loss": 0.1082, "step": 49035 }, { "epoch": 2.29, - "learning_rate": 1.549575734845999e-05, - "loss": 0.0957, + "learning_rate": 2.550278501552432e-05, + "loss": 0.0796, "step": 49040 }, { "epoch": 2.29, - "learning_rate": 1.549528854718485e-05, - "loss": 0.1298, + "learning_rate": 2.5502316945688296e-05, + "loss": 0.0901, "step": 49045 }, { "epoch": 2.29, - "learning_rate": 1.549481974590971e-05, - "loss": 0.0739, + "learning_rate": 2.5501848875852276e-05, + "loss": 0.1315, "step": 49050 }, { "epoch": 2.29, - "learning_rate": 1.549435094463457e-05, - "loss": 0.1974, + "learning_rate": 2.5501380806016256e-05, + "loss": 0.174, "step": 49055 }, { "epoch": 2.29, - "learning_rate": 1.549388214335943e-05, - "loss": 0.2498, + "learning_rate": 2.550091273618024e-05, + "loss": 0.2425, "step": 49060 }, { "epoch": 2.29, - "learning_rate": 1.549341334208429e-05, - "loss": 0.3423, + "learning_rate": 2.550044466634422e-05, + "loss": 0.3346, "step": 49065 }, { "epoch": 2.29, - "learning_rate": 1.5492944540809153e-05, - "loss": 0.0346, + "learning_rate": 2.54999765965082e-05, + "loss": 0.0782, "step": 49070 }, { "epoch": 2.29, - "learning_rate": 1.5492475739534013e-05, - "loss": 0.0509, + "learning_rate": 2.5499508526672182e-05, + "loss": 0.0316, "step": 49075 }, { "epoch": 2.29, - "learning_rate": 1.5492006938258873e-05, - "loss": 0.0455, + "learning_rate": 2.549904045683616e-05, + "loss": 0.1175, "step": 49080 }, { "epoch": 2.29, - "learning_rate": 1.5491538136983733e-05, - "loss": 0.0597, + "learning_rate": 2.549857238700014e-05, + "loss": 0.0944, "step": 49085 }, { "epoch": 2.29, - "learning_rate": 1.5491069335708596e-05, - "loss": 0.0448, + "learning_rate": 2.549810431716412e-05, + "loss": 0.0893, "step": 49090 }, { "epoch": 2.29, - "learning_rate": 1.5490600534433456e-05, - "loss": 0.0904, + "learning_rate": 2.5497636247328104e-05, + "loss": 0.0829, "step": 49095 }, { "epoch": 2.29, - "learning_rate": 1.5490131733158316e-05, - "loss": 0.1396, + "learning_rate": 2.5497168177492084e-05, + "loss": 0.1089, "step": 49100 }, { "epoch": 2.29, - "learning_rate": 1.5489662931883176e-05, - "loss": 0.1287, + "learning_rate": 2.549670010765606e-05, + "loss": 0.2078, "step": 49105 }, { "epoch": 2.29, - "learning_rate": 1.5489194130608036e-05, - "loss": 0.2406, + "learning_rate": 2.549623203782004e-05, + "loss": 0.3748, "step": 49110 }, { "epoch": 2.29, - "learning_rate": 1.5488725329332896e-05, - "loss": 0.2453, + "learning_rate": 2.5495763967984024e-05, + "loss": 0.2284, "step": 49115 }, { "epoch": 2.29, - "learning_rate": 1.5488256528057756e-05, - "loss": 0.0674, + "learning_rate": 2.5495295898148004e-05, + "loss": 0.0441, "step": 49120 }, { "epoch": 2.29, - "learning_rate": 1.5487787726782616e-05, - "loss": 0.0443, + "learning_rate": 2.5494827828311983e-05, + "loss": 0.058, "step": 49125 }, { "epoch": 2.29, - "learning_rate": 1.548731892550748e-05, - "loss": 0.1009, + "learning_rate": 2.5494359758475967e-05, + "loss": 0.0295, "step": 49130 }, { "epoch": 2.29, - "learning_rate": 1.548685012423234e-05, - "loss": 0.0738, + "learning_rate": 2.5493891688639946e-05, + "loss": 0.0434, "step": 49135 }, { "epoch": 2.29, - "learning_rate": 1.54863813229572e-05, - "loss": 0.1236, + "learning_rate": 2.5493423618803926e-05, + "loss": 0.1006, "step": 49140 }, { "epoch": 2.29, - "learning_rate": 1.548591252168206e-05, - "loss": 0.1246, + "learning_rate": 2.5492955548967906e-05, + "loss": 0.1513, "step": 49145 }, { "epoch": 2.29, - "learning_rate": 1.5485443720406922e-05, - "loss": 0.117, + "learning_rate": 2.549248747913189e-05, + "loss": 0.0728, "step": 49150 }, { "epoch": 2.29, - "learning_rate": 1.5484974919131782e-05, - "loss": 0.1785, + "learning_rate": 2.549201940929587e-05, + "loss": 0.1477, "step": 49155 }, { "epoch": 2.29, - "learning_rate": 1.5484506117856642e-05, - "loss": 0.2126, + "learning_rate": 2.549155133945985e-05, + "loss": 0.2181, "step": 49160 }, { "epoch": 2.29, - "learning_rate": 1.5484037316581502e-05, - "loss": 0.3216, + "learning_rate": 2.549108326962383e-05, + "loss": 0.2554, "step": 49165 }, { "epoch": 2.29, - "learning_rate": 1.5483568515306362e-05, - "loss": 0.0795, + "learning_rate": 2.549061519978781e-05, + "loss": 0.0275, "step": 49170 }, { "epoch": 2.29, - "learning_rate": 1.5483099714031226e-05, - "loss": 0.021, + "learning_rate": 2.549014712995179e-05, + "loss": 0.0503, "step": 49175 }, { "epoch": 2.29, - "learning_rate": 1.5482630912756085e-05, - "loss": 0.045, + "learning_rate": 2.5489679060115768e-05, + "loss": 0.074, "step": 49180 }, { "epoch": 2.3, - "learning_rate": 1.5482162111480945e-05, - "loss": 0.0943, + "learning_rate": 2.5489210990279748e-05, + "loss": 0.0854, "step": 49185 }, { "epoch": 2.3, - "learning_rate": 1.5481693310205805e-05, - "loss": 0.116, + "learning_rate": 2.548874292044373e-05, + "loss": 0.1153, "step": 49190 }, { "epoch": 2.3, - "learning_rate": 1.5481224508930665e-05, - "loss": 0.0777, + "learning_rate": 2.548827485060771e-05, + "loss": 0.1457, "step": 49195 }, { "epoch": 2.3, - "learning_rate": 1.5480755707655525e-05, - "loss": 0.1557, + "learning_rate": 2.548780678077169e-05, + "loss": 0.1199, "step": 49200 }, { "epoch": 2.3, - "learning_rate": 1.5480286906380385e-05, - "loss": 0.1894, + "learning_rate": 2.5487338710935674e-05, + "loss": 0.1506, "step": 49205 }, { "epoch": 2.3, - "learning_rate": 1.5479818105105245e-05, - "loss": 0.317, + "learning_rate": 2.5486870641099654e-05, + "loss": 0.2608, "step": 49210 }, { "epoch": 2.3, - "learning_rate": 1.547934930383011e-05, - "loss": 0.3218, + "learning_rate": 2.5486402571263634e-05, + "loss": 0.2974, "step": 49215 }, { "epoch": 2.3, - "learning_rate": 1.5478880502554968e-05, - "loss": 0.0294, + "learning_rate": 2.5485934501427613e-05, + "loss": 0.04, "step": 49220 }, { "epoch": 2.3, - "learning_rate": 1.5478411701279828e-05, - "loss": 0.0354, + "learning_rate": 2.5485466431591597e-05, + "loss": 0.0455, "step": 49225 }, { "epoch": 2.3, - "learning_rate": 1.547794290000469e-05, - "loss": 0.0338, + "learning_rate": 2.5484998361755577e-05, + "loss": 0.0973, "step": 49230 }, { "epoch": 2.3, - "learning_rate": 1.547747409872955e-05, - "loss": 0.1213, + "learning_rate": 2.5484530291919553e-05, + "loss": 0.0707, "step": 49235 }, { "epoch": 2.3, - "learning_rate": 1.547700529745441e-05, - "loss": 0.0969, + "learning_rate": 2.5484062222083533e-05, + "loss": 0.0664, "step": 49240 }, { "epoch": 2.3, - "learning_rate": 1.547653649617927e-05, - "loss": 0.0449, + "learning_rate": 2.5483594152247516e-05, + "loss": 0.1586, "step": 49245 }, { "epoch": 2.3, - "learning_rate": 1.547606769490413e-05, - "loss": 0.0821, + "learning_rate": 2.5483126082411496e-05, + "loss": 0.1245, "step": 49250 }, { "epoch": 2.3, - "learning_rate": 1.547559889362899e-05, - "loss": 0.2227, + "learning_rate": 2.5482658012575476e-05, + "loss": 0.1546, "step": 49255 }, { "epoch": 2.3, - "learning_rate": 1.547513009235385e-05, - "loss": 0.2439, + "learning_rate": 2.548218994273946e-05, + "loss": 0.2112, "step": 49260 }, { "epoch": 2.3, - "learning_rate": 1.547466129107871e-05, - "loss": 0.1495, + "learning_rate": 2.548172187290344e-05, + "loss": 0.2794, "step": 49265 }, { "epoch": 2.3, - "learning_rate": 1.5474192489803574e-05, - "loss": 0.034, + "learning_rate": 2.548125380306742e-05, + "loss": 0.0569, "step": 49270 }, { "epoch": 2.3, - "learning_rate": 1.5473723688528434e-05, - "loss": 0.0257, + "learning_rate": 2.5480785733231398e-05, + "loss": 0.0636, "step": 49275 }, { "epoch": 2.3, - "learning_rate": 1.5473254887253294e-05, - "loss": 0.1189, + "learning_rate": 2.548031766339538e-05, + "loss": 0.0337, "step": 49280 }, { "epoch": 2.3, - "learning_rate": 1.5472786085978154e-05, - "loss": 0.0593, + "learning_rate": 2.547984959355936e-05, + "loss": 0.1443, "step": 49285 }, { "epoch": 2.3, - "learning_rate": 1.5472317284703014e-05, - "loss": 0.0709, + "learning_rate": 2.547938152372334e-05, + "loss": 0.0839, "step": 49290 }, { "epoch": 2.3, - "learning_rate": 1.5471848483427877e-05, - "loss": 0.1199, + "learning_rate": 2.5478913453887318e-05, + "loss": 0.0944, "step": 49295 }, { "epoch": 2.3, - "learning_rate": 1.5471379682152737e-05, - "loss": 0.1563, + "learning_rate": 2.54784453840513e-05, + "loss": 0.1429, "step": 49300 }, { "epoch": 2.3, - "learning_rate": 1.5470910880877597e-05, - "loss": 0.1083, + "learning_rate": 2.547797731421528e-05, + "loss": 0.132, "step": 49305 }, { "epoch": 2.3, - "learning_rate": 1.5470442079602457e-05, - "loss": 0.3005, + "learning_rate": 2.547750924437926e-05, + "loss": 0.2637, "step": 49310 }, { "epoch": 2.3, - "learning_rate": 1.546997327832732e-05, - "loss": 0.2143, + "learning_rate": 2.5477041174543244e-05, + "loss": 0.4144, "step": 49315 }, { "epoch": 2.3, - "learning_rate": 1.546950447705218e-05, - "loss": 0.0602, + "learning_rate": 2.5476573104707223e-05, + "loss": 0.076, "step": 49320 }, { "epoch": 2.3, - "learning_rate": 1.546903567577704e-05, - "loss": 0.021, + "learning_rate": 2.5476105034871203e-05, + "loss": 0.0528, "step": 49325 }, { "epoch": 2.3, - "learning_rate": 1.54685668745019e-05, - "loss": 0.1049, + "learning_rate": 2.5475636965035183e-05, + "loss": 0.0759, "step": 49330 }, { "epoch": 2.3, - "learning_rate": 1.546809807322676e-05, - "loss": 0.1159, + "learning_rate": 2.5475168895199166e-05, + "loss": 0.0648, "step": 49335 }, { "epoch": 2.3, - "learning_rate": 1.546762927195162e-05, - "loss": 0.0707, + "learning_rate": 2.5474700825363146e-05, + "loss": 0.0784, "step": 49340 }, { "epoch": 2.3, - "learning_rate": 1.546716047067648e-05, - "loss": 0.1285, + "learning_rate": 2.5474232755527126e-05, + "loss": 0.1186, "step": 49345 }, { "epoch": 2.3, - "learning_rate": 1.546669166940134e-05, - "loss": 0.154, + "learning_rate": 2.5473764685691106e-05, + "loss": 0.0815, "step": 49350 }, { "epoch": 2.3, - "learning_rate": 1.54662228681262e-05, - "loss": 0.1687, + "learning_rate": 2.547329661585509e-05, + "loss": 0.1659, "step": 49355 }, { "epoch": 2.3, - "learning_rate": 1.5465754066851063e-05, - "loss": 0.3047, + "learning_rate": 2.5472828546019065e-05, + "loss": 0.3107, "step": 49360 }, { "epoch": 2.3, - "learning_rate": 1.5465285265575923e-05, - "loss": 0.2414, + "learning_rate": 2.5472360476183045e-05, + "loss": 0.3417, "step": 49365 }, { "epoch": 2.3, - "learning_rate": 1.5464816464300783e-05, - "loss": 0.0811, + "learning_rate": 2.5471892406347025e-05, + "loss": 0.0458, "step": 49370 }, { "epoch": 2.3, - "learning_rate": 1.5464347663025647e-05, - "loss": 0.0398, + "learning_rate": 2.5471424336511008e-05, + "loss": 0.0509, "step": 49375 }, { "epoch": 2.3, - "learning_rate": 1.5463878861750507e-05, - "loss": 0.071, + "learning_rate": 2.5470956266674988e-05, + "loss": 0.0771, "step": 49380 }, { "epoch": 2.3, - "learning_rate": 1.5463410060475366e-05, - "loss": 0.0606, + "learning_rate": 2.5470488196838968e-05, + "loss": 0.1004, "step": 49385 }, { "epoch": 2.3, - "learning_rate": 1.5462941259200226e-05, - "loss": 0.2161, + "learning_rate": 2.547002012700295e-05, + "loss": 0.1161, "step": 49390 }, { "epoch": 2.3, - "learning_rate": 1.5462472457925086e-05, - "loss": 0.1271, + "learning_rate": 2.546955205716693e-05, + "loss": 0.2007, "step": 49395 }, { "epoch": 2.31, - "learning_rate": 1.5462003656649946e-05, - "loss": 0.084, + "learning_rate": 2.546908398733091e-05, + "loss": 0.1251, "step": 49400 }, { "epoch": 2.31, - "learning_rate": 1.5461534855374806e-05, - "loss": 0.2245, + "learning_rate": 2.546861591749489e-05, + "loss": 0.1663, "step": 49405 }, { "epoch": 2.31, - "learning_rate": 1.546106605409967e-05, - "loss": 0.3096, + "learning_rate": 2.5468147847658874e-05, + "loss": 0.2334, "step": 49410 }, { "epoch": 2.31, - "learning_rate": 1.546059725282453e-05, - "loss": 0.2058, + "learning_rate": 2.5467679777822853e-05, + "loss": 0.3047, "step": 49415 }, { "epoch": 2.31, - "learning_rate": 1.546012845154939e-05, - "loss": 0.041, + "learning_rate": 2.5467211707986833e-05, + "loss": 0.0639, "step": 49420 }, { "epoch": 2.31, - "learning_rate": 1.545965965027425e-05, - "loss": 0.0828, + "learning_rate": 2.546674363815081e-05, + "loss": 0.0752, "step": 49425 }, { "epoch": 2.31, - "learning_rate": 1.545919084899911e-05, - "loss": 0.0496, + "learning_rate": 2.5466275568314793e-05, + "loss": 0.0783, "step": 49430 }, { "epoch": 2.31, - "learning_rate": 1.545872204772397e-05, - "loss": 0.0631, + "learning_rate": 2.5465807498478773e-05, + "loss": 0.0544, "step": 49435 }, { "epoch": 2.31, - "learning_rate": 1.5458253246448832e-05, - "loss": 0.0788, + "learning_rate": 2.5465339428642753e-05, + "loss": 0.034, "step": 49440 }, { "epoch": 2.31, - "learning_rate": 1.5457784445173692e-05, - "loss": 0.1385, + "learning_rate": 2.5464871358806736e-05, + "loss": 0.1552, "step": 49445 }, { "epoch": 2.31, - "learning_rate": 1.5457315643898552e-05, - "loss": 0.2284, + "learning_rate": 2.5464403288970716e-05, + "loss": 0.1129, "step": 49450 }, { "epoch": 2.31, - "learning_rate": 1.5456846842623416e-05, - "loss": 0.2322, + "learning_rate": 2.5463935219134695e-05, + "loss": 0.2289, "step": 49455 }, { "epoch": 2.31, - "learning_rate": 1.5456378041348276e-05, - "loss": 0.3846, + "learning_rate": 2.5463467149298675e-05, + "loss": 0.2528, "step": 49460 }, { "epoch": 2.31, - "learning_rate": 1.5455909240073136e-05, - "loss": 0.2495, + "learning_rate": 2.546299907946266e-05, + "loss": 0.3527, "step": 49465 }, { "epoch": 2.31, - "learning_rate": 1.5455440438797995e-05, - "loss": 0.0653, + "learning_rate": 2.5462531009626638e-05, + "loss": 0.0871, "step": 49470 }, { "epoch": 2.31, - "learning_rate": 1.5454971637522855e-05, - "loss": 0.0465, + "learning_rate": 2.5462062939790618e-05, + "loss": 0.0753, "step": 49475 }, { "epoch": 2.31, - "learning_rate": 1.5454502836247715e-05, - "loss": 0.091, + "learning_rate": 2.5461594869954598e-05, + "loss": 0.0513, "step": 49480 }, { "epoch": 2.31, - "learning_rate": 1.5454034034972575e-05, - "loss": 0.0576, + "learning_rate": 2.5461126800118578e-05, + "loss": 0.1227, "step": 49485 }, { "epoch": 2.31, - "learning_rate": 1.5453565233697435e-05, - "loss": 0.0574, + "learning_rate": 2.5460658730282558e-05, + "loss": 0.1106, "step": 49490 }, { "epoch": 2.31, - "learning_rate": 1.5453096432422295e-05, - "loss": 0.0683, + "learning_rate": 2.5460190660446537e-05, + "loss": 0.0785, "step": 49495 }, { "epoch": 2.31, - "learning_rate": 1.545262763114716e-05, - "loss": 0.1513, + "learning_rate": 2.545972259061052e-05, + "loss": 0.0876, "step": 49500 }, { "epoch": 2.31, - "learning_rate": 1.545215882987202e-05, - "loss": 0.1737, + "learning_rate": 2.54592545207745e-05, + "loss": 0.2105, "step": 49505 }, { "epoch": 2.31, - "learning_rate": 1.545169002859688e-05, - "loss": 0.2737, + "learning_rate": 2.545878645093848e-05, + "loss": 0.1714, "step": 49510 }, { "epoch": 2.31, - "learning_rate": 1.5451221227321738e-05, - "loss": 0.2102, + "learning_rate": 2.545831838110246e-05, + "loss": 0.4019, "step": 49515 }, { "epoch": 2.31, - "learning_rate": 1.54507524260466e-05, - "loss": 0.0284, + "learning_rate": 2.5457850311266443e-05, + "loss": 0.0654, "step": 49520 }, { "epoch": 2.31, - "learning_rate": 1.545028362477146e-05, - "loss": 0.0773, + "learning_rate": 2.5457382241430423e-05, + "loss": 0.0716, "step": 49525 }, { "epoch": 2.31, - "learning_rate": 1.544981482349632e-05, - "loss": 0.0148, + "learning_rate": 2.5456914171594403e-05, + "loss": 0.06, "step": 49530 }, { "epoch": 2.31, - "learning_rate": 1.544934602222118e-05, - "loss": 0.062, + "learning_rate": 2.5456446101758383e-05, + "loss": 0.052, "step": 49535 }, { "epoch": 2.31, - "learning_rate": 1.544887722094604e-05, - "loss": 0.0813, + "learning_rate": 2.5455978031922366e-05, + "loss": 0.0881, "step": 49540 }, { "epoch": 2.31, - "learning_rate": 1.5448408419670905e-05, - "loss": 0.0957, + "learning_rate": 2.5455509962086346e-05, + "loss": 0.0607, "step": 49545 }, { "epoch": 2.31, - "learning_rate": 1.5447939618395765e-05, - "loss": 0.1741, + "learning_rate": 2.5455041892250322e-05, + "loss": 0.0982, "step": 49550 }, { "epoch": 2.31, - "learning_rate": 1.5447470817120625e-05, - "loss": 0.1808, + "learning_rate": 2.5454573822414302e-05, + "loss": 0.1946, "step": 49555 }, { "epoch": 2.31, - "learning_rate": 1.5447002015845484e-05, - "loss": 0.3045, + "learning_rate": 2.5454105752578285e-05, + "loss": 0.2121, "step": 49560 }, { "epoch": 2.31, - "learning_rate": 1.5446533214570344e-05, - "loss": 0.2576, + "learning_rate": 2.5453637682742265e-05, + "loss": 0.3449, "step": 49565 }, { "epoch": 2.31, - "learning_rate": 1.5446064413295204e-05, - "loss": 0.0866, + "learning_rate": 2.5453169612906245e-05, + "loss": 0.1156, "step": 49570 }, { "epoch": 2.31, - "learning_rate": 1.5445595612020064e-05, - "loss": 0.0214, + "learning_rate": 2.5452701543070228e-05, + "loss": 0.047, "step": 49575 }, { "epoch": 2.31, - "learning_rate": 1.5445126810744928e-05, - "loss": 0.0444, + "learning_rate": 2.5452233473234208e-05, + "loss": 0.06, "step": 49580 }, { "epoch": 2.31, - "learning_rate": 1.5444658009469788e-05, - "loss": 0.046, + "learning_rate": 2.5451765403398188e-05, + "loss": 0.0634, "step": 49585 }, { "epoch": 2.31, - "learning_rate": 1.5444189208194647e-05, - "loss": 0.1132, + "learning_rate": 2.5451297333562167e-05, + "loss": 0.0328, "step": 49590 }, { "epoch": 2.31, - "learning_rate": 1.5443720406919507e-05, - "loss": 0.0736, + "learning_rate": 2.545082926372615e-05, + "loss": 0.0912, "step": 49595 }, { "epoch": 2.31, - "learning_rate": 1.544325160564437e-05, - "loss": 0.0877, + "learning_rate": 2.545036119389013e-05, + "loss": 0.1614, "step": 49600 }, { "epoch": 2.31, - "learning_rate": 1.544278280436923e-05, - "loss": 0.1461, + "learning_rate": 2.544989312405411e-05, + "loss": 0.1363, "step": 49605 }, { "epoch": 2.31, - "learning_rate": 1.544231400309409e-05, - "loss": 0.2255, + "learning_rate": 2.544942505421809e-05, + "loss": 0.1532, "step": 49610 }, { "epoch": 2.32, - "learning_rate": 1.544184520181895e-05, - "loss": 0.2188, + "learning_rate": 2.544895698438207e-05, + "loss": 0.3348, "step": 49615 }, { "epoch": 2.32, - "learning_rate": 1.544137640054381e-05, - "loss": 0.0241, + "learning_rate": 2.544848891454605e-05, + "loss": 0.0651, "step": 49620 }, { "epoch": 2.32, - "learning_rate": 1.544090759926867e-05, - "loss": 0.0118, + "learning_rate": 2.544802084471003e-05, + "loss": 0.0531, "step": 49625 }, { "epoch": 2.32, - "learning_rate": 1.544043879799353e-05, - "loss": 0.0784, + "learning_rate": 2.5447552774874013e-05, + "loss": 0.0337, "step": 49630 }, { "epoch": 2.32, - "learning_rate": 1.543996999671839e-05, - "loss": 0.1511, + "learning_rate": 2.5447084705037993e-05, + "loss": 0.1521, "step": 49635 }, { "epoch": 2.32, - "learning_rate": 1.5439501195443254e-05, - "loss": 0.1046, + "learning_rate": 2.5446616635201972e-05, + "loss": 0.1037, "step": 49640 }, { "epoch": 2.32, - "learning_rate": 1.5439032394168113e-05, - "loss": 0.1218, + "learning_rate": 2.5446148565365952e-05, + "loss": 0.0829, "step": 49645 }, { "epoch": 2.32, - "learning_rate": 1.5438563592892973e-05, - "loss": 0.1129, + "learning_rate": 2.5445680495529935e-05, + "loss": 0.1731, "step": 49650 }, { "epoch": 2.32, - "learning_rate": 1.5438094791617833e-05, - "loss": 0.1344, + "learning_rate": 2.5445212425693915e-05, + "loss": 0.1272, "step": 49655 }, { "epoch": 2.32, - "learning_rate": 1.5437625990342697e-05, - "loss": 0.2268, + "learning_rate": 2.5444744355857895e-05, + "loss": 0.3034, "step": 49660 }, { "epoch": 2.32, - "learning_rate": 1.5437157189067557e-05, - "loss": 0.3166, + "learning_rate": 2.5444276286021875e-05, + "loss": 0.2454, "step": 49665 }, { "epoch": 2.32, - "learning_rate": 1.5436688387792417e-05, - "loss": 0.0485, + "learning_rate": 2.5443808216185858e-05, + "loss": 0.0299, "step": 49670 }, { "epoch": 2.32, - "learning_rate": 1.5436219586517276e-05, - "loss": 0.0671, + "learning_rate": 2.5443340146349834e-05, + "loss": 0.0292, "step": 49675 }, { "epoch": 2.32, - "learning_rate": 1.5435750785242136e-05, - "loss": 0.059, + "learning_rate": 2.5442872076513814e-05, + "loss": 0.0221, "step": 49680 }, { "epoch": 2.32, - "learning_rate": 1.5435281983967e-05, - "loss": 0.0861, + "learning_rate": 2.5442404006677798e-05, + "loss": 0.0613, "step": 49685 }, { "epoch": 2.32, - "learning_rate": 1.543481318269186e-05, - "loss": 0.0956, + "learning_rate": 2.5441935936841777e-05, + "loss": 0.1138, "step": 49690 }, { "epoch": 2.32, - "learning_rate": 1.543434438141672e-05, - "loss": 0.1467, + "learning_rate": 2.5441467867005757e-05, + "loss": 0.1598, "step": 49695 }, { "epoch": 2.32, - "learning_rate": 1.543387558014158e-05, - "loss": 0.1193, + "learning_rate": 2.5440999797169737e-05, + "loss": 0.1373, "step": 49700 }, { "epoch": 2.32, - "learning_rate": 1.543340677886644e-05, - "loss": 0.1714, + "learning_rate": 2.544053172733372e-05, + "loss": 0.1314, "step": 49705 }, { "epoch": 2.32, - "learning_rate": 1.54329379775913e-05, - "loss": 0.229, + "learning_rate": 2.54400636574977e-05, + "loss": 0.1474, "step": 49710 }, { "epoch": 2.32, - "learning_rate": 1.543246917631616e-05, - "loss": 0.2369, + "learning_rate": 2.543959558766168e-05, + "loss": 0.2503, "step": 49715 }, { "epoch": 2.32, - "learning_rate": 1.543200037504102e-05, - "loss": 0.0291, + "learning_rate": 2.543912751782566e-05, + "loss": 0.0241, "step": 49720 }, { "epoch": 2.32, - "learning_rate": 1.5431531573765883e-05, - "loss": 0.0872, + "learning_rate": 2.5438659447989643e-05, + "loss": 0.0413, "step": 49725 }, { "epoch": 2.32, - "learning_rate": 1.5431062772490743e-05, - "loss": 0.1216, + "learning_rate": 2.5438191378153623e-05, + "loss": 0.0351, "step": 49730 }, { "epoch": 2.32, - "learning_rate": 1.5430593971215602e-05, - "loss": 0.0756, + "learning_rate": 2.5437723308317602e-05, + "loss": 0.0802, "step": 49735 }, { "epoch": 2.32, - "learning_rate": 1.5430125169940466e-05, - "loss": 0.1129, + "learning_rate": 2.5437255238481582e-05, + "loss": 0.115, "step": 49740 }, { "epoch": 2.32, - "learning_rate": 1.5429656368665326e-05, - "loss": 0.1254, + "learning_rate": 2.5436787168645562e-05, + "loss": 0.0864, "step": 49745 }, { "epoch": 2.32, - "learning_rate": 1.5429187567390186e-05, - "loss": 0.1671, + "learning_rate": 2.5436319098809542e-05, + "loss": 0.0862, "step": 49750 }, { "epoch": 2.32, - "learning_rate": 1.5428718766115046e-05, - "loss": 0.1629, + "learning_rate": 2.5435851028973522e-05, + "loss": 0.1779, "step": 49755 }, { "epoch": 2.32, - "learning_rate": 1.5428249964839906e-05, - "loss": 0.2814, + "learning_rate": 2.5435382959137505e-05, + "loss": 0.2722, "step": 49760 }, { "epoch": 2.32, - "learning_rate": 1.5427781163564765e-05, - "loss": 0.2398, + "learning_rate": 2.5434914889301485e-05, + "loss": 0.2308, "step": 49765 }, { "epoch": 2.32, - "learning_rate": 1.5427312362289625e-05, - "loss": 0.0732, + "learning_rate": 2.5434446819465465e-05, + "loss": 0.0952, "step": 49770 }, { "epoch": 2.32, - "learning_rate": 1.5426843561014485e-05, - "loss": 0.0563, + "learning_rate": 2.5433978749629444e-05, + "loss": 0.0569, "step": 49775 }, { "epoch": 2.32, - "learning_rate": 1.542637475973935e-05, - "loss": 0.0592, + "learning_rate": 2.5433510679793428e-05, + "loss": 0.0189, "step": 49780 }, { "epoch": 2.32, - "learning_rate": 1.542590595846421e-05, - "loss": 0.0781, + "learning_rate": 2.5433042609957407e-05, + "loss": 0.0628, "step": 49785 }, { "epoch": 2.32, - "learning_rate": 1.542543715718907e-05, - "loss": 0.0921, + "learning_rate": 2.5432574540121387e-05, + "loss": 0.0487, "step": 49790 }, { "epoch": 2.32, - "learning_rate": 1.542496835591393e-05, - "loss": 0.1204, + "learning_rate": 2.5432106470285367e-05, + "loss": 0.0649, "step": 49795 }, { "epoch": 2.32, - "learning_rate": 1.542449955463879e-05, - "loss": 0.1716, + "learning_rate": 2.5431638400449347e-05, + "loss": 0.1791, "step": 49800 }, { "epoch": 2.32, - "learning_rate": 1.542403075336365e-05, - "loss": 0.208, + "learning_rate": 2.5431170330613327e-05, + "loss": 0.1248, "step": 49805 }, { "epoch": 2.32, - "learning_rate": 1.542356195208851e-05, - "loss": 0.3305, + "learning_rate": 2.5430702260777307e-05, + "loss": 0.1678, "step": 49810 }, { "epoch": 2.32, - "learning_rate": 1.542309315081337e-05, - "loss": 0.2823, + "learning_rate": 2.543023419094129e-05, + "loss": 0.2428, "step": 49815 }, { "epoch": 2.32, - "learning_rate": 1.542262434953823e-05, - "loss": 0.0389, + "learning_rate": 2.542976612110527e-05, + "loss": 0.0325, "step": 49820 }, { "epoch": 2.32, - "learning_rate": 1.5422155548263095e-05, - "loss": 0.0554, + "learning_rate": 2.542929805126925e-05, + "loss": 0.0578, "step": 49825 }, { "epoch": 2.33, - "learning_rate": 1.5421686746987955e-05, - "loss": 0.0942, + "learning_rate": 2.542882998143323e-05, + "loss": 0.0784, "step": 49830 }, { "epoch": 2.33, - "learning_rate": 1.5421217945712815e-05, - "loss": 0.0994, + "learning_rate": 2.5428361911597212e-05, + "loss": 0.0647, "step": 49835 }, { "epoch": 2.33, - "learning_rate": 1.5420749144437675e-05, - "loss": 0.097, + "learning_rate": 2.5427893841761192e-05, + "loss": 0.1084, "step": 49840 }, { "epoch": 2.33, - "learning_rate": 1.5420280343162535e-05, - "loss": 0.1022, + "learning_rate": 2.5427425771925172e-05, + "loss": 0.1054, "step": 49845 }, { "epoch": 2.33, - "learning_rate": 1.5419811541887394e-05, - "loss": 0.0807, + "learning_rate": 2.5426957702089152e-05, + "loss": 0.1604, "step": 49850 }, { "epoch": 2.33, - "learning_rate": 1.5419342740612254e-05, - "loss": 0.1672, + "learning_rate": 2.5426489632253135e-05, + "loss": 0.1043, "step": 49855 }, { "epoch": 2.33, - "learning_rate": 1.5418873939337114e-05, - "loss": 0.2764, + "learning_rate": 2.5426021562417115e-05, + "loss": 0.2644, "step": 49860 }, { "epoch": 2.33, - "learning_rate": 1.5418405138061974e-05, - "loss": 0.3174, + "learning_rate": 2.542555349258109e-05, + "loss": 0.2072, "step": 49865 }, { "epoch": 2.33, - "learning_rate": 1.5417936336786838e-05, - "loss": 0.1249, + "learning_rate": 2.5425085422745074e-05, + "loss": 0.0799, "step": 49870 }, { "epoch": 2.33, - "learning_rate": 1.5417467535511698e-05, - "loss": 0.0402, + "learning_rate": 2.5424617352909054e-05, + "loss": 0.1019, "step": 49875 }, { "epoch": 2.33, - "learning_rate": 1.5416998734236557e-05, - "loss": 0.0741, + "learning_rate": 2.5424149283073034e-05, + "loss": 0.0481, "step": 49880 }, { "epoch": 2.33, - "learning_rate": 1.541652993296142e-05, - "loss": 0.1277, + "learning_rate": 2.5423681213237014e-05, + "loss": 0.0452, "step": 49885 }, { "epoch": 2.33, - "learning_rate": 1.541606113168628e-05, - "loss": 0.1288, + "learning_rate": 2.5423213143400997e-05, + "loss": 0.0779, "step": 49890 }, { "epoch": 2.33, - "learning_rate": 1.541559233041114e-05, - "loss": 0.1828, + "learning_rate": 2.5422745073564977e-05, + "loss": 0.0588, "step": 49895 }, { "epoch": 2.33, - "learning_rate": 1.5415123529136e-05, - "loss": 0.1366, + "learning_rate": 2.5422277003728957e-05, + "loss": 0.1759, "step": 49900 }, { "epoch": 2.33, - "learning_rate": 1.541465472786086e-05, - "loss": 0.2104, + "learning_rate": 2.5421808933892937e-05, + "loss": 0.1676, "step": 49905 }, { "epoch": 2.33, - "learning_rate": 1.541418592658572e-05, - "loss": 0.1688, + "learning_rate": 2.542134086405692e-05, + "loss": 0.2049, "step": 49910 }, { "epoch": 2.33, - "learning_rate": 1.541371712531058e-05, - "loss": 0.2507, + "learning_rate": 2.54208727942209e-05, + "loss": 0.2557, "step": 49915 }, { "epoch": 2.33, - "learning_rate": 1.5413248324035444e-05, - "loss": 0.0244, + "learning_rate": 2.542040472438488e-05, + "loss": 0.0453, "step": 49920 }, { "epoch": 2.33, - "learning_rate": 1.5412779522760304e-05, - "loss": 0.0705, + "learning_rate": 2.5419936654548863e-05, + "loss": 0.0367, "step": 49925 }, { "epoch": 2.33, - "learning_rate": 1.5412310721485164e-05, - "loss": 0.0406, + "learning_rate": 2.541946858471284e-05, + "loss": 0.0789, "step": 49930 }, { "epoch": 2.33, - "learning_rate": 1.5411841920210024e-05, - "loss": 0.0665, + "learning_rate": 2.541900051487682e-05, + "loss": 0.0582, "step": 49935 }, { "epoch": 2.33, - "learning_rate": 1.5411373118934883e-05, - "loss": 0.044, + "learning_rate": 2.54185324450408e-05, + "loss": 0.168, "step": 49940 }, { "epoch": 2.33, - "learning_rate": 1.5410904317659743e-05, - "loss": 0.0675, + "learning_rate": 2.5418064375204782e-05, + "loss": 0.0575, "step": 49945 }, { "epoch": 2.33, - "learning_rate": 1.5410435516384607e-05, - "loss": 0.0992, + "learning_rate": 2.5417596305368762e-05, + "loss": 0.1285, "step": 49950 }, { "epoch": 2.33, - "learning_rate": 1.5409966715109467e-05, - "loss": 0.1772, + "learning_rate": 2.541712823553274e-05, + "loss": 0.1581, "step": 49955 }, { "epoch": 2.33, - "learning_rate": 1.5409497913834327e-05, - "loss": 0.1362, + "learning_rate": 2.541666016569672e-05, + "loss": 0.1517, "step": 49960 }, { "epoch": 2.33, - "learning_rate": 1.540902911255919e-05, - "loss": 0.2604, + "learning_rate": 2.5416192095860705e-05, + "loss": 0.3418, "step": 49965 }, { "epoch": 2.33, - "learning_rate": 1.540856031128405e-05, - "loss": 0.0627, + "learning_rate": 2.5415724026024684e-05, + "loss": 0.0279, "step": 49970 }, { "epoch": 2.33, - "learning_rate": 1.540809151000891e-05, - "loss": 0.0178, + "learning_rate": 2.5415255956188664e-05, + "loss": 0.0606, "step": 49975 }, { "epoch": 2.33, - "learning_rate": 1.540762270873377e-05, - "loss": 0.0792, + "learning_rate": 2.5414787886352644e-05, + "loss": 0.0848, "step": 49980 }, { "epoch": 2.33, - "learning_rate": 1.540715390745863e-05, - "loss": 0.0241, + "learning_rate": 2.5414319816516627e-05, + "loss": 0.0611, "step": 49985 }, { "epoch": 2.33, - "learning_rate": 1.540668510618349e-05, - "loss": 0.1072, + "learning_rate": 2.5413851746680604e-05, + "loss": 0.078, "step": 49990 }, { "epoch": 2.33, - "learning_rate": 1.540621630490835e-05, - "loss": 0.0948, + "learning_rate": 2.5413383676844583e-05, + "loss": 0.1221, "step": 49995 }, { "epoch": 2.33, - "learning_rate": 1.540574750363321e-05, - "loss": 0.1007, + "learning_rate": 2.5412915607008567e-05, + "loss": 0.3131, "step": 50000 }, { "epoch": 2.33, - "learning_rate": 1.540527870235807e-05, - "loss": 0.2221, + "learning_rate": 2.5412447537172546e-05, + "loss": 0.1315, "step": 50005 }, { "epoch": 2.33, - "learning_rate": 1.5404809901082933e-05, - "loss": 0.3763, + "learning_rate": 2.5411979467336526e-05, + "loss": 0.1607, "step": 50010 }, { "epoch": 2.33, - "learning_rate": 1.5404341099807793e-05, - "loss": 0.2037, + "learning_rate": 2.5411511397500506e-05, + "loss": 0.2701, "step": 50015 }, { "epoch": 2.33, - "learning_rate": 1.5403872298532653e-05, - "loss": 0.0519, + "learning_rate": 2.541104332766449e-05, + "loss": 0.0967, "step": 50020 }, { "epoch": 2.33, - "learning_rate": 1.5403403497257512e-05, - "loss": 0.0638, + "learning_rate": 2.541057525782847e-05, + "loss": 0.0494, "step": 50025 }, { "epoch": 2.33, - "learning_rate": 1.5402934695982376e-05, - "loss": 0.106, + "learning_rate": 2.541010718799245e-05, + "loss": 0.1049, "step": 50030 }, { "epoch": 2.33, - "learning_rate": 1.5402465894707236e-05, - "loss": 0.0515, + "learning_rate": 2.540963911815643e-05, + "loss": 0.0495, "step": 50035 }, { "epoch": 2.33, - "learning_rate": 1.5401997093432096e-05, - "loss": 0.0843, + "learning_rate": 2.5409171048320412e-05, + "loss": 0.1027, "step": 50040 }, { "epoch": 2.34, - "learning_rate": 1.5401528292156956e-05, - "loss": 0.1201, + "learning_rate": 2.5408702978484392e-05, + "loss": 0.0669, "step": 50045 }, { "epoch": 2.34, - "learning_rate": 1.5401059490881816e-05, - "loss": 0.1472, + "learning_rate": 2.540823490864837e-05, + "loss": 0.1393, "step": 50050 }, { "epoch": 2.34, - "learning_rate": 1.5400590689606675e-05, - "loss": 0.1796, + "learning_rate": 2.540776683881235e-05, + "loss": 0.2202, "step": 50055 }, { "epoch": 2.34, - "learning_rate": 1.540012188833154e-05, - "loss": 0.2752, + "learning_rate": 2.540729876897633e-05, + "loss": 0.2238, "step": 50060 }, { "epoch": 2.34, - "learning_rate": 1.53996530870564e-05, - "loss": 0.191, + "learning_rate": 2.540683069914031e-05, + "loss": 0.182, "step": 50065 }, { "epoch": 2.34, - "learning_rate": 1.539918428578126e-05, - "loss": 0.023, + "learning_rate": 2.540636262930429e-05, + "loss": 0.0626, "step": 50070 }, { "epoch": 2.34, - "learning_rate": 1.539871548450612e-05, - "loss": 0.0786, + "learning_rate": 2.5405894559468274e-05, + "loss": 0.0237, "step": 50075 }, { "epoch": 2.34, - "learning_rate": 1.539824668323098e-05, - "loss": 0.0499, + "learning_rate": 2.5405426489632254e-05, + "loss": 0.066, "step": 50080 }, { "epoch": 2.34, - "learning_rate": 1.539777788195584e-05, - "loss": 0.06, + "learning_rate": 2.5404958419796234e-05, + "loss": 0.0602, "step": 50085 }, { "epoch": 2.34, - "learning_rate": 1.5397309080680702e-05, - "loss": 0.0837, + "learning_rate": 2.5404490349960214e-05, + "loss": 0.0647, "step": 50090 }, { "epoch": 2.34, - "learning_rate": 1.5396840279405562e-05, - "loss": 0.1011, + "learning_rate": 2.5404022280124197e-05, + "loss": 0.0931, "step": 50095 }, { "epoch": 2.34, - "learning_rate": 1.539637147813042e-05, - "loss": 0.2519, + "learning_rate": 2.5403554210288177e-05, + "loss": 0.1107, "step": 50100 }, { "epoch": 2.34, - "learning_rate": 1.5395902676855285e-05, - "loss": 0.2367, + "learning_rate": 2.5403086140452156e-05, + "loss": 0.1569, "step": 50105 }, { "epoch": 2.34, - "learning_rate": 1.5395433875580145e-05, - "loss": 0.2337, + "learning_rate": 2.540261807061614e-05, + "loss": 0.1128, "step": 50110 }, { "epoch": 2.34, - "learning_rate": 1.5394965074305005e-05, - "loss": 0.3048, + "learning_rate": 2.540215000078012e-05, + "loss": 0.3365, "step": 50115 }, { "epoch": 2.34, - "learning_rate": 1.5394496273029865e-05, - "loss": 0.1067, + "learning_rate": 2.5401681930944096e-05, + "loss": 0.0456, "step": 50120 }, { "epoch": 2.34, - "learning_rate": 1.5394027471754725e-05, - "loss": 0.0296, + "learning_rate": 2.5401213861108076e-05, + "loss": 0.0748, "step": 50125 }, { "epoch": 2.34, - "learning_rate": 1.5393558670479585e-05, - "loss": 0.0417, + "learning_rate": 2.540074579127206e-05, + "loss": 0.0557, "step": 50130 }, { "epoch": 2.34, - "learning_rate": 1.5393089869204445e-05, - "loss": 0.0882, + "learning_rate": 2.540027772143604e-05, + "loss": 0.0999, "step": 50135 }, { "epoch": 2.34, - "learning_rate": 1.5392621067929305e-05, - "loss": 0.1051, + "learning_rate": 2.539980965160002e-05, + "loss": 0.0658, "step": 50140 }, { "epoch": 2.34, - "learning_rate": 1.5392152266654164e-05, - "loss": 0.118, + "learning_rate": 2.5399341581764e-05, + "loss": 0.1675, "step": 50145 }, { "epoch": 2.34, - "learning_rate": 1.5391683465379028e-05, - "loss": 0.1182, + "learning_rate": 2.539887351192798e-05, + "loss": 0.2124, "step": 50150 }, { "epoch": 2.34, - "learning_rate": 1.5391214664103888e-05, - "loss": 0.1522, + "learning_rate": 2.539840544209196e-05, + "loss": 0.1499, "step": 50155 }, { "epoch": 2.34, - "learning_rate": 1.5390745862828748e-05, - "loss": 0.2011, + "learning_rate": 2.539793737225594e-05, + "loss": 0.3174, "step": 50160 }, { "epoch": 2.34, - "learning_rate": 1.5390277061553608e-05, - "loss": 0.2483, + "learning_rate": 2.539746930241992e-05, + "loss": 0.2783, "step": 50165 }, { "epoch": 2.34, - "learning_rate": 1.538980826027847e-05, - "loss": 0.0936, + "learning_rate": 2.5397001232583904e-05, + "loss": 0.0268, "step": 50170 }, { "epoch": 2.34, - "learning_rate": 1.538933945900333e-05, - "loss": 0.0621, + "learning_rate": 2.5396533162747884e-05, + "loss": 0.0272, "step": 50175 }, { "epoch": 2.34, - "learning_rate": 1.538887065772819e-05, - "loss": 0.0768, + "learning_rate": 2.539606509291186e-05, + "loss": 0.0436, "step": 50180 }, { "epoch": 2.34, - "learning_rate": 1.538840185645305e-05, - "loss": 0.1631, + "learning_rate": 2.5395597023075844e-05, + "loss": 0.0458, "step": 50185 }, { "epoch": 2.34, - "learning_rate": 1.538793305517791e-05, - "loss": 0.0566, + "learning_rate": 2.5395128953239823e-05, + "loss": 0.1333, "step": 50190 }, { "epoch": 2.34, - "learning_rate": 1.538746425390277e-05, - "loss": 0.1157, + "learning_rate": 2.5394660883403803e-05, + "loss": 0.068, "step": 50195 }, { "epoch": 2.34, - "learning_rate": 1.5386995452627634e-05, - "loss": 0.1238, + "learning_rate": 2.5394192813567783e-05, + "loss": 0.0794, "step": 50200 }, { "epoch": 2.34, - "learning_rate": 1.5386526651352494e-05, - "loss": 0.1757, + "learning_rate": 2.5393724743731766e-05, + "loss": 0.1917, "step": 50205 }, { "epoch": 2.34, - "learning_rate": 1.5386057850077354e-05, - "loss": 0.2642, + "learning_rate": 2.5393256673895746e-05, + "loss": 0.4107, "step": 50210 }, { "epoch": 2.34, - "learning_rate": 1.5385589048802214e-05, - "loss": 0.3062, + "learning_rate": 2.5392788604059726e-05, + "loss": 0.2252, "step": 50215 }, { "epoch": 2.34, - "learning_rate": 1.5385120247527074e-05, - "loss": 0.0431, + "learning_rate": 2.5392320534223706e-05, + "loss": 0.0406, "step": 50220 }, { "epoch": 2.34, - "learning_rate": 1.5384651446251934e-05, - "loss": 0.0448, + "learning_rate": 2.539185246438769e-05, + "loss": 0.0432, "step": 50225 }, { "epoch": 2.34, - "learning_rate": 1.5384182644976793e-05, - "loss": 0.0536, + "learning_rate": 2.539138439455167e-05, + "loss": 0.0594, "step": 50230 }, { "epoch": 2.34, - "learning_rate": 1.5383713843701657e-05, - "loss": 0.0219, + "learning_rate": 2.539091632471565e-05, + "loss": 0.1008, "step": 50235 }, { "epoch": 2.34, - "learning_rate": 1.5383245042426517e-05, - "loss": 0.0811, + "learning_rate": 2.5390448254879632e-05, + "loss": 0.0651, "step": 50240 }, { "epoch": 2.34, - "learning_rate": 1.5382776241151377e-05, - "loss": 0.124, + "learning_rate": 2.5389980185043608e-05, + "loss": 0.1193, "step": 50245 }, { "epoch": 2.34, - "learning_rate": 1.538230743987624e-05, - "loss": 0.1731, + "learning_rate": 2.5389512115207588e-05, + "loss": 0.1278, "step": 50250 }, { "epoch": 2.34, - "learning_rate": 1.53818386386011e-05, - "loss": 0.1248, + "learning_rate": 2.5389044045371568e-05, + "loss": 0.1976, "step": 50255 }, { "epoch": 2.35, - "learning_rate": 1.538136983732596e-05, - "loss": 0.211, + "learning_rate": 2.538857597553555e-05, + "loss": 0.1723, "step": 50260 }, { "epoch": 2.35, - "learning_rate": 1.538090103605082e-05, - "loss": 0.2802, + "learning_rate": 2.538810790569953e-05, + "loss": 0.2009, "step": 50265 }, { "epoch": 2.35, - "learning_rate": 1.538043223477568e-05, - "loss": 0.0651, + "learning_rate": 2.538763983586351e-05, + "loss": 0.0442, "step": 50270 }, { "epoch": 2.35, - "learning_rate": 1.537996343350054e-05, - "loss": 0.0739, + "learning_rate": 2.538717176602749e-05, + "loss": 0.0261, "step": 50275 }, { "epoch": 2.35, - "learning_rate": 1.53794946322254e-05, - "loss": 0.0697, + "learning_rate": 2.5386703696191474e-05, + "loss": 0.0849, "step": 50280 }, { "epoch": 2.35, - "learning_rate": 1.537902583095026e-05, - "loss": 0.066, + "learning_rate": 2.5386235626355454e-05, + "loss": 0.0594, "step": 50285 }, { "epoch": 2.35, - "learning_rate": 1.5378557029675123e-05, - "loss": 0.0599, + "learning_rate": 2.5385767556519433e-05, + "loss": 0.0961, "step": 50290 }, { "epoch": 2.35, - "learning_rate": 1.5378088228399983e-05, - "loss": 0.0544, + "learning_rate": 2.5385299486683417e-05, + "loss": 0.0355, "step": 50295 }, { "epoch": 2.35, - "learning_rate": 1.5377619427124843e-05, - "loss": 0.0874, + "learning_rate": 2.5384831416847396e-05, + "loss": 0.089, "step": 50300 }, { "epoch": 2.35, - "learning_rate": 1.5377150625849703e-05, - "loss": 0.1567, + "learning_rate": 2.5384363347011373e-05, + "loss": 0.1026, "step": 50305 }, { "epoch": 2.35, - "learning_rate": 1.5376681824574563e-05, - "loss": 0.2568, + "learning_rate": 2.5383895277175353e-05, + "loss": 0.1942, "step": 50310 }, { "epoch": 2.35, - "learning_rate": 1.5376213023299426e-05, - "loss": 0.2765, + "learning_rate": 2.5383427207339336e-05, + "loss": 0.3554, "step": 50315 }, { "epoch": 2.35, - "learning_rate": 1.5375744222024286e-05, - "loss": 0.0519, + "learning_rate": 2.5382959137503316e-05, + "loss": 0.0324, "step": 50320 }, { "epoch": 2.35, - "learning_rate": 1.5375275420749146e-05, - "loss": 0.0839, + "learning_rate": 2.5382491067667295e-05, + "loss": 0.0389, "step": 50325 }, { "epoch": 2.35, - "learning_rate": 1.5374806619474006e-05, - "loss": 0.0378, + "learning_rate": 2.5382022997831275e-05, + "loss": 0.0259, "step": 50330 }, { "epoch": 2.35, - "learning_rate": 1.537433781819887e-05, - "loss": 0.0958, + "learning_rate": 2.538155492799526e-05, + "loss": 0.0538, "step": 50335 }, { "epoch": 2.35, - "learning_rate": 1.537386901692373e-05, - "loss": 0.0688, + "learning_rate": 2.538108685815924e-05, + "loss": 0.0534, "step": 50340 }, { "epoch": 2.35, - "learning_rate": 1.537340021564859e-05, - "loss": 0.1647, + "learning_rate": 2.5380618788323218e-05, + "loss": 0.1144, "step": 50345 }, { "epoch": 2.35, - "learning_rate": 1.537293141437345e-05, - "loss": 0.1321, + "learning_rate": 2.53801507184872e-05, + "loss": 0.175, "step": 50350 }, { "epoch": 2.35, - "learning_rate": 1.537246261309831e-05, - "loss": 0.1933, + "learning_rate": 2.537968264865118e-05, + "loss": 0.1183, "step": 50355 }, { "epoch": 2.35, - "learning_rate": 1.537199381182317e-05, - "loss": 0.2768, + "learning_rate": 2.537921457881516e-05, + "loss": 0.2255, "step": 50360 }, { "epoch": 2.35, - "learning_rate": 1.537152501054803e-05, - "loss": 0.3416, + "learning_rate": 2.537874650897914e-05, + "loss": 0.1964, "step": 50365 }, { "epoch": 2.35, - "learning_rate": 1.537105620927289e-05, - "loss": 0.0425, + "learning_rate": 2.537827843914312e-05, + "loss": 0.052, "step": 50370 }, { "epoch": 2.35, - "learning_rate": 1.537058740799775e-05, - "loss": 0.0616, + "learning_rate": 2.53778103693071e-05, + "loss": 0.0467, "step": 50375 }, { "epoch": 2.35, - "learning_rate": 1.5370118606722612e-05, - "loss": 0.0403, + "learning_rate": 2.537734229947108e-05, + "loss": 0.0439, "step": 50380 }, { "epoch": 2.35, - "learning_rate": 1.5369649805447472e-05, - "loss": 0.0957, + "learning_rate": 2.537687422963506e-05, + "loss": 0.0687, "step": 50385 }, { "epoch": 2.35, - "learning_rate": 1.536918100417233e-05, - "loss": 0.0697, + "learning_rate": 2.5376406159799043e-05, + "loss": 0.1015, "step": 50390 }, { "epoch": 2.35, - "learning_rate": 1.5368712202897195e-05, - "loss": 0.0837, + "learning_rate": 2.5375938089963023e-05, + "loss": 0.0946, "step": 50395 }, { "epoch": 2.35, - "learning_rate": 1.5368243401622055e-05, - "loss": 0.1271, + "learning_rate": 2.5375470020127003e-05, + "loss": 0.1287, "step": 50400 }, { "epoch": 2.35, - "learning_rate": 1.5367774600346915e-05, - "loss": 0.1135, + "learning_rate": 2.5375001950290983e-05, + "loss": 0.1982, "step": 50405 }, { "epoch": 2.35, - "learning_rate": 1.5367305799071775e-05, - "loss": 0.2313, + "learning_rate": 2.5374533880454966e-05, + "loss": 0.2237, "step": 50410 }, { "epoch": 2.35, - "learning_rate": 1.5366836997796635e-05, - "loss": 0.2247, + "learning_rate": 2.5374065810618946e-05, + "loss": 0.2411, "step": 50415 }, { "epoch": 2.35, - "learning_rate": 1.5366368196521495e-05, - "loss": 0.0565, + "learning_rate": 2.5373597740782926e-05, + "loss": 0.0149, "step": 50420 }, { "epoch": 2.35, - "learning_rate": 1.5365899395246355e-05, - "loss": 0.0344, + "learning_rate": 2.537312967094691e-05, + "loss": 0.0509, "step": 50425 }, { "epoch": 2.35, - "learning_rate": 1.5365430593971218e-05, - "loss": 0.0838, + "learning_rate": 2.537266160111089e-05, + "loss": 0.0873, "step": 50430 }, { "epoch": 2.35, - "learning_rate": 1.5364961792696078e-05, - "loss": 0.0572, + "learning_rate": 2.5372193531274865e-05, + "loss": 0.092, "step": 50435 }, { "epoch": 2.35, - "learning_rate": 1.5364492991420938e-05, - "loss": 0.0976, + "learning_rate": 2.5371725461438845e-05, + "loss": 0.0939, "step": 50440 }, { "epoch": 2.35, - "learning_rate": 1.5364024190145798e-05, - "loss": 0.1598, + "learning_rate": 2.5371257391602828e-05, + "loss": 0.1211, "step": 50445 }, { "epoch": 2.35, - "learning_rate": 1.5363555388870658e-05, - "loss": 0.1353, + "learning_rate": 2.5370789321766808e-05, + "loss": 0.191, "step": 50450 }, { "epoch": 2.35, - "learning_rate": 1.536308658759552e-05, - "loss": 0.2411, + "learning_rate": 2.5370321251930788e-05, + "loss": 0.1858, "step": 50455 }, { "epoch": 2.35, - "learning_rate": 1.536261778632038e-05, - "loss": 0.2845, + "learning_rate": 2.5369853182094768e-05, + "loss": 0.3049, "step": 50460 }, { "epoch": 2.35, - "learning_rate": 1.536214898504524e-05, - "loss": 0.3948, + "learning_rate": 2.536938511225875e-05, + "loss": 0.2958, "step": 50465 }, { "epoch": 2.35, - "learning_rate": 1.53616801837701e-05, - "loss": 0.0147, + "learning_rate": 2.536891704242273e-05, + "loss": 0.0345, "step": 50470 }, { "epoch": 2.36, - "learning_rate": 1.5361211382494964e-05, - "loss": 0.065, + "learning_rate": 2.536844897258671e-05, + "loss": 0.0568, "step": 50475 }, { "epoch": 2.36, - "learning_rate": 1.5360742581219824e-05, - "loss": 0.0895, + "learning_rate": 2.5367980902750694e-05, + "loss": 0.0802, "step": 50480 }, { "epoch": 2.36, - "learning_rate": 1.5360273779944684e-05, - "loss": 0.0594, + "learning_rate": 2.5367512832914673e-05, + "loss": 0.0865, "step": 50485 }, { "epoch": 2.36, - "learning_rate": 1.5359804978669544e-05, - "loss": 0.1196, + "learning_rate": 2.5367044763078653e-05, + "loss": 0.0524, "step": 50490 }, { "epoch": 2.36, - "learning_rate": 1.5359336177394404e-05, - "loss": 0.1975, + "learning_rate": 2.536657669324263e-05, + "loss": 0.0844, "step": 50495 }, { "epoch": 2.36, - "learning_rate": 1.5358867376119264e-05, - "loss": 0.1263, + "learning_rate": 2.5366108623406613e-05, + "loss": 0.0929, "step": 50500 }, { "epoch": 2.36, - "learning_rate": 1.5358398574844124e-05, - "loss": 0.2439, + "learning_rate": 2.5365640553570593e-05, + "loss": 0.1287, "step": 50505 }, { "epoch": 2.36, - "learning_rate": 1.5357929773568984e-05, - "loss": 0.1744, + "learning_rate": 2.5365172483734572e-05, + "loss": 0.1441, "step": 50510 }, { "epoch": 2.36, - "learning_rate": 1.5357460972293844e-05, - "loss": 0.2682, + "learning_rate": 2.5364704413898552e-05, + "loss": 0.2264, "step": 50515 }, { "epoch": 2.36, - "learning_rate": 1.5356992171018707e-05, - "loss": 0.0798, + "learning_rate": 2.5364236344062535e-05, + "loss": 0.0316, "step": 50520 }, { "epoch": 2.36, - "learning_rate": 1.5356523369743567e-05, - "loss": 0.025, + "learning_rate": 2.5363768274226515e-05, + "loss": 0.0592, "step": 50525 }, { "epoch": 2.36, - "learning_rate": 1.5356054568468427e-05, - "loss": 0.0204, + "learning_rate": 2.5363300204390495e-05, + "loss": 0.0379, "step": 50530 }, { "epoch": 2.36, - "learning_rate": 1.535558576719329e-05, - "loss": 0.0742, + "learning_rate": 2.536283213455448e-05, + "loss": 0.0985, "step": 50535 }, { "epoch": 2.36, - "learning_rate": 1.535511696591815e-05, - "loss": 0.0824, + "learning_rate": 2.5362364064718458e-05, + "loss": 0.1048, "step": 50540 }, { "epoch": 2.36, - "learning_rate": 1.535464816464301e-05, - "loss": 0.1327, + "learning_rate": 2.5361895994882438e-05, + "loss": 0.1704, "step": 50545 }, { "epoch": 2.36, - "learning_rate": 1.535417936336787e-05, - "loss": 0.0983, + "learning_rate": 2.5361427925046418e-05, + "loss": 0.1961, "step": 50550 }, { "epoch": 2.36, - "learning_rate": 1.535371056209273e-05, - "loss": 0.1686, + "learning_rate": 2.53609598552104e-05, + "loss": 0.1636, "step": 50555 }, { "epoch": 2.36, - "learning_rate": 1.535324176081759e-05, - "loss": 0.5493, + "learning_rate": 2.5360491785374377e-05, + "loss": 0.3128, "step": 50560 }, { "epoch": 2.36, - "learning_rate": 1.535277295954245e-05, - "loss": 0.2809, + "learning_rate": 2.5360023715538357e-05, + "loss": 0.2527, "step": 50565 }, { "epoch": 2.36, - "learning_rate": 1.5352304158267313e-05, - "loss": 0.084, + "learning_rate": 2.5359555645702337e-05, + "loss": 0.0768, "step": 50570 }, { "epoch": 2.36, - "learning_rate": 1.5351835356992173e-05, - "loss": 0.0162, + "learning_rate": 2.535908757586632e-05, + "loss": 0.0512, "step": 50575 }, { "epoch": 2.36, - "learning_rate": 1.5351366555717033e-05, - "loss": 0.1348, + "learning_rate": 2.53586195060303e-05, + "loss": 0.0392, "step": 50580 }, { "epoch": 2.36, - "learning_rate": 1.5350897754441893e-05, - "loss": 0.0913, + "learning_rate": 2.535815143619428e-05, + "loss": 0.0605, "step": 50585 }, { "epoch": 2.36, - "learning_rate": 1.5350428953166753e-05, - "loss": 0.0912, + "learning_rate": 2.535768336635826e-05, + "loss": 0.1337, "step": 50590 }, { "epoch": 2.36, - "learning_rate": 1.5349960151891613e-05, - "loss": 0.1417, + "learning_rate": 2.5357215296522243e-05, + "loss": 0.1132, "step": 50595 }, { "epoch": 2.36, - "learning_rate": 1.5349491350616476e-05, - "loss": 0.0634, + "learning_rate": 2.5356747226686223e-05, + "loss": 0.0603, "step": 50600 }, { "epoch": 2.36, - "learning_rate": 1.5349022549341336e-05, - "loss": 0.2053, + "learning_rate": 2.5356279156850203e-05, + "loss": 0.1236, "step": 50605 }, { "epoch": 2.36, - "learning_rate": 1.5348553748066196e-05, - "loss": 0.3073, + "learning_rate": 2.5355811087014186e-05, + "loss": 0.1894, "step": 50610 }, { "epoch": 2.36, - "learning_rate": 1.534808494679106e-05, - "loss": 0.2726, + "learning_rate": 2.5355343017178166e-05, + "loss": 0.31, "step": 50615 }, { "epoch": 2.36, - "learning_rate": 1.534761614551592e-05, - "loss": 0.0641, + "learning_rate": 2.5354874947342145e-05, + "loss": 0.0427, "step": 50620 }, { "epoch": 2.36, - "learning_rate": 1.534714734424078e-05, - "loss": 0.0314, + "learning_rate": 2.5354406877506122e-05, + "loss": 0.0668, "step": 50625 }, { "epoch": 2.36, - "learning_rate": 1.534667854296564e-05, - "loss": 0.0893, + "learning_rate": 2.5353938807670105e-05, + "loss": 0.0399, "step": 50630 }, { "epoch": 2.36, - "learning_rate": 1.53462097416905e-05, - "loss": 0.062, + "learning_rate": 2.5353470737834085e-05, + "loss": 0.016, "step": 50635 }, { "epoch": 2.36, - "learning_rate": 1.534574094041536e-05, - "loss": 0.0985, + "learning_rate": 2.5353002667998065e-05, + "loss": 0.1485, "step": 50640 }, { "epoch": 2.36, - "learning_rate": 1.534527213914022e-05, - "loss": 0.1125, + "learning_rate": 2.5352534598162044e-05, + "loss": 0.0868, "step": 50645 }, { "epoch": 2.36, - "learning_rate": 1.534480333786508e-05, - "loss": 0.0805, + "learning_rate": 2.5352066528326028e-05, + "loss": 0.065, "step": 50650 }, { "epoch": 2.36, - "learning_rate": 1.534433453658994e-05, - "loss": 0.1704, + "learning_rate": 2.5351598458490007e-05, + "loss": 0.1806, "step": 50655 }, { "epoch": 2.36, - "learning_rate": 1.5343865735314802e-05, - "loss": 0.3254, + "learning_rate": 2.5351130388653987e-05, + "loss": 0.1838, "step": 50660 }, { "epoch": 2.36, - "learning_rate": 1.5343396934039662e-05, - "loss": 0.1932, + "learning_rate": 2.535066231881797e-05, + "loss": 0.4168, "step": 50665 }, { "epoch": 2.36, - "learning_rate": 1.5342928132764522e-05, - "loss": 0.0345, + "learning_rate": 2.535019424898195e-05, + "loss": 0.0335, "step": 50670 }, { "epoch": 2.36, - "learning_rate": 1.5342459331489382e-05, - "loss": 0.0975, + "learning_rate": 2.534972617914593e-05, + "loss": 0.0152, "step": 50675 }, { "epoch": 2.36, - "learning_rate": 1.5341990530214245e-05, - "loss": 0.0179, + "learning_rate": 2.534925810930991e-05, + "loss": 0.0493, "step": 50680 }, { "epoch": 2.37, - "learning_rate": 1.5341521728939105e-05, - "loss": 0.1288, + "learning_rate": 2.534879003947389e-05, + "loss": 0.0674, "step": 50685 }, { "epoch": 2.37, - "learning_rate": 1.5341052927663965e-05, - "loss": 0.135, + "learning_rate": 2.534832196963787e-05, + "loss": 0.0883, "step": 50690 }, { "epoch": 2.37, - "learning_rate": 1.5340584126388825e-05, - "loss": 0.1499, + "learning_rate": 2.534785389980185e-05, + "loss": 0.1207, "step": 50695 }, { "epoch": 2.37, - "learning_rate": 1.5340115325113685e-05, - "loss": 0.1307, + "learning_rate": 2.534738582996583e-05, + "loss": 0.1926, "step": 50700 }, { "epoch": 2.37, - "learning_rate": 1.5339646523838545e-05, - "loss": 0.1184, + "learning_rate": 2.5346917760129812e-05, + "loss": 0.1596, "step": 50705 }, { "epoch": 2.37, - "learning_rate": 1.5339177722563408e-05, - "loss": 0.2781, + "learning_rate": 2.5346449690293792e-05, + "loss": 0.1482, "step": 50710 }, { "epoch": 2.37, - "learning_rate": 1.5338708921288268e-05, - "loss": 0.2641, + "learning_rate": 2.5345981620457772e-05, + "loss": 0.3368, "step": 50715 }, { "epoch": 2.37, - "learning_rate": 1.5338240120013128e-05, - "loss": 0.0174, + "learning_rate": 2.5345513550621755e-05, + "loss": 0.0387, "step": 50720 }, { "epoch": 2.37, - "learning_rate": 1.5337771318737988e-05, - "loss": 0.0609, + "learning_rate": 2.5345045480785735e-05, + "loss": 0.0467, "step": 50725 }, { "epoch": 2.37, - "learning_rate": 1.5337302517462848e-05, - "loss": 0.0497, + "learning_rate": 2.5344577410949715e-05, + "loss": 0.0813, "step": 50730 }, { "epoch": 2.37, - "learning_rate": 1.5336833716187708e-05, - "loss": 0.1138, + "learning_rate": 2.5344109341113695e-05, + "loss": 0.0819, "step": 50735 }, { "epoch": 2.37, - "learning_rate": 1.5336364914912568e-05, - "loss": 0.0964, + "learning_rate": 2.5343641271277678e-05, + "loss": 0.1093, "step": 50740 }, { "epoch": 2.37, - "learning_rate": 1.533589611363743e-05, - "loss": 0.0858, + "learning_rate": 2.5343173201441658e-05, + "loss": 0.097, "step": 50745 }, { "epoch": 2.37, - "learning_rate": 1.533542731236229e-05, - "loss": 0.1338, + "learning_rate": 2.5342705131605634e-05, + "loss": 0.302, "step": 50750 }, { "epoch": 2.37, - "learning_rate": 1.533495851108715e-05, - "loss": 0.0712, + "learning_rate": 2.5342237061769614e-05, + "loss": 0.1494, "step": 50755 }, { "epoch": 2.37, - "learning_rate": 1.5334489709812014e-05, - "loss": 0.1415, + "learning_rate": 2.5341768991933597e-05, + "loss": 0.1882, "step": 50760 }, { "epoch": 2.37, - "learning_rate": 1.5334020908536874e-05, - "loss": 0.3659, + "learning_rate": 2.5341300922097577e-05, + "loss": 0.2207, "step": 50765 }, { "epoch": 2.37, - "learning_rate": 1.5333552107261734e-05, - "loss": 0.0337, + "learning_rate": 2.5340832852261557e-05, + "loss": 0.0713, "step": 50770 }, { "epoch": 2.37, - "learning_rate": 1.5333083305986594e-05, - "loss": 0.0486, + "learning_rate": 2.5340364782425537e-05, + "loss": 0.0471, "step": 50775 }, { "epoch": 2.37, - "learning_rate": 1.5332614504711454e-05, - "loss": 0.0634, + "learning_rate": 2.533989671258952e-05, + "loss": 0.0565, "step": 50780 }, { "epoch": 2.37, - "learning_rate": 1.5332145703436314e-05, - "loss": 0.0815, + "learning_rate": 2.53394286427535e-05, + "loss": 0.0745, "step": 50785 }, { "epoch": 2.37, - "learning_rate": 1.5331676902161174e-05, - "loss": 0.0665, + "learning_rate": 2.533896057291748e-05, + "loss": 0.1431, "step": 50790 }, { "epoch": 2.37, - "learning_rate": 1.5331208100886034e-05, - "loss": 0.0991, + "learning_rate": 2.5338492503081463e-05, + "loss": 0.177, "step": 50795 }, { "epoch": 2.37, - "learning_rate": 1.5330739299610897e-05, - "loss": 0.1177, + "learning_rate": 2.5338024433245443e-05, + "loss": 0.2157, "step": 50800 }, { "epoch": 2.37, - "learning_rate": 1.5330270498335757e-05, - "loss": 0.2121, + "learning_rate": 2.5337556363409422e-05, + "loss": 0.1742, "step": 50805 }, { "epoch": 2.37, - "learning_rate": 1.5329801697060617e-05, - "loss": 0.2237, + "learning_rate": 2.5337088293573402e-05, + "loss": 0.3254, "step": 50810 }, { "epoch": 2.37, - "learning_rate": 1.5329332895785477e-05, - "loss": 0.3753, + "learning_rate": 2.5336620223737382e-05, + "loss": 0.3474, "step": 50815 }, { "epoch": 2.37, - "learning_rate": 1.5328864094510337e-05, - "loss": 0.0427, + "learning_rate": 2.5336152153901362e-05, + "loss": 0.103, "step": 50820 }, { "epoch": 2.37, - "learning_rate": 1.53283952932352e-05, - "loss": 0.0655, + "learning_rate": 2.533568408406534e-05, + "loss": 0.082, "step": 50825 }, { "epoch": 2.37, - "learning_rate": 1.532792649196006e-05, - "loss": 0.0963, + "learning_rate": 2.533521601422932e-05, + "loss": 0.0972, "step": 50830 }, { "epoch": 2.37, - "learning_rate": 1.532745769068492e-05, - "loss": 0.0541, + "learning_rate": 2.5334747944393305e-05, + "loss": 0.0583, "step": 50835 }, { "epoch": 2.37, - "learning_rate": 1.532698888940978e-05, - "loss": 0.0904, + "learning_rate": 2.5334279874557284e-05, + "loss": 0.1032, "step": 50840 }, { "epoch": 2.37, - "learning_rate": 1.532652008813464e-05, - "loss": 0.1261, + "learning_rate": 2.5333811804721264e-05, + "loss": 0.1137, "step": 50845 }, { "epoch": 2.37, - "learning_rate": 1.5326051286859503e-05, - "loss": 0.0886, + "learning_rate": 2.5333343734885247e-05, + "loss": 0.1596, "step": 50850 }, { "epoch": 2.37, - "learning_rate": 1.5325582485584363e-05, - "loss": 0.175, + "learning_rate": 2.5332875665049227e-05, + "loss": 0.2884, "step": 50855 }, { "epoch": 2.37, - "learning_rate": 1.5325113684309223e-05, - "loss": 0.2556, + "learning_rate": 2.5332407595213207e-05, + "loss": 0.249, "step": 50860 }, { "epoch": 2.37, - "learning_rate": 1.5324644883034083e-05, - "loss": 0.1925, + "learning_rate": 2.5331939525377187e-05, + "loss": 0.3164, "step": 50865 }, { "epoch": 2.37, - "learning_rate": 1.5324176081758943e-05, - "loss": 0.101, + "learning_rate": 2.533147145554117e-05, + "loss": 0.0429, "step": 50870 }, { "epoch": 2.37, - "learning_rate": 1.5323707280483803e-05, - "loss": 0.0538, + "learning_rate": 2.5331003385705147e-05, + "loss": 0.0177, "step": 50875 }, { "epoch": 2.37, - "learning_rate": 1.5323238479208663e-05, - "loss": 0.0709, + "learning_rate": 2.5330535315869126e-05, + "loss": 0.0341, "step": 50880 }, { "epoch": 2.37, - "learning_rate": 1.5322769677933526e-05, - "loss": 0.1137, + "learning_rate": 2.5330067246033106e-05, + "loss": 0.0251, "step": 50885 }, { "epoch": 2.37, - "learning_rate": 1.5322300876658386e-05, - "loss": 0.0497, + "learning_rate": 2.532959917619709e-05, + "loss": 0.1125, "step": 50890 }, { "epoch": 2.37, - "learning_rate": 1.5321832075383246e-05, - "loss": 0.0955, + "learning_rate": 2.532913110636107e-05, + "loss": 0.0925, "step": 50895 }, { "epoch": 2.38, - "learning_rate": 1.5321363274108106e-05, - "loss": 0.0704, + "learning_rate": 2.532866303652505e-05, + "loss": 0.1246, "step": 50900 }, { "epoch": 2.38, - "learning_rate": 1.532089447283297e-05, - "loss": 0.101, + "learning_rate": 2.5328194966689032e-05, + "loss": 0.2222, "step": 50905 }, { "epoch": 2.38, - "learning_rate": 1.532042567155783e-05, - "loss": 0.221, + "learning_rate": 2.5327726896853012e-05, + "loss": 0.3999, "step": 50910 }, { "epoch": 2.38, - "learning_rate": 1.531995687028269e-05, - "loss": 0.3579, + "learning_rate": 2.5327258827016992e-05, + "loss": 0.2956, "step": 50915 }, { "epoch": 2.38, - "learning_rate": 1.531948806900755e-05, - "loss": 0.051, + "learning_rate": 2.5326790757180972e-05, + "loss": 0.0303, "step": 50920 }, { "epoch": 2.38, - "learning_rate": 1.531901926773241e-05, - "loss": 0.0345, + "learning_rate": 2.5326322687344955e-05, + "loss": 0.0555, "step": 50925 }, { "epoch": 2.38, - "learning_rate": 1.531855046645727e-05, - "loss": 0.0612, + "learning_rate": 2.5325854617508935e-05, + "loss": 0.0436, "step": 50930 }, { "epoch": 2.38, - "learning_rate": 1.531808166518213e-05, - "loss": 0.1305, + "learning_rate": 2.5325386547672915e-05, + "loss": 0.0464, "step": 50935 }, { "epoch": 2.38, - "learning_rate": 1.5317612863906992e-05, - "loss": 0.0428, + "learning_rate": 2.532491847783689e-05, + "loss": 0.1225, "step": 50940 }, { "epoch": 2.38, - "learning_rate": 1.5317144062631852e-05, - "loss": 0.1188, + "learning_rate": 2.5324450408000874e-05, + "loss": 0.0913, "step": 50945 }, { "epoch": 2.38, - "learning_rate": 1.5316675261356712e-05, - "loss": 0.1338, + "learning_rate": 2.5323982338164854e-05, + "loss": 0.1366, "step": 50950 }, { "epoch": 2.38, - "learning_rate": 1.5316206460081572e-05, - "loss": 0.1286, + "learning_rate": 2.5323514268328834e-05, + "loss": 0.1804, "step": 50955 }, { "epoch": 2.38, - "learning_rate": 1.5315737658806432e-05, - "loss": 0.2555, + "learning_rate": 2.5323046198492814e-05, + "loss": 0.1268, "step": 50960 }, { "epoch": 2.38, - "learning_rate": 1.5315268857531295e-05, - "loss": 0.3541, + "learning_rate": 2.5322578128656797e-05, + "loss": 0.1835, "step": 50965 }, { "epoch": 2.38, - "learning_rate": 1.5314800056256155e-05, - "loss": 0.0705, + "learning_rate": 2.5322110058820777e-05, + "loss": 0.0251, "step": 50970 }, { "epoch": 2.38, - "learning_rate": 1.5314331254981015e-05, - "loss": 0.0908, + "learning_rate": 2.5321641988984756e-05, + "loss": 0.0554, "step": 50975 }, { "epoch": 2.38, - "learning_rate": 1.5313862453705875e-05, - "loss": 0.0534, + "learning_rate": 2.532117391914874e-05, + "loss": 0.0826, "step": 50980 }, { "epoch": 2.38, - "learning_rate": 1.531339365243074e-05, - "loss": 0.1644, + "learning_rate": 2.532070584931272e-05, + "loss": 0.1398, "step": 50985 }, { "epoch": 2.38, - "learning_rate": 1.5312924851155598e-05, - "loss": 0.0605, + "learning_rate": 2.53202377794767e-05, + "loss": 0.0734, "step": 50990 }, { "epoch": 2.38, - "learning_rate": 1.5312456049880458e-05, - "loss": 0.1191, + "learning_rate": 2.531976970964068e-05, + "loss": 0.0862, "step": 50995 }, { "epoch": 2.38, - "learning_rate": 1.5311987248605318e-05, - "loss": 0.0936, + "learning_rate": 2.531930163980466e-05, + "loss": 0.1195, "step": 51000 }, { "epoch": 2.38, - "learning_rate": 1.5311518447330178e-05, - "loss": 0.2088, + "learning_rate": 2.531883356996864e-05, + "loss": 0.2509, "step": 51005 }, { "epoch": 2.38, - "learning_rate": 1.5311049646055038e-05, - "loss": 0.1665, + "learning_rate": 2.531836550013262e-05, + "loss": 0.1815, "step": 51010 }, { "epoch": 2.38, - "learning_rate": 1.5310580844779898e-05, - "loss": 0.3294, + "learning_rate": 2.53178974302966e-05, + "loss": 0.1515, "step": 51015 }, { "epoch": 2.38, - "learning_rate": 1.5310112043504758e-05, - "loss": 0.0376, + "learning_rate": 2.531742936046058e-05, + "loss": 0.0767, "step": 51020 }, { "epoch": 2.38, - "learning_rate": 1.5309643242229618e-05, - "loss": 0.0529, + "learning_rate": 2.531696129062456e-05, + "loss": 0.0138, "step": 51025 }, { "epoch": 2.38, - "learning_rate": 1.530917444095448e-05, - "loss": 0.0384, + "learning_rate": 2.531649322078854e-05, + "loss": 0.0405, "step": 51030 }, { "epoch": 2.38, - "learning_rate": 1.530870563967934e-05, - "loss": 0.0483, + "learning_rate": 2.5316025150952524e-05, + "loss": 0.0609, "step": 51035 }, { "epoch": 2.38, - "learning_rate": 1.53082368384042e-05, - "loss": 0.0941, + "learning_rate": 2.5315557081116504e-05, + "loss": 0.1235, "step": 51040 }, { "epoch": 2.38, - "learning_rate": 1.5307768037129064e-05, - "loss": 0.1144, + "learning_rate": 2.5315089011280484e-05, + "loss": 0.114, "step": 51045 }, { "epoch": 2.38, - "learning_rate": 1.5307299235853924e-05, - "loss": 0.1497, + "learning_rate": 2.5314620941444464e-05, + "loss": 0.111, "step": 51050 }, { "epoch": 2.38, - "learning_rate": 1.5306830434578784e-05, - "loss": 0.0734, + "learning_rate": 2.5314152871608447e-05, + "loss": 0.1615, "step": 51055 }, { "epoch": 2.38, - "learning_rate": 1.5306361633303644e-05, - "loss": 0.3083, + "learning_rate": 2.5313684801772427e-05, + "loss": 0.3308, "step": 51060 }, { "epoch": 2.38, - "learning_rate": 1.5305892832028504e-05, - "loss": 0.2441, + "learning_rate": 2.5313216731936403e-05, + "loss": 0.3028, "step": 51065 }, { "epoch": 2.38, - "learning_rate": 1.5305424030753364e-05, - "loss": 0.0149, + "learning_rate": 2.5312748662100383e-05, + "loss": 0.0886, "step": 51070 }, { "epoch": 2.38, - "learning_rate": 1.5304955229478224e-05, - "loss": 0.0412, + "learning_rate": 2.5312280592264366e-05, + "loss": 0.0574, "step": 51075 }, { "epoch": 2.38, - "learning_rate": 1.5304486428203087e-05, - "loss": 0.0896, + "learning_rate": 2.5311812522428346e-05, + "loss": 0.0315, "step": 51080 }, { "epoch": 2.38, - "learning_rate": 1.5304017626927947e-05, - "loss": 0.0703, + "learning_rate": 2.5311344452592326e-05, + "loss": 0.086, "step": 51085 }, { "epoch": 2.38, - "learning_rate": 1.5303548825652807e-05, - "loss": 0.0497, + "learning_rate": 2.531087638275631e-05, + "loss": 0.0702, "step": 51090 }, { "epoch": 2.38, - "learning_rate": 1.5303080024377667e-05, - "loss": 0.083, + "learning_rate": 2.531040831292029e-05, + "loss": 0.1331, "step": 51095 }, { "epoch": 2.38, - "learning_rate": 1.5302611223102527e-05, - "loss": 0.2109, + "learning_rate": 2.530994024308427e-05, + "loss": 0.0748, "step": 51100 }, { "epoch": 2.38, - "learning_rate": 1.5302142421827387e-05, - "loss": 0.2021, + "learning_rate": 2.530947217324825e-05, + "loss": 0.1176, "step": 51105 }, { "epoch": 2.38, - "learning_rate": 1.530167362055225e-05, - "loss": 0.3419, + "learning_rate": 2.5309004103412232e-05, + "loss": 0.3016, "step": 51110 }, { "epoch": 2.39, - "learning_rate": 1.530120481927711e-05, - "loss": 0.3248, + "learning_rate": 2.5308536033576212e-05, + "loss": 0.2768, "step": 51115 }, { "epoch": 2.39, - "learning_rate": 1.530073601800197e-05, - "loss": 0.096, + "learning_rate": 2.530806796374019e-05, + "loss": 0.0692, "step": 51120 }, { "epoch": 2.39, - "learning_rate": 1.5300267216726833e-05, - "loss": 0.064, + "learning_rate": 2.530759989390417e-05, + "loss": 0.053, "step": 51125 }, { "epoch": 2.39, - "learning_rate": 1.5299798415451693e-05, - "loss": 0.0324, + "learning_rate": 2.530713182406815e-05, + "loss": 0.0446, "step": 51130 }, { "epoch": 2.39, - "learning_rate": 1.5299329614176553e-05, - "loss": 0.0749, + "learning_rate": 2.530666375423213e-05, + "loss": 0.09, "step": 51135 }, { "epoch": 2.39, - "learning_rate": 1.5298860812901413e-05, - "loss": 0.0687, + "learning_rate": 2.530619568439611e-05, + "loss": 0.0447, "step": 51140 }, { "epoch": 2.39, - "learning_rate": 1.5298392011626273e-05, - "loss": 0.0519, + "learning_rate": 2.5305727614560094e-05, + "loss": 0.0908, "step": 51145 }, { "epoch": 2.39, - "learning_rate": 1.5297923210351133e-05, - "loss": 0.1116, + "learning_rate": 2.5305259544724074e-05, + "loss": 0.1345, "step": 51150 }, { "epoch": 2.39, - "learning_rate": 1.5297454409075993e-05, - "loss": 0.2051, + "learning_rate": 2.5304791474888054e-05, + "loss": 0.1828, "step": 51155 }, { "epoch": 2.39, - "learning_rate": 1.5296985607800853e-05, - "loss": 0.1371, + "learning_rate": 2.5304323405052033e-05, + "loss": 0.3409, "step": 51160 }, { "epoch": 2.39, - "learning_rate": 1.5296516806525713e-05, - "loss": 0.3723, + "learning_rate": 2.5303855335216017e-05, + "loss": 0.1768, "step": 51165 }, { "epoch": 2.39, - "learning_rate": 1.5296048005250573e-05, - "loss": 0.0814, + "learning_rate": 2.5303387265379996e-05, + "loss": 0.0451, "step": 51170 }, { "epoch": 2.39, - "learning_rate": 1.5295579203975436e-05, - "loss": 0.0467, + "learning_rate": 2.5302919195543976e-05, + "loss": 0.0485, "step": 51175 }, { "epoch": 2.39, - "learning_rate": 1.5295110402700296e-05, - "loss": 0.0746, + "learning_rate": 2.5302451125707956e-05, + "loss": 0.0633, "step": 51180 }, { "epoch": 2.39, - "learning_rate": 1.5294641601425156e-05, - "loss": 0.0812, + "learning_rate": 2.530198305587194e-05, + "loss": 0.0743, "step": 51185 }, { "epoch": 2.39, - "learning_rate": 1.529417280015002e-05, - "loss": 0.0522, + "learning_rate": 2.5301514986035916e-05, + "loss": 0.0756, "step": 51190 }, { "epoch": 2.39, - "learning_rate": 1.529370399887488e-05, - "loss": 0.0694, + "learning_rate": 2.5301046916199896e-05, + "loss": 0.1049, "step": 51195 }, { "epoch": 2.39, - "learning_rate": 1.529323519759974e-05, - "loss": 0.1579, + "learning_rate": 2.5300578846363875e-05, + "loss": 0.1493, "step": 51200 }, { "epoch": 2.39, - "learning_rate": 1.52927663963246e-05, - "loss": 0.2847, + "learning_rate": 2.530011077652786e-05, + "loss": 0.1933, "step": 51205 }, { "epoch": 2.39, - "learning_rate": 1.529229759504946e-05, - "loss": 0.2189, + "learning_rate": 2.529964270669184e-05, + "loss": 0.3533, "step": 51210 }, { "epoch": 2.39, - "learning_rate": 1.529182879377432e-05, - "loss": 0.3801, + "learning_rate": 2.5299174636855818e-05, + "loss": 0.4012, "step": 51215 }, { "epoch": 2.39, - "learning_rate": 1.5291359992499182e-05, - "loss": 0.0602, + "learning_rate": 2.52987065670198e-05, + "loss": 0.0426, "step": 51220 }, { "epoch": 2.39, - "learning_rate": 1.5290891191224042e-05, - "loss": 0.0781, + "learning_rate": 2.529823849718378e-05, + "loss": 0.0591, "step": 51225 }, { "epoch": 2.39, - "learning_rate": 1.5290422389948902e-05, - "loss": 0.0435, + "learning_rate": 2.529777042734776e-05, + "loss": 0.0662, "step": 51230 }, { "epoch": 2.39, - "learning_rate": 1.5289953588673762e-05, - "loss": 0.0791, + "learning_rate": 2.529730235751174e-05, + "loss": 0.0435, "step": 51235 }, { "epoch": 2.39, - "learning_rate": 1.5289484787398622e-05, - "loss": 0.1357, + "learning_rate": 2.5296834287675724e-05, + "loss": 0.0817, "step": 51240 }, { "epoch": 2.39, - "learning_rate": 1.5289015986123482e-05, - "loss": 0.0868, + "learning_rate": 2.5296366217839704e-05, + "loss": 0.0782, "step": 51245 }, { "epoch": 2.39, - "learning_rate": 1.5288547184848342e-05, - "loss": 0.1032, + "learning_rate": 2.5295898148003684e-05, + "loss": 0.0707, "step": 51250 }, { "epoch": 2.39, - "learning_rate": 1.5288078383573205e-05, - "loss": 0.1667, + "learning_rate": 2.529543007816766e-05, + "loss": 0.156, "step": 51255 }, { "epoch": 2.39, - "learning_rate": 1.5287609582298065e-05, - "loss": 0.3281, + "learning_rate": 2.5294962008331643e-05, + "loss": 0.2861, "step": 51260 }, { "epoch": 2.39, - "learning_rate": 1.5287140781022925e-05, - "loss": 0.3135, + "learning_rate": 2.5294493938495623e-05, + "loss": 0.19, "step": 51265 }, { "epoch": 2.39, - "learning_rate": 1.528667197974779e-05, - "loss": 0.0099, + "learning_rate": 2.5294025868659603e-05, + "loss": 0.0505, "step": 51270 }, { "epoch": 2.39, - "learning_rate": 1.528620317847265e-05, - "loss": 0.0385, + "learning_rate": 2.5293557798823586e-05, + "loss": 0.0584, "step": 51275 }, { "epoch": 2.39, - "learning_rate": 1.5285734377197508e-05, - "loss": 0.0753, + "learning_rate": 2.5293089728987566e-05, + "loss": 0.0735, "step": 51280 }, { "epoch": 2.39, - "learning_rate": 1.5285265575922368e-05, - "loss": 0.0719, + "learning_rate": 2.5292621659151546e-05, + "loss": 0.0549, "step": 51285 }, { "epoch": 2.39, - "learning_rate": 1.5284796774647228e-05, - "loss": 0.0609, + "learning_rate": 2.5292153589315526e-05, + "loss": 0.1312, "step": 51290 }, { "epoch": 2.39, - "learning_rate": 1.5284327973372088e-05, - "loss": 0.1466, + "learning_rate": 2.529168551947951e-05, + "loss": 0.0389, "step": 51295 }, { "epoch": 2.39, - "learning_rate": 1.5283859172096948e-05, - "loss": 0.1317, + "learning_rate": 2.529121744964349e-05, + "loss": 0.1786, "step": 51300 }, { "epoch": 2.39, - "learning_rate": 1.5283390370821808e-05, - "loss": 0.2492, + "learning_rate": 2.529074937980747e-05, + "loss": 0.1736, "step": 51305 }, { "epoch": 2.39, - "learning_rate": 1.528292156954667e-05, - "loss": 0.2003, + "learning_rate": 2.529028130997145e-05, + "loss": 0.138, "step": 51310 }, { "epoch": 2.39, - "learning_rate": 1.528245276827153e-05, - "loss": 0.3231, + "learning_rate": 2.528981324013543e-05, + "loss": 0.2678, "step": 51315 }, { "epoch": 2.39, - "learning_rate": 1.528198396699639e-05, - "loss": 0.0387, + "learning_rate": 2.5289345170299408e-05, + "loss": 0.0584, "step": 51320 }, { "epoch": 2.39, - "learning_rate": 1.528151516572125e-05, - "loss": 0.0167, + "learning_rate": 2.5288877100463388e-05, + "loss": 0.0534, "step": 51325 }, { "epoch": 2.4, - "learning_rate": 1.528104636444611e-05, - "loss": 0.0384, + "learning_rate": 2.528840903062737e-05, + "loss": 0.0327, "step": 51330 }, { "epoch": 2.4, - "learning_rate": 1.5280577563170974e-05, - "loss": 0.0847, + "learning_rate": 2.528794096079135e-05, + "loss": 0.0561, "step": 51335 }, { "epoch": 2.4, - "learning_rate": 1.5280108761895834e-05, - "loss": 0.0887, + "learning_rate": 2.528747289095533e-05, + "loss": 0.1063, "step": 51340 }, { "epoch": 2.4, - "learning_rate": 1.5279639960620694e-05, - "loss": 0.1089, + "learning_rate": 2.528700482111931e-05, + "loss": 0.1915, "step": 51345 }, { "epoch": 2.4, - "learning_rate": 1.5279171159345554e-05, - "loss": 0.1063, + "learning_rate": 2.5286536751283294e-05, + "loss": 0.1177, "step": 51350 }, { "epoch": 2.4, - "learning_rate": 1.5278702358070414e-05, - "loss": 0.2167, + "learning_rate": 2.5286068681447273e-05, + "loss": 0.1729, "step": 51355 }, { "epoch": 2.4, - "learning_rate": 1.5278233556795277e-05, - "loss": 0.2051, + "learning_rate": 2.5285600611611253e-05, + "loss": 0.1774, "step": 51360 }, { "epoch": 2.4, - "learning_rate": 1.5277764755520137e-05, - "loss": 0.2115, + "learning_rate": 2.5285132541775233e-05, + "loss": 0.2218, "step": 51365 }, { "epoch": 2.4, - "learning_rate": 1.5277295954244997e-05, - "loss": 0.04, + "learning_rate": 2.5284664471939216e-05, + "loss": 0.063, "step": 51370 }, { "epoch": 2.4, - "learning_rate": 1.5276827152969857e-05, - "loss": 0.0965, + "learning_rate": 2.5284196402103196e-05, + "loss": 0.0544, "step": 51375 }, { "epoch": 2.4, - "learning_rate": 1.5276358351694717e-05, - "loss": 0.0714, + "learning_rate": 2.5283728332267173e-05, + "loss": 0.0402, "step": 51380 }, { "epoch": 2.4, - "learning_rate": 1.5275889550419577e-05, - "loss": 0.0565, + "learning_rate": 2.5283260262431152e-05, + "loss": 0.0721, "step": 51385 }, { "epoch": 2.4, - "learning_rate": 1.5275420749144437e-05, - "loss": 0.0819, + "learning_rate": 2.5282792192595136e-05, + "loss": 0.0583, "step": 51390 }, { "epoch": 2.4, - "learning_rate": 1.52749519478693e-05, - "loss": 0.1281, + "learning_rate": 2.5282324122759115e-05, + "loss": 0.1504, "step": 51395 }, { "epoch": 2.4, - "learning_rate": 1.527448314659416e-05, - "loss": 0.1891, + "learning_rate": 2.5281856052923095e-05, + "loss": 0.1562, "step": 51400 }, { "epoch": 2.4, - "learning_rate": 1.527401434531902e-05, - "loss": 0.2188, + "learning_rate": 2.528138798308708e-05, + "loss": 0.12, "step": 51405 }, { "epoch": 2.4, - "learning_rate": 1.527354554404388e-05, - "loss": 0.2091, + "learning_rate": 2.5280919913251058e-05, + "loss": 0.3152, "step": 51410 }, { "epoch": 2.4, - "learning_rate": 1.5273076742768743e-05, - "loss": 0.2994, + "learning_rate": 2.5280451843415038e-05, + "loss": 0.1733, "step": 51415 }, { "epoch": 2.4, - "learning_rate": 1.5272607941493603e-05, - "loss": 0.0559, + "learning_rate": 2.5279983773579018e-05, + "loss": 0.0389, "step": 51420 }, { "epoch": 2.4, - "learning_rate": 1.5272139140218463e-05, - "loss": 0.0707, + "learning_rate": 2.5279515703743e-05, + "loss": 0.0556, "step": 51425 }, { "epoch": 2.4, - "learning_rate": 1.5271670338943323e-05, - "loss": 0.0289, + "learning_rate": 2.527904763390698e-05, + "loss": 0.0541, "step": 51430 }, { "epoch": 2.4, - "learning_rate": 1.5271201537668183e-05, - "loss": 0.1167, + "learning_rate": 2.527857956407096e-05, + "loss": 0.0539, "step": 51435 }, { "epoch": 2.4, - "learning_rate": 1.5270732736393043e-05, - "loss": 0.0636, + "learning_rate": 2.527811149423494e-05, + "loss": 0.0457, "step": 51440 }, { "epoch": 2.4, - "learning_rate": 1.5270263935117903e-05, - "loss": 0.1197, + "learning_rate": 2.527764342439892e-05, + "loss": 0.1715, "step": 51445 }, { "epoch": 2.4, - "learning_rate": 1.5269795133842766e-05, - "loss": 0.1567, + "learning_rate": 2.52771753545629e-05, + "loss": 0.1252, "step": 51450 }, { "epoch": 2.4, - "learning_rate": 1.5269326332567626e-05, - "loss": 0.1569, + "learning_rate": 2.527670728472688e-05, + "loss": 0.1371, "step": 51455 }, { "epoch": 2.4, - "learning_rate": 1.5268857531292486e-05, - "loss": 0.1988, + "learning_rate": 2.5276239214890863e-05, + "loss": 0.1839, "step": 51460 }, { "epoch": 2.4, - "learning_rate": 1.5268388730017346e-05, - "loss": 0.2972, + "learning_rate": 2.5275771145054843e-05, + "loss": 0.2957, "step": 51465 }, { "epoch": 2.4, - "learning_rate": 1.5267919928742206e-05, - "loss": 0.0234, + "learning_rate": 2.5275303075218823e-05, + "loss": 0.0435, "step": 51470 }, { "epoch": 2.4, - "learning_rate": 1.526745112746707e-05, - "loss": 0.0534, + "learning_rate": 2.5274835005382803e-05, + "loss": 0.0607, "step": 51475 }, { "epoch": 2.4, - "learning_rate": 1.526698232619193e-05, - "loss": 0.1281, + "learning_rate": 2.5274366935546786e-05, + "loss": 0.1073, "step": 51480 }, { "epoch": 2.4, - "learning_rate": 1.526651352491679e-05, - "loss": 0.0836, + "learning_rate": 2.5273898865710766e-05, + "loss": 0.0364, "step": 51485 }, { "epoch": 2.4, - "learning_rate": 1.526604472364165e-05, - "loss": 0.0708, + "learning_rate": 2.5273430795874745e-05, + "loss": 0.0529, "step": 51490 }, { "epoch": 2.4, - "learning_rate": 1.526557592236651e-05, - "loss": 0.0852, + "learning_rate": 2.5272962726038725e-05, + "loss": 0.1174, "step": 51495 }, { "epoch": 2.4, - "learning_rate": 1.5265107121091372e-05, - "loss": 0.1376, + "learning_rate": 2.527249465620271e-05, + "loss": 0.1237, "step": 51500 }, { "epoch": 2.4, - "learning_rate": 1.5264638319816232e-05, - "loss": 0.1672, + "learning_rate": 2.5272026586366685e-05, + "loss": 0.1694, "step": 51505 }, { "epoch": 2.4, - "learning_rate": 1.5264169518541092e-05, - "loss": 0.31, + "learning_rate": 2.5271558516530665e-05, + "loss": 0.2309, "step": 51510 }, { "epoch": 2.4, - "learning_rate": 1.5263700717265952e-05, - "loss": 0.2633, + "learning_rate": 2.5271090446694648e-05, + "loss": 0.2679, "step": 51515 }, { "epoch": 2.4, - "learning_rate": 1.5263231915990812e-05, - "loss": 0.0777, + "learning_rate": 2.5270622376858628e-05, + "loss": 0.0264, "step": 51520 }, { "epoch": 2.4, - "learning_rate": 1.5262763114715672e-05, - "loss": 0.0532, + "learning_rate": 2.5270154307022608e-05, + "loss": 0.0459, "step": 51525 }, { "epoch": 2.4, - "learning_rate": 1.5262294313440532e-05, - "loss": 0.0739, + "learning_rate": 2.5269686237186587e-05, + "loss": 0.0516, "step": 51530 }, { "epoch": 2.4, - "learning_rate": 1.5261825512165392e-05, - "loss": 0.2191, + "learning_rate": 2.526921816735057e-05, + "loss": 0.0762, "step": 51535 }, { "epoch": 2.4, - "learning_rate": 1.5261356710890255e-05, - "loss": 0.0397, + "learning_rate": 2.526875009751455e-05, + "loss": 0.1188, "step": 51540 }, { "epoch": 2.41, - "learning_rate": 1.5260887909615115e-05, - "loss": 0.0308, + "learning_rate": 2.526828202767853e-05, + "loss": 0.1007, "step": 51545 }, { "epoch": 2.41, - "learning_rate": 1.5260419108339975e-05, - "loss": 0.1165, + "learning_rate": 2.526781395784251e-05, + "loss": 0.11, "step": 51550 }, { "epoch": 2.41, - "learning_rate": 1.525995030706484e-05, - "loss": 0.1375, + "learning_rate": 2.5267345888006493e-05, + "loss": 0.1335, "step": 51555 }, { "epoch": 2.41, - "learning_rate": 1.52594815057897e-05, - "loss": 0.3485, + "learning_rate": 2.5266877818170473e-05, + "loss": 0.1747, "step": 51560 }, { "epoch": 2.41, - "learning_rate": 1.525901270451456e-05, - "loss": 0.4187, + "learning_rate": 2.5266409748334453e-05, + "loss": 0.4072, "step": 51565 }, { "epoch": 2.41, - "learning_rate": 1.5258543903239418e-05, - "loss": 0.0766, + "learning_rate": 2.526594167849843e-05, + "loss": 0.0689, "step": 51570 }, { "epoch": 2.41, - "learning_rate": 1.5258075101964278e-05, - "loss": 0.0273, + "learning_rate": 2.5265473608662413e-05, + "loss": 0.0277, "step": 51575 }, { "epoch": 2.41, - "learning_rate": 1.5257606300689138e-05, - "loss": 0.0801, + "learning_rate": 2.5265005538826392e-05, + "loss": 0.0869, "step": 51580 }, { "epoch": 2.41, - "learning_rate": 1.5257137499413998e-05, - "loss": 0.0657, + "learning_rate": 2.5264537468990372e-05, + "loss": 0.0741, "step": 51585 }, { "epoch": 2.41, - "learning_rate": 1.5256668698138861e-05, - "loss": 0.0234, + "learning_rate": 2.5264069399154355e-05, + "loss": 0.1226, "step": 51590 }, { "epoch": 2.41, - "learning_rate": 1.5256199896863721e-05, - "loss": 0.1634, + "learning_rate": 2.5263601329318335e-05, + "loss": 0.1205, "step": 51595 }, { "epoch": 2.41, - "learning_rate": 1.5255731095588581e-05, - "loss": 0.1124, + "learning_rate": 2.5263133259482315e-05, + "loss": 0.0689, "step": 51600 }, { "epoch": 2.41, - "learning_rate": 1.5255262294313441e-05, - "loss": 0.2432, + "learning_rate": 2.5262665189646295e-05, + "loss": 0.1892, "step": 51605 }, { "epoch": 2.41, - "learning_rate": 1.5254793493038303e-05, - "loss": 0.3987, + "learning_rate": 2.5262197119810278e-05, + "loss": 0.329, "step": 51610 }, { "epoch": 2.41, - "learning_rate": 1.5254324691763163e-05, - "loss": 0.2696, + "learning_rate": 2.5261729049974258e-05, + "loss": 0.2043, "step": 51615 }, { "epoch": 2.41, - "learning_rate": 1.5253855890488023e-05, - "loss": 0.0878, + "learning_rate": 2.5261260980138238e-05, + "loss": 0.034, "step": 51620 }, { "epoch": 2.41, - "learning_rate": 1.5253387089212883e-05, - "loss": 0.0419, + "learning_rate": 2.5260792910302217e-05, + "loss": 0.0212, "step": 51625 }, { "epoch": 2.41, - "learning_rate": 1.5252918287937744e-05, - "loss": 0.0225, + "learning_rate": 2.52603248404662e-05, + "loss": 0.0976, "step": 51630 }, { "epoch": 2.41, - "learning_rate": 1.5252449486662606e-05, - "loss": 0.0671, + "learning_rate": 2.5259856770630177e-05, + "loss": 0.134, "step": 51635 }, { "epoch": 2.41, - "learning_rate": 1.5251980685387466e-05, - "loss": 0.1203, + "learning_rate": 2.5259388700794157e-05, + "loss": 0.1299, "step": 51640 }, { "epoch": 2.41, - "learning_rate": 1.5251511884112326e-05, - "loss": 0.0863, + "learning_rate": 2.525892063095814e-05, + "loss": 0.1233, "step": 51645 }, { "epoch": 2.41, - "learning_rate": 1.5251043082837187e-05, - "loss": 0.0896, + "learning_rate": 2.525845256112212e-05, + "loss": 0.047, "step": 51650 }, { "epoch": 2.41, - "learning_rate": 1.5250574281562047e-05, - "loss": 0.179, + "learning_rate": 2.52579844912861e-05, + "loss": 0.1237, "step": 51655 }, { "epoch": 2.41, - "learning_rate": 1.5250105480286907e-05, - "loss": 0.1446, + "learning_rate": 2.525751642145008e-05, + "loss": 0.2555, "step": 51660 }, { "epoch": 2.41, - "learning_rate": 1.5249636679011767e-05, - "loss": 0.2598, + "learning_rate": 2.5257048351614063e-05, + "loss": 0.3221, "step": 51665 }, { "epoch": 2.41, - "learning_rate": 1.5249167877736629e-05, - "loss": 0.0394, + "learning_rate": 2.5256580281778043e-05, + "loss": 0.0669, "step": 51670 }, { "epoch": 2.41, - "learning_rate": 1.5248699076461489e-05, - "loss": 0.044, + "learning_rate": 2.5256112211942022e-05, + "loss": 0.0397, "step": 51675 }, { "epoch": 2.41, - "learning_rate": 1.5248230275186349e-05, - "loss": 0.1548, + "learning_rate": 2.5255644142106002e-05, + "loss": 0.1037, "step": 51680 }, { "epoch": 2.41, - "learning_rate": 1.524776147391121e-05, - "loss": 0.0878, + "learning_rate": 2.5255176072269985e-05, + "loss": 0.049, "step": 51685 }, { "epoch": 2.41, - "learning_rate": 1.5247292672636072e-05, - "loss": 0.1237, + "learning_rate": 2.5254708002433965e-05, + "loss": 0.045, "step": 51690 }, { "epoch": 2.41, - "learning_rate": 1.5246823871360932e-05, - "loss": 0.1141, + "learning_rate": 2.5254239932597942e-05, + "loss": 0.0432, "step": 51695 }, { "epoch": 2.41, - "learning_rate": 1.5246355070085792e-05, - "loss": 0.0894, + "learning_rate": 2.5253771862761925e-05, + "loss": 0.0717, "step": 51700 }, { "epoch": 2.41, - "learning_rate": 1.5245886268810652e-05, - "loss": 0.291, + "learning_rate": 2.5253303792925905e-05, + "loss": 0.2429, "step": 51705 }, { "epoch": 2.41, - "learning_rate": 1.5245417467535513e-05, - "loss": 0.2835, + "learning_rate": 2.5252835723089885e-05, + "loss": 0.2468, "step": 51710 }, { "epoch": 2.41, - "learning_rate": 1.5244948666260373e-05, - "loss": 0.3245, + "learning_rate": 2.5252367653253864e-05, + "loss": 0.3373, "step": 51715 }, { "epoch": 2.41, - "learning_rate": 1.5244479864985233e-05, - "loss": 0.0313, + "learning_rate": 2.5251899583417848e-05, + "loss": 0.0418, "step": 51720 }, { "epoch": 2.41, - "learning_rate": 1.5244011063710093e-05, - "loss": 0.0367, + "learning_rate": 2.5251431513581827e-05, + "loss": 0.0174, "step": 51725 }, { "epoch": 2.41, - "learning_rate": 1.5243542262434957e-05, - "loss": 0.068, + "learning_rate": 2.5250963443745807e-05, + "loss": 0.0545, "step": 51730 }, { "epoch": 2.41, - "learning_rate": 1.5243073461159816e-05, - "loss": 0.0796, + "learning_rate": 2.5250495373909787e-05, + "loss": 0.0718, "step": 51735 }, { "epoch": 2.41, - "learning_rate": 1.5242604659884676e-05, - "loss": 0.0945, + "learning_rate": 2.525002730407377e-05, + "loss": 0.1581, "step": 51740 }, { "epoch": 2.41, - "learning_rate": 1.5242135858609536e-05, - "loss": 0.1982, + "learning_rate": 2.524955923423775e-05, + "loss": 0.101, "step": 51745 }, { "epoch": 2.41, - "learning_rate": 1.5241667057334398e-05, - "loss": 0.1924, + "learning_rate": 2.524909116440173e-05, + "loss": 0.1302, "step": 51750 }, { "epoch": 2.41, - "learning_rate": 1.5241198256059258e-05, - "loss": 0.1331, + "learning_rate": 2.5248623094565713e-05, + "loss": 0.1135, "step": 51755 }, { "epoch": 2.42, - "learning_rate": 1.5240729454784118e-05, - "loss": 0.1925, + "learning_rate": 2.524815502472969e-05, + "loss": 0.2549, "step": 51760 }, { "epoch": 2.42, - "learning_rate": 1.5240260653508978e-05, - "loss": 0.2428, + "learning_rate": 2.524768695489367e-05, + "loss": 0.4319, "step": 51765 }, { "epoch": 2.42, - "learning_rate": 1.5239791852233838e-05, - "loss": 0.0137, + "learning_rate": 2.524721888505765e-05, + "loss": 0.055, "step": 51770 }, { "epoch": 2.42, - "learning_rate": 1.5239323050958701e-05, - "loss": 0.0859, + "learning_rate": 2.5246750815221632e-05, + "loss": 0.0365, "step": 51775 }, { "epoch": 2.42, - "learning_rate": 1.5238854249683561e-05, - "loss": 0.0738, + "learning_rate": 2.5246282745385612e-05, + "loss": 0.0658, "step": 51780 }, { "epoch": 2.42, - "learning_rate": 1.5238385448408421e-05, - "loss": 0.0828, + "learning_rate": 2.5245814675549592e-05, + "loss": 0.084, "step": 51785 }, { "epoch": 2.42, - "learning_rate": 1.5237916647133283e-05, - "loss": 0.0725, + "learning_rate": 2.5245346605713572e-05, + "loss": 0.1265, "step": 51790 }, { "epoch": 2.42, - "learning_rate": 1.5237447845858142e-05, - "loss": 0.0752, + "learning_rate": 2.5244878535877555e-05, + "loss": 0.0726, "step": 51795 }, { "epoch": 2.42, - "learning_rate": 1.5236979044583002e-05, - "loss": 0.0732, + "learning_rate": 2.5244410466041535e-05, + "loss": 0.1055, "step": 51800 }, { "epoch": 2.42, - "learning_rate": 1.5236510243307862e-05, - "loss": 0.2306, + "learning_rate": 2.5243942396205515e-05, + "loss": 0.1952, "step": 51805 }, { "epoch": 2.42, - "learning_rate": 1.5236041442032722e-05, - "loss": 0.2798, + "learning_rate": 2.5243474326369494e-05, + "loss": 0.1421, "step": 51810 }, { "epoch": 2.42, - "learning_rate": 1.5235572640757584e-05, - "loss": 0.2878, + "learning_rate": 2.5243006256533478e-05, + "loss": 0.3485, "step": 51815 }, { "epoch": 2.42, - "learning_rate": 1.5235103839482444e-05, - "loss": 0.0305, + "learning_rate": 2.5242538186697457e-05, + "loss": 0.0908, "step": 51820 }, { "epoch": 2.42, - "learning_rate": 1.5234635038207305e-05, - "loss": 0.051, + "learning_rate": 2.5242070116861434e-05, + "loss": 0.0349, "step": 51825 }, { "epoch": 2.42, - "learning_rate": 1.5234166236932167e-05, - "loss": 0.0365, + "learning_rate": 2.5241602047025417e-05, + "loss": 0.0974, "step": 51830 }, { "epoch": 2.42, - "learning_rate": 1.5233697435657027e-05, - "loss": 0.0645, + "learning_rate": 2.5241133977189397e-05, + "loss": 0.0189, "step": 51835 }, { "epoch": 2.42, - "learning_rate": 1.5233228634381887e-05, - "loss": 0.1311, + "learning_rate": 2.5240665907353377e-05, + "loss": 0.0704, "step": 51840 }, { "epoch": 2.42, - "learning_rate": 1.5232759833106747e-05, - "loss": 0.1698, + "learning_rate": 2.5240197837517357e-05, + "loss": 0.1367, "step": 51845 }, { "epoch": 2.42, - "learning_rate": 1.5232291031831607e-05, - "loss": 0.0807, + "learning_rate": 2.523972976768134e-05, + "loss": 0.1309, "step": 51850 }, { "epoch": 2.42, - "learning_rate": 1.5231822230556468e-05, - "loss": 0.1707, + "learning_rate": 2.523926169784532e-05, + "loss": 0.2016, "step": 51855 }, { "epoch": 2.42, - "learning_rate": 1.5231353429281328e-05, - "loss": 0.192, + "learning_rate": 2.52387936280093e-05, + "loss": 0.1476, "step": 51860 }, { "epoch": 2.42, - "learning_rate": 1.5230884628006188e-05, - "loss": 0.2625, + "learning_rate": 2.523832555817328e-05, + "loss": 0.3493, "step": 51865 }, { "epoch": 2.42, - "learning_rate": 1.5230415826731052e-05, - "loss": 0.03, + "learning_rate": 2.5237857488337262e-05, + "loss": 0.0545, "step": 51870 }, { "epoch": 2.42, - "learning_rate": 1.5229947025455912e-05, - "loss": 0.0394, + "learning_rate": 2.5237389418501242e-05, + "loss": 0.0496, "step": 51875 }, { "epoch": 2.42, - "learning_rate": 1.5229478224180771e-05, - "loss": 0.0815, + "learning_rate": 2.5236921348665222e-05, + "loss": 0.0513, "step": 51880 }, { "epoch": 2.42, - "learning_rate": 1.5229009422905631e-05, - "loss": 0.0712, + "learning_rate": 2.5236453278829202e-05, + "loss": 0.0263, "step": 51885 }, { "epoch": 2.42, - "learning_rate": 1.5228540621630491e-05, - "loss": 0.0508, + "learning_rate": 2.5235985208993182e-05, + "loss": 0.0704, "step": 51890 }, { "epoch": 2.42, - "learning_rate": 1.5228071820355353e-05, - "loss": 0.1297, + "learning_rate": 2.523551713915716e-05, + "loss": 0.1454, "step": 51895 }, { "epoch": 2.42, - "learning_rate": 1.5227603019080213e-05, - "loss": 0.1003, + "learning_rate": 2.523504906932114e-05, + "loss": 0.201, "step": 51900 }, { "epoch": 2.42, - "learning_rate": 1.5227134217805073e-05, - "loss": 0.2595, + "learning_rate": 2.5234580999485125e-05, + "loss": 0.2355, "step": 51905 }, { "epoch": 2.42, - "learning_rate": 1.5226665416529933e-05, - "loss": 0.2423, + "learning_rate": 2.5234112929649104e-05, + "loss": 0.3176, "step": 51910 }, { "epoch": 2.42, - "learning_rate": 1.5226196615254796e-05, - "loss": 0.3415, + "learning_rate": 2.5233644859813084e-05, + "loss": 0.2759, "step": 51915 }, { "epoch": 2.42, - "learning_rate": 1.5225727813979656e-05, - "loss": 0.0748, + "learning_rate": 2.5233176789977064e-05, + "loss": 0.0285, "step": 51920 }, { "epoch": 2.42, - "learning_rate": 1.5225259012704516e-05, - "loss": 0.0466, + "learning_rate": 2.5232708720141047e-05, + "loss": 0.0275, "step": 51925 }, { "epoch": 2.42, - "learning_rate": 1.5224790211429376e-05, - "loss": 0.1348, + "learning_rate": 2.5232240650305027e-05, + "loss": 0.0391, "step": 51930 }, { "epoch": 2.42, - "learning_rate": 1.5224321410154238e-05, - "loss": 0.0513, + "learning_rate": 2.5231772580469007e-05, + "loss": 0.0544, "step": 51935 }, { "epoch": 2.42, - "learning_rate": 1.5223852608879097e-05, - "loss": 0.0782, + "learning_rate": 2.523130451063299e-05, + "loss": 0.084, "step": 51940 }, { "epoch": 2.42, - "learning_rate": 1.5223383807603957e-05, - "loss": 0.1423, + "learning_rate": 2.523083644079697e-05, + "loss": 0.0853, "step": 51945 }, { "epoch": 2.42, - "learning_rate": 1.5222915006328817e-05, - "loss": 0.0698, + "learning_rate": 2.5230368370960946e-05, + "loss": 0.1162, "step": 51950 }, { "epoch": 2.42, - "learning_rate": 1.5222446205053677e-05, - "loss": 0.1165, + "learning_rate": 2.5229900301124926e-05, + "loss": 0.2122, "step": 51955 }, { "epoch": 2.42, - "learning_rate": 1.522197740377854e-05, - "loss": 0.1488, + "learning_rate": 2.522943223128891e-05, + "loss": 0.26, "step": 51960 }, { "epoch": 2.42, - "learning_rate": 1.52215086025034e-05, - "loss": 0.2605, + "learning_rate": 2.522896416145289e-05, + "loss": 0.2532, "step": 51965 }, { "epoch": 2.42, - "learning_rate": 1.522103980122826e-05, - "loss": 0.0814, + "learning_rate": 2.522849609161687e-05, + "loss": 0.0949, "step": 51970 }, { "epoch": 2.43, - "learning_rate": 1.5220570999953122e-05, - "loss": 0.0322, + "learning_rate": 2.522802802178085e-05, + "loss": 0.0662, "step": 51975 }, { "epoch": 2.43, - "learning_rate": 1.5220102198677982e-05, - "loss": 0.05, + "learning_rate": 2.5227559951944832e-05, + "loss": 0.0292, "step": 51980 }, { "epoch": 2.43, - "learning_rate": 1.5219633397402842e-05, - "loss": 0.1171, + "learning_rate": 2.5227091882108812e-05, + "loss": 0.0273, "step": 51985 }, { "epoch": 2.43, - "learning_rate": 1.5219164596127702e-05, - "loss": 0.0913, + "learning_rate": 2.522662381227279e-05, + "loss": 0.0494, "step": 51990 }, { "epoch": 2.43, - "learning_rate": 1.5218695794852562e-05, - "loss": 0.1017, + "learning_rate": 2.522615574243677e-05, + "loss": 0.0851, "step": 51995 }, { "epoch": 2.43, - "learning_rate": 1.5218226993577423e-05, - "loss": 0.1502, + "learning_rate": 2.5225687672600755e-05, + "loss": 0.1209, "step": 52000 }, { "epoch": 2.43, - "learning_rate": 1.5217758192302283e-05, - "loss": 0.1181, + "learning_rate": 2.5225219602764734e-05, + "loss": 0.2379, "step": 52005 }, { "epoch": 2.43, - "learning_rate": 1.5217289391027145e-05, - "loss": 0.2336, + "learning_rate": 2.5224751532928714e-05, + "loss": 0.1028, "step": 52010 }, { "epoch": 2.43, - "learning_rate": 1.5216820589752007e-05, - "loss": 0.211, + "learning_rate": 2.5224283463092694e-05, + "loss": 0.2222, "step": 52015 }, { "epoch": 2.43, - "learning_rate": 1.5216351788476867e-05, - "loss": 0.0689, + "learning_rate": 2.5223815393256674e-05, + "loss": 0.0303, "step": 52020 }, { "epoch": 2.43, - "learning_rate": 1.5215882987201726e-05, - "loss": 0.0412, + "learning_rate": 2.5223347323420654e-05, + "loss": 0.0438, "step": 52025 }, { "epoch": 2.43, - "learning_rate": 1.5215414185926586e-05, - "loss": 0.0589, + "learning_rate": 2.5222879253584634e-05, + "loss": 0.0611, "step": 52030 }, { "epoch": 2.43, - "learning_rate": 1.5214945384651448e-05, - "loss": 0.0807, + "learning_rate": 2.5222411183748617e-05, + "loss": 0.0848, "step": 52035 }, { "epoch": 2.43, - "learning_rate": 1.5214476583376308e-05, - "loss": 0.1278, + "learning_rate": 2.5221943113912597e-05, + "loss": 0.0753, "step": 52040 }, { "epoch": 2.43, - "learning_rate": 1.5214007782101168e-05, - "loss": 0.0676, + "learning_rate": 2.5221475044076576e-05, + "loss": 0.1874, "step": 52045 }, { "epoch": 2.43, - "learning_rate": 1.5213538980826028e-05, - "loss": 0.0983, + "learning_rate": 2.5221006974240556e-05, + "loss": 0.1245, "step": 52050 }, { "epoch": 2.43, - "learning_rate": 1.5213070179550891e-05, - "loss": 0.1433, + "learning_rate": 2.522053890440454e-05, + "loss": 0.1719, "step": 52055 }, { "epoch": 2.43, - "learning_rate": 1.5212601378275751e-05, - "loss": 0.2007, + "learning_rate": 2.522007083456852e-05, + "loss": 0.3701, "step": 52060 }, { "epoch": 2.43, - "learning_rate": 1.5212132577000611e-05, - "loss": 0.2886, + "learning_rate": 2.52196027647325e-05, + "loss": 0.201, "step": 52065 }, { "epoch": 2.43, - "learning_rate": 1.5211663775725471e-05, - "loss": 0.034, + "learning_rate": 2.5219134694896482e-05, + "loss": 0.0651, "step": 52070 }, { "epoch": 2.43, - "learning_rate": 1.5211194974450333e-05, - "loss": 0.0571, + "learning_rate": 2.521866662506046e-05, + "loss": 0.0635, "step": 52075 }, { "epoch": 2.43, - "learning_rate": 1.5210726173175193e-05, - "loss": 0.0658, + "learning_rate": 2.521819855522444e-05, + "loss": 0.0555, "step": 52080 }, { "epoch": 2.43, - "learning_rate": 1.5210257371900052e-05, - "loss": 0.0228, + "learning_rate": 2.521773048538842e-05, + "loss": 0.0525, "step": 52085 }, { "epoch": 2.43, - "learning_rate": 1.5209788570624912e-05, - "loss": 0.1038, + "learning_rate": 2.52172624155524e-05, + "loss": 0.0648, "step": 52090 }, { "epoch": 2.43, - "learning_rate": 1.5209319769349772e-05, - "loss": 0.0917, + "learning_rate": 2.521679434571638e-05, + "loss": 0.1424, "step": 52095 }, { "epoch": 2.43, - "learning_rate": 1.5208850968074636e-05, - "loss": 0.1264, + "learning_rate": 2.521632627588036e-05, + "loss": 0.1147, "step": 52100 }, { "epoch": 2.43, - "learning_rate": 1.5208382166799496e-05, - "loss": 0.1084, + "learning_rate": 2.521585820604434e-05, + "loss": 0.1584, "step": 52105 }, { "epoch": 2.43, - "learning_rate": 1.5207913365524356e-05, - "loss": 0.204, + "learning_rate": 2.5215390136208324e-05, + "loss": 0.2292, "step": 52110 }, { "epoch": 2.43, - "learning_rate": 1.5207444564249217e-05, - "loss": 0.3396, + "learning_rate": 2.5214922066372304e-05, + "loss": 0.3829, "step": 52115 }, { "epoch": 2.43, - "learning_rate": 1.5206975762974077e-05, - "loss": 0.0746, + "learning_rate": 2.5214453996536284e-05, + "loss": 0.047, "step": 52120 }, { "epoch": 2.43, - "learning_rate": 1.5206506961698937e-05, - "loss": 0.0563, + "learning_rate": 2.5213985926700267e-05, + "loss": 0.0609, "step": 52125 }, { "epoch": 2.43, - "learning_rate": 1.5206038160423797e-05, - "loss": 0.0551, + "learning_rate": 2.5213517856864247e-05, + "loss": 0.066, "step": 52130 }, { "epoch": 2.43, - "learning_rate": 1.5205569359148657e-05, - "loss": 0.0601, + "learning_rate": 2.5213049787028227e-05, + "loss": 0.0748, "step": 52135 }, { "epoch": 2.43, - "learning_rate": 1.5205100557873519e-05, - "loss": 0.1207, + "learning_rate": 2.5212581717192203e-05, + "loss": 0.1025, "step": 52140 }, { "epoch": 2.43, - "learning_rate": 1.5204631756598378e-05, - "loss": 0.1706, + "learning_rate": 2.5212113647356186e-05, + "loss": 0.0543, "step": 52145 }, { "epoch": 2.43, - "learning_rate": 1.520416295532324e-05, - "loss": 0.1324, + "learning_rate": 2.5211645577520166e-05, + "loss": 0.0698, "step": 52150 }, { "epoch": 2.43, - "learning_rate": 1.5203694154048102e-05, - "loss": 0.1642, + "learning_rate": 2.5211177507684146e-05, + "loss": 0.2142, "step": 52155 }, { "epoch": 2.43, - "learning_rate": 1.5203225352772962e-05, - "loss": 0.2112, + "learning_rate": 2.5210709437848126e-05, + "loss": 0.2254, "step": 52160 }, { "epoch": 2.43, - "learning_rate": 1.5202756551497822e-05, - "loss": 0.2276, + "learning_rate": 2.521024136801211e-05, + "loss": 0.4205, "step": 52165 }, { "epoch": 2.43, - "learning_rate": 1.5202287750222682e-05, - "loss": 0.0522, + "learning_rate": 2.520977329817609e-05, + "loss": 0.1196, "step": 52170 }, { "epoch": 2.43, - "learning_rate": 1.5201818948947541e-05, - "loss": 0.0433, + "learning_rate": 2.520930522834007e-05, + "loss": 0.0188, "step": 52175 }, { "epoch": 2.43, - "learning_rate": 1.5201350147672403e-05, - "loss": 0.0256, + "learning_rate": 2.520883715850405e-05, + "loss": 0.0982, "step": 52180 }, { "epoch": 2.44, - "learning_rate": 1.5200881346397263e-05, - "loss": 0.063, + "learning_rate": 2.520836908866803e-05, + "loss": 0.1506, "step": 52185 }, { "epoch": 2.44, - "learning_rate": 1.5200412545122123e-05, - "loss": 0.0976, + "learning_rate": 2.520790101883201e-05, + "loss": 0.0921, "step": 52190 }, { "epoch": 2.44, - "learning_rate": 1.5199943743846986e-05, - "loss": 0.1321, + "learning_rate": 2.520743294899599e-05, + "loss": 0.1336, "step": 52195 }, { "epoch": 2.44, - "learning_rate": 1.5199474942571846e-05, - "loss": 0.1091, + "learning_rate": 2.520696487915997e-05, + "loss": 0.1143, "step": 52200 }, { "epoch": 2.44, - "learning_rate": 1.5199006141296706e-05, - "loss": 0.1427, + "learning_rate": 2.520649680932395e-05, + "loss": 0.214, "step": 52205 }, { "epoch": 2.44, - "learning_rate": 1.5198537340021566e-05, - "loss": 0.2313, + "learning_rate": 2.520602873948793e-05, + "loss": 0.2243, "step": 52210 }, { "epoch": 2.44, - "learning_rate": 1.5198068538746426e-05, - "loss": 0.2734, + "learning_rate": 2.520556066965191e-05, + "loss": 0.4304, "step": 52215 }, { "epoch": 2.44, - "learning_rate": 1.5197599737471288e-05, - "loss": 0.0068, + "learning_rate": 2.5205092599815894e-05, + "loss": 0.0319, "step": 52220 }, { "epoch": 2.44, - "learning_rate": 1.5197130936196148e-05, - "loss": 0.0642, + "learning_rate": 2.5204624529979874e-05, + "loss": 0.0231, "step": 52225 }, { "epoch": 2.44, - "learning_rate": 1.5196662134921007e-05, - "loss": 0.072, + "learning_rate": 2.5204156460143853e-05, + "loss": 0.0737, "step": 52230 }, { "epoch": 2.44, - "learning_rate": 1.5196193333645867e-05, - "loss": 0.0945, + "learning_rate": 2.5203688390307833e-05, + "loss": 0.0324, "step": 52235 }, { "epoch": 2.44, - "learning_rate": 1.519572453237073e-05, - "loss": 0.0911, + "learning_rate": 2.5203220320471816e-05, + "loss": 0.1592, "step": 52240 }, { "epoch": 2.44, - "learning_rate": 1.519525573109559e-05, - "loss": 0.2037, + "learning_rate": 2.5202752250635796e-05, + "loss": 0.0736, "step": 52245 }, { "epoch": 2.44, - "learning_rate": 1.519478692982045e-05, - "loss": 0.1951, + "learning_rate": 2.5202284180799776e-05, + "loss": 0.1332, "step": 52250 }, { "epoch": 2.44, - "learning_rate": 1.519431812854531e-05, - "loss": 0.1587, + "learning_rate": 2.520181611096376e-05, + "loss": 0.1342, "step": 52255 }, { "epoch": 2.44, - "learning_rate": 1.5193849327270172e-05, - "loss": 0.2093, + "learning_rate": 2.520134804112774e-05, + "loss": 0.1728, "step": 52260 }, { "epoch": 2.44, - "learning_rate": 1.5193380525995032e-05, - "loss": 0.2805, + "learning_rate": 2.5200879971291715e-05, + "loss": 0.278, "step": 52265 }, { "epoch": 2.44, - "learning_rate": 1.5192911724719892e-05, - "loss": 0.0562, + "learning_rate": 2.5200411901455695e-05, + "loss": 0.0266, "step": 52270 }, { "epoch": 2.44, - "learning_rate": 1.5192442923444752e-05, - "loss": 0.0569, + "learning_rate": 2.519994383161968e-05, + "loss": 0.046, "step": 52275 }, { "epoch": 2.44, - "learning_rate": 1.5191974122169612e-05, - "loss": 0.1093, + "learning_rate": 2.5199475761783658e-05, + "loss": 0.0686, "step": 52280 }, { "epoch": 2.44, - "learning_rate": 1.5191505320894475e-05, - "loss": 0.0803, + "learning_rate": 2.5199007691947638e-05, + "loss": 0.0621, "step": 52285 }, { "epoch": 2.44, - "learning_rate": 1.5191036519619335e-05, - "loss": 0.0702, + "learning_rate": 2.5198539622111618e-05, + "loss": 0.0907, "step": 52290 }, { "epoch": 2.44, - "learning_rate": 1.5190567718344195e-05, - "loss": 0.1036, + "learning_rate": 2.51980715522756e-05, + "loss": 0.0781, "step": 52295 }, { "epoch": 2.44, - "learning_rate": 1.5190098917069057e-05, - "loss": 0.157, + "learning_rate": 2.519760348243958e-05, + "loss": 0.0904, "step": 52300 }, { "epoch": 2.44, - "learning_rate": 1.5189630115793917e-05, - "loss": 0.171, + "learning_rate": 2.519713541260356e-05, + "loss": 0.1418, "step": 52305 }, { "epoch": 2.44, - "learning_rate": 1.5189161314518777e-05, - "loss": 0.3712, + "learning_rate": 2.5196667342767544e-05, + "loss": 0.1473, "step": 52310 }, { "epoch": 2.44, - "learning_rate": 1.5188692513243637e-05, - "loss": 0.3013, + "learning_rate": 2.5196199272931524e-05, + "loss": 0.2129, "step": 52315 }, { "epoch": 2.44, - "learning_rate": 1.5188223711968496e-05, - "loss": 0.0322, + "learning_rate": 2.5195731203095504e-05, + "loss": 0.0451, "step": 52320 }, { "epoch": 2.44, - "learning_rate": 1.5187754910693358e-05, - "loss": 0.0464, + "learning_rate": 2.5195263133259483e-05, + "loss": 0.0179, "step": 52325 }, { "epoch": 2.44, - "learning_rate": 1.5187286109418218e-05, - "loss": 0.0681, + "learning_rate": 2.5194795063423463e-05, + "loss": 0.0259, "step": 52330 }, { "epoch": 2.44, - "learning_rate": 1.518681730814308e-05, - "loss": 0.0501, + "learning_rate": 2.5194326993587443e-05, + "loss": 0.0715, "step": 52335 }, { "epoch": 2.44, - "learning_rate": 1.5186348506867941e-05, - "loss": 0.1006, + "learning_rate": 2.5193858923751423e-05, + "loss": 0.1341, "step": 52340 }, { "epoch": 2.44, - "learning_rate": 1.5185879705592801e-05, - "loss": 0.1419, + "learning_rate": 2.5193390853915403e-05, + "loss": 0.054, "step": 52345 }, { "epoch": 2.44, - "learning_rate": 1.5185410904317661e-05, - "loss": 0.1124, + "learning_rate": 2.5192922784079386e-05, + "loss": 0.0753, "step": 52350 }, { "epoch": 2.44, - "learning_rate": 1.5184942103042521e-05, - "loss": 0.094, + "learning_rate": 2.5192454714243366e-05, + "loss": 0.1833, "step": 52355 }, { "epoch": 2.44, - "learning_rate": 1.5184473301767381e-05, - "loss": 0.2268, + "learning_rate": 2.5191986644407346e-05, + "loss": 0.6172, "step": 52360 }, { "epoch": 2.44, - "learning_rate": 1.5184004500492243e-05, - "loss": 0.3197, + "learning_rate": 2.519151857457133e-05, + "loss": 0.2876, "step": 52365 }, { "epoch": 2.44, - "learning_rate": 1.5183535699217103e-05, - "loss": 0.0583, + "learning_rate": 2.519105050473531e-05, + "loss": 0.0317, "step": 52370 }, { "epoch": 2.44, - "learning_rate": 1.5183066897941963e-05, - "loss": 0.0275, + "learning_rate": 2.519058243489929e-05, + "loss": 0.0545, "step": 52375 }, { "epoch": 2.44, - "learning_rate": 1.5182598096666826e-05, - "loss": 0.0546, + "learning_rate": 2.5190114365063268e-05, + "loss": 0.0487, "step": 52380 }, { "epoch": 2.44, - "learning_rate": 1.5182129295391686e-05, - "loss": 0.1263, + "learning_rate": 2.518964629522725e-05, + "loss": 0.0414, "step": 52385 }, { "epoch": 2.44, - "learning_rate": 1.5181660494116546e-05, - "loss": 0.0598, + "learning_rate": 2.5189178225391228e-05, + "loss": 0.0762, "step": 52390 }, { "epoch": 2.44, - "learning_rate": 1.5181191692841406e-05, - "loss": 0.0843, + "learning_rate": 2.5188710155555208e-05, + "loss": 0.1468, "step": 52395 }, { "epoch": 2.45, - "learning_rate": 1.5180722891566266e-05, - "loss": 0.1803, + "learning_rate": 2.5188242085719187e-05, + "loss": 0.0978, "step": 52400 }, { "epoch": 2.45, - "learning_rate": 1.5180254090291127e-05, - "loss": 0.2682, + "learning_rate": 2.518777401588317e-05, + "loss": 0.1532, "step": 52405 }, { "epoch": 2.45, - "learning_rate": 1.5179785289015987e-05, - "loss": 0.1826, + "learning_rate": 2.518730594604715e-05, + "loss": 0.3888, "step": 52410 }, { "epoch": 2.45, - "learning_rate": 1.5179316487740847e-05, - "loss": 0.309, + "learning_rate": 2.518683787621113e-05, + "loss": 0.2808, "step": 52415 }, { "epoch": 2.45, - "learning_rate": 1.5178847686465707e-05, - "loss": 0.0619, + "learning_rate": 2.518636980637511e-05, + "loss": 0.043, "step": 52420 }, { "epoch": 2.45, - "learning_rate": 1.517837888519057e-05, - "loss": 0.0193, + "learning_rate": 2.5185901736539093e-05, + "loss": 0.0324, "step": 52425 }, { "epoch": 2.45, - "learning_rate": 1.517791008391543e-05, - "loss": 0.0525, + "learning_rate": 2.5185433666703073e-05, + "loss": 0.0765, "step": 52430 }, { "epoch": 2.45, - "learning_rate": 1.517744128264029e-05, - "loss": 0.0579, + "learning_rate": 2.5184965596867053e-05, + "loss": 0.069, "step": 52435 }, { "epoch": 2.45, - "learning_rate": 1.517697248136515e-05, - "loss": 0.0894, + "learning_rate": 2.5184497527031036e-05, + "loss": 0.0406, "step": 52440 }, { "epoch": 2.45, - "learning_rate": 1.5176503680090012e-05, - "loss": 0.1532, + "learning_rate": 2.5184029457195016e-05, + "loss": 0.0692, "step": 52445 }, { "epoch": 2.45, - "learning_rate": 1.5176034878814872e-05, - "loss": 0.1447, + "learning_rate": 2.5183561387358996e-05, + "loss": 0.2368, "step": 52450 }, { "epoch": 2.45, - "learning_rate": 1.5175566077539732e-05, - "loss": 0.1665, + "learning_rate": 2.5183093317522972e-05, + "loss": 0.131, "step": 52455 }, { "epoch": 2.45, - "learning_rate": 1.5175097276264592e-05, - "loss": 0.1983, + "learning_rate": 2.5182625247686955e-05, + "loss": 0.2531, "step": 52460 }, { "epoch": 2.45, - "learning_rate": 1.5174628474989453e-05, - "loss": 0.2106, + "learning_rate": 2.5182157177850935e-05, + "loss": 0.2602, "step": 52465 }, { "epoch": 2.45, - "learning_rate": 1.5174159673714313e-05, - "loss": 0.0597, + "learning_rate": 2.5181689108014915e-05, + "loss": 0.0813, "step": 52470 }, { "epoch": 2.45, - "learning_rate": 1.5173690872439175e-05, - "loss": 0.0445, + "learning_rate": 2.5181221038178895e-05, + "loss": 0.0428, "step": 52475 }, { "epoch": 2.45, - "learning_rate": 1.5173222071164035e-05, - "loss": 0.0628, + "learning_rate": 2.5180752968342878e-05, + "loss": 0.0207, "step": 52480 }, { "epoch": 2.45, - "learning_rate": 1.5172753269888896e-05, - "loss": 0.0762, + "learning_rate": 2.5180284898506858e-05, + "loss": 0.0731, "step": 52485 }, { "epoch": 2.45, - "learning_rate": 1.5172284468613756e-05, - "loss": 0.0737, + "learning_rate": 2.5179816828670838e-05, + "loss": 0.1143, "step": 52490 }, { "epoch": 2.45, - "learning_rate": 1.5171815667338616e-05, - "loss": 0.0675, + "learning_rate": 2.517934875883482e-05, + "loss": 0.1158, "step": 52495 }, { "epoch": 2.45, - "learning_rate": 1.5171346866063476e-05, - "loss": 0.1033, + "learning_rate": 2.51788806889988e-05, + "loss": 0.1362, "step": 52500 }, { "epoch": 2.45, - "learning_rate": 1.5170878064788338e-05, - "loss": 0.1985, + "learning_rate": 2.517841261916278e-05, + "loss": 0.0893, "step": 52505 }, { "epoch": 2.45, - "learning_rate": 1.5170409263513198e-05, - "loss": 0.2309, + "learning_rate": 2.517794454932676e-05, + "loss": 0.2606, "step": 52510 }, { "epoch": 2.45, - "learning_rate": 1.5169940462238058e-05, - "loss": 0.2345, + "learning_rate": 2.5177476479490744e-05, + "loss": 0.3078, "step": 52515 }, { "epoch": 2.45, - "learning_rate": 1.516947166096292e-05, - "loss": 0.0415, + "learning_rate": 2.517700840965472e-05, + "loss": 0.0162, "step": 52520 }, { "epoch": 2.45, - "learning_rate": 1.5169002859687781e-05, - "loss": 0.0588, + "learning_rate": 2.51765403398187e-05, + "loss": 0.0139, "step": 52525 }, { "epoch": 2.45, - "learning_rate": 1.516853405841264e-05, - "loss": 0.0386, + "learning_rate": 2.517607226998268e-05, + "loss": 0.0568, "step": 52530 }, { "epoch": 2.45, - "learning_rate": 1.51680652571375e-05, - "loss": 0.1693, + "learning_rate": 2.5175604200146663e-05, + "loss": 0.056, "step": 52535 }, { "epoch": 2.45, - "learning_rate": 1.516759645586236e-05, - "loss": 0.1481, + "learning_rate": 2.5175136130310643e-05, + "loss": 0.1181, "step": 52540 }, { "epoch": 2.45, - "learning_rate": 1.5167127654587222e-05, - "loss": 0.0804, + "learning_rate": 2.5174668060474623e-05, + "loss": 0.0799, "step": 52545 }, { "epoch": 2.45, - "learning_rate": 1.5166658853312082e-05, - "loss": 0.1041, + "learning_rate": 2.5174199990638606e-05, + "loss": 0.1234, "step": 52550 }, { "epoch": 2.45, - "learning_rate": 1.5166190052036942e-05, - "loss": 0.1923, + "learning_rate": 2.5173731920802586e-05, + "loss": 0.1662, "step": 52555 }, { "epoch": 2.45, - "learning_rate": 1.5165721250761802e-05, - "loss": 0.3416, + "learning_rate": 2.5173263850966565e-05, + "loss": 0.1641, "step": 52560 }, { "epoch": 2.45, - "learning_rate": 1.5165252449486665e-05, - "loss": 0.4328, + "learning_rate": 2.5172795781130545e-05, + "loss": 0.4078, "step": 52565 }, { "epoch": 2.45, - "learning_rate": 1.5164783648211525e-05, - "loss": 0.0259, + "learning_rate": 2.517232771129453e-05, + "loss": 0.0641, "step": 52570 }, { "epoch": 2.45, - "learning_rate": 1.5164314846936385e-05, - "loss": 0.0365, + "learning_rate": 2.5171859641458508e-05, + "loss": 0.0375, "step": 52575 }, { "epoch": 2.45, - "learning_rate": 1.5163846045661245e-05, - "loss": 0.0338, + "learning_rate": 2.5171391571622485e-05, + "loss": 0.0733, "step": 52580 }, { "epoch": 2.45, - "learning_rate": 1.5163377244386107e-05, - "loss": 0.0438, + "learning_rate": 2.5170923501786464e-05, + "loss": 0.0778, "step": 52585 }, { "epoch": 2.45, - "learning_rate": 1.5162908443110967e-05, - "loss": 0.0815, + "learning_rate": 2.5170455431950448e-05, + "loss": 0.0773, "step": 52590 }, { "epoch": 2.45, - "learning_rate": 1.5162439641835827e-05, - "loss": 0.1101, + "learning_rate": 2.5169987362114427e-05, + "loss": 0.117, "step": 52595 }, { "epoch": 2.45, - "learning_rate": 1.5161970840560687e-05, - "loss": 0.0867, + "learning_rate": 2.5169519292278407e-05, + "loss": 0.226, "step": 52600 }, { "epoch": 2.45, - "learning_rate": 1.5161502039285547e-05, - "loss": 0.2476, + "learning_rate": 2.5169051222442387e-05, + "loss": 0.184, "step": 52605 }, { "epoch": 2.45, - "learning_rate": 1.516103323801041e-05, - "loss": 0.2438, + "learning_rate": 2.516858315260637e-05, + "loss": 0.2767, "step": 52610 }, { "epoch": 2.46, - "learning_rate": 1.516056443673527e-05, - "loss": 0.3517, + "learning_rate": 2.516811508277035e-05, + "loss": 0.3545, "step": 52615 }, { "epoch": 2.46, - "learning_rate": 1.516009563546013e-05, - "loss": 0.075, + "learning_rate": 2.516764701293433e-05, + "loss": 0.046, "step": 52620 }, { "epoch": 2.46, - "learning_rate": 1.5159626834184991e-05, - "loss": 0.0791, + "learning_rate": 2.5167178943098313e-05, + "loss": 0.0502, "step": 52625 }, { "epoch": 2.46, - "learning_rate": 1.5159158032909851e-05, - "loss": 0.0481, + "learning_rate": 2.5166710873262293e-05, + "loss": 0.1021, "step": 52630 }, { "epoch": 2.46, - "learning_rate": 1.5158689231634711e-05, - "loss": 0.129, + "learning_rate": 2.5166242803426273e-05, + "loss": 0.0652, "step": 52635 }, { "epoch": 2.46, - "learning_rate": 1.5158220430359571e-05, - "loss": 0.1212, + "learning_rate": 2.5165774733590253e-05, + "loss": 0.1092, "step": 52640 }, { "epoch": 2.46, - "learning_rate": 1.5157751629084431e-05, - "loss": 0.1333, + "learning_rate": 2.5165306663754232e-05, + "loss": 0.0526, "step": 52645 }, { "epoch": 2.46, - "learning_rate": 1.5157282827809293e-05, - "loss": 0.1213, + "learning_rate": 2.5164838593918212e-05, + "loss": 0.0951, "step": 52650 }, { "epoch": 2.46, - "learning_rate": 1.5156814026534153e-05, - "loss": 0.1455, + "learning_rate": 2.5164370524082192e-05, + "loss": 0.1798, "step": 52655 }, { "epoch": 2.46, - "learning_rate": 1.5156345225259014e-05, - "loss": 0.1973, + "learning_rate": 2.5163902454246172e-05, + "loss": 0.2423, "step": 52660 }, { "epoch": 2.46, - "learning_rate": 1.5155876423983876e-05, - "loss": 0.2, + "learning_rate": 2.5163434384410155e-05, + "loss": 0.2527, "step": 52665 }, { "epoch": 2.46, - "learning_rate": 1.5155407622708736e-05, - "loss": 0.0601, + "learning_rate": 2.5162966314574135e-05, + "loss": 0.0154, "step": 52670 }, { "epoch": 2.46, - "learning_rate": 1.5154938821433596e-05, - "loss": 0.0499, + "learning_rate": 2.5162498244738115e-05, + "loss": 0.0181, "step": 52675 }, { "epoch": 2.46, - "learning_rate": 1.5154470020158456e-05, - "loss": 0.0689, + "learning_rate": 2.5162030174902098e-05, + "loss": 0.0505, "step": 52680 }, { "epoch": 2.46, - "learning_rate": 1.5154001218883316e-05, - "loss": 0.0873, + "learning_rate": 2.5161562105066078e-05, + "loss": 0.0972, "step": 52685 }, { "epoch": 2.46, - "learning_rate": 1.5153532417608177e-05, - "loss": 0.073, + "learning_rate": 2.5161094035230058e-05, + "loss": 0.0295, "step": 52690 }, { "epoch": 2.46, - "learning_rate": 1.5153063616333037e-05, - "loss": 0.1049, + "learning_rate": 2.5160625965394037e-05, + "loss": 0.0877, "step": 52695 }, { "epoch": 2.46, - "learning_rate": 1.5152594815057897e-05, - "loss": 0.128, + "learning_rate": 2.516015789555802e-05, + "loss": 0.1176, "step": 52700 }, { "epoch": 2.46, - "learning_rate": 1.515212601378276e-05, - "loss": 0.1296, + "learning_rate": 2.5159689825722e-05, + "loss": 0.109, "step": 52705 }, { "epoch": 2.46, - "learning_rate": 1.515165721250762e-05, - "loss": 0.2877, + "learning_rate": 2.5159221755885977e-05, + "loss": 0.316, "step": 52710 }, { "epoch": 2.46, - "learning_rate": 1.515118841123248e-05, - "loss": 0.2938, + "learning_rate": 2.5158753686049957e-05, + "loss": 0.3343, "step": 52715 }, { "epoch": 2.46, - "learning_rate": 1.515071960995734e-05, - "loss": 0.0638, + "learning_rate": 2.515828561621394e-05, + "loss": 0.0537, "step": 52720 }, { "epoch": 2.46, - "learning_rate": 1.51502508086822e-05, - "loss": 0.0505, + "learning_rate": 2.515781754637792e-05, + "loss": 0.0435, "step": 52725 }, { "epoch": 2.46, - "learning_rate": 1.5149782007407062e-05, - "loss": 0.057, + "learning_rate": 2.51573494765419e-05, + "loss": 0.0566, "step": 52730 }, { "epoch": 2.46, - "learning_rate": 1.5149313206131922e-05, - "loss": 0.0638, + "learning_rate": 2.5156881406705883e-05, + "loss": 0.0749, "step": 52735 }, { "epoch": 2.46, - "learning_rate": 1.5148844404856782e-05, - "loss": 0.0365, + "learning_rate": 2.5156413336869863e-05, + "loss": 0.0451, "step": 52740 }, { "epoch": 2.46, - "learning_rate": 1.5148375603581642e-05, - "loss": 0.0596, + "learning_rate": 2.5155945267033842e-05, + "loss": 0.1212, "step": 52745 }, { "epoch": 2.46, - "learning_rate": 1.5147906802306505e-05, - "loss": 0.1375, + "learning_rate": 2.5155477197197822e-05, + "loss": 0.1203, "step": 52750 }, { "epoch": 2.46, - "learning_rate": 1.5147438001031365e-05, - "loss": 0.1279, + "learning_rate": 2.5155009127361805e-05, + "loss": 0.2059, "step": 52755 }, { "epoch": 2.46, - "learning_rate": 1.5146969199756225e-05, - "loss": 0.1905, + "learning_rate": 2.5154541057525785e-05, + "loss": 0.2683, "step": 52760 }, { "epoch": 2.46, - "learning_rate": 1.5146500398481085e-05, - "loss": 0.2818, + "learning_rate": 2.5154072987689765e-05, + "loss": 0.2852, "step": 52765 }, { "epoch": 2.46, - "learning_rate": 1.5146031597205946e-05, - "loss": 0.0624, + "learning_rate": 2.515360491785374e-05, + "loss": 0.0461, "step": 52770 }, { "epoch": 2.46, - "learning_rate": 1.5145562795930806e-05, - "loss": 0.0779, + "learning_rate": 2.5153136848017725e-05, + "loss": 0.0449, "step": 52775 }, { "epoch": 2.46, - "learning_rate": 1.5145093994655666e-05, - "loss": 0.0439, + "learning_rate": 2.5152668778181704e-05, + "loss": 0.047, "step": 52780 }, { "epoch": 2.46, - "learning_rate": 1.5144625193380526e-05, - "loss": 0.0472, + "learning_rate": 2.5152200708345684e-05, + "loss": 0.0799, "step": 52785 }, { "epoch": 2.46, - "learning_rate": 1.5144156392105386e-05, - "loss": 0.0525, + "learning_rate": 2.5151732638509664e-05, + "loss": 0.055, "step": 52790 }, { "epoch": 2.46, - "learning_rate": 1.5143687590830248e-05, - "loss": 0.0622, + "learning_rate": 2.5151264568673647e-05, + "loss": 0.1364, "step": 52795 }, { "epoch": 2.46, - "learning_rate": 1.514321878955511e-05, - "loss": 0.1119, + "learning_rate": 2.5150796498837627e-05, + "loss": 0.0631, "step": 52800 }, { "epoch": 2.46, - "learning_rate": 1.514274998827997e-05, - "loss": 0.261, + "learning_rate": 2.5150328429001607e-05, + "loss": 0.1585, "step": 52805 }, { "epoch": 2.46, - "learning_rate": 1.5142281187004831e-05, - "loss": 0.3272, + "learning_rate": 2.514986035916559e-05, + "loss": 0.3361, "step": 52810 }, { "epoch": 2.46, - "learning_rate": 1.5141812385729691e-05, - "loss": 0.3198, + "learning_rate": 2.514939228932957e-05, + "loss": 0.2617, "step": 52815 }, { "epoch": 2.46, - "learning_rate": 1.514134358445455e-05, - "loss": 0.0372, + "learning_rate": 2.514892421949355e-05, + "loss": 0.1086, "step": 52820 }, { "epoch": 2.46, - "learning_rate": 1.514087478317941e-05, - "loss": 0.0189, + "learning_rate": 2.514845614965753e-05, + "loss": 0.0886, "step": 52825 }, { "epoch": 2.47, - "learning_rate": 1.514040598190427e-05, - "loss": 0.0368, + "learning_rate": 2.5147988079821513e-05, + "loss": 0.0402, "step": 52830 }, { "epoch": 2.47, - "learning_rate": 1.5139937180629132e-05, - "loss": 0.1019, + "learning_rate": 2.514752000998549e-05, + "loss": 0.0471, "step": 52835 }, { "epoch": 2.47, - "learning_rate": 1.5139468379353992e-05, - "loss": 0.15, + "learning_rate": 2.514705194014947e-05, + "loss": 0.1097, "step": 52840 }, { "epoch": 2.47, - "learning_rate": 1.5138999578078854e-05, - "loss": 0.1733, + "learning_rate": 2.514658387031345e-05, + "loss": 0.0571, "step": 52845 }, { "epoch": 2.47, - "learning_rate": 1.5138530776803715e-05, - "loss": 0.0982, + "learning_rate": 2.5146115800477432e-05, + "loss": 0.1192, "step": 52850 }, { "epoch": 2.47, - "learning_rate": 1.5138061975528575e-05, - "loss": 0.0813, + "learning_rate": 2.5145647730641412e-05, + "loss": 0.1675, "step": 52855 }, { "epoch": 2.47, - "learning_rate": 1.5137593174253435e-05, - "loss": 0.247, + "learning_rate": 2.514517966080539e-05, + "loss": 0.1556, "step": 52860 }, { "epoch": 2.47, - "learning_rate": 1.5137124372978295e-05, - "loss": 0.1989, + "learning_rate": 2.5144711590969375e-05, + "loss": 0.2968, "step": 52865 }, { "epoch": 2.47, - "learning_rate": 1.5136655571703155e-05, - "loss": 0.0529, + "learning_rate": 2.5144243521133355e-05, + "loss": 0.0888, "step": 52870 }, { "epoch": 2.47, - "learning_rate": 1.5136186770428017e-05, - "loss": 0.0296, + "learning_rate": 2.5143775451297335e-05, + "loss": 0.0484, "step": 52875 }, { "epoch": 2.47, - "learning_rate": 1.5135717969152877e-05, - "loss": 0.0404, + "learning_rate": 2.5143307381461314e-05, + "loss": 0.0456, "step": 52880 }, { "epoch": 2.47, - "learning_rate": 1.5135249167877737e-05, - "loss": 0.0876, + "learning_rate": 2.5142839311625298e-05, + "loss": 0.0619, "step": 52885 }, { "epoch": 2.47, - "learning_rate": 1.51347803666026e-05, - "loss": 0.0909, + "learning_rate": 2.5142371241789277e-05, + "loss": 0.0961, "step": 52890 }, { "epoch": 2.47, - "learning_rate": 1.513431156532746e-05, - "loss": 0.119, + "learning_rate": 2.5141903171953254e-05, + "loss": 0.1127, "step": 52895 }, { "epoch": 2.47, - "learning_rate": 1.513384276405232e-05, - "loss": 0.1705, + "learning_rate": 2.5141435102117234e-05, + "loss": 0.1211, "step": 52900 }, { "epoch": 2.47, - "learning_rate": 1.513337396277718e-05, - "loss": 0.1126, + "learning_rate": 2.5140967032281217e-05, + "loss": 0.2063, "step": 52905 }, { "epoch": 2.47, - "learning_rate": 1.513290516150204e-05, - "loss": 0.1938, + "learning_rate": 2.5140498962445197e-05, + "loss": 0.2051, "step": 52910 }, { "epoch": 2.47, - "learning_rate": 1.5132436360226901e-05, - "loss": 0.3044, + "learning_rate": 2.5140030892609176e-05, + "loss": 0.4252, "step": 52915 }, { "epoch": 2.47, - "learning_rate": 1.5131967558951761e-05, - "loss": 0.0649, + "learning_rate": 2.513956282277316e-05, + "loss": 0.0802, "step": 52920 }, { "epoch": 2.47, - "learning_rate": 1.5131498757676621e-05, - "loss": 0.0713, + "learning_rate": 2.513909475293714e-05, + "loss": 0.0148, "step": 52925 }, { "epoch": 2.47, - "learning_rate": 1.5131029956401481e-05, - "loss": 0.0724, + "learning_rate": 2.513862668310112e-05, + "loss": 0.1018, "step": 52930 }, { "epoch": 2.47, - "learning_rate": 1.5130561155126345e-05, - "loss": 0.0492, + "learning_rate": 2.51381586132651e-05, + "loss": 0.0707, "step": 52935 }, { "epoch": 2.47, - "learning_rate": 1.5130092353851204e-05, - "loss": 0.1065, + "learning_rate": 2.5137690543429082e-05, + "loss": 0.0244, "step": 52940 }, { "epoch": 2.47, - "learning_rate": 1.5129623552576064e-05, - "loss": 0.1069, + "learning_rate": 2.5137222473593062e-05, + "loss": 0.1077, "step": 52945 }, { "epoch": 2.47, - "learning_rate": 1.5129154751300924e-05, - "loss": 0.0492, + "learning_rate": 2.5136754403757042e-05, + "loss": 0.1026, "step": 52950 }, { "epoch": 2.47, - "learning_rate": 1.5128685950025786e-05, - "loss": 0.159, + "learning_rate": 2.5136286333921022e-05, + "loss": 0.1085, "step": 52955 }, { "epoch": 2.47, - "learning_rate": 1.5128217148750646e-05, - "loss": 0.307, + "learning_rate": 2.5135818264085e-05, + "loss": 0.2029, "step": 52960 }, { "epoch": 2.47, - "learning_rate": 1.5127748347475506e-05, - "loss": 0.2151, + "learning_rate": 2.513535019424898e-05, + "loss": 0.3476, "step": 52965 }, { "epoch": 2.47, - "learning_rate": 1.5127279546200366e-05, - "loss": 0.0254, + "learning_rate": 2.513488212441296e-05, + "loss": 0.0672, "step": 52970 }, { "epoch": 2.47, - "learning_rate": 1.5126810744925227e-05, - "loss": 0.067, + "learning_rate": 2.513441405457694e-05, + "loss": 0.038, "step": 52975 }, { "epoch": 2.47, - "learning_rate": 1.5126341943650087e-05, - "loss": 0.0801, + "learning_rate": 2.5133945984740924e-05, + "loss": 0.0318, "step": 52980 }, { "epoch": 2.47, - "learning_rate": 1.5125873142374949e-05, - "loss": 0.0605, + "learning_rate": 2.5133477914904904e-05, + "loss": 0.1066, "step": 52985 }, { "epoch": 2.47, - "learning_rate": 1.5125404341099809e-05, - "loss": 0.0755, + "learning_rate": 2.5133009845068884e-05, + "loss": 0.0741, "step": 52990 }, { "epoch": 2.47, - "learning_rate": 1.512493553982467e-05, - "loss": 0.136, + "learning_rate": 2.5132541775232867e-05, + "loss": 0.2065, "step": 52995 }, { "epoch": 2.47, - "learning_rate": 1.512446673854953e-05, - "loss": 0.1061, + "learning_rate": 2.5132073705396847e-05, + "loss": 0.2066, "step": 53000 }, { "epoch": 2.47, - "learning_rate": 1.512399793727439e-05, - "loss": 0.1312, + "learning_rate": 2.5131605635560827e-05, + "loss": 0.124, "step": 53005 }, { "epoch": 2.47, - "learning_rate": 1.512352913599925e-05, - "loss": 0.3252, + "learning_rate": 2.5131137565724807e-05, + "loss": 0.1881, "step": 53010 }, { "epoch": 2.47, - "learning_rate": 1.5123060334724112e-05, - "loss": 0.3149, + "learning_rate": 2.513066949588879e-05, + "loss": 0.225, "step": 53015 }, { "epoch": 2.47, - "learning_rate": 1.5122591533448972e-05, - "loss": 0.0129, + "learning_rate": 2.513020142605277e-05, + "loss": 0.0445, "step": 53020 }, { "epoch": 2.47, - "learning_rate": 1.5122122732173832e-05, - "loss": 0.0211, + "learning_rate": 2.5129733356216746e-05, + "loss": 0.0175, "step": 53025 }, { "epoch": 2.47, - "learning_rate": 1.5121653930898693e-05, - "loss": 0.041, + "learning_rate": 2.5129265286380726e-05, + "loss": 0.0474, "step": 53030 }, { "epoch": 2.47, - "learning_rate": 1.5121185129623555e-05, - "loss": 0.0847, + "learning_rate": 2.512879721654471e-05, + "loss": 0.0526, "step": 53035 }, { "epoch": 2.47, - "learning_rate": 1.5120716328348415e-05, - "loss": 0.0698, + "learning_rate": 2.512832914670869e-05, + "loss": 0.4421, "step": 53040 }, { "epoch": 2.48, - "learning_rate": 1.5120247527073275e-05, - "loss": 0.0816, + "learning_rate": 2.512786107687267e-05, + "loss": 0.0497, "step": 53045 }, { "epoch": 2.48, - "learning_rate": 1.5119778725798135e-05, - "loss": 0.1465, + "learning_rate": 2.5127393007036652e-05, + "loss": 0.1382, "step": 53050 }, { "epoch": 2.48, - "learning_rate": 1.5119309924522996e-05, - "loss": 0.1993, + "learning_rate": 2.512692493720063e-05, + "loss": 0.1484, "step": 53055 }, { "epoch": 2.48, - "learning_rate": 1.5118841123247856e-05, - "loss": 0.3018, + "learning_rate": 2.512645686736461e-05, + "loss": 0.4425, "step": 53060 }, { "epoch": 2.48, - "learning_rate": 1.5118372321972716e-05, - "loss": 0.3261, + "learning_rate": 2.512598879752859e-05, + "loss": 0.3004, "step": 53065 }, { "epoch": 2.48, - "learning_rate": 1.5117903520697576e-05, - "loss": 0.0392, + "learning_rate": 2.5125520727692575e-05, + "loss": 0.0968, "step": 53070 }, { "epoch": 2.48, - "learning_rate": 1.511743471942244e-05, - "loss": 0.027, + "learning_rate": 2.5125052657856554e-05, + "loss": 0.0778, "step": 53075 }, { "epoch": 2.48, - "learning_rate": 1.51169659181473e-05, - "loss": 0.0483, + "learning_rate": 2.5124584588020534e-05, + "loss": 0.0768, "step": 53080 }, { "epoch": 2.48, - "learning_rate": 1.511649711687216e-05, - "loss": 0.0291, + "learning_rate": 2.512411651818451e-05, + "loss": 0.0855, "step": 53085 }, { "epoch": 2.48, - "learning_rate": 1.511602831559702e-05, - "loss": 0.1415, + "learning_rate": 2.5123648448348494e-05, + "loss": 0.0752, "step": 53090 }, { "epoch": 2.48, - "learning_rate": 1.5115559514321881e-05, - "loss": 0.0677, + "learning_rate": 2.5123180378512474e-05, + "loss": 0.0801, "step": 53095 }, { "epoch": 2.48, - "learning_rate": 1.5115090713046741e-05, - "loss": 0.1427, + "learning_rate": 2.5122712308676453e-05, + "loss": 0.1066, "step": 53100 }, { "epoch": 2.48, - "learning_rate": 1.5114621911771601e-05, - "loss": 0.1626, + "learning_rate": 2.5122244238840437e-05, + "loss": 0.1512, "step": 53105 }, { "epoch": 2.48, - "learning_rate": 1.5114153110496461e-05, - "loss": 0.2132, + "learning_rate": 2.5121776169004416e-05, + "loss": 0.1996, "step": 53110 }, { "epoch": 2.48, - "learning_rate": 1.511368430922132e-05, - "loss": 0.4029, + "learning_rate": 2.5121308099168396e-05, + "loss": 0.3174, "step": 53115 }, { "epoch": 2.48, - "learning_rate": 1.5113215507946182e-05, - "loss": 0.0847, + "learning_rate": 2.5120840029332376e-05, + "loss": 0.0307, "step": 53120 }, { "epoch": 2.48, - "learning_rate": 1.5112746706671044e-05, - "loss": 0.0436, + "learning_rate": 2.512037195949636e-05, + "loss": 0.0242, "step": 53125 }, { "epoch": 2.48, - "learning_rate": 1.5112277905395904e-05, - "loss": 0.0253, + "learning_rate": 2.511990388966034e-05, + "loss": 0.1071, "step": 53130 }, { "epoch": 2.48, - "learning_rate": 1.5111809104120766e-05, - "loss": 0.0722, + "learning_rate": 2.511943581982432e-05, + "loss": 0.0978, "step": 53135 }, { "epoch": 2.48, - "learning_rate": 1.5111340302845626e-05, - "loss": 0.117, + "learning_rate": 2.51189677499883e-05, + "loss": 0.0576, "step": 53140 }, { "epoch": 2.48, - "learning_rate": 1.5110871501570485e-05, - "loss": 0.1382, + "learning_rate": 2.5118499680152282e-05, + "loss": 0.0713, "step": 53145 }, { "epoch": 2.48, - "learning_rate": 1.5110402700295345e-05, - "loss": 0.1069, + "learning_rate": 2.511803161031626e-05, + "loss": 0.038, "step": 53150 }, { "epoch": 2.48, - "learning_rate": 1.5109933899020205e-05, - "loss": 0.1258, + "learning_rate": 2.5117563540480238e-05, + "loss": 0.2546, "step": 53155 }, { "epoch": 2.48, - "learning_rate": 1.5109465097745067e-05, - "loss": 0.4169, + "learning_rate": 2.511709547064422e-05, + "loss": 0.2199, "step": 53160 }, { "epoch": 2.48, - "learning_rate": 1.5108996296469927e-05, - "loss": 0.3047, + "learning_rate": 2.51166274008082e-05, + "loss": 0.3676, "step": 53165 }, { "epoch": 2.48, - "learning_rate": 1.5108527495194789e-05, - "loss": 0.0683, + "learning_rate": 2.511615933097218e-05, + "loss": 0.0242, "step": 53170 }, { "epoch": 2.48, - "learning_rate": 1.510805869391965e-05, - "loss": 0.0628, + "learning_rate": 2.511569126113616e-05, + "loss": 0.0589, "step": 53175 }, { "epoch": 2.48, - "learning_rate": 1.510758989264451e-05, - "loss": 0.0626, + "learning_rate": 2.5115223191300144e-05, + "loss": 0.0429, "step": 53180 }, { "epoch": 2.48, - "learning_rate": 1.510712109136937e-05, - "loss": 0.0793, + "learning_rate": 2.5114755121464124e-05, + "loss": 0.0971, "step": 53185 }, { "epoch": 2.48, - "learning_rate": 1.510665229009423e-05, - "loss": 0.1138, + "learning_rate": 2.5114287051628104e-05, + "loss": 0.0915, "step": 53190 }, { "epoch": 2.48, - "learning_rate": 1.510618348881909e-05, - "loss": 0.0797, + "learning_rate": 2.5113818981792084e-05, + "loss": 0.0838, "step": 53195 }, { "epoch": 2.48, - "learning_rate": 1.5105714687543952e-05, - "loss": 0.1331, + "learning_rate": 2.5113350911956067e-05, + "loss": 0.1195, "step": 53200 }, { "epoch": 2.48, - "learning_rate": 1.5105245886268811e-05, - "loss": 0.1266, + "learning_rate": 2.5112882842120047e-05, + "loss": 0.1195, "step": 53205 }, { "epoch": 2.48, - "learning_rate": 1.5104777084993671e-05, - "loss": 0.1997, + "learning_rate": 2.5112414772284026e-05, + "loss": 0.1937, "step": 53210 }, { "epoch": 2.48, - "learning_rate": 1.5104308283718535e-05, - "loss": 0.2617, + "learning_rate": 2.5111946702448003e-05, + "loss": 0.2583, "step": 53215 }, { "epoch": 2.48, - "learning_rate": 1.5103839482443395e-05, - "loss": 0.0412, + "learning_rate": 2.5111478632611986e-05, + "loss": 0.052, "step": 53220 }, { "epoch": 2.48, - "learning_rate": 1.5103370681168255e-05, - "loss": 0.1357, + "learning_rate": 2.5111010562775966e-05, + "loss": 0.0525, "step": 53225 }, { "epoch": 2.48, - "learning_rate": 1.5102901879893115e-05, - "loss": 0.0339, + "learning_rate": 2.5110542492939946e-05, + "loss": 0.0723, "step": 53230 }, { "epoch": 2.48, - "learning_rate": 1.5102433078617974e-05, - "loss": 0.109, + "learning_rate": 2.511007442310393e-05, + "loss": 0.0695, "step": 53235 }, { "epoch": 2.48, - "learning_rate": 1.5101964277342836e-05, - "loss": 0.1435, + "learning_rate": 2.510960635326791e-05, + "loss": 0.0692, "step": 53240 }, { "epoch": 2.48, - "learning_rate": 1.5101495476067696e-05, - "loss": 0.1191, + "learning_rate": 2.510913828343189e-05, + "loss": 0.0966, "step": 53245 }, { "epoch": 2.48, - "learning_rate": 1.5101026674792556e-05, - "loss": 0.1276, + "learning_rate": 2.5108670213595868e-05, + "loss": 0.2415, "step": 53250 }, { "epoch": 2.48, - "learning_rate": 1.5100557873517416e-05, - "loss": 0.2045, + "learning_rate": 2.510820214375985e-05, + "loss": 0.2034, "step": 53255 }, { "epoch": 2.49, - "learning_rate": 1.510008907224228e-05, - "loss": 0.2849, + "learning_rate": 2.510773407392383e-05, + "loss": 0.222, "step": 53260 }, { "epoch": 2.49, - "learning_rate": 1.5099620270967139e-05, - "loss": 0.3419, + "learning_rate": 2.510726600408781e-05, + "loss": 0.296, "step": 53265 }, { "epoch": 2.49, - "learning_rate": 1.5099151469691999e-05, - "loss": 0.0099, + "learning_rate": 2.510679793425179e-05, + "loss": 0.0594, "step": 53270 }, { "epoch": 2.49, - "learning_rate": 1.5098682668416859e-05, - "loss": 0.0081, + "learning_rate": 2.510632986441577e-05, + "loss": 0.0711, "step": 53275 }, { "epoch": 2.49, - "learning_rate": 1.509821386714172e-05, - "loss": 0.0745, + "learning_rate": 2.510586179457975e-05, + "loss": 0.0367, "step": 53280 }, { "epoch": 2.49, - "learning_rate": 1.509774506586658e-05, - "loss": 0.1331, + "learning_rate": 2.510539372474373e-05, + "loss": 0.0363, "step": 53285 }, { "epoch": 2.49, - "learning_rate": 1.509727626459144e-05, - "loss": 0.147, + "learning_rate": 2.5104925654907714e-05, + "loss": 0.0593, "step": 53290 }, { "epoch": 2.49, - "learning_rate": 1.50968074633163e-05, - "loss": 0.1196, + "learning_rate": 2.5104457585071693e-05, + "loss": 0.1109, "step": 53295 }, { "epoch": 2.49, - "learning_rate": 1.509633866204116e-05, - "loss": 0.1508, + "learning_rate": 2.5103989515235673e-05, + "loss": 0.1238, "step": 53300 }, { "epoch": 2.49, - "learning_rate": 1.5095869860766022e-05, - "loss": 0.1784, + "learning_rate": 2.5103521445399653e-05, + "loss": 0.1999, "step": 53305 }, { "epoch": 2.49, - "learning_rate": 1.5095401059490884e-05, - "loss": 0.1141, + "learning_rate": 2.5103053375563636e-05, + "loss": 0.2999, "step": 53310 }, { "epoch": 2.49, - "learning_rate": 1.5094932258215744e-05, - "loss": 0.1613, + "learning_rate": 2.5102585305727616e-05, + "loss": 0.1659, "step": 53315 }, { "epoch": 2.49, - "learning_rate": 1.5094463456940605e-05, - "loss": 0.0906, + "learning_rate": 2.5102117235891596e-05, + "loss": 0.0613, "step": 53320 }, { "epoch": 2.49, - "learning_rate": 1.5093994655665465e-05, - "loss": 0.057, + "learning_rate": 2.5101649166055576e-05, + "loss": 0.0335, "step": 53325 }, { "epoch": 2.49, - "learning_rate": 1.5093525854390325e-05, - "loss": 0.0731, + "learning_rate": 2.510118109621956e-05, + "loss": 0.0715, "step": 53330 }, { "epoch": 2.49, - "learning_rate": 1.5093057053115185e-05, - "loss": 0.0516, + "learning_rate": 2.510071302638354e-05, + "loss": 0.0905, "step": 53335 }, { "epoch": 2.49, - "learning_rate": 1.5092588251840045e-05, - "loss": 0.0711, + "learning_rate": 2.5100244956547515e-05, + "loss": 0.0503, "step": 53340 }, { "epoch": 2.49, - "learning_rate": 1.5092119450564907e-05, - "loss": 0.0954, + "learning_rate": 2.50997768867115e-05, + "loss": 0.1676, "step": 53345 }, { "epoch": 2.49, - "learning_rate": 1.5091650649289766e-05, - "loss": 0.0862, + "learning_rate": 2.5099308816875478e-05, + "loss": 0.1139, "step": 53350 }, { "epoch": 2.49, - "learning_rate": 1.5091181848014628e-05, - "loss": 0.2134, + "learning_rate": 2.5098840747039458e-05, + "loss": 0.2108, "step": 53355 }, { "epoch": 2.49, - "learning_rate": 1.509071304673949e-05, - "loss": 0.2143, + "learning_rate": 2.5098372677203438e-05, + "loss": 0.2596, "step": 53360 }, { "epoch": 2.49, - "learning_rate": 1.509024424546435e-05, - "loss": 0.2572, + "learning_rate": 2.509790460736742e-05, + "loss": 0.2595, "step": 53365 }, { "epoch": 2.49, - "learning_rate": 1.508977544418921e-05, - "loss": 0.039, + "learning_rate": 2.50974365375314e-05, + "loss": 0.0527, "step": 53370 }, { "epoch": 2.49, - "learning_rate": 1.508930664291407e-05, - "loss": 0.1016, + "learning_rate": 2.509696846769538e-05, + "loss": 0.022, "step": 53375 }, { "epoch": 2.49, - "learning_rate": 1.508883784163893e-05, - "loss": 0.0638, + "learning_rate": 2.509650039785936e-05, + "loss": 0.0557, "step": 53380 }, { "epoch": 2.49, - "learning_rate": 1.5088369040363791e-05, - "loss": 0.0594, + "learning_rate": 2.5096032328023344e-05, + "loss": 0.0506, "step": 53385 }, { "epoch": 2.49, - "learning_rate": 1.5087900239088651e-05, - "loss": 0.0642, + "learning_rate": 2.5095564258187324e-05, + "loss": 0.0867, "step": 53390 }, { "epoch": 2.49, - "learning_rate": 1.5087431437813511e-05, - "loss": 0.079, + "learning_rate": 2.5095096188351303e-05, + "loss": 0.0508, "step": 53395 }, { "epoch": 2.49, - "learning_rate": 1.5086962636538374e-05, - "loss": 0.1898, + "learning_rate": 2.5094628118515283e-05, + "loss": 0.1314, "step": 53400 }, { "epoch": 2.49, - "learning_rate": 1.5086493835263234e-05, - "loss": 0.178, + "learning_rate": 2.5094160048679263e-05, + "loss": 0.2294, "step": 53405 }, { "epoch": 2.49, - "learning_rate": 1.5086025033988094e-05, - "loss": 0.2495, + "learning_rate": 2.5093691978843243e-05, + "loss": 0.2391, "step": 53410 }, { "epoch": 2.49, - "learning_rate": 1.5085556232712954e-05, - "loss": 0.2828, + "learning_rate": 2.5093223909007223e-05, + "loss": 0.2303, "step": 53415 }, { "epoch": 2.49, - "learning_rate": 1.5085087431437814e-05, - "loss": 0.0302, + "learning_rate": 2.5092755839171206e-05, + "loss": 0.0052, "step": 53420 }, { "epoch": 2.49, - "learning_rate": 1.5084618630162676e-05, - "loss": 0.0251, + "learning_rate": 2.5092287769335186e-05, + "loss": 0.0195, "step": 53425 }, { "epoch": 2.49, - "learning_rate": 1.5084149828887536e-05, - "loss": 0.0749, + "learning_rate": 2.5091819699499165e-05, + "loss": 0.0534, "step": 53430 }, { "epoch": 2.49, - "learning_rate": 1.5083681027612395e-05, - "loss": 0.0388, + "learning_rate": 2.5091351629663145e-05, + "loss": 0.0563, "step": 53435 }, { "epoch": 2.49, - "learning_rate": 1.5083212226337255e-05, - "loss": 0.1141, + "learning_rate": 2.509088355982713e-05, + "loss": 0.1052, "step": 53440 }, { "epoch": 2.49, - "learning_rate": 1.5082743425062117e-05, - "loss": 0.0893, + "learning_rate": 2.5090415489991108e-05, + "loss": 0.0889, "step": 53445 }, { "epoch": 2.49, - "learning_rate": 1.5082274623786979e-05, - "loss": 0.1481, + "learning_rate": 2.5089947420155088e-05, + "loss": 0.0898, "step": 53450 }, { "epoch": 2.49, - "learning_rate": 1.5081805822511839e-05, - "loss": 0.2765, + "learning_rate": 2.5089479350319068e-05, + "loss": 0.1483, "step": 53455 }, { "epoch": 2.49, - "learning_rate": 1.50813370212367e-05, - "loss": 0.5149, + "learning_rate": 2.508901128048305e-05, + "loss": 0.2292, "step": 53460 }, { "epoch": 2.49, - "learning_rate": 1.508086821996156e-05, - "loss": 0.324, + "learning_rate": 2.5088543210647028e-05, + "loss": 0.3382, "step": 53465 }, { "epoch": 2.49, - "learning_rate": 1.508039941868642e-05, - "loss": 0.0539, + "learning_rate": 2.5088075140811007e-05, + "loss": 0.0341, "step": 53470 }, { "epoch": 2.5, - "learning_rate": 1.507993061741128e-05, - "loss": 0.066, + "learning_rate": 2.508760707097499e-05, + "loss": 0.0266, "step": 53475 }, { "epoch": 2.5, - "learning_rate": 1.507946181613614e-05, - "loss": 0.0415, + "learning_rate": 2.508713900113897e-05, + "loss": 0.0568, "step": 53480 }, { "epoch": 2.5, - "learning_rate": 1.5078993014861002e-05, - "loss": 0.0461, + "learning_rate": 2.508667093130295e-05, + "loss": 0.0772, "step": 53485 }, { "epoch": 2.5, - "learning_rate": 1.5078524213585862e-05, - "loss": 0.0759, + "learning_rate": 2.508620286146693e-05, + "loss": 0.0588, "step": 53490 }, { "epoch": 2.5, - "learning_rate": 1.5078055412310723e-05, - "loss": 0.1054, + "learning_rate": 2.5085734791630913e-05, + "loss": 0.1131, "step": 53495 }, { "epoch": 2.5, - "learning_rate": 1.5077586611035585e-05, - "loss": 0.0651, + "learning_rate": 2.5085266721794893e-05, + "loss": 0.1028, "step": 53500 }, { "epoch": 2.5, - "learning_rate": 1.5077117809760445e-05, - "loss": 0.2001, + "learning_rate": 2.5084798651958873e-05, + "loss": 0.0818, "step": 53505 }, { "epoch": 2.5, - "learning_rate": 1.5076649008485305e-05, - "loss": 0.1741, + "learning_rate": 2.5084330582122853e-05, + "loss": 0.127, "step": 53510 }, { "epoch": 2.5, - "learning_rate": 1.5076180207210165e-05, - "loss": 0.1449, + "learning_rate": 2.5083862512286836e-05, + "loss": 0.28, "step": 53515 }, { "epoch": 2.5, - "learning_rate": 1.5075711405935025e-05, - "loss": 0.0408, + "learning_rate": 2.5083394442450816e-05, + "loss": 0.0727, "step": 53520 }, { "epoch": 2.5, - "learning_rate": 1.5075242604659886e-05, - "loss": 0.0829, + "learning_rate": 2.5082926372614796e-05, + "loss": 0.0303, "step": 53525 }, { "epoch": 2.5, - "learning_rate": 1.5074773803384746e-05, - "loss": 0.057, + "learning_rate": 2.5082458302778775e-05, + "loss": 0.0236, "step": 53530 }, { "epoch": 2.5, - "learning_rate": 1.5074305002109606e-05, - "loss": 0.0364, + "learning_rate": 2.5081990232942755e-05, + "loss": 0.0735, "step": 53535 }, { "epoch": 2.5, - "learning_rate": 1.507383620083447e-05, - "loss": 0.0862, + "learning_rate": 2.5081522163106735e-05, + "loss": 0.061, "step": 53540 }, { "epoch": 2.5, - "learning_rate": 1.507336739955933e-05, - "loss": 0.1627, + "learning_rate": 2.5081054093270715e-05, + "loss": 0.1206, "step": 53545 }, { "epoch": 2.5, - "learning_rate": 1.507289859828419e-05, - "loss": 0.1607, + "learning_rate": 2.5080586023434698e-05, + "loss": 0.0839, "step": 53550 }, { "epoch": 2.5, - "learning_rate": 1.5072429797009049e-05, - "loss": 0.2309, + "learning_rate": 2.5080117953598678e-05, + "loss": 0.178, "step": 53555 }, { "epoch": 2.5, - "learning_rate": 1.5071960995733909e-05, - "loss": 0.362, + "learning_rate": 2.5079649883762658e-05, + "loss": 0.1644, "step": 53560 }, { "epoch": 2.5, - "learning_rate": 1.507149219445877e-05, - "loss": 0.257, + "learning_rate": 2.5079181813926637e-05, + "loss": 0.2305, "step": 53565 }, { "epoch": 2.5, - "learning_rate": 1.507102339318363e-05, - "loss": 0.0627, + "learning_rate": 2.507871374409062e-05, + "loss": 0.0892, "step": 53570 }, { "epoch": 2.5, - "learning_rate": 1.507055459190849e-05, - "loss": 0.0498, + "learning_rate": 2.50782456742546e-05, + "loss": 0.0783, "step": 53575 }, { "epoch": 2.5, - "learning_rate": 1.507008579063335e-05, - "loss": 0.054, + "learning_rate": 2.507777760441858e-05, + "loss": 0.065, "step": 53580 }, { "epoch": 2.5, - "learning_rate": 1.5069616989358214e-05, - "loss": 0.0815, + "learning_rate": 2.507730953458256e-05, + "loss": 0.0319, "step": 53585 }, { "epoch": 2.5, - "learning_rate": 1.5069148188083074e-05, - "loss": 0.055, + "learning_rate": 2.507684146474654e-05, + "loss": 0.0967, "step": 53590 }, { "epoch": 2.5, - "learning_rate": 1.5068679386807934e-05, - "loss": 0.1355, + "learning_rate": 2.507637339491052e-05, + "loss": 0.0123, "step": 53595 }, { "epoch": 2.5, - "learning_rate": 1.5068210585532794e-05, - "loss": 0.1415, + "learning_rate": 2.50759053250745e-05, + "loss": 0.0947, "step": 53600 }, { "epoch": 2.5, - "learning_rate": 1.5067741784257655e-05, - "loss": 0.1233, + "learning_rate": 2.5075437255238483e-05, + "loss": 0.3027, "step": 53605 }, { "epoch": 2.5, - "learning_rate": 1.5067272982982515e-05, - "loss": 0.216, + "learning_rate": 2.5074969185402463e-05, + "loss": 0.2883, "step": 53610 }, { "epoch": 2.5, - "learning_rate": 1.5066804181707375e-05, - "loss": 0.2266, + "learning_rate": 2.5074501115566442e-05, + "loss": 0.2533, "step": 53615 }, { "epoch": 2.5, - "learning_rate": 1.5066335380432235e-05, - "loss": 0.0833, + "learning_rate": 2.5074033045730422e-05, + "loss": 0.0162, "step": 53620 }, { "epoch": 2.5, - "learning_rate": 1.5065866579157095e-05, - "loss": 0.0612, + "learning_rate": 2.5073564975894405e-05, + "loss": 0.0637, "step": 53625 }, { "epoch": 2.5, - "learning_rate": 1.5065397777881957e-05, - "loss": 0.0735, + "learning_rate": 2.5073096906058385e-05, + "loss": 0.1104, "step": 53630 }, { "epoch": 2.5, - "learning_rate": 1.5064928976606818e-05, - "loss": 0.0573, + "learning_rate": 2.5072628836222365e-05, + "loss": 0.0611, "step": 53635 }, { "epoch": 2.5, - "learning_rate": 1.5064460175331678e-05, - "loss": 0.2157, + "learning_rate": 2.5072160766386345e-05, + "loss": 0.0894, "step": 53640 }, { "epoch": 2.5, - "learning_rate": 1.506399137405654e-05, - "loss": 0.126, + "learning_rate": 2.5071692696550328e-05, + "loss": 0.093, "step": 53645 }, { "epoch": 2.5, - "learning_rate": 1.50635225727814e-05, - "loss": 0.0667, + "learning_rate": 2.5071224626714308e-05, + "loss": 0.2569, "step": 53650 }, { "epoch": 2.5, - "learning_rate": 1.506305377150626e-05, - "loss": 0.1685, + "learning_rate": 2.5070756556878284e-05, + "loss": 0.2252, "step": 53655 }, { "epoch": 2.5, - "learning_rate": 1.506258497023112e-05, - "loss": 0.4227, + "learning_rate": 2.5070288487042268e-05, + "loss": 0.2512, "step": 53660 }, { "epoch": 2.5, - "learning_rate": 1.506211616895598e-05, - "loss": 0.2431, + "learning_rate": 2.5069820417206247e-05, + "loss": 0.2363, "step": 53665 }, { "epoch": 2.5, - "learning_rate": 1.5061647367680841e-05, - "loss": 0.0585, + "learning_rate": 2.5069352347370227e-05, + "loss": 0.1054, "step": 53670 }, { "epoch": 2.5, - "learning_rate": 1.5061178566405701e-05, - "loss": 0.0575, + "learning_rate": 2.5068884277534207e-05, + "loss": 0.0431, "step": 53675 }, { "epoch": 2.5, - "learning_rate": 1.5060709765130563e-05, - "loss": 0.0963, + "learning_rate": 2.506841620769819e-05, + "loss": 0.0788, "step": 53680 }, { "epoch": 2.51, - "learning_rate": 1.5060240963855424e-05, - "loss": 0.0607, + "learning_rate": 2.506794813786217e-05, + "loss": 0.0512, "step": 53685 }, { "epoch": 2.51, - "learning_rate": 1.5059772162580284e-05, - "loss": 0.0998, + "learning_rate": 2.506748006802615e-05, + "loss": 0.0916, "step": 53690 }, { "epoch": 2.51, - "learning_rate": 1.5059303361305144e-05, - "loss": 0.0586, + "learning_rate": 2.506701199819013e-05, + "loss": 0.0755, "step": 53695 }, { "epoch": 2.51, - "learning_rate": 1.5058834560030004e-05, - "loss": 0.1631, + "learning_rate": 2.5066543928354113e-05, + "loss": 0.1341, "step": 53700 }, { "epoch": 2.51, - "learning_rate": 1.5058365758754864e-05, - "loss": 0.167, + "learning_rate": 2.5066075858518093e-05, + "loss": 0.1662, "step": 53705 }, { "epoch": 2.51, - "learning_rate": 1.5057896957479726e-05, - "loss": 0.2672, + "learning_rate": 2.5065607788682073e-05, + "loss": 0.1018, "step": 53710 }, { "epoch": 2.51, - "learning_rate": 1.5057428156204586e-05, - "loss": 0.2048, + "learning_rate": 2.5065139718846056e-05, + "loss": 0.4115, "step": 53715 }, { "epoch": 2.51, - "learning_rate": 1.5056959354929446e-05, - "loss": 0.0415, + "learning_rate": 2.5064671649010032e-05, + "loss": 0.0496, "step": 53720 }, { "epoch": 2.51, - "learning_rate": 1.5056490553654309e-05, - "loss": 0.0442, + "learning_rate": 2.5064203579174012e-05, + "loss": 0.0623, "step": 53725 }, { "epoch": 2.51, - "learning_rate": 1.5056021752379169e-05, - "loss": 0.0594, + "learning_rate": 2.5063735509337992e-05, + "loss": 0.034, "step": 53730 }, { "epoch": 2.51, - "learning_rate": 1.5055552951104029e-05, - "loss": 0.1218, + "learning_rate": 2.5063267439501975e-05, + "loss": 0.0329, "step": 53735 }, { "epoch": 2.51, - "learning_rate": 1.5055084149828889e-05, - "loss": 0.122, + "learning_rate": 2.5062799369665955e-05, + "loss": 0.0754, "step": 53740 }, { "epoch": 2.51, - "learning_rate": 1.5054615348553749e-05, - "loss": 0.085, + "learning_rate": 2.5062331299829935e-05, + "loss": 0.0921, "step": 53745 }, { "epoch": 2.51, - "learning_rate": 1.505414654727861e-05, - "loss": 0.2021, + "learning_rate": 2.5061863229993914e-05, + "loss": 0.1461, "step": 53750 }, { "epoch": 2.51, - "learning_rate": 1.505367774600347e-05, - "loss": 0.1342, + "learning_rate": 2.5061395160157898e-05, + "loss": 0.1212, "step": 53755 }, { "epoch": 2.51, - "learning_rate": 1.505320894472833e-05, - "loss": 0.2367, + "learning_rate": 2.5060927090321877e-05, + "loss": 0.23, "step": 53760 }, { "epoch": 2.51, - "learning_rate": 1.505274014345319e-05, - "loss": 0.3059, + "learning_rate": 2.5060459020485857e-05, + "loss": 0.257, "step": 53765 }, { "epoch": 2.51, - "learning_rate": 1.505227134217805e-05, - "loss": 0.0305, + "learning_rate": 2.505999095064984e-05, + "loss": 0.0439, "step": 53770 }, { "epoch": 2.51, - "learning_rate": 1.5051802540902913e-05, - "loss": 0.0275, + "learning_rate": 2.505952288081382e-05, + "loss": 0.0298, "step": 53775 }, { "epoch": 2.51, - "learning_rate": 1.5051333739627773e-05, - "loss": 0.035, + "learning_rate": 2.5059054810977797e-05, + "loss": 0.0593, "step": 53780 }, { "epoch": 2.51, - "learning_rate": 1.5050864938352633e-05, - "loss": 0.0746, + "learning_rate": 2.5058586741141777e-05, + "loss": 0.0885, "step": 53785 }, { "epoch": 2.51, - "learning_rate": 1.5050396137077495e-05, - "loss": 0.0952, + "learning_rate": 2.505811867130576e-05, + "loss": 0.0743, "step": 53790 }, { "epoch": 2.51, - "learning_rate": 1.5049927335802355e-05, - "loss": 0.1478, + "learning_rate": 2.505765060146974e-05, + "loss": 0.1249, "step": 53795 }, { "epoch": 2.51, - "learning_rate": 1.5049458534527215e-05, - "loss": 0.1021, + "learning_rate": 2.505718253163372e-05, + "loss": 0.0854, "step": 53800 }, { "epoch": 2.51, - "learning_rate": 1.5048989733252075e-05, - "loss": 0.2201, + "learning_rate": 2.50567144617977e-05, + "loss": 0.1943, "step": 53805 }, { "epoch": 2.51, - "learning_rate": 1.5048520931976936e-05, - "loss": 0.2871, + "learning_rate": 2.5056246391961682e-05, + "loss": 0.1837, "step": 53810 }, { "epoch": 2.51, - "learning_rate": 1.5048052130701796e-05, - "loss": 0.2895, + "learning_rate": 2.5055778322125662e-05, + "loss": 0.2483, "step": 53815 }, { "epoch": 2.51, - "learning_rate": 1.5047583329426658e-05, - "loss": 0.0503, + "learning_rate": 2.5055310252289642e-05, + "loss": 0.0288, "step": 53820 }, { "epoch": 2.51, - "learning_rate": 1.5047114528151518e-05, - "loss": 0.0398, + "learning_rate": 2.5054842182453622e-05, + "loss": 0.0789, "step": 53825 }, { "epoch": 2.51, - "learning_rate": 1.504664572687638e-05, - "loss": 0.069, + "learning_rate": 2.5054374112617605e-05, + "loss": 0.079, "step": 53830 }, { "epoch": 2.51, - "learning_rate": 1.504617692560124e-05, - "loss": 0.0884, + "learning_rate": 2.5053906042781585e-05, + "loss": 0.0805, "step": 53835 }, { "epoch": 2.51, - "learning_rate": 1.50457081243261e-05, - "loss": 0.07, + "learning_rate": 2.5053437972945565e-05, + "loss": 0.1404, "step": 53840 }, { "epoch": 2.51, - "learning_rate": 1.504523932305096e-05, - "loss": 0.1193, + "learning_rate": 2.5052969903109545e-05, + "loss": 0.1767, "step": 53845 }, { "epoch": 2.51, - "learning_rate": 1.504477052177582e-05, - "loss": 0.1512, + "learning_rate": 2.5052501833273524e-05, + "loss": 0.0998, "step": 53850 }, { "epoch": 2.51, - "learning_rate": 1.504430172050068e-05, - "loss": 0.1932, + "learning_rate": 2.5052033763437504e-05, + "loss": 0.1828, "step": 53855 }, { "epoch": 2.51, - "learning_rate": 1.504383291922554e-05, - "loss": 0.2692, + "learning_rate": 2.5051565693601484e-05, + "loss": 0.32, "step": 53860 }, { "epoch": 2.51, - "learning_rate": 1.5043364117950402e-05, - "loss": 0.3341, + "learning_rate": 2.5051097623765467e-05, + "loss": 0.3163, "step": 53865 }, { "epoch": 2.51, - "learning_rate": 1.5042895316675264e-05, - "loss": 0.0146, + "learning_rate": 2.5050629553929447e-05, + "loss": 0.1114, "step": 53870 }, { "epoch": 2.51, - "learning_rate": 1.5042426515400124e-05, - "loss": 0.0379, + "learning_rate": 2.5050161484093427e-05, + "loss": 0.0232, "step": 53875 }, { "epoch": 2.51, - "learning_rate": 1.5041957714124984e-05, - "loss": 0.0244, + "learning_rate": 2.5049693414257407e-05, + "loss": 0.0577, "step": 53880 }, { "epoch": 2.51, - "learning_rate": 1.5041488912849844e-05, - "loss": 0.0709, + "learning_rate": 2.504922534442139e-05, + "loss": 0.0678, "step": 53885 }, { "epoch": 2.51, - "learning_rate": 1.5041020111574705e-05, - "loss": 0.1115, + "learning_rate": 2.504875727458537e-05, + "loss": 0.0459, "step": 53890 }, { "epoch": 2.51, - "learning_rate": 1.5040551310299565e-05, - "loss": 0.0665, + "learning_rate": 2.504828920474935e-05, + "loss": 0.1759, "step": 53895 }, { "epoch": 2.52, - "learning_rate": 1.5040082509024425e-05, - "loss": 0.0744, + "learning_rate": 2.5047821134913333e-05, + "loss": 0.1275, "step": 53900 }, { "epoch": 2.52, - "learning_rate": 1.5039613707749285e-05, - "loss": 0.1328, + "learning_rate": 2.5047353065077313e-05, + "loss": 0.1476, "step": 53905 }, { "epoch": 2.52, - "learning_rate": 1.5039144906474148e-05, - "loss": 0.2599, + "learning_rate": 2.504688499524129e-05, + "loss": 0.2772, "step": 53910 }, { "epoch": 2.52, - "learning_rate": 1.5038676105199008e-05, - "loss": 0.2966, + "learning_rate": 2.504641692540527e-05, + "loss": 0.2799, "step": 53915 }, { "epoch": 2.52, - "learning_rate": 1.5038207303923868e-05, - "loss": 0.0359, + "learning_rate": 2.5045948855569252e-05, + "loss": 0.0564, "step": 53920 }, { "epoch": 2.52, - "learning_rate": 1.5037738502648728e-05, - "loss": 0.0412, + "learning_rate": 2.5045480785733232e-05, + "loss": 0.0162, "step": 53925 }, { "epoch": 2.52, - "learning_rate": 1.503726970137359e-05, - "loss": 0.0839, + "learning_rate": 2.504501271589721e-05, + "loss": 0.0571, "step": 53930 }, { "epoch": 2.52, - "learning_rate": 1.503680090009845e-05, - "loss": 0.048, + "learning_rate": 2.504454464606119e-05, + "loss": 0.1392, "step": 53935 }, { "epoch": 2.52, - "learning_rate": 1.503633209882331e-05, - "loss": 0.0786, + "learning_rate": 2.5044076576225175e-05, + "loss": 0.0791, "step": 53940 }, { "epoch": 2.52, - "learning_rate": 1.503586329754817e-05, - "loss": 0.1743, + "learning_rate": 2.5043608506389154e-05, + "loss": 0.0467, "step": 53945 }, { "epoch": 2.52, - "learning_rate": 1.503539449627303e-05, - "loss": 0.0831, + "learning_rate": 2.5043140436553134e-05, + "loss": 0.1503, "step": 53950 }, { "epoch": 2.52, - "learning_rate": 1.5034925694997891e-05, - "loss": 0.2851, + "learning_rate": 2.5042672366717117e-05, + "loss": 0.1876, "step": 53955 }, { "epoch": 2.52, - "learning_rate": 1.5034456893722753e-05, - "loss": 0.3397, + "learning_rate": 2.5042204296881097e-05, + "loss": 0.2658, "step": 53960 }, { "epoch": 2.52, - "learning_rate": 1.5033988092447613e-05, - "loss": 0.3113, + "learning_rate": 2.5041736227045077e-05, + "loss": 0.2373, "step": 53965 }, { "epoch": 2.52, - "learning_rate": 1.5033519291172474e-05, - "loss": 0.0512, + "learning_rate": 2.5041268157209054e-05, + "loss": 0.0701, "step": 53970 }, { "epoch": 2.52, - "learning_rate": 1.5033050489897334e-05, - "loss": 0.0442, + "learning_rate": 2.5040800087373037e-05, + "loss": 0.0436, "step": 53975 }, { "epoch": 2.52, - "learning_rate": 1.5032581688622194e-05, - "loss": 0.0382, + "learning_rate": 2.5040332017537017e-05, + "loss": 0.0613, "step": 53980 }, { "epoch": 2.52, - "learning_rate": 1.5032112887347054e-05, - "loss": 0.0607, + "learning_rate": 2.5039863947700996e-05, + "loss": 0.04, "step": 53985 }, { "epoch": 2.52, - "learning_rate": 1.5031644086071914e-05, - "loss": 0.1256, + "learning_rate": 2.5039395877864976e-05, + "loss": 0.2007, "step": 53990 }, { "epoch": 2.52, - "learning_rate": 1.5031175284796776e-05, - "loss": 0.1335, + "learning_rate": 2.503892780802896e-05, + "loss": 0.0659, "step": 53995 }, { "epoch": 2.52, - "learning_rate": 1.5030706483521636e-05, - "loss": 0.1469, + "learning_rate": 2.503845973819294e-05, + "loss": 0.1564, "step": 54000 }, { "epoch": 2.52, - "learning_rate": 1.5030237682246497e-05, - "loss": 0.1371, + "learning_rate": 2.503799166835692e-05, + "loss": 0.0791, "step": 54005 }, { "epoch": 2.52, - "learning_rate": 1.5029768880971359e-05, - "loss": 0.1676, + "learning_rate": 2.50375235985209e-05, + "loss": 0.1994, "step": 54010 }, { "epoch": 2.52, - "learning_rate": 1.5029300079696219e-05, - "loss": 0.1999, + "learning_rate": 2.5037055528684882e-05, + "loss": 0.255, "step": 54015 }, { "epoch": 2.52, - "learning_rate": 1.5028831278421079e-05, - "loss": 0.0053, + "learning_rate": 2.5036587458848862e-05, + "loss": 0.045, "step": 54020 }, { "epoch": 2.52, - "learning_rate": 1.5028362477145939e-05, - "loss": 0.0534, + "learning_rate": 2.503611938901284e-05, + "loss": 0.0832, "step": 54025 }, { "epoch": 2.52, - "learning_rate": 1.5027893675870799e-05, - "loss": 0.0322, + "learning_rate": 2.5035651319176825e-05, + "loss": 0.0536, "step": 54030 }, { "epoch": 2.52, - "learning_rate": 1.502742487459566e-05, - "loss": 0.1084, + "learning_rate": 2.50351832493408e-05, + "loss": 0.0861, "step": 54035 }, { "epoch": 2.52, - "learning_rate": 1.502695607332052e-05, - "loss": 0.1186, + "learning_rate": 2.503471517950478e-05, + "loss": 0.0867, "step": 54040 }, { "epoch": 2.52, - "learning_rate": 1.502648727204538e-05, - "loss": 0.0961, + "learning_rate": 2.503424710966876e-05, + "loss": 0.0945, "step": 54045 }, { "epoch": 2.52, - "learning_rate": 1.5026018470770244e-05, - "loss": 0.1099, + "learning_rate": 2.5033779039832744e-05, + "loss": 0.0858, "step": 54050 }, { "epoch": 2.52, - "learning_rate": 1.5025549669495104e-05, - "loss": 0.2092, + "learning_rate": 2.5033310969996724e-05, + "loss": 0.1485, "step": 54055 }, { "epoch": 2.52, - "learning_rate": 1.5025080868219963e-05, - "loss": 0.4047, + "learning_rate": 2.5032842900160704e-05, + "loss": 0.1836, "step": 54060 }, { "epoch": 2.52, - "learning_rate": 1.5024612066944823e-05, - "loss": 0.2458, + "learning_rate": 2.5032374830324684e-05, + "loss": 0.2405, "step": 54065 }, { "epoch": 2.52, - "learning_rate": 1.5024143265669683e-05, - "loss": 0.0388, + "learning_rate": 2.5031906760488667e-05, + "loss": 0.0285, "step": 54070 }, { "epoch": 2.52, - "learning_rate": 1.5023674464394545e-05, - "loss": 0.0411, + "learning_rate": 2.5031438690652647e-05, + "loss": 0.0372, "step": 54075 }, { "epoch": 2.52, - "learning_rate": 1.5023205663119405e-05, - "loss": 0.051, + "learning_rate": 2.5030970620816626e-05, + "loss": 0.0403, "step": 54080 }, { "epoch": 2.52, - "learning_rate": 1.5022736861844265e-05, - "loss": 0.0488, + "learning_rate": 2.503050255098061e-05, + "loss": 0.0616, "step": 54085 }, { "epoch": 2.52, - "learning_rate": 1.5022268060569125e-05, - "loss": 0.0987, + "learning_rate": 2.503003448114459e-05, + "loss": 0.101, "step": 54090 }, { "epoch": 2.52, - "learning_rate": 1.5021799259293985e-05, - "loss": 0.0812, + "learning_rate": 2.502956641130857e-05, + "loss": 0.0591, "step": 54095 }, { "epoch": 2.52, - "learning_rate": 1.5021330458018848e-05, - "loss": 0.142, + "learning_rate": 2.5029098341472546e-05, + "loss": 0.0808, "step": 54100 }, { "epoch": 2.52, - "learning_rate": 1.5020861656743708e-05, - "loss": 0.1601, + "learning_rate": 2.502863027163653e-05, + "loss": 0.1996, "step": 54105 }, { "epoch": 2.52, - "learning_rate": 1.5020392855468568e-05, - "loss": 0.2332, + "learning_rate": 2.502816220180051e-05, + "loss": 0.1673, "step": 54110 }, { "epoch": 2.53, - "learning_rate": 1.501992405419343e-05, - "loss": 0.2225, + "learning_rate": 2.502769413196449e-05, + "loss": 0.2462, "step": 54115 }, { "epoch": 2.53, - "learning_rate": 1.501945525291829e-05, - "loss": 0.0234, + "learning_rate": 2.502722606212847e-05, + "loss": 0.0194, "step": 54120 }, { "epoch": 2.53, - "learning_rate": 1.501898645164315e-05, - "loss": 0.0471, + "learning_rate": 2.502675799229245e-05, + "loss": 0.0507, "step": 54125 }, { "epoch": 2.53, - "learning_rate": 1.501851765036801e-05, - "loss": 0.0303, + "learning_rate": 2.502628992245643e-05, + "loss": 0.1076, "step": 54130 }, { "epoch": 2.53, - "learning_rate": 1.501804884909287e-05, - "loss": 0.0284, + "learning_rate": 2.502582185262041e-05, + "loss": 0.0711, "step": 54135 }, { "epoch": 2.53, - "learning_rate": 1.501758004781773e-05, - "loss": 0.0837, + "learning_rate": 2.5025353782784394e-05, + "loss": 0.0609, "step": 54140 }, { "epoch": 2.53, - "learning_rate": 1.5017111246542592e-05, - "loss": 0.1419, + "learning_rate": 2.5024885712948374e-05, + "loss": 0.1024, "step": 54145 }, { "epoch": 2.53, - "learning_rate": 1.5016642445267452e-05, - "loss": 0.1755, + "learning_rate": 2.5024417643112354e-05, + "loss": 0.0734, "step": 54150 }, { "epoch": 2.53, - "learning_rate": 1.5016173643992314e-05, - "loss": 0.1494, + "learning_rate": 2.5023949573276334e-05, + "loss": 0.1106, "step": 54155 }, { "epoch": 2.53, - "learning_rate": 1.5015704842717174e-05, - "loss": 0.1468, + "learning_rate": 2.5023481503440314e-05, + "loss": 0.1954, "step": 54160 }, { "epoch": 2.53, - "learning_rate": 1.5015236041442034e-05, - "loss": 0.2734, + "learning_rate": 2.5023013433604294e-05, + "loss": 0.3085, "step": 54165 }, { "epoch": 2.53, - "learning_rate": 1.5014767240166894e-05, - "loss": 0.0356, + "learning_rate": 2.5022545363768273e-05, + "loss": 0.0695, "step": 54170 }, { "epoch": 2.53, - "learning_rate": 1.5014298438891754e-05, - "loss": 0.0384, + "learning_rate": 2.5022077293932253e-05, + "loss": 0.0673, "step": 54175 }, { "epoch": 2.53, - "learning_rate": 1.5013829637616615e-05, - "loss": 0.0356, + "learning_rate": 2.5021609224096236e-05, + "loss": 0.0393, "step": 54180 }, { "epoch": 2.53, - "learning_rate": 1.5013360836341475e-05, - "loss": 0.0438, + "learning_rate": 2.5021141154260216e-05, + "loss": 0.0529, "step": 54185 }, { "epoch": 2.53, - "learning_rate": 1.5012892035066337e-05, - "loss": 0.1356, + "learning_rate": 2.5020673084424196e-05, + "loss": 0.1031, "step": 54190 }, { "epoch": 2.53, - "learning_rate": 1.5012423233791199e-05, - "loss": 0.0935, + "learning_rate": 2.5020205014588176e-05, + "loss": 0.0893, "step": 54195 }, { "epoch": 2.53, - "learning_rate": 1.5011954432516059e-05, - "loss": 0.1811, + "learning_rate": 2.501973694475216e-05, + "loss": 0.246, "step": 54200 }, { "epoch": 2.53, - "learning_rate": 1.5011485631240918e-05, - "loss": 0.1958, + "learning_rate": 2.501926887491614e-05, + "loss": 0.2143, "step": 54205 }, { "epoch": 2.53, - "learning_rate": 1.5011016829965778e-05, - "loss": 0.1376, + "learning_rate": 2.501880080508012e-05, + "loss": 0.2443, "step": 54210 }, { "epoch": 2.53, - "learning_rate": 1.5010548028690638e-05, - "loss": 0.229, + "learning_rate": 2.5018332735244102e-05, + "loss": 0.4272, "step": 54215 }, { "epoch": 2.53, - "learning_rate": 1.50100792274155e-05, - "loss": 0.0747, + "learning_rate": 2.501786466540808e-05, + "loss": 0.0986, "step": 54220 }, { "epoch": 2.53, - "learning_rate": 1.500961042614036e-05, - "loss": 0.0206, + "learning_rate": 2.5017396595572058e-05, + "loss": 0.0217, "step": 54225 }, { "epoch": 2.53, - "learning_rate": 1.500914162486522e-05, - "loss": 0.0575, + "learning_rate": 2.5016928525736038e-05, + "loss": 0.0281, "step": 54230 }, { "epoch": 2.53, - "learning_rate": 1.5008672823590083e-05, - "loss": 0.063, + "learning_rate": 2.501646045590002e-05, + "loss": 0.0641, "step": 54235 }, { "epoch": 2.53, - "learning_rate": 1.5008204022314943e-05, - "loss": 0.0903, + "learning_rate": 2.5015992386064e-05, + "loss": 0.0824, "step": 54240 }, { "epoch": 2.53, - "learning_rate": 1.5007735221039803e-05, - "loss": 0.2531, + "learning_rate": 2.501552431622798e-05, + "loss": 0.0702, "step": 54245 }, { "epoch": 2.53, - "learning_rate": 1.5007266419764663e-05, - "loss": 0.1318, + "learning_rate": 2.501505624639196e-05, + "loss": 0.107, "step": 54250 }, { "epoch": 2.53, - "learning_rate": 1.5006797618489523e-05, - "loss": 0.2507, + "learning_rate": 2.5014588176555944e-05, + "loss": 0.0983, "step": 54255 }, { "epoch": 2.53, - "learning_rate": 1.5006328817214384e-05, - "loss": 0.2718, + "learning_rate": 2.5014120106719924e-05, + "loss": 0.1499, "step": 54260 }, { "epoch": 2.53, - "learning_rate": 1.5005860015939244e-05, - "loss": 0.24, + "learning_rate": 2.5013652036883903e-05, + "loss": 0.2461, "step": 54265 }, { "epoch": 2.53, - "learning_rate": 1.5005391214664104e-05, - "loss": 0.0388, + "learning_rate": 2.5013183967047887e-05, + "loss": 0.0382, "step": 54270 }, { "epoch": 2.53, - "learning_rate": 1.5004922413388964e-05, - "loss": 0.0633, + "learning_rate": 2.5012715897211866e-05, + "loss": 0.0754, "step": 54275 }, { "epoch": 2.53, - "learning_rate": 1.5004453612113826e-05, - "loss": 0.0199, + "learning_rate": 2.5012247827375846e-05, + "loss": 0.0482, "step": 54280 }, { "epoch": 2.53, - "learning_rate": 1.5003984810838688e-05, - "loss": 0.0784, + "learning_rate": 2.5011779757539823e-05, + "loss": 0.0496, "step": 54285 }, { "epoch": 2.53, - "learning_rate": 1.5003516009563547e-05, - "loss": 0.0515, + "learning_rate": 2.5011311687703806e-05, + "loss": 0.0858, "step": 54290 }, { "epoch": 2.53, - "learning_rate": 1.5003047208288407e-05, - "loss": 0.197, + "learning_rate": 2.5010843617867786e-05, + "loss": 0.0985, "step": 54295 }, { "epoch": 2.53, - "learning_rate": 1.5002578407013269e-05, - "loss": 0.146, + "learning_rate": 2.5010375548031766e-05, + "loss": 0.1118, "step": 54300 }, { "epoch": 2.53, - "learning_rate": 1.5002109605738129e-05, - "loss": 0.1319, + "learning_rate": 2.5009907478195745e-05, + "loss": 0.1143, "step": 54305 }, { "epoch": 2.53, - "learning_rate": 1.5001640804462989e-05, - "loss": 0.3043, + "learning_rate": 2.500943940835973e-05, + "loss": 0.1826, "step": 54310 }, { "epoch": 2.53, - "learning_rate": 1.5001172003187849e-05, - "loss": 0.3923, + "learning_rate": 2.500897133852371e-05, + "loss": 0.2129, "step": 54315 }, { "epoch": 2.53, - "learning_rate": 1.500070320191271e-05, - "loss": 0.053, + "learning_rate": 2.5008503268687688e-05, + "loss": 0.0589, "step": 54320 }, { "epoch": 2.53, - "learning_rate": 1.500023440063757e-05, - "loss": 0.0313, + "learning_rate": 2.500803519885167e-05, + "loss": 0.041, "step": 54325 }, { "epoch": 2.54, - "learning_rate": 1.4999765599362432e-05, - "loss": 0.066, + "learning_rate": 2.500756712901565e-05, + "loss": 0.0422, "step": 54330 }, { "epoch": 2.54, - "learning_rate": 1.4999296798087292e-05, - "loss": 0.0837, + "learning_rate": 2.500709905917963e-05, + "loss": 0.0874, "step": 54335 }, { "epoch": 2.54, - "learning_rate": 1.4998827996812154e-05, - "loss": 0.06, + "learning_rate": 2.500663098934361e-05, + "loss": 0.1185, "step": 54340 }, { "epoch": 2.54, - "learning_rate": 1.4998359195537014e-05, - "loss": 0.1305, + "learning_rate": 2.5006162919507594e-05, + "loss": 0.0856, "step": 54345 }, { "epoch": 2.54, - "learning_rate": 1.4997890394261873e-05, - "loss": 0.1331, + "learning_rate": 2.500569484967157e-05, + "loss": 0.0997, "step": 54350 }, { "epoch": 2.54, - "learning_rate": 1.4997421592986733e-05, - "loss": 0.1286, + "learning_rate": 2.500522677983555e-05, + "loss": 0.1972, "step": 54355 }, { "epoch": 2.54, - "learning_rate": 1.4996952791711595e-05, - "loss": 0.2388, + "learning_rate": 2.500475870999953e-05, + "loss": 0.1929, "step": 54360 }, { "epoch": 2.54, - "learning_rate": 1.4996483990436455e-05, - "loss": 0.419, + "learning_rate": 2.5004290640163513e-05, + "loss": 0.3079, "step": 54365 }, { "epoch": 2.54, - "learning_rate": 1.4996015189161315e-05, - "loss": 0.0119, + "learning_rate": 2.5003822570327493e-05, + "loss": 0.0272, "step": 54370 }, { "epoch": 2.54, - "learning_rate": 1.4995546387886177e-05, - "loss": 0.0559, + "learning_rate": 2.5003354500491473e-05, + "loss": 0.0555, "step": 54375 }, { "epoch": 2.54, - "learning_rate": 1.4995077586611038e-05, - "loss": 0.0893, + "learning_rate": 2.5002886430655453e-05, + "loss": 0.071, "step": 54380 }, { "epoch": 2.54, - "learning_rate": 1.4994608785335898e-05, - "loss": 0.0824, + "learning_rate": 2.5002418360819436e-05, + "loss": 0.0784, "step": 54385 }, { "epoch": 2.54, - "learning_rate": 1.4994139984060758e-05, - "loss": 0.0721, + "learning_rate": 2.5001950290983416e-05, + "loss": 0.091, "step": 54390 }, { "epoch": 2.54, - "learning_rate": 1.4993671182785618e-05, - "loss": 0.1021, + "learning_rate": 2.5001482221147396e-05, + "loss": 0.0573, "step": 54395 }, { "epoch": 2.54, - "learning_rate": 1.499320238151048e-05, - "loss": 0.1121, + "learning_rate": 2.500101415131138e-05, + "loss": 0.1737, "step": 54400 }, { "epoch": 2.54, - "learning_rate": 1.499273358023534e-05, - "loss": 0.1667, + "learning_rate": 2.500054608147536e-05, + "loss": 0.148, "step": 54405 }, { "epoch": 2.54, - "learning_rate": 1.49922647789602e-05, - "loss": 0.2541, + "learning_rate": 2.500007801163934e-05, + "loss": 0.2922, "step": 54410 }, { "epoch": 2.54, - "learning_rate": 1.499179597768506e-05, - "loss": 0.3177, + "learning_rate": 2.4999609941803315e-05, + "loss": 0.2347, "step": 54415 }, { "epoch": 2.54, - "learning_rate": 1.499132717640992e-05, - "loss": 0.0115, + "learning_rate": 2.4999141871967298e-05, + "loss": 0.044, "step": 54420 }, { "epoch": 2.54, - "learning_rate": 1.4990858375134783e-05, - "loss": 0.0391, + "learning_rate": 2.4998673802131278e-05, + "loss": 0.0217, "step": 54425 }, { "epoch": 2.54, - "learning_rate": 1.4990389573859643e-05, - "loss": 0.0732, + "learning_rate": 2.4998205732295258e-05, + "loss": 0.0272, "step": 54430 }, { "epoch": 2.54, - "learning_rate": 1.4989920772584503e-05, - "loss": 0.1121, + "learning_rate": 2.4997737662459238e-05, + "loss": 0.0377, "step": 54435 }, { "epoch": 2.54, - "learning_rate": 1.4989451971309364e-05, - "loss": 0.1034, + "learning_rate": 2.499726959262322e-05, + "loss": 0.0569, "step": 54440 }, { "epoch": 2.54, - "learning_rate": 1.4988983170034224e-05, - "loss": 0.1104, + "learning_rate": 2.49968015227872e-05, + "loss": 0.1419, "step": 54445 }, { "epoch": 2.54, - "learning_rate": 1.4988514368759084e-05, - "loss": 0.0782, + "learning_rate": 2.499633345295118e-05, + "loss": 0.1619, "step": 54450 }, { "epoch": 2.54, - "learning_rate": 1.4988045567483944e-05, - "loss": 0.125, + "learning_rate": 2.4995865383115164e-05, + "loss": 0.1279, "step": 54455 }, { "epoch": 2.54, - "learning_rate": 1.4987576766208804e-05, - "loss": 0.251, + "learning_rate": 2.4995397313279143e-05, + "loss": 0.1513, "step": 54460 }, { "epoch": 2.54, - "learning_rate": 1.4987107964933665e-05, - "loss": 0.2208, + "learning_rate": 2.4994929243443123e-05, + "loss": 0.3358, "step": 54465 }, { "epoch": 2.54, - "learning_rate": 1.4986639163658527e-05, - "loss": 0.0434, + "learning_rate": 2.4994461173607103e-05, + "loss": 0.0211, "step": 54470 }, { "epoch": 2.54, - "learning_rate": 1.4986170362383387e-05, - "loss": 0.047, + "learning_rate": 2.4993993103771083e-05, + "loss": 0.0362, "step": 54475 }, { "epoch": 2.54, - "learning_rate": 1.4985701561108249e-05, - "loss": 0.0701, + "learning_rate": 2.4993525033935063e-05, + "loss": 0.0855, "step": 54480 }, { "epoch": 2.54, - "learning_rate": 1.4985232759833109e-05, - "loss": 0.0824, + "learning_rate": 2.4993056964099043e-05, + "loss": 0.0433, "step": 54485 }, { "epoch": 2.54, - "learning_rate": 1.4984763958557969e-05, - "loss": 0.062, + "learning_rate": 2.4992588894263022e-05, + "loss": 0.0331, "step": 54490 }, { "epoch": 2.54, - "learning_rate": 1.4984295157282828e-05, - "loss": 0.1074, + "learning_rate": 2.4992120824427006e-05, + "loss": 0.1079, "step": 54495 }, { "epoch": 2.54, - "learning_rate": 1.4983826356007688e-05, - "loss": 0.2118, + "learning_rate": 2.4991652754590985e-05, + "loss": 0.2036, "step": 54500 }, { "epoch": 2.54, - "learning_rate": 1.498335755473255e-05, - "loss": 0.1498, + "learning_rate": 2.4991184684754965e-05, + "loss": 0.1188, "step": 54505 }, { "epoch": 2.54, - "learning_rate": 1.498288875345741e-05, - "loss": 0.1589, + "learning_rate": 2.499071661491895e-05, + "loss": 0.2893, "step": 54510 }, { "epoch": 2.54, - "learning_rate": 1.4982419952182272e-05, - "loss": 0.265, + "learning_rate": 2.4990248545082928e-05, + "loss": 0.3575, "step": 54515 }, { "epoch": 2.54, - "learning_rate": 1.4981951150907133e-05, - "loss": 0.0316, + "learning_rate": 2.4989780475246908e-05, + "loss": 0.0312, "step": 54520 }, { "epoch": 2.54, - "learning_rate": 1.4981482349631993e-05, - "loss": 0.0771, + "learning_rate": 2.4989312405410888e-05, + "loss": 0.0433, "step": 54525 }, { "epoch": 2.54, - "learning_rate": 1.4981013548356853e-05, - "loss": 0.0136, + "learning_rate": 2.498884433557487e-05, + "loss": 0.0632, "step": 54530 }, { "epoch": 2.54, - "learning_rate": 1.4980544747081713e-05, - "loss": 0.0744, + "learning_rate": 2.498837626573885e-05, + "loss": 0.1006, "step": 54535 }, { "epoch": 2.54, - "learning_rate": 1.4980075945806573e-05, - "loss": 0.1031, + "learning_rate": 2.4987908195902827e-05, + "loss": 0.0779, "step": 54540 }, { "epoch": 2.55, - "learning_rate": 1.4979607144531435e-05, - "loss": 0.0737, + "learning_rate": 2.4987440126066807e-05, + "loss": 0.1326, "step": 54545 }, { "epoch": 2.55, - "learning_rate": 1.4979138343256295e-05, - "loss": 0.0639, + "learning_rate": 2.498697205623079e-05, + "loss": 0.1437, "step": 54550 }, { "epoch": 2.55, - "learning_rate": 1.4978669541981154e-05, - "loss": 0.143, + "learning_rate": 2.498650398639477e-05, + "loss": 0.1954, "step": 54555 }, { "epoch": 2.55, - "learning_rate": 1.4978200740706018e-05, - "loss": 0.2828, + "learning_rate": 2.498603591655875e-05, + "loss": 0.3787, "step": 54560 }, { "epoch": 2.55, - "learning_rate": 1.4977731939430878e-05, - "loss": 0.3214, + "learning_rate": 2.4985567846722733e-05, + "loss": 0.1898, "step": 54565 }, { "epoch": 2.55, - "learning_rate": 1.4977263138155738e-05, - "loss": 0.0628, + "learning_rate": 2.4985099776886713e-05, + "loss": 0.0355, "step": 54570 }, { "epoch": 2.55, - "learning_rate": 1.4976794336880598e-05, - "loss": 0.028, + "learning_rate": 2.4984631707050693e-05, + "loss": 0.0389, "step": 54575 }, { "epoch": 2.55, - "learning_rate": 1.4976325535605458e-05, - "loss": 0.0796, + "learning_rate": 2.4984163637214673e-05, + "loss": 0.1168, "step": 54580 }, { "epoch": 2.55, - "learning_rate": 1.4975856734330319e-05, - "loss": 0.086, + "learning_rate": 2.4983695567378656e-05, + "loss": 0.0968, "step": 54585 }, { "epoch": 2.55, - "learning_rate": 1.4975387933055179e-05, - "loss": 0.0964, + "learning_rate": 2.4983227497542636e-05, + "loss": 0.0676, "step": 54590 }, { "epoch": 2.55, - "learning_rate": 1.4974919131780039e-05, - "loss": 0.0622, + "learning_rate": 2.4982759427706615e-05, + "loss": 0.0811, "step": 54595 }, { "epoch": 2.55, - "learning_rate": 1.4974450330504899e-05, - "loss": 0.1113, + "learning_rate": 2.4982291357870595e-05, + "loss": 0.1514, "step": 54600 }, { "epoch": 2.55, - "learning_rate": 1.4973981529229759e-05, - "loss": 0.1587, + "learning_rate": 2.4981823288034575e-05, + "loss": 0.1824, "step": 54605 }, { "epoch": 2.55, - "learning_rate": 1.4973512727954622e-05, - "loss": 0.215, + "learning_rate": 2.4981355218198555e-05, + "loss": 0.2406, "step": 54610 }, { "epoch": 2.55, - "learning_rate": 1.4973043926679482e-05, - "loss": 0.2566, + "learning_rate": 2.4980887148362535e-05, + "loss": 0.3005, "step": 54615 }, { "epoch": 2.55, - "learning_rate": 1.4972575125404342e-05, - "loss": 0.0598, + "learning_rate": 2.4980419078526515e-05, + "loss": 0.0663, "step": 54620 }, { "epoch": 2.55, - "learning_rate": 1.4972106324129204e-05, - "loss": 0.0519, + "learning_rate": 2.4979951008690498e-05, + "loss": 0.036, "step": 54625 }, { "epoch": 2.55, - "learning_rate": 1.4971637522854064e-05, - "loss": 0.0432, + "learning_rate": 2.4979482938854478e-05, + "loss": 0.0223, "step": 54630 }, { "epoch": 2.55, - "learning_rate": 1.4971168721578924e-05, - "loss": 0.0895, + "learning_rate": 2.4979014869018457e-05, + "loss": 0.0473, "step": 54635 }, { "epoch": 2.55, - "learning_rate": 1.4970699920303784e-05, - "loss": 0.0863, + "learning_rate": 2.497854679918244e-05, + "loss": 0.0789, "step": 54640 }, { "epoch": 2.55, - "learning_rate": 1.4970231119028643e-05, - "loss": 0.2322, + "learning_rate": 2.497807872934642e-05, + "loss": 0.1912, "step": 54645 }, { "epoch": 2.55, - "learning_rate": 1.4969762317753505e-05, - "loss": 0.079, + "learning_rate": 2.49776106595104e-05, + "loss": 0.1911, "step": 54650 }, { "epoch": 2.55, - "learning_rate": 1.4969293516478367e-05, - "loss": 0.1671, + "learning_rate": 2.497714258967438e-05, + "loss": 0.185, "step": 54655 }, { "epoch": 2.55, - "learning_rate": 1.4968824715203227e-05, - "loss": 0.1948, + "learning_rate": 2.4976674519838363e-05, + "loss": 0.2641, "step": 54660 }, { "epoch": 2.55, - "learning_rate": 1.4968355913928088e-05, - "loss": 0.2194, + "learning_rate": 2.497620645000234e-05, + "loss": 0.1693, "step": 54665 }, { "epoch": 2.55, - "learning_rate": 1.4967887112652948e-05, - "loss": 0.0337, + "learning_rate": 2.497573838016632e-05, + "loss": 0.034, "step": 54670 }, { "epoch": 2.55, - "learning_rate": 1.4967418311377808e-05, - "loss": 0.0521, + "learning_rate": 2.49752703103303e-05, + "loss": 0.0635, "step": 54675 }, { "epoch": 2.55, - "learning_rate": 1.4966949510102668e-05, - "loss": 0.0716, + "learning_rate": 2.4974802240494282e-05, + "loss": 0.0585, "step": 54680 }, { "epoch": 2.55, - "learning_rate": 1.4966480708827528e-05, - "loss": 0.074, + "learning_rate": 2.4974334170658262e-05, + "loss": 0.0314, "step": 54685 }, { "epoch": 2.55, - "learning_rate": 1.496601190755239e-05, - "loss": 0.0648, + "learning_rate": 2.4973866100822242e-05, + "loss": 0.0966, "step": 54690 }, { "epoch": 2.55, - "learning_rate": 1.496554310627725e-05, - "loss": 0.0953, + "learning_rate": 2.4973398030986225e-05, + "loss": 0.0917, "step": 54695 }, { "epoch": 2.55, - "learning_rate": 1.4965074305002111e-05, - "loss": 0.1457, + "learning_rate": 2.4972929961150205e-05, + "loss": 0.1054, "step": 54700 }, { "epoch": 2.55, - "learning_rate": 1.4964605503726973e-05, - "loss": 0.0831, + "learning_rate": 2.4972461891314185e-05, + "loss": 0.166, "step": 54705 }, { "epoch": 2.55, - "learning_rate": 1.4964136702451833e-05, - "loss": 0.222, + "learning_rate": 2.4971993821478165e-05, + "loss": 0.1697, "step": 54710 }, { "epoch": 2.55, - "learning_rate": 1.4963667901176693e-05, - "loss": 0.2152, + "learning_rate": 2.4971525751642148e-05, + "loss": 0.2646, "step": 54715 }, { "epoch": 2.55, - "learning_rate": 1.4963199099901553e-05, - "loss": 0.0534, + "learning_rate": 2.4971057681806128e-05, + "loss": 0.074, "step": 54720 }, { "epoch": 2.55, - "learning_rate": 1.4962730298626413e-05, - "loss": 0.0514, + "learning_rate": 2.4970589611970108e-05, + "loss": 0.0266, "step": 54725 }, { "epoch": 2.55, - "learning_rate": 1.4962261497351274e-05, - "loss": 0.0556, + "learning_rate": 2.4970121542134084e-05, + "loss": 0.0337, "step": 54730 }, { "epoch": 2.55, - "learning_rate": 1.4961792696076134e-05, - "loss": 0.1183, + "learning_rate": 2.4969653472298067e-05, + "loss": 0.0613, "step": 54735 }, { "epoch": 2.55, - "learning_rate": 1.4961323894800994e-05, - "loss": 0.0392, + "learning_rate": 2.4969185402462047e-05, + "loss": 0.1644, "step": 54740 }, { "epoch": 2.55, - "learning_rate": 1.4960855093525854e-05, - "loss": 0.1177, + "learning_rate": 2.4968717332626027e-05, + "loss": 0.075, "step": 54745 }, { "epoch": 2.55, - "learning_rate": 1.4960386292250717e-05, - "loss": 0.2041, + "learning_rate": 2.496824926279001e-05, + "loss": 0.0901, "step": 54750 }, { "epoch": 2.55, - "learning_rate": 1.4959917490975577e-05, - "loss": 0.116, + "learning_rate": 2.496778119295399e-05, + "loss": 0.145, "step": 54755 }, { "epoch": 2.56, - "learning_rate": 1.4959448689700437e-05, - "loss": 0.225, + "learning_rate": 2.496731312311797e-05, + "loss": 0.1835, "step": 54760 }, { "epoch": 2.56, - "learning_rate": 1.4958979888425297e-05, - "loss": 0.3145, + "learning_rate": 2.496684505328195e-05, + "loss": 0.3753, "step": 54765 }, { "epoch": 2.56, - "learning_rate": 1.4958511087150159e-05, - "loss": 0.1032, + "learning_rate": 2.4966376983445933e-05, + "loss": 0.0463, "step": 54770 }, { "epoch": 2.56, - "learning_rate": 1.4958042285875019e-05, - "loss": 0.0883, + "learning_rate": 2.4965908913609913e-05, + "loss": 0.022, "step": 54775 }, { "epoch": 2.56, - "learning_rate": 1.4957573484599879e-05, - "loss": 0.0607, + "learning_rate": 2.4965440843773892e-05, + "loss": 0.0587, "step": 54780 }, { "epoch": 2.56, - "learning_rate": 1.4957104683324739e-05, - "loss": 0.1201, + "learning_rate": 2.4964972773937872e-05, + "loss": 0.0567, "step": 54785 }, { "epoch": 2.56, - "learning_rate": 1.49566358820496e-05, - "loss": 0.064, + "learning_rate": 2.4964504704101852e-05, + "loss": 0.1246, "step": 54790 }, { "epoch": 2.56, - "learning_rate": 1.4956167080774462e-05, - "loss": 0.2079, + "learning_rate": 2.4964036634265832e-05, + "loss": 0.1212, "step": 54795 }, { "epoch": 2.56, - "learning_rate": 1.4955698279499322e-05, - "loss": 0.143, + "learning_rate": 2.496356856442981e-05, + "loss": 0.2095, "step": 54800 }, { "epoch": 2.56, - "learning_rate": 1.4955229478224182e-05, - "loss": 0.1583, + "learning_rate": 2.496310049459379e-05, + "loss": 0.1394, "step": 54805 }, { "epoch": 2.56, - "learning_rate": 1.4954760676949043e-05, - "loss": 0.277, + "learning_rate": 2.4962632424757775e-05, + "loss": 0.2123, "step": 54810 }, { "epoch": 2.56, - "learning_rate": 1.4954291875673903e-05, - "loss": 0.2445, + "learning_rate": 2.4962164354921755e-05, + "loss": 0.1837, "step": 54815 }, { "epoch": 2.56, - "learning_rate": 1.4953823074398763e-05, - "loss": 0.068, + "learning_rate": 2.4961696285085734e-05, + "loss": 0.0484, "step": 54820 }, { "epoch": 2.56, - "learning_rate": 1.4953354273123623e-05, - "loss": 0.0589, + "learning_rate": 2.4961228215249718e-05, + "loss": 0.0366, "step": 54825 }, { "epoch": 2.56, - "learning_rate": 1.4952885471848485e-05, - "loss": 0.038, + "learning_rate": 2.4960760145413697e-05, + "loss": 0.0379, "step": 54830 }, { "epoch": 2.56, - "learning_rate": 1.4952416670573345e-05, - "loss": 0.0265, + "learning_rate": 2.4960292075577677e-05, + "loss": 0.0851, "step": 54835 }, { "epoch": 2.56, - "learning_rate": 1.4951947869298206e-05, - "loss": 0.1087, + "learning_rate": 2.4959824005741657e-05, + "loss": 0.0592, "step": 54840 }, { "epoch": 2.56, - "learning_rate": 1.4951479068023066e-05, - "loss": 0.1027, + "learning_rate": 2.495935593590564e-05, + "loss": 0.1129, "step": 54845 }, { "epoch": 2.56, - "learning_rate": 1.4951010266747928e-05, - "loss": 0.1725, + "learning_rate": 2.495888786606962e-05, + "loss": 0.1563, "step": 54850 }, { "epoch": 2.56, - "learning_rate": 1.4950541465472788e-05, - "loss": 0.1339, + "learning_rate": 2.4958419796233596e-05, + "loss": 0.1945, "step": 54855 }, { "epoch": 2.56, - "learning_rate": 1.4950072664197648e-05, - "loss": 0.33, + "learning_rate": 2.4957951726397576e-05, + "loss": 0.1793, "step": 54860 }, { "epoch": 2.56, - "learning_rate": 1.4949603862922508e-05, - "loss": 0.2939, + "learning_rate": 2.495748365656156e-05, + "loss": 0.2664, "step": 54865 }, { "epoch": 2.56, - "learning_rate": 1.494913506164737e-05, - "loss": 0.0765, + "learning_rate": 2.495701558672554e-05, + "loss": 0.039, "step": 54870 }, { "epoch": 2.56, - "learning_rate": 1.494866626037223e-05, - "loss": 0.0295, + "learning_rate": 2.495654751688952e-05, + "loss": 0.0367, "step": 54875 }, { "epoch": 2.56, - "learning_rate": 1.4948197459097089e-05, - "loss": 0.0551, + "learning_rate": 2.4956079447053502e-05, + "loss": 0.0369, "step": 54880 }, { "epoch": 2.56, - "learning_rate": 1.4947728657821952e-05, - "loss": 0.0445, + "learning_rate": 2.4955611377217482e-05, + "loss": 0.0672, "step": 54885 }, { "epoch": 2.56, - "learning_rate": 1.4947259856546812e-05, - "loss": 0.0828, + "learning_rate": 2.4955143307381462e-05, + "loss": 0.0556, "step": 54890 }, { "epoch": 2.56, - "learning_rate": 1.4946791055271672e-05, - "loss": 0.0708, + "learning_rate": 2.4954675237545442e-05, + "loss": 0.1584, "step": 54895 }, { "epoch": 2.56, - "learning_rate": 1.4946322253996532e-05, - "loss": 0.1411, + "learning_rate": 2.4954207167709425e-05, + "loss": 0.0898, "step": 54900 }, { "epoch": 2.56, - "learning_rate": 1.4945853452721392e-05, - "loss": 0.0814, + "learning_rate": 2.4953739097873405e-05, + "loss": 0.1182, "step": 54905 }, { "epoch": 2.56, - "learning_rate": 1.4945384651446254e-05, - "loss": 0.3506, + "learning_rate": 2.4953271028037385e-05, + "loss": 0.1661, "step": 54910 }, { "epoch": 2.56, - "learning_rate": 1.4944915850171114e-05, - "loss": 0.265, + "learning_rate": 2.4952802958201364e-05, + "loss": 0.1882, "step": 54915 }, { "epoch": 2.56, - "learning_rate": 1.4944447048895974e-05, - "loss": 0.0974, + "learning_rate": 2.4952334888365344e-05, + "loss": 0.0429, "step": 54920 }, { "epoch": 2.56, - "learning_rate": 1.4943978247620834e-05, - "loss": 0.0545, + "learning_rate": 2.4951866818529324e-05, + "loss": 0.0296, "step": 54925 }, { "epoch": 2.56, - "learning_rate": 1.4943509446345694e-05, - "loss": 0.0914, + "learning_rate": 2.4951398748693304e-05, + "loss": 0.0698, "step": 54930 }, { "epoch": 2.56, - "learning_rate": 1.4943040645070557e-05, - "loss": 0.0748, + "learning_rate": 2.4950930678857287e-05, + "loss": 0.0282, "step": 54935 }, { "epoch": 2.56, - "learning_rate": 1.4942571843795417e-05, - "loss": 0.0855, + "learning_rate": 2.4950462609021267e-05, + "loss": 0.0385, "step": 54940 }, { "epoch": 2.56, - "learning_rate": 1.4942103042520277e-05, - "loss": 0.1053, + "learning_rate": 2.4949994539185247e-05, + "loss": 0.0697, "step": 54945 }, { "epoch": 2.56, - "learning_rate": 1.4941634241245138e-05, - "loss": 0.0821, + "learning_rate": 2.4949526469349227e-05, + "loss": 0.0864, "step": 54950 }, { "epoch": 2.56, - "learning_rate": 1.4941165439969998e-05, - "loss": 0.2162, + "learning_rate": 2.494905839951321e-05, + "loss": 0.2991, "step": 54955 }, { "epoch": 2.56, - "learning_rate": 1.4940696638694858e-05, - "loss": 0.1372, + "learning_rate": 2.494859032967719e-05, + "loss": 0.3339, "step": 54960 }, { "epoch": 2.56, - "learning_rate": 1.4940227837419718e-05, - "loss": 0.333, + "learning_rate": 2.494812225984117e-05, + "loss": 0.3374, "step": 54965 }, { "epoch": 2.56, - "learning_rate": 1.4939759036144578e-05, - "loss": 0.0484, + "learning_rate": 2.494765419000515e-05, + "loss": 0.0754, "step": 54970 }, { "epoch": 2.57, - "learning_rate": 1.493929023486944e-05, - "loss": 0.0849, + "learning_rate": 2.4947186120169132e-05, + "loss": 0.0798, "step": 54975 }, { "epoch": 2.57, - "learning_rate": 1.4938821433594301e-05, - "loss": 0.0428, + "learning_rate": 2.494671805033311e-05, + "loss": 0.0414, "step": 54980 }, { "epoch": 2.57, - "learning_rate": 1.4938352632319161e-05, - "loss": 0.0314, + "learning_rate": 2.494624998049709e-05, + "loss": 0.0265, "step": 54985 }, { "epoch": 2.57, - "learning_rate": 1.4937883831044023e-05, - "loss": 0.0623, + "learning_rate": 2.494578191066107e-05, + "loss": 0.0548, "step": 54990 }, { "epoch": 2.57, - "learning_rate": 1.4937415029768883e-05, - "loss": 0.2235, + "learning_rate": 2.494531384082505e-05, + "loss": 0.0758, "step": 54995 }, { "epoch": 2.57, - "learning_rate": 1.4936946228493743e-05, - "loss": 0.1594, + "learning_rate": 2.494484577098903e-05, + "loss": 0.0983, "step": 55000 }, { "epoch": 2.57, - "learning_rate": 1.4936477427218603e-05, - "loss": 0.1248, + "learning_rate": 2.494437770115301e-05, + "loss": 0.1379, "step": 55005 }, { "epoch": 2.57, - "learning_rate": 1.4936008625943463e-05, - "loss": 0.2323, + "learning_rate": 2.4943909631316995e-05, + "loss": 0.2312, "step": 55010 }, { "epoch": 2.57, - "learning_rate": 1.4935539824668324e-05, - "loss": 0.1702, + "learning_rate": 2.4943441561480974e-05, + "loss": 0.2212, "step": 55015 }, { "epoch": 2.57, - "learning_rate": 1.4935071023393184e-05, - "loss": 0.0247, + "learning_rate": 2.4942973491644954e-05, + "loss": 0.0245, "step": 55020 }, { "epoch": 2.57, - "learning_rate": 1.4934602222118046e-05, - "loss": 0.0557, + "learning_rate": 2.4942505421808934e-05, + "loss": 0.0547, "step": 55025 }, { "epoch": 2.57, - "learning_rate": 1.4934133420842907e-05, - "loss": 0.0506, + "learning_rate": 2.4942037351972917e-05, + "loss": 0.0427, "step": 55030 }, { "epoch": 2.57, - "learning_rate": 1.4933664619567767e-05, - "loss": 0.1097, + "learning_rate": 2.4941569282136897e-05, + "loss": 0.0377, "step": 55035 }, { "epoch": 2.57, - "learning_rate": 1.4933195818292627e-05, - "loss": 0.0586, + "learning_rate": 2.4941101212300877e-05, + "loss": 0.1138, "step": 55040 }, { "epoch": 2.57, - "learning_rate": 1.4932727017017487e-05, - "loss": 0.1202, + "learning_rate": 2.4940633142464853e-05, + "loss": 0.154, "step": 55045 }, { "epoch": 2.57, - "learning_rate": 1.4932258215742347e-05, - "loss": 0.141, + "learning_rate": 2.4940165072628836e-05, + "loss": 0.1163, "step": 55050 }, { "epoch": 2.57, - "learning_rate": 1.4931789414467209e-05, - "loss": 0.1917, + "learning_rate": 2.4939697002792816e-05, + "loss": 0.2235, "step": 55055 }, { "epoch": 2.57, - "learning_rate": 1.4931320613192069e-05, - "loss": 0.2892, + "learning_rate": 2.4939228932956796e-05, + "loss": 0.2344, "step": 55060 }, { "epoch": 2.57, - "learning_rate": 1.4930851811916929e-05, - "loss": 0.1868, + "learning_rate": 2.493876086312078e-05, + "loss": 0.3719, "step": 55065 }, { "epoch": 2.57, - "learning_rate": 1.4930383010641789e-05, - "loss": 0.0403, + "learning_rate": 2.493829279328476e-05, + "loss": 0.0359, "step": 55070 }, { "epoch": 2.57, - "learning_rate": 1.4929914209366652e-05, - "loss": 0.051, + "learning_rate": 2.493782472344874e-05, + "loss": 0.0035, "step": 55075 }, { "epoch": 2.57, - "learning_rate": 1.4929445408091512e-05, - "loss": 0.0712, + "learning_rate": 2.493735665361272e-05, + "loss": 0.0642, "step": 55080 }, { "epoch": 2.57, - "learning_rate": 1.4928976606816372e-05, - "loss": 0.1299, + "learning_rate": 2.4936888583776702e-05, + "loss": 0.0889, "step": 55085 }, { "epoch": 2.57, - "learning_rate": 1.4928507805541232e-05, - "loss": 0.1003, + "learning_rate": 2.4936420513940682e-05, + "loss": 0.1642, "step": 55090 }, { "epoch": 2.57, - "learning_rate": 1.4928039004266093e-05, - "loss": 0.0991, + "learning_rate": 2.493595244410466e-05, + "loss": 0.124, "step": 55095 }, { "epoch": 2.57, - "learning_rate": 1.4927570202990953e-05, - "loss": 0.0788, + "learning_rate": 2.493548437426864e-05, + "loss": 0.0738, "step": 55100 }, { "epoch": 2.57, - "learning_rate": 1.4927101401715813e-05, - "loss": 0.1653, + "learning_rate": 2.4935016304432625e-05, + "loss": 0.1238, "step": 55105 }, { "epoch": 2.57, - "learning_rate": 1.4926632600440673e-05, - "loss": 0.2166, + "learning_rate": 2.49345482345966e-05, + "loss": 0.3144, "step": 55110 }, { "epoch": 2.57, - "learning_rate": 1.4926163799165533e-05, - "loss": 0.3222, + "learning_rate": 2.493408016476058e-05, + "loss": 0.2044, "step": 55115 }, { "epoch": 2.57, - "learning_rate": 1.4925694997890396e-05, - "loss": 0.0577, + "learning_rate": 2.4933612094924564e-05, + "loss": 0.0599, "step": 55120 }, { "epoch": 2.57, - "learning_rate": 1.4925226196615256e-05, - "loss": 0.0373, + "learning_rate": 2.4933144025088544e-05, + "loss": 0.0402, "step": 55125 }, { "epoch": 2.57, - "learning_rate": 1.4924757395340116e-05, - "loss": 0.0392, + "learning_rate": 2.4932675955252524e-05, + "loss": 0.0926, "step": 55130 }, { "epoch": 2.57, - "learning_rate": 1.4924288594064978e-05, - "loss": 0.0796, + "learning_rate": 2.4932207885416504e-05, + "loss": 0.0623, "step": 55135 }, { "epoch": 2.57, - "learning_rate": 1.4923819792789838e-05, - "loss": 0.0793, + "learning_rate": 2.4931739815580487e-05, + "loss": 0.0852, "step": 55140 }, { "epoch": 2.57, - "learning_rate": 1.4923350991514698e-05, - "loss": 0.1843, + "learning_rate": 2.4931271745744467e-05, + "loss": 0.1275, "step": 55145 }, { "epoch": 2.57, - "learning_rate": 1.4922882190239558e-05, - "loss": 0.1432, + "learning_rate": 2.4930803675908446e-05, + "loss": 0.2069, "step": 55150 }, { "epoch": 2.57, - "learning_rate": 1.4922413388964418e-05, - "loss": 0.146, + "learning_rate": 2.4930335606072426e-05, + "loss": 0.0938, "step": 55155 }, { "epoch": 2.57, - "learning_rate": 1.492194458768928e-05, - "loss": 0.2226, + "learning_rate": 2.492986753623641e-05, + "loss": 0.2646, "step": 55160 }, { "epoch": 2.57, - "learning_rate": 1.4921475786414141e-05, - "loss": 0.3435, + "learning_rate": 2.492939946640039e-05, + "loss": 0.4099, "step": 55165 }, { "epoch": 2.57, - "learning_rate": 1.4921006985139e-05, - "loss": 0.0359, + "learning_rate": 2.4928931396564366e-05, + "loss": 0.0682, "step": 55170 }, { "epoch": 2.57, - "learning_rate": 1.4920538183863862e-05, - "loss": 0.0193, + "learning_rate": 2.492846332672835e-05, + "loss": 0.017, "step": 55175 }, { "epoch": 2.57, - "learning_rate": 1.4920069382588722e-05, - "loss": 0.0916, + "learning_rate": 2.492799525689233e-05, + "loss": 0.0336, "step": 55180 }, { "epoch": 2.58, - "learning_rate": 1.4919600581313582e-05, - "loss": 0.0995, + "learning_rate": 2.492752718705631e-05, + "loss": 0.0702, "step": 55185 }, { "epoch": 2.58, - "learning_rate": 1.4919131780038442e-05, - "loss": 0.1484, + "learning_rate": 2.4927059117220288e-05, + "loss": 0.0996, "step": 55190 }, { "epoch": 2.58, - "learning_rate": 1.4918662978763302e-05, - "loss": 0.0871, + "learning_rate": 2.492659104738427e-05, + "loss": 0.0921, "step": 55195 }, { "epoch": 2.58, - "learning_rate": 1.4918194177488164e-05, - "loss": 0.14, + "learning_rate": 2.492612297754825e-05, + "loss": 0.1747, "step": 55200 }, { "epoch": 2.58, - "learning_rate": 1.4917725376213024e-05, - "loss": 0.1125, + "learning_rate": 2.492565490771223e-05, + "loss": 0.0909, "step": 55205 }, { "epoch": 2.58, - "learning_rate": 1.4917256574937885e-05, - "loss": 0.2089, + "learning_rate": 2.492518683787621e-05, + "loss": 0.5925, "step": 55210 }, { "epoch": 2.58, - "learning_rate": 1.4916787773662747e-05, - "loss": 0.2097, + "learning_rate": 2.4924718768040194e-05, + "loss": 0.267, "step": 55215 }, { "epoch": 2.58, - "learning_rate": 1.4916318972387607e-05, - "loss": 0.0522, + "learning_rate": 2.4924250698204174e-05, + "loss": 0.0249, "step": 55220 }, { "epoch": 2.58, - "learning_rate": 1.4915850171112467e-05, - "loss": 0.0457, + "learning_rate": 2.4923782628368154e-05, + "loss": 0.0301, "step": 55225 }, { "epoch": 2.58, - "learning_rate": 1.4915381369837327e-05, - "loss": 0.0382, + "learning_rate": 2.4923314558532134e-05, + "loss": 0.0575, "step": 55230 }, { "epoch": 2.58, - "learning_rate": 1.4914912568562188e-05, - "loss": 0.0416, + "learning_rate": 2.4922846488696113e-05, + "loss": 0.0627, "step": 55235 }, { "epoch": 2.58, - "learning_rate": 1.4914443767287048e-05, - "loss": 0.1091, + "learning_rate": 2.4922378418860093e-05, + "loss": 0.0823, "step": 55240 }, { "epoch": 2.58, - "learning_rate": 1.4913974966011908e-05, - "loss": 0.1649, + "learning_rate": 2.4921910349024073e-05, + "loss": 0.0871, "step": 55245 }, { "epoch": 2.58, - "learning_rate": 1.4913506164736768e-05, - "loss": 0.1362, + "learning_rate": 2.4921442279188056e-05, + "loss": 0.1885, "step": 55250 }, { "epoch": 2.58, - "learning_rate": 1.4913037363461628e-05, - "loss": 0.1366, + "learning_rate": 2.4920974209352036e-05, + "loss": 0.1823, "step": 55255 }, { "epoch": 2.58, - "learning_rate": 1.4912568562186492e-05, - "loss": 0.2839, + "learning_rate": 2.4920506139516016e-05, + "loss": 0.1295, "step": 55260 }, { "epoch": 2.58, - "learning_rate": 1.4912099760911351e-05, - "loss": 0.2657, + "learning_rate": 2.4920038069679996e-05, + "loss": 0.2162, "step": 55265 }, { "epoch": 2.58, - "learning_rate": 1.4911630959636211e-05, - "loss": 0.0139, + "learning_rate": 2.491956999984398e-05, + "loss": 0.014, "step": 55270 }, { "epoch": 2.58, - "learning_rate": 1.4911162158361073e-05, - "loss": 0.057, + "learning_rate": 2.491910193000796e-05, + "loss": 0.0814, "step": 55275 }, { "epoch": 2.58, - "learning_rate": 1.4910693357085933e-05, - "loss": 0.0532, + "learning_rate": 2.491863386017194e-05, + "loss": 0.0591, "step": 55280 }, { "epoch": 2.58, - "learning_rate": 1.4910224555810793e-05, - "loss": 0.0633, + "learning_rate": 2.491816579033592e-05, + "loss": 0.0482, "step": 55285 }, { "epoch": 2.58, - "learning_rate": 1.4909755754535653e-05, - "loss": 0.0604, + "learning_rate": 2.49176977204999e-05, + "loss": 0.0677, "step": 55290 }, { "epoch": 2.58, - "learning_rate": 1.4909286953260513e-05, - "loss": 0.1417, + "learning_rate": 2.491722965066388e-05, + "loss": 0.044, "step": 55295 }, { "epoch": 2.58, - "learning_rate": 1.4908818151985374e-05, - "loss": 0.1108, + "learning_rate": 2.4916761580827858e-05, + "loss": 0.0898, "step": 55300 }, { "epoch": 2.58, - "learning_rate": 1.4908349350710236e-05, - "loss": 0.158, + "learning_rate": 2.491629351099184e-05, + "loss": 0.2449, "step": 55305 }, { "epoch": 2.58, - "learning_rate": 1.4907880549435096e-05, - "loss": 0.2174, + "learning_rate": 2.491582544115582e-05, + "loss": 0.3145, "step": 55310 }, { "epoch": 2.58, - "learning_rate": 1.4907411748159958e-05, - "loss": 0.2398, + "learning_rate": 2.49153573713198e-05, + "loss": 0.2186, "step": 55315 }, { "epoch": 2.58, - "learning_rate": 1.4906942946884817e-05, - "loss": 0.0446, + "learning_rate": 2.491488930148378e-05, + "loss": 0.0839, "step": 55320 }, { "epoch": 2.58, - "learning_rate": 1.4906474145609677e-05, - "loss": 0.0344, + "learning_rate": 2.4914421231647764e-05, + "loss": 0.0311, "step": 55325 }, { "epoch": 2.58, - "learning_rate": 1.4906005344334537e-05, - "loss": 0.0328, + "learning_rate": 2.4913953161811743e-05, + "loss": 0.0593, "step": 55330 }, { "epoch": 2.58, - "learning_rate": 1.4905536543059397e-05, - "loss": 0.1214, + "learning_rate": 2.4913485091975723e-05, + "loss": 0.044, "step": 55335 }, { "epoch": 2.58, - "learning_rate": 1.4905067741784259e-05, - "loss": 0.0773, + "learning_rate": 2.4913017022139703e-05, + "loss": 0.0976, "step": 55340 }, { "epoch": 2.58, - "learning_rate": 1.4904598940509119e-05, - "loss": 0.0828, + "learning_rate": 2.4912548952303686e-05, + "loss": 0.114, "step": 55345 }, { "epoch": 2.58, - "learning_rate": 1.490413013923398e-05, - "loss": 0.1193, + "learning_rate": 2.4912080882467666e-05, + "loss": 0.1655, "step": 55350 }, { "epoch": 2.58, - "learning_rate": 1.4903661337958842e-05, - "loss": 0.1879, + "learning_rate": 2.4911612812631646e-05, + "loss": 0.2045, "step": 55355 }, { "epoch": 2.58, - "learning_rate": 1.4903192536683702e-05, - "loss": 0.2597, + "learning_rate": 2.4911144742795626e-05, + "loss": 0.1919, "step": 55360 }, { "epoch": 2.58, - "learning_rate": 1.4902723735408562e-05, - "loss": 0.243, + "learning_rate": 2.4910676672959606e-05, + "loss": 0.24, "step": 55365 }, { "epoch": 2.58, - "learning_rate": 1.4902254934133422e-05, - "loss": 0.1397, + "learning_rate": 2.4910208603123585e-05, + "loss": 0.0574, "step": 55370 }, { "epoch": 2.58, - "learning_rate": 1.4901786132858282e-05, - "loss": 0.0523, + "learning_rate": 2.4909740533287565e-05, + "loss": 0.0477, "step": 55375 }, { "epoch": 2.58, - "learning_rate": 1.4901317331583143e-05, - "loss": 0.0604, + "learning_rate": 2.490927246345155e-05, + "loss": 0.0812, "step": 55380 }, { "epoch": 2.58, - "learning_rate": 1.4900848530308003e-05, - "loss": 0.0544, + "learning_rate": 2.4908804393615528e-05, + "loss": 0.0386, "step": 55385 }, { "epoch": 2.58, - "learning_rate": 1.4900379729032863e-05, - "loss": 0.067, + "learning_rate": 2.4908336323779508e-05, + "loss": 0.1101, "step": 55390 }, { "epoch": 2.58, - "learning_rate": 1.4899910927757723e-05, - "loss": 0.1031, + "learning_rate": 2.4907868253943488e-05, + "loss": 0.096, "step": 55395 }, { "epoch": 2.59, - "learning_rate": 1.4899442126482587e-05, - "loss": 0.1228, + "learning_rate": 2.490740018410747e-05, + "loss": 0.0696, "step": 55400 }, { "epoch": 2.59, - "learning_rate": 1.4898973325207447e-05, - "loss": 0.1742, + "learning_rate": 2.490693211427145e-05, + "loss": 0.1879, "step": 55405 }, { "epoch": 2.59, - "learning_rate": 1.4898504523932306e-05, - "loss": 0.1916, + "learning_rate": 2.490646404443543e-05, + "loss": 0.4245, "step": 55410 }, { "epoch": 2.59, - "learning_rate": 1.4898035722657166e-05, - "loss": 0.2808, + "learning_rate": 2.490599597459941e-05, + "loss": 0.3266, "step": 55415 }, { "epoch": 2.59, - "learning_rate": 1.4897566921382028e-05, - "loss": 0.0512, + "learning_rate": 2.4905527904763394e-05, + "loss": 0.0176, "step": 55420 }, { "epoch": 2.59, - "learning_rate": 1.4897098120106888e-05, - "loss": 0.0845, + "learning_rate": 2.490505983492737e-05, + "loss": 0.0238, "step": 55425 }, { "epoch": 2.59, - "learning_rate": 1.4896629318831748e-05, - "loss": 0.0709, + "learning_rate": 2.490459176509135e-05, + "loss": 0.0475, "step": 55430 }, { "epoch": 2.59, - "learning_rate": 1.4896160517556608e-05, - "loss": 0.0582, + "learning_rate": 2.4904123695255333e-05, + "loss": 0.098, "step": 55435 }, { "epoch": 2.59, - "learning_rate": 1.4895691716281468e-05, - "loss": 0.0797, + "learning_rate": 2.4903655625419313e-05, + "loss": 0.1087, "step": 55440 }, { "epoch": 2.59, - "learning_rate": 1.4895222915006331e-05, - "loss": 0.0405, + "learning_rate": 2.4903187555583293e-05, + "loss": 0.0753, "step": 55445 }, { "epoch": 2.59, - "learning_rate": 1.4894754113731191e-05, - "loss": 0.139, + "learning_rate": 2.4902719485747273e-05, + "loss": 0.1167, "step": 55450 }, { "epoch": 2.59, - "learning_rate": 1.4894285312456051e-05, - "loss": 0.1857, + "learning_rate": 2.4902251415911256e-05, + "loss": 0.0595, "step": 55455 }, { "epoch": 2.59, - "learning_rate": 1.4893816511180913e-05, - "loss": 0.2593, + "learning_rate": 2.4901783346075236e-05, + "loss": 0.1833, "step": 55460 }, { "epoch": 2.59, - "learning_rate": 1.4893347709905773e-05, - "loss": 0.354, + "learning_rate": 2.4901315276239216e-05, + "loss": 0.3303, "step": 55465 }, { "epoch": 2.59, - "learning_rate": 1.4892878908630632e-05, - "loss": 0.0158, + "learning_rate": 2.4900847206403195e-05, + "loss": 0.0577, "step": 55470 }, { "epoch": 2.59, - "learning_rate": 1.4892410107355492e-05, - "loss": 0.0485, + "learning_rate": 2.490037913656718e-05, + "loss": 0.0685, "step": 55475 }, { "epoch": 2.59, - "learning_rate": 1.4891941306080352e-05, - "loss": 0.0488, + "learning_rate": 2.489991106673116e-05, + "loss": 0.0419, "step": 55480 }, { "epoch": 2.59, - "learning_rate": 1.4891472504805214e-05, - "loss": 0.0494, + "learning_rate": 2.4899442996895138e-05, + "loss": 0.0418, "step": 55485 }, { "epoch": 2.59, - "learning_rate": 1.4891003703530076e-05, - "loss": 0.141, + "learning_rate": 2.4898974927059118e-05, + "loss": 0.1089, "step": 55490 }, { "epoch": 2.59, - "learning_rate": 1.4890534902254935e-05, - "loss": 0.0894, + "learning_rate": 2.4898506857223098e-05, + "loss": 0.1051, "step": 55495 }, { "epoch": 2.59, - "learning_rate": 1.4890066100979797e-05, - "loss": 0.1028, + "learning_rate": 2.4898038787387078e-05, + "loss": 0.1464, "step": 55500 }, { "epoch": 2.59, - "learning_rate": 1.4889597299704657e-05, - "loss": 0.135, + "learning_rate": 2.4897570717551057e-05, + "loss": 0.1362, "step": 55505 }, { "epoch": 2.59, - "learning_rate": 1.4889128498429517e-05, - "loss": 0.1806, + "learning_rate": 2.489710264771504e-05, + "loss": 0.1921, "step": 55510 }, { "epoch": 2.59, - "learning_rate": 1.4888659697154377e-05, - "loss": 0.2292, + "learning_rate": 2.489663457787902e-05, + "loss": 0.1861, "step": 55515 }, { "epoch": 2.59, - "learning_rate": 1.4888190895879237e-05, - "loss": 0.0453, + "learning_rate": 2.4896166508043e-05, + "loss": 0.0843, "step": 55520 }, { "epoch": 2.59, - "learning_rate": 1.4887722094604098e-05, - "loss": 0.0331, + "learning_rate": 2.489569843820698e-05, + "loss": 0.0699, "step": 55525 }, { "epoch": 2.59, - "learning_rate": 1.4887253293328958e-05, - "loss": 0.0538, + "learning_rate": 2.4895230368370963e-05, + "loss": 0.0606, "step": 55530 }, { "epoch": 2.59, - "learning_rate": 1.488678449205382e-05, - "loss": 0.0925, + "learning_rate": 2.4894762298534943e-05, + "loss": 0.0675, "step": 55535 }, { "epoch": 2.59, - "learning_rate": 1.4886315690778682e-05, - "loss": 0.0954, + "learning_rate": 2.4894294228698923e-05, + "loss": 0.1192, "step": 55540 }, { "epoch": 2.59, - "learning_rate": 1.4885846889503542e-05, - "loss": 0.0866, + "learning_rate": 2.4893826158862906e-05, + "loss": 0.0594, "step": 55545 }, { "epoch": 2.59, - "learning_rate": 1.4885378088228402e-05, - "loss": 0.1247, + "learning_rate": 2.4893358089026883e-05, + "loss": 0.1487, "step": 55550 }, { "epoch": 2.59, - "learning_rate": 1.4884909286953261e-05, - "loss": 0.2373, + "learning_rate": 2.4892890019190862e-05, + "loss": 0.1876, "step": 55555 }, { "epoch": 2.59, - "learning_rate": 1.4884440485678121e-05, - "loss": 0.2644, + "learning_rate": 2.4892421949354842e-05, + "loss": 0.2431, "step": 55560 }, { "epoch": 2.59, - "learning_rate": 1.4883971684402983e-05, - "loss": 0.2973, + "learning_rate": 2.4891953879518825e-05, + "loss": 0.1864, "step": 55565 }, { "epoch": 2.59, - "learning_rate": 1.4883502883127843e-05, - "loss": 0.0809, + "learning_rate": 2.4891485809682805e-05, + "loss": 0.0444, "step": 55570 }, { "epoch": 2.59, - "learning_rate": 1.4883034081852703e-05, - "loss": 0.0438, + "learning_rate": 2.4891017739846785e-05, + "loss": 0.0379, "step": 55575 }, { "epoch": 2.59, - "learning_rate": 1.4882565280577563e-05, - "loss": 0.0554, + "learning_rate": 2.4890549670010765e-05, + "loss": 0.0577, "step": 55580 }, { "epoch": 2.59, - "learning_rate": 1.4882096479302426e-05, - "loss": 0.0847, + "learning_rate": 2.4890081600174748e-05, + "loss": 0.0597, "step": 55585 }, { "epoch": 2.59, - "learning_rate": 1.4881627678027286e-05, - "loss": 0.1215, + "learning_rate": 2.4889613530338728e-05, + "loss": 0.0855, "step": 55590 }, { "epoch": 2.59, - "learning_rate": 1.4881158876752146e-05, - "loss": 0.0465, + "learning_rate": 2.4889145460502708e-05, + "loss": 0.0489, "step": 55595 }, { "epoch": 2.59, - "learning_rate": 1.4880690075477006e-05, - "loss": 0.1264, + "learning_rate": 2.4888677390666688e-05, + "loss": 0.0751, "step": 55600 }, { "epoch": 2.59, - "learning_rate": 1.4880221274201868e-05, - "loss": 0.1865, + "learning_rate": 2.488820932083067e-05, + "loss": 0.2856, "step": 55605 }, { "epoch": 2.59, - "learning_rate": 1.4879752472926728e-05, - "loss": 0.2029, + "learning_rate": 2.488774125099465e-05, + "loss": 0.236, "step": 55610 }, { "epoch": 2.6, - "learning_rate": 1.4879283671651587e-05, - "loss": 0.2115, + "learning_rate": 2.4887273181158627e-05, + "loss": 0.2413, "step": 55615 }, { "epoch": 2.6, - "learning_rate": 1.4878814870376447e-05, - "loss": 0.0313, + "learning_rate": 2.488680511132261e-05, + "loss": 0.0216, "step": 55620 }, { "epoch": 2.6, - "learning_rate": 1.4878346069101309e-05, - "loss": 0.0566, + "learning_rate": 2.488633704148659e-05, + "loss": 0.0681, "step": 55625 }, { "epoch": 2.6, - "learning_rate": 1.487787726782617e-05, - "loss": 0.0854, + "learning_rate": 2.488586897165057e-05, + "loss": 0.0589, "step": 55630 }, { "epoch": 2.6, - "learning_rate": 1.487740846655103e-05, - "loss": 0.0547, + "learning_rate": 2.488540090181455e-05, + "loss": 0.0702, "step": 55635 }, { "epoch": 2.6, - "learning_rate": 1.487693966527589e-05, - "loss": 0.0987, + "learning_rate": 2.4884932831978533e-05, + "loss": 0.0506, "step": 55640 }, { "epoch": 2.6, - "learning_rate": 1.4876470864000752e-05, - "loss": 0.0807, + "learning_rate": 2.4884464762142513e-05, + "loss": 0.0524, "step": 55645 }, { "epoch": 2.6, - "learning_rate": 1.4876002062725612e-05, - "loss": 0.1574, + "learning_rate": 2.4883996692306492e-05, + "loss": 0.2176, "step": 55650 }, { "epoch": 2.6, - "learning_rate": 1.4875533261450472e-05, - "loss": 0.1102, + "learning_rate": 2.4883528622470472e-05, + "loss": 0.1403, "step": 55655 }, { "epoch": 2.6, - "learning_rate": 1.4875064460175332e-05, - "loss": 0.3842, + "learning_rate": 2.4883060552634456e-05, + "loss": 0.2125, "step": 55660 }, { "epoch": 2.6, - "learning_rate": 1.4874595658900194e-05, - "loss": 0.2638, + "learning_rate": 2.4882592482798435e-05, + "loss": 0.3621, "step": 55665 }, { "epoch": 2.6, - "learning_rate": 1.4874126857625053e-05, - "loss": 0.0447, + "learning_rate": 2.4882124412962415e-05, + "loss": 0.0241, "step": 55670 }, { "epoch": 2.6, - "learning_rate": 1.4873658056349915e-05, - "loss": 0.0429, + "learning_rate": 2.4881656343126395e-05, + "loss": 0.0337, "step": 55675 }, { "epoch": 2.6, - "learning_rate": 1.4873189255074775e-05, - "loss": 0.056, + "learning_rate": 2.4881188273290375e-05, + "loss": 0.0556, "step": 55680 }, { "epoch": 2.6, - "learning_rate": 1.4872720453799637e-05, - "loss": 0.0471, + "learning_rate": 2.4880720203454355e-05, + "loss": 0.0622, "step": 55685 }, { "epoch": 2.6, - "learning_rate": 1.4872251652524497e-05, - "loss": 0.1042, + "learning_rate": 2.4880252133618334e-05, + "loss": 0.1056, "step": 55690 }, { "epoch": 2.6, - "learning_rate": 1.4871782851249357e-05, - "loss": 0.1077, + "learning_rate": 2.4879784063782318e-05, + "loss": 0.1868, "step": 55695 }, { "epoch": 2.6, - "learning_rate": 1.4871314049974216e-05, - "loss": 0.117, + "learning_rate": 2.4879315993946297e-05, + "loss": 0.1624, "step": 55700 }, { "epoch": 2.6, - "learning_rate": 1.4870845248699078e-05, - "loss": 0.1148, + "learning_rate": 2.4878847924110277e-05, + "loss": 0.1596, "step": 55705 }, { "epoch": 2.6, - "learning_rate": 1.4870376447423938e-05, - "loss": 0.2782, + "learning_rate": 2.4878379854274257e-05, + "loss": 0.1789, "step": 55710 }, { "epoch": 2.6, - "learning_rate": 1.4869907646148798e-05, - "loss": 0.2159, + "learning_rate": 2.487791178443824e-05, + "loss": 0.2479, "step": 55715 }, { "epoch": 2.6, - "learning_rate": 1.4869438844873658e-05, - "loss": 0.0824, + "learning_rate": 2.487744371460222e-05, + "loss": 0.0446, "step": 55720 }, { "epoch": 2.6, - "learning_rate": 1.4868970043598521e-05, - "loss": 0.0114, + "learning_rate": 2.48769756447662e-05, + "loss": 0.0764, "step": 55725 }, { "epoch": 2.6, - "learning_rate": 1.4868501242323381e-05, - "loss": 0.0292, + "learning_rate": 2.4876507574930183e-05, + "loss": 0.0528, "step": 55730 }, { "epoch": 2.6, - "learning_rate": 1.4868032441048241e-05, - "loss": 0.0636, + "learning_rate": 2.4876039505094163e-05, + "loss": 0.0915, "step": 55735 }, { "epoch": 2.6, - "learning_rate": 1.4867563639773101e-05, - "loss": 0.0672, + "learning_rate": 2.487557143525814e-05, + "loss": 0.1073, "step": 55740 }, { "epoch": 2.6, - "learning_rate": 1.4867094838497963e-05, - "loss": 0.1228, + "learning_rate": 2.487510336542212e-05, + "loss": 0.0716, "step": 55745 }, { "epoch": 2.6, - "learning_rate": 1.4866626037222823e-05, - "loss": 0.1111, + "learning_rate": 2.4874635295586102e-05, + "loss": 0.1146, "step": 55750 }, { "epoch": 2.6, - "learning_rate": 1.4866157235947683e-05, - "loss": 0.1702, + "learning_rate": 2.4874167225750082e-05, + "loss": 0.1948, "step": 55755 }, { "epoch": 2.6, - "learning_rate": 1.4865688434672542e-05, - "loss": 0.1814, + "learning_rate": 2.4873699155914062e-05, + "loss": 0.2386, "step": 55760 }, { "epoch": 2.6, - "learning_rate": 1.4865219633397402e-05, - "loss": 0.2145, + "learning_rate": 2.4873231086078042e-05, + "loss": 0.2818, "step": 55765 }, { "epoch": 2.6, - "learning_rate": 1.4864750832122266e-05, - "loss": 0.0433, + "learning_rate": 2.4872763016242025e-05, + "loss": 0.0279, "step": 55770 }, { "epoch": 2.6, - "learning_rate": 1.4864282030847126e-05, - "loss": 0.0851, + "learning_rate": 2.4872294946406005e-05, + "loss": 0.039, "step": 55775 }, { "epoch": 2.6, - "learning_rate": 1.4863813229571986e-05, - "loss": 0.0285, + "learning_rate": 2.4871826876569985e-05, + "loss": 0.0803, "step": 55780 }, { "epoch": 2.6, - "learning_rate": 1.4863344428296847e-05, - "loss": 0.084, + "learning_rate": 2.4871358806733965e-05, + "loss": 0.0976, "step": 55785 }, { "epoch": 2.6, - "learning_rate": 1.4862875627021707e-05, - "loss": 0.0675, + "learning_rate": 2.4870890736897948e-05, + "loss": 0.0641, "step": 55790 }, { "epoch": 2.6, - "learning_rate": 1.4862406825746567e-05, - "loss": 0.0788, + "learning_rate": 2.4870422667061928e-05, + "loss": 0.1088, "step": 55795 }, { "epoch": 2.6, - "learning_rate": 1.4861938024471427e-05, - "loss": 0.1191, + "learning_rate": 2.4869954597225907e-05, + "loss": 0.2088, "step": 55800 }, { "epoch": 2.6, - "learning_rate": 1.4861469223196287e-05, - "loss": 0.2021, + "learning_rate": 2.4869486527389887e-05, + "loss": 0.1582, "step": 55805 }, { "epoch": 2.6, - "learning_rate": 1.4861000421921149e-05, - "loss": 0.343, + "learning_rate": 2.4869018457553867e-05, + "loss": 0.2437, "step": 55810 }, { "epoch": 2.6, - "learning_rate": 1.486053162064601e-05, - "loss": 0.2186, + "learning_rate": 2.4868550387717847e-05, + "loss": 0.2285, "step": 55815 }, { "epoch": 2.6, - "learning_rate": 1.486006281937087e-05, - "loss": 0.0562, + "learning_rate": 2.4868082317881827e-05, + "loss": 0.0515, "step": 55820 }, { "epoch": 2.6, - "learning_rate": 1.4859594018095732e-05, - "loss": 0.0369, + "learning_rate": 2.486761424804581e-05, + "loss": 0.0516, "step": 55825 }, { "epoch": 2.61, - "learning_rate": 1.4859125216820592e-05, - "loss": 0.0856, + "learning_rate": 2.486714617820979e-05, + "loss": 0.0385, "step": 55830 }, { "epoch": 2.61, - "learning_rate": 1.4858656415545452e-05, - "loss": 0.1225, + "learning_rate": 2.486667810837377e-05, + "loss": 0.0661, "step": 55835 }, { "epoch": 2.61, - "learning_rate": 1.4858187614270312e-05, - "loss": 0.124, + "learning_rate": 2.486621003853775e-05, + "loss": 0.1022, "step": 55840 }, { "epoch": 2.61, - "learning_rate": 1.4857718812995172e-05, - "loss": 0.1764, + "learning_rate": 2.4865741968701732e-05, + "loss": 0.102, "step": 55845 }, { "epoch": 2.61, - "learning_rate": 1.4857250011720033e-05, - "loss": 0.15, + "learning_rate": 2.4865273898865712e-05, + "loss": 0.1489, "step": 55850 }, { "epoch": 2.61, - "learning_rate": 1.4856781210444893e-05, - "loss": 0.1934, + "learning_rate": 2.4864805829029692e-05, + "loss": 0.2035, "step": 55855 }, { "epoch": 2.61, - "learning_rate": 1.4856312409169755e-05, - "loss": 0.3649, + "learning_rate": 2.4864337759193675e-05, + "loss": 0.2289, "step": 55860 }, { "epoch": 2.61, - "learning_rate": 1.4855843607894616e-05, - "loss": 0.3696, + "learning_rate": 2.4863869689357652e-05, + "loss": 0.2436, "step": 55865 }, { "epoch": 2.61, - "learning_rate": 1.4855374806619476e-05, - "loss": 0.0612, + "learning_rate": 2.486340161952163e-05, + "loss": 0.0206, "step": 55870 }, { "epoch": 2.61, - "learning_rate": 1.4854906005344336e-05, - "loss": 0.0421, + "learning_rate": 2.486293354968561e-05, + "loss": 0.0459, "step": 55875 }, { "epoch": 2.61, - "learning_rate": 1.4854437204069196e-05, - "loss": 0.0431, + "learning_rate": 2.4862465479849595e-05, + "loss": 0.1318, "step": 55880 }, { "epoch": 2.61, - "learning_rate": 1.4853968402794056e-05, - "loss": 0.0768, + "learning_rate": 2.4861997410013574e-05, + "loss": 0.0235, "step": 55885 }, { "epoch": 2.61, - "learning_rate": 1.4853499601518918e-05, - "loss": 0.0669, + "learning_rate": 2.4861529340177554e-05, + "loss": 0.0769, "step": 55890 }, { "epoch": 2.61, - "learning_rate": 1.4853030800243778e-05, - "loss": 0.1025, + "learning_rate": 2.4861061270341534e-05, + "loss": 0.1074, "step": 55895 }, { "epoch": 2.61, - "learning_rate": 1.4852561998968638e-05, - "loss": 0.1342, + "learning_rate": 2.4860593200505517e-05, + "loss": 0.1184, "step": 55900 }, { "epoch": 2.61, - "learning_rate": 1.4852093197693497e-05, - "loss": 0.1633, + "learning_rate": 2.4860125130669497e-05, + "loss": 0.1981, "step": 55905 }, { "epoch": 2.61, - "learning_rate": 1.485162439641836e-05, - "loss": 0.2885, + "learning_rate": 2.4859657060833477e-05, + "loss": 0.2717, "step": 55910 }, { "epoch": 2.61, - "learning_rate": 1.485115559514322e-05, - "loss": 0.2572, + "learning_rate": 2.485918899099746e-05, + "loss": 0.3014, "step": 55915 }, { "epoch": 2.61, - "learning_rate": 1.485068679386808e-05, - "loss": 0.0396, + "learning_rate": 2.485872092116144e-05, + "loss": 0.0629, "step": 55920 }, { "epoch": 2.61, - "learning_rate": 1.485021799259294e-05, - "loss": 0.1123, + "learning_rate": 2.485825285132542e-05, + "loss": 0.0372, "step": 55925 }, { "epoch": 2.61, - "learning_rate": 1.4849749191317802e-05, - "loss": 0.0515, + "learning_rate": 2.4857784781489396e-05, + "loss": 0.0103, "step": 55930 }, { "epoch": 2.61, - "learning_rate": 1.4849280390042662e-05, - "loss": 0.0417, + "learning_rate": 2.485731671165338e-05, + "loss": 0.0399, "step": 55935 }, { "epoch": 2.61, - "learning_rate": 1.4848811588767522e-05, - "loss": 0.0986, + "learning_rate": 2.485684864181736e-05, + "loss": 0.0618, "step": 55940 }, { "epoch": 2.61, - "learning_rate": 1.4848342787492382e-05, - "loss": 0.0709, + "learning_rate": 2.485638057198134e-05, + "loss": 0.0404, "step": 55945 }, { "epoch": 2.61, - "learning_rate": 1.4847873986217242e-05, - "loss": 0.1535, + "learning_rate": 2.485591250214532e-05, + "loss": 0.1291, "step": 55950 }, { "epoch": 2.61, - "learning_rate": 1.4847405184942105e-05, - "loss": 0.1495, + "learning_rate": 2.4855444432309302e-05, + "loss": 0.1403, "step": 55955 }, { "epoch": 2.61, - "learning_rate": 1.4846936383666965e-05, - "loss": 0.2546, + "learning_rate": 2.4854976362473282e-05, + "loss": 0.2289, "step": 55960 }, { "epoch": 2.61, - "learning_rate": 1.4846467582391825e-05, - "loss": 0.3005, + "learning_rate": 2.485450829263726e-05, + "loss": 0.3351, "step": 55965 }, { "epoch": 2.61, - "learning_rate": 1.4845998781116687e-05, - "loss": 0.0819, + "learning_rate": 2.4854040222801245e-05, + "loss": 0.0568, "step": 55970 }, { "epoch": 2.61, - "learning_rate": 1.4845529979841547e-05, - "loss": 0.06, + "learning_rate": 2.4853572152965225e-05, + "loss": 0.0629, "step": 55975 }, { "epoch": 2.61, - "learning_rate": 1.4845061178566407e-05, - "loss": 0.0428, + "learning_rate": 2.4853104083129204e-05, + "loss": 0.0856, "step": 55980 }, { "epoch": 2.61, - "learning_rate": 1.4844592377291267e-05, - "loss": 0.099, + "learning_rate": 2.4852636013293184e-05, + "loss": 0.1445, "step": 55985 }, { "epoch": 2.61, - "learning_rate": 1.4844123576016127e-05, - "loss": 0.0624, + "learning_rate": 2.4852167943457164e-05, + "loss": 0.132, "step": 55990 }, { "epoch": 2.61, - "learning_rate": 1.4843654774740988e-05, - "loss": 0.0641, + "learning_rate": 2.4851699873621144e-05, + "loss": 0.0855, "step": 55995 }, { "epoch": 2.61, - "learning_rate": 1.484318597346585e-05, - "loss": 0.1507, + "learning_rate": 2.4851231803785124e-05, + "loss": 0.2332, "step": 56000 }, { "epoch": 2.61, - "learning_rate": 1.484271717219071e-05, - "loss": 0.128, + "learning_rate": 2.4850763733949104e-05, + "loss": 0.1916, "step": 56005 }, { "epoch": 2.61, - "learning_rate": 1.4842248370915571e-05, - "loss": 0.2025, + "learning_rate": 2.4850295664113087e-05, + "loss": 0.2721, "step": 56010 }, { "epoch": 2.61, - "learning_rate": 1.4841779569640431e-05, - "loss": 0.2703, + "learning_rate": 2.4849827594277067e-05, + "loss": 0.258, "step": 56015 }, { "epoch": 2.61, - "learning_rate": 1.4841310768365291e-05, - "loss": 0.0541, + "learning_rate": 2.4849359524441046e-05, + "loss": 0.0678, "step": 56020 }, { "epoch": 2.61, - "learning_rate": 1.4840841967090151e-05, - "loss": 0.0346, + "learning_rate": 2.4848891454605026e-05, + "loss": 0.0619, "step": 56025 }, { "epoch": 2.61, - "learning_rate": 1.4840373165815011e-05, - "loss": 0.0098, + "learning_rate": 2.484842338476901e-05, + "loss": 0.0764, "step": 56030 }, { "epoch": 2.61, - "learning_rate": 1.4839904364539873e-05, - "loss": 0.064, + "learning_rate": 2.484795531493299e-05, + "loss": 0.0429, "step": 56035 }, { "epoch": 2.61, - "learning_rate": 1.4839435563264733e-05, - "loss": 0.1539, + "learning_rate": 2.484748724509697e-05, + "loss": 0.078, "step": 56040 }, { "epoch": 2.62, - "learning_rate": 1.4838966761989593e-05, - "loss": 0.141, + "learning_rate": 2.4847019175260952e-05, + "loss": 0.0844, "step": 56045 }, { "epoch": 2.62, - "learning_rate": 1.4838497960714456e-05, - "loss": 0.1373, + "learning_rate": 2.4846551105424932e-05, + "loss": 0.0517, "step": 56050 }, { "epoch": 2.62, - "learning_rate": 1.4838029159439316e-05, - "loss": 0.2292, + "learning_rate": 2.484608303558891e-05, + "loss": 0.2129, "step": 56055 }, { "epoch": 2.62, - "learning_rate": 1.4837560358164176e-05, - "loss": 0.2279, + "learning_rate": 2.484561496575289e-05, + "loss": 0.2468, "step": 56060 }, { "epoch": 2.62, - "learning_rate": 1.4837091556889036e-05, - "loss": 0.381, + "learning_rate": 2.484514689591687e-05, + "loss": 0.3149, "step": 56065 }, { "epoch": 2.62, - "learning_rate": 1.4836622755613896e-05, - "loss": 0.063, + "learning_rate": 2.484467882608085e-05, + "loss": 0.061, "step": 56070 }, { "epoch": 2.62, - "learning_rate": 1.4836153954338757e-05, - "loss": 0.0523, + "learning_rate": 2.484421075624483e-05, + "loss": 0.0645, "step": 56075 }, { "epoch": 2.62, - "learning_rate": 1.4835685153063617e-05, - "loss": 0.1318, + "learning_rate": 2.484374268640881e-05, + "loss": 0.0696, "step": 56080 }, { "epoch": 2.62, - "learning_rate": 1.4835216351788477e-05, - "loss": 0.0529, + "learning_rate": 2.4843274616572794e-05, + "loss": 0.0416, "step": 56085 }, { "epoch": 2.62, - "learning_rate": 1.4834747550513337e-05, - "loss": 0.0833, + "learning_rate": 2.4842806546736774e-05, + "loss": 0.0714, "step": 56090 }, { "epoch": 2.62, - "learning_rate": 1.48342787492382e-05, - "loss": 0.2288, + "learning_rate": 2.4842338476900754e-05, + "loss": 0.0705, "step": 56095 }, { "epoch": 2.62, - "learning_rate": 1.483380994796306e-05, - "loss": 0.137, + "learning_rate": 2.4841870407064737e-05, + "loss": 0.1065, "step": 56100 }, { "epoch": 2.62, - "learning_rate": 1.483334114668792e-05, - "loss": 0.1722, + "learning_rate": 2.4841402337228717e-05, + "loss": 0.1444, "step": 56105 }, { "epoch": 2.62, - "learning_rate": 1.483287234541278e-05, - "loss": 0.3163, + "learning_rate": 2.4840934267392697e-05, + "loss": 0.2391, "step": 56110 }, { "epoch": 2.62, - "learning_rate": 1.4832403544137642e-05, - "loss": 0.4034, + "learning_rate": 2.4840466197556677e-05, + "loss": 0.2695, "step": 56115 }, { "epoch": 2.62, - "learning_rate": 1.4831934742862502e-05, - "loss": 0.0912, + "learning_rate": 2.4839998127720656e-05, + "loss": 0.0378, "step": 56120 }, { "epoch": 2.62, - "learning_rate": 1.4831465941587362e-05, - "loss": 0.0487, + "learning_rate": 2.4839530057884636e-05, + "loss": 0.0255, "step": 56125 }, { "epoch": 2.62, - "learning_rate": 1.4830997140312222e-05, - "loss": 0.0635, + "learning_rate": 2.4839061988048616e-05, + "loss": 0.0216, "step": 56130 }, { "epoch": 2.62, - "learning_rate": 1.4830528339037083e-05, - "loss": 0.065, + "learning_rate": 2.4838593918212596e-05, + "loss": 0.0649, "step": 56135 }, { "epoch": 2.62, - "learning_rate": 1.4830059537761945e-05, - "loss": 0.0522, + "learning_rate": 2.483812584837658e-05, + "loss": 0.0932, "step": 56140 }, { "epoch": 2.62, - "learning_rate": 1.4829590736486805e-05, - "loss": 0.1437, + "learning_rate": 2.483765777854056e-05, + "loss": 0.0792, "step": 56145 }, { "epoch": 2.62, - "learning_rate": 1.4829121935211665e-05, - "loss": 0.1129, + "learning_rate": 2.483718970870454e-05, + "loss": 0.2263, "step": 56150 }, { "epoch": 2.62, - "learning_rate": 1.4828653133936526e-05, - "loss": 0.1928, + "learning_rate": 2.4836721638868522e-05, + "loss": 0.2608, "step": 56155 }, { "epoch": 2.62, - "learning_rate": 1.4828184332661386e-05, - "loss": 0.2616, + "learning_rate": 2.48362535690325e-05, + "loss": 0.171, "step": 56160 }, { "epoch": 2.62, - "learning_rate": 1.4827715531386246e-05, - "loss": 0.2916, + "learning_rate": 2.483578549919648e-05, + "loss": 0.3316, "step": 56165 }, { "epoch": 2.62, - "learning_rate": 1.4827246730111106e-05, - "loss": 0.0677, + "learning_rate": 2.483531742936046e-05, + "loss": 0.0179, "step": 56170 }, { "epoch": 2.62, - "learning_rate": 1.4826777928835968e-05, - "loss": 0.0552, + "learning_rate": 2.4834849359524444e-05, + "loss": 0.0258, "step": 56175 }, { "epoch": 2.62, - "learning_rate": 1.4826309127560828e-05, - "loss": 0.0957, + "learning_rate": 2.483438128968842e-05, + "loss": 0.0419, "step": 56180 }, { "epoch": 2.62, - "learning_rate": 1.482584032628569e-05, - "loss": 0.0683, + "learning_rate": 2.48339132198524e-05, + "loss": 0.0238, "step": 56185 }, { "epoch": 2.62, - "learning_rate": 1.482537152501055e-05, - "loss": 0.1212, + "learning_rate": 2.483344515001638e-05, + "loss": 0.0839, "step": 56190 }, { "epoch": 2.62, - "learning_rate": 1.4824902723735411e-05, - "loss": 0.0701, + "learning_rate": 2.4832977080180364e-05, + "loss": 0.1399, "step": 56195 }, { "epoch": 2.62, - "learning_rate": 1.482443392246027e-05, - "loss": 0.1366, + "learning_rate": 2.4832509010344344e-05, + "loss": 0.1505, "step": 56200 }, { "epoch": 2.62, - "learning_rate": 1.482396512118513e-05, - "loss": 0.184, + "learning_rate": 2.4832040940508323e-05, + "loss": 0.248, "step": 56205 }, { "epoch": 2.62, - "learning_rate": 1.482349631990999e-05, - "loss": 0.1445, + "learning_rate": 2.4831572870672303e-05, + "loss": 0.2085, "step": 56210 }, { "epoch": 2.62, - "learning_rate": 1.4823027518634852e-05, - "loss": 0.2239, + "learning_rate": 2.4831104800836286e-05, + "loss": 0.2723, "step": 56215 }, { "epoch": 2.62, - "learning_rate": 1.4822558717359712e-05, - "loss": 0.0696, + "learning_rate": 2.4830636731000266e-05, + "loss": 0.0246, "step": 56220 }, { "epoch": 2.62, - "learning_rate": 1.4822089916084572e-05, - "loss": 0.0632, + "learning_rate": 2.4830168661164246e-05, + "loss": 0.0453, "step": 56225 }, { "epoch": 2.62, - "learning_rate": 1.4821621114809432e-05, - "loss": 0.0973, + "learning_rate": 2.482970059132823e-05, + "loss": 0.0159, "step": 56230 }, { "epoch": 2.62, - "learning_rate": 1.4821152313534295e-05, - "loss": 0.0835, + "learning_rate": 2.482923252149221e-05, + "loss": 0.0821, "step": 56235 }, { "epoch": 2.62, - "learning_rate": 1.4820683512259155e-05, - "loss": 0.0855, + "learning_rate": 2.482876445165619e-05, + "loss": 0.0611, "step": 56240 }, { "epoch": 2.62, - "learning_rate": 1.4820214710984015e-05, - "loss": 0.065, + "learning_rate": 2.4828296381820165e-05, + "loss": 0.0454, "step": 56245 }, { "epoch": 2.62, - "learning_rate": 1.4819745909708875e-05, - "loss": 0.1209, + "learning_rate": 2.482782831198415e-05, + "loss": 0.1062, "step": 56250 }, { "epoch": 2.62, - "learning_rate": 1.4819277108433737e-05, - "loss": 0.1289, + "learning_rate": 2.482736024214813e-05, + "loss": 0.1669, "step": 56255 }, { "epoch": 2.63, - "learning_rate": 1.4818808307158597e-05, - "loss": 0.1962, + "learning_rate": 2.4826892172312108e-05, + "loss": 0.1415, "step": 56260 }, { "epoch": 2.63, - "learning_rate": 1.4818339505883457e-05, - "loss": 0.2531, + "learning_rate": 2.4826424102476088e-05, + "loss": 0.2721, "step": 56265 }, { "epoch": 2.63, - "learning_rate": 1.4817870704608317e-05, - "loss": 0.0775, + "learning_rate": 2.482595603264007e-05, + "loss": 0.0676, "step": 56270 }, { "epoch": 2.63, - "learning_rate": 1.4817401903333177e-05, - "loss": 0.0633, + "learning_rate": 2.482548796280405e-05, + "loss": 0.1631, "step": 56275 }, { "epoch": 2.63, - "learning_rate": 1.481693310205804e-05, - "loss": 0.0361, + "learning_rate": 2.482501989296803e-05, + "loss": 0.0285, "step": 56280 }, { "epoch": 2.63, - "learning_rate": 1.48164643007829e-05, - "loss": 0.0948, + "learning_rate": 2.4824551823132014e-05, + "loss": 0.0791, "step": 56285 }, { "epoch": 2.63, - "learning_rate": 1.481599549950776e-05, - "loss": 0.402, + "learning_rate": 2.4824083753295994e-05, + "loss": 0.1029, "step": 56290 }, { "epoch": 2.63, - "learning_rate": 1.4815526698232621e-05, - "loss": 0.0857, + "learning_rate": 2.4823615683459974e-05, + "loss": 0.1792, "step": 56295 }, { "epoch": 2.63, - "learning_rate": 1.4815057896957481e-05, - "loss": 0.0479, + "learning_rate": 2.4823147613623953e-05, + "loss": 0.1128, "step": 56300 }, { "epoch": 2.63, - "learning_rate": 1.4814589095682341e-05, - "loss": 0.1922, + "learning_rate": 2.4822679543787937e-05, + "loss": 0.1171, "step": 56305 }, { "epoch": 2.63, - "learning_rate": 1.4814120294407201e-05, - "loss": 0.2842, + "learning_rate": 2.4822211473951913e-05, + "loss": 0.293, "step": 56310 }, { "epoch": 2.63, - "learning_rate": 1.4813651493132061e-05, - "loss": 0.2176, + "learning_rate": 2.4821743404115893e-05, + "loss": 0.3136, "step": 56315 }, { "epoch": 2.63, - "learning_rate": 1.4813182691856923e-05, - "loss": 0.0288, + "learning_rate": 2.4821275334279873e-05, + "loss": 0.0717, "step": 56320 }, { "epoch": 2.63, - "learning_rate": 1.4812713890581784e-05, - "loss": 0.053, + "learning_rate": 2.4820807264443856e-05, + "loss": 0.0332, "step": 56325 }, { "epoch": 2.63, - "learning_rate": 1.4812245089306644e-05, - "loss": 0.0439, + "learning_rate": 2.4820339194607836e-05, + "loss": 0.0609, "step": 56330 }, { "epoch": 2.63, - "learning_rate": 1.4811776288031506e-05, - "loss": 0.0837, + "learning_rate": 2.4819871124771816e-05, + "loss": 0.0853, "step": 56335 }, { "epoch": 2.63, - "learning_rate": 1.4811307486756366e-05, - "loss": 0.0933, + "learning_rate": 2.48194030549358e-05, + "loss": 0.0824, "step": 56340 }, { "epoch": 2.63, - "learning_rate": 1.4810838685481226e-05, - "loss": 0.0983, + "learning_rate": 2.481893498509978e-05, + "loss": 0.1004, "step": 56345 }, { "epoch": 2.63, - "learning_rate": 1.4810369884206086e-05, - "loss": 0.1216, + "learning_rate": 2.481846691526376e-05, + "loss": 0.1309, "step": 56350 }, { "epoch": 2.63, - "learning_rate": 1.4809901082930946e-05, - "loss": 0.2602, + "learning_rate": 2.4817998845427738e-05, + "loss": 0.1891, "step": 56355 }, { "epoch": 2.63, - "learning_rate": 1.4809432281655807e-05, - "loss": 0.2467, + "learning_rate": 2.481753077559172e-05, + "loss": 0.2519, "step": 56360 }, { "epoch": 2.63, - "learning_rate": 1.4808963480380667e-05, - "loss": 0.2412, + "learning_rate": 2.48170627057557e-05, + "loss": 0.1681, "step": 56365 }, { "epoch": 2.63, - "learning_rate": 1.4808494679105527e-05, - "loss": 0.1279, + "learning_rate": 2.4816594635919678e-05, + "loss": 0.0233, "step": 56370 }, { "epoch": 2.63, - "learning_rate": 1.480802587783039e-05, - "loss": 0.0306, + "learning_rate": 2.4816126566083658e-05, + "loss": 0.0524, "step": 56375 }, { "epoch": 2.63, - "learning_rate": 1.480755707655525e-05, - "loss": 0.0568, + "learning_rate": 2.481565849624764e-05, + "loss": 0.0128, "step": 56380 }, { "epoch": 2.63, - "learning_rate": 1.480708827528011e-05, - "loss": 0.1136, + "learning_rate": 2.481519042641162e-05, + "loss": 0.0438, "step": 56385 }, { "epoch": 2.63, - "learning_rate": 1.480661947400497e-05, - "loss": 0.0458, + "learning_rate": 2.48147223565756e-05, + "loss": 0.0893, "step": 56390 }, { "epoch": 2.63, - "learning_rate": 1.480615067272983e-05, - "loss": 0.1037, + "learning_rate": 2.481425428673958e-05, + "loss": 0.1187, "step": 56395 }, { "epoch": 2.63, - "learning_rate": 1.4805681871454692e-05, - "loss": 0.1402, + "learning_rate": 2.4813786216903563e-05, + "loss": 0.1523, "step": 56400 }, { "epoch": 2.63, - "learning_rate": 1.4805213070179552e-05, - "loss": 0.1404, + "learning_rate": 2.4813318147067543e-05, + "loss": 0.1913, "step": 56405 }, { "epoch": 2.63, - "learning_rate": 1.4804744268904412e-05, - "loss": 0.3928, + "learning_rate": 2.4812850077231523e-05, + "loss": 0.2801, "step": 56410 }, { "epoch": 2.63, - "learning_rate": 1.4804275467629272e-05, - "loss": 0.2752, + "learning_rate": 2.4812382007395506e-05, + "loss": 0.4468, "step": 56415 }, { "epoch": 2.63, - "learning_rate": 1.4803806666354135e-05, - "loss": 0.0587, + "learning_rate": 2.4811913937559486e-05, + "loss": 0.0819, "step": 56420 }, { "epoch": 2.63, - "learning_rate": 1.4803337865078995e-05, - "loss": 0.0282, + "learning_rate": 2.4811445867723466e-05, + "loss": 0.0279, "step": 56425 }, { "epoch": 2.63, - "learning_rate": 1.4802869063803855e-05, - "loss": 0.0494, + "learning_rate": 2.4810977797887446e-05, + "loss": 0.1139, "step": 56430 }, { "epoch": 2.63, - "learning_rate": 1.4802400262528715e-05, - "loss": 0.1203, + "learning_rate": 2.4810509728051426e-05, + "loss": 0.0545, "step": 56435 }, { "epoch": 2.63, - "learning_rate": 1.4801931461253576e-05, - "loss": 0.0802, + "learning_rate": 2.4810041658215405e-05, + "loss": 0.0967, "step": 56440 }, { "epoch": 2.63, - "learning_rate": 1.4801462659978436e-05, - "loss": 0.1469, + "learning_rate": 2.4809573588379385e-05, + "loss": 0.1217, "step": 56445 }, { "epoch": 2.63, - "learning_rate": 1.4800993858703296e-05, - "loss": 0.1011, + "learning_rate": 2.4809105518543365e-05, + "loss": 0.1794, "step": 56450 }, { "epoch": 2.63, - "learning_rate": 1.4800525057428156e-05, - "loss": 0.1856, + "learning_rate": 2.4808637448707348e-05, + "loss": 0.1976, "step": 56455 }, { "epoch": 2.63, - "learning_rate": 1.4800056256153016e-05, - "loss": 0.2412, + "learning_rate": 2.4808169378871328e-05, + "loss": 0.3034, "step": 56460 }, { "epoch": 2.63, - "learning_rate": 1.479958745487788e-05, - "loss": 0.2946, + "learning_rate": 2.4807701309035308e-05, + "loss": 0.205, "step": 56465 }, { "epoch": 2.63, - "learning_rate": 1.479911865360274e-05, - "loss": 0.0999, + "learning_rate": 2.480723323919929e-05, + "loss": 0.069, "step": 56470 }, { "epoch": 2.64, - "learning_rate": 1.47986498523276e-05, - "loss": 0.0356, + "learning_rate": 2.480676516936327e-05, + "loss": 0.0386, "step": 56475 }, { "epoch": 2.64, - "learning_rate": 1.4798181051052461e-05, - "loss": 0.0473, + "learning_rate": 2.480629709952725e-05, + "loss": 0.0713, "step": 56480 }, { "epoch": 2.64, - "learning_rate": 1.4797712249777321e-05, - "loss": 0.0465, + "learning_rate": 2.480582902969123e-05, + "loss": 0.0785, "step": 56485 }, { "epoch": 2.64, - "learning_rate": 1.4797243448502181e-05, - "loss": 0.1163, + "learning_rate": 2.4805360959855214e-05, + "loss": 0.0872, "step": 56490 }, { "epoch": 2.64, - "learning_rate": 1.479677464722704e-05, - "loss": 0.084, + "learning_rate": 2.4804892890019193e-05, + "loss": 0.1564, "step": 56495 }, { "epoch": 2.64, - "learning_rate": 1.47963058459519e-05, - "loss": 0.123, + "learning_rate": 2.480442482018317e-05, + "loss": 0.2005, "step": 56500 }, { "epoch": 2.64, - "learning_rate": 1.4795837044676762e-05, - "loss": 0.2126, + "learning_rate": 2.480395675034715e-05, + "loss": 0.0865, "step": 56505 }, { "epoch": 2.64, - "learning_rate": 1.4795368243401624e-05, - "loss": 0.2022, + "learning_rate": 2.4803488680511133e-05, + "loss": 0.213, "step": 56510 }, { "epoch": 2.64, - "learning_rate": 1.4794899442126484e-05, - "loss": 0.2718, + "learning_rate": 2.4803020610675113e-05, + "loss": 0.4198, "step": 56515 }, { "epoch": 2.64, - "learning_rate": 1.4794430640851346e-05, - "loss": 0.0692, + "learning_rate": 2.4802552540839093e-05, + "loss": 0.0398, "step": 56520 }, { "epoch": 2.64, - "learning_rate": 1.4793961839576205e-05, - "loss": 0.0684, + "learning_rate": 2.4802084471003076e-05, + "loss": 0.0896, "step": 56525 }, { "epoch": 2.64, - "learning_rate": 1.4793493038301065e-05, - "loss": 0.0628, + "learning_rate": 2.4801616401167056e-05, + "loss": 0.0395, "step": 56530 }, { "epoch": 2.64, - "learning_rate": 1.4793024237025925e-05, - "loss": 0.0465, + "learning_rate": 2.4801148331331035e-05, + "loss": 0.0644, "step": 56535 }, { "epoch": 2.64, - "learning_rate": 1.4792555435750785e-05, - "loss": 0.1655, + "learning_rate": 2.4800680261495015e-05, + "loss": 0.0664, "step": 56540 }, { "epoch": 2.64, - "learning_rate": 1.4792086634475647e-05, - "loss": 0.1165, + "learning_rate": 2.4800212191659e-05, + "loss": 0.1634, "step": 56545 }, { "epoch": 2.64, - "learning_rate": 1.4791617833200507e-05, - "loss": 0.1698, + "learning_rate": 2.4799744121822978e-05, + "loss": 0.0776, "step": 56550 }, { "epoch": 2.64, - "learning_rate": 1.4791149031925367e-05, - "loss": 0.0829, + "learning_rate": 2.4799276051986958e-05, + "loss": 0.1236, "step": 56555 }, { "epoch": 2.64, - "learning_rate": 1.479068023065023e-05, - "loss": 0.1631, + "learning_rate": 2.4798807982150934e-05, + "loss": 0.2574, "step": 56560 }, { "epoch": 2.64, - "learning_rate": 1.479021142937509e-05, - "loss": 0.2569, + "learning_rate": 2.4798339912314918e-05, + "loss": 0.3454, "step": 56565 }, { "epoch": 2.64, - "learning_rate": 1.478974262809995e-05, - "loss": 0.0661, + "learning_rate": 2.4797871842478898e-05, + "loss": 0.0262, "step": 56570 }, { "epoch": 2.64, - "learning_rate": 1.478927382682481e-05, - "loss": 0.0431, + "learning_rate": 2.4797403772642877e-05, + "loss": 0.1027, "step": 56575 }, { "epoch": 2.64, - "learning_rate": 1.478880502554967e-05, - "loss": 0.0998, + "learning_rate": 2.479693570280686e-05, + "loss": 0.0402, "step": 56580 }, { "epoch": 2.64, - "learning_rate": 1.4788336224274531e-05, - "loss": 0.067, + "learning_rate": 2.479646763297084e-05, + "loss": 0.0715, "step": 56585 }, { "epoch": 2.64, - "learning_rate": 1.4787867422999391e-05, - "loss": 0.1911, + "learning_rate": 2.479599956313482e-05, + "loss": 0.0632, "step": 56590 }, { "epoch": 2.64, - "learning_rate": 1.4787398621724251e-05, - "loss": 0.076, + "learning_rate": 2.47955314932988e-05, + "loss": 0.1507, "step": 56595 }, { "epoch": 2.64, - "learning_rate": 1.4786929820449111e-05, - "loss": 0.095, + "learning_rate": 2.4795063423462783e-05, + "loss": 0.1241, "step": 56600 }, { "epoch": 2.64, - "learning_rate": 1.4786461019173975e-05, - "loss": 0.1959, + "learning_rate": 2.4794595353626763e-05, + "loss": 0.1424, "step": 56605 }, { "epoch": 2.64, - "learning_rate": 1.4785992217898835e-05, - "loss": 0.1887, + "learning_rate": 2.4794127283790743e-05, + "loss": 0.2284, "step": 56610 }, { "epoch": 2.64, - "learning_rate": 1.4785523416623694e-05, - "loss": 0.3195, + "learning_rate": 2.4793659213954723e-05, + "loss": 0.2301, "step": 56615 }, { "epoch": 2.64, - "learning_rate": 1.4785054615348554e-05, - "loss": 0.0381, + "learning_rate": 2.4793191144118706e-05, + "loss": 0.0888, "step": 56620 }, { "epoch": 2.64, - "learning_rate": 1.4784585814073416e-05, - "loss": 0.0168, + "learning_rate": 2.4792723074282682e-05, + "loss": 0.0232, "step": 56625 }, { "epoch": 2.64, - "learning_rate": 1.4784117012798276e-05, - "loss": 0.0497, + "learning_rate": 2.4792255004446662e-05, + "loss": 0.0165, "step": 56630 }, { "epoch": 2.64, - "learning_rate": 1.4783648211523136e-05, - "loss": 0.0663, + "learning_rate": 2.4791786934610642e-05, + "loss": 0.0989, "step": 56635 }, { "epoch": 2.64, - "learning_rate": 1.4783179410247996e-05, - "loss": 0.153, + "learning_rate": 2.4791318864774625e-05, + "loss": 0.0603, "step": 56640 }, { "epoch": 2.64, - "learning_rate": 1.4782710608972857e-05, - "loss": 0.0553, + "learning_rate": 2.4790850794938605e-05, + "loss": 0.09, "step": 56645 }, { "epoch": 2.64, - "learning_rate": 1.4782241807697719e-05, - "loss": 0.0652, + "learning_rate": 2.4790382725102585e-05, + "loss": 0.1327, "step": 56650 }, { "epoch": 2.64, - "learning_rate": 1.4781773006422579e-05, - "loss": 0.1328, + "learning_rate": 2.4789914655266568e-05, + "loss": 0.1227, "step": 56655 }, { "epoch": 2.64, - "learning_rate": 1.478130420514744e-05, - "loss": 0.3094, + "learning_rate": 2.4789446585430548e-05, + "loss": 0.2768, "step": 56660 }, { "epoch": 2.64, - "learning_rate": 1.47808354038723e-05, - "loss": 0.3157, + "learning_rate": 2.4788978515594528e-05, + "loss": 0.2845, "step": 56665 }, { "epoch": 2.64, - "learning_rate": 1.478036660259716e-05, - "loss": 0.0119, + "learning_rate": 2.4788510445758507e-05, + "loss": 0.0072, "step": 56670 }, { "epoch": 2.64, - "learning_rate": 1.477989780132202e-05, - "loss": 0.05, + "learning_rate": 2.478804237592249e-05, + "loss": 0.0421, "step": 56675 }, { "epoch": 2.64, - "learning_rate": 1.477942900004688e-05, - "loss": 0.0535, + "learning_rate": 2.478757430608647e-05, + "loss": 0.0871, "step": 56680 }, { "epoch": 2.65, - "learning_rate": 1.4778960198771742e-05, - "loss": 0.0421, + "learning_rate": 2.478710623625045e-05, + "loss": 0.134, "step": 56685 }, { "epoch": 2.65, - "learning_rate": 1.4778491397496602e-05, - "loss": 0.0757, + "learning_rate": 2.4786638166414427e-05, + "loss": 0.108, "step": 56690 }, { "epoch": 2.65, - "learning_rate": 1.4778022596221462e-05, - "loss": 0.0902, + "learning_rate": 2.478617009657841e-05, + "loss": 0.0634, "step": 56695 }, { "epoch": 2.65, - "learning_rate": 1.4777553794946325e-05, - "loss": 0.1036, + "learning_rate": 2.478570202674239e-05, + "loss": 0.0972, "step": 56700 }, { "epoch": 2.65, - "learning_rate": 1.4777084993671185e-05, - "loss": 0.0464, + "learning_rate": 2.478523395690637e-05, + "loss": 0.151, "step": 56705 }, { "epoch": 2.65, - "learning_rate": 1.4776616192396045e-05, - "loss": 0.3221, + "learning_rate": 2.4784765887070353e-05, + "loss": 0.1934, "step": 56710 }, { "epoch": 2.65, - "learning_rate": 1.4776147391120905e-05, - "loss": 0.2555, + "learning_rate": 2.4784297817234333e-05, + "loss": 0.2165, "step": 56715 }, { "epoch": 2.65, - "learning_rate": 1.4775678589845765e-05, - "loss": 0.0529, + "learning_rate": 2.4783829747398312e-05, + "loss": 0.0419, "step": 56720 }, { "epoch": 2.65, - "learning_rate": 1.4775209788570627e-05, - "loss": 0.0143, + "learning_rate": 2.4783361677562292e-05, + "loss": 0.0295, "step": 56725 }, { "epoch": 2.65, - "learning_rate": 1.4774740987295486e-05, - "loss": 0.0219, + "learning_rate": 2.4782893607726275e-05, + "loss": 0.0331, "step": 56730 }, { "epoch": 2.65, - "learning_rate": 1.4774272186020346e-05, - "loss": 0.0693, + "learning_rate": 2.4782425537890255e-05, + "loss": 0.0786, "step": 56735 }, { "epoch": 2.65, - "learning_rate": 1.4773803384745206e-05, - "loss": 0.0585, + "learning_rate": 2.4781957468054235e-05, + "loss": 0.0559, "step": 56740 }, { "epoch": 2.65, - "learning_rate": 1.477333458347007e-05, - "loss": 0.1523, + "learning_rate": 2.4781489398218215e-05, + "loss": 0.0944, "step": 56745 }, { "epoch": 2.65, - "learning_rate": 1.477286578219493e-05, - "loss": 0.1027, + "learning_rate": 2.4781021328382195e-05, + "loss": 0.153, "step": 56750 }, { "epoch": 2.65, - "learning_rate": 1.477239698091979e-05, - "loss": 0.1413, + "learning_rate": 2.4780553258546174e-05, + "loss": 0.1432, "step": 56755 }, { "epoch": 2.65, - "learning_rate": 1.477192817964465e-05, - "loss": 0.3343, + "learning_rate": 2.4780085188710154e-05, + "loss": 0.1883, "step": 56760 }, { "epoch": 2.65, - "learning_rate": 1.4771459378369511e-05, - "loss": 0.2061, + "learning_rate": 2.4779617118874138e-05, + "loss": 0.3125, "step": 56765 }, { "epoch": 2.65, - "learning_rate": 1.4770990577094371e-05, - "loss": 0.0353, + "learning_rate": 2.4779149049038117e-05, + "loss": 0.0633, "step": 56770 }, { "epoch": 2.65, - "learning_rate": 1.4770521775819231e-05, - "loss": 0.0424, + "learning_rate": 2.4778680979202097e-05, + "loss": 0.0644, "step": 56775 }, { "epoch": 2.65, - "learning_rate": 1.4770052974544091e-05, - "loss": 0.0619, + "learning_rate": 2.4778212909366077e-05, + "loss": 0.1129, "step": 56780 }, { "epoch": 2.65, - "learning_rate": 1.476958417326895e-05, - "loss": 0.0789, + "learning_rate": 2.477774483953006e-05, + "loss": 0.0388, "step": 56785 }, { "epoch": 2.65, - "learning_rate": 1.4769115371993814e-05, - "loss": 0.1295, + "learning_rate": 2.477727676969404e-05, + "loss": 0.0727, "step": 56790 }, { "epoch": 2.65, - "learning_rate": 1.4768646570718674e-05, - "loss": 0.0659, + "learning_rate": 2.477680869985802e-05, + "loss": 0.1007, "step": 56795 }, { "epoch": 2.65, - "learning_rate": 1.4768177769443534e-05, - "loss": 0.133, + "learning_rate": 2.4776340630022e-05, + "loss": 0.1425, "step": 56800 }, { "epoch": 2.65, - "learning_rate": 1.4767708968168396e-05, - "loss": 0.2318, + "learning_rate": 2.4775872560185983e-05, + "loss": 0.2667, "step": 56805 }, { "epoch": 2.65, - "learning_rate": 1.4767240166893256e-05, - "loss": 0.2537, + "learning_rate": 2.4775404490349963e-05, + "loss": 0.2314, "step": 56810 }, { "epoch": 2.65, - "learning_rate": 1.4766771365618116e-05, - "loss": 0.306, + "learning_rate": 2.477493642051394e-05, + "loss": 0.2242, "step": 56815 }, { "epoch": 2.65, - "learning_rate": 1.4766302564342975e-05, - "loss": 0.0372, + "learning_rate": 2.477446835067792e-05, + "loss": 0.0418, "step": 56820 }, { "epoch": 2.65, - "learning_rate": 1.4765833763067835e-05, - "loss": 0.0766, + "learning_rate": 2.4774000280841902e-05, + "loss": 0.0443, "step": 56825 }, { "epoch": 2.65, - "learning_rate": 1.4765364961792697e-05, - "loss": 0.0259, + "learning_rate": 2.4773532211005882e-05, + "loss": 0.0666, "step": 56830 }, { "epoch": 2.65, - "learning_rate": 1.4764896160517559e-05, - "loss": 0.0714, + "learning_rate": 2.4773064141169862e-05, + "loss": 0.0267, "step": 56835 }, { "epoch": 2.65, - "learning_rate": 1.4764427359242419e-05, - "loss": 0.0551, + "learning_rate": 2.4772596071333845e-05, + "loss": 0.065, "step": 56840 }, { "epoch": 2.65, - "learning_rate": 1.476395855796728e-05, - "loss": 0.158, + "learning_rate": 2.4772128001497825e-05, + "loss": 0.0928, "step": 56845 }, { "epoch": 2.65, - "learning_rate": 1.476348975669214e-05, - "loss": 0.1384, + "learning_rate": 2.4771659931661805e-05, + "loss": 0.1144, "step": 56850 }, { "epoch": 2.65, - "learning_rate": 1.4763020955417e-05, - "loss": 0.2766, + "learning_rate": 2.4771191861825784e-05, + "loss": 0.1679, "step": 56855 }, { "epoch": 2.65, - "learning_rate": 1.476255215414186e-05, - "loss": 0.3387, + "learning_rate": 2.4770723791989768e-05, + "loss": 0.2068, "step": 56860 }, { "epoch": 2.65, - "learning_rate": 1.476208335286672e-05, - "loss": 0.2891, + "learning_rate": 2.4770255722153747e-05, + "loss": 0.1983, "step": 56865 }, { "epoch": 2.65, - "learning_rate": 1.4761614551591582e-05, - "loss": 0.0331, + "learning_rate": 2.4769787652317727e-05, + "loss": 0.0492, "step": 56870 }, { "epoch": 2.65, - "learning_rate": 1.4761145750316442e-05, - "loss": 0.0907, + "learning_rate": 2.4769319582481704e-05, + "loss": 0.0197, "step": 56875 }, { "epoch": 2.65, - "learning_rate": 1.4760676949041301e-05, - "loss": 0.0488, + "learning_rate": 2.4768851512645687e-05, + "loss": 0.0459, "step": 56880 }, { "epoch": 2.65, - "learning_rate": 1.4760208147766165e-05, - "loss": 0.062, + "learning_rate": 2.4768383442809667e-05, + "loss": 0.0812, "step": 56885 }, { "epoch": 2.65, - "learning_rate": 1.4759739346491025e-05, - "loss": 0.0947, + "learning_rate": 2.4767915372973647e-05, + "loss": 0.0608, "step": 56890 }, { "epoch": 2.65, - "learning_rate": 1.4759270545215885e-05, - "loss": 0.0958, + "learning_rate": 2.476744730313763e-05, + "loss": 0.1539, "step": 56895 }, { "epoch": 2.66, - "learning_rate": 1.4758801743940745e-05, - "loss": 0.1018, + "learning_rate": 2.476697923330161e-05, + "loss": 0.1474, "step": 56900 }, { "epoch": 2.66, - "learning_rate": 1.4758332942665604e-05, - "loss": 0.1602, + "learning_rate": 2.476651116346559e-05, + "loss": 0.1491, "step": 56905 }, { "epoch": 2.66, - "learning_rate": 1.4757864141390466e-05, - "loss": 0.2821, + "learning_rate": 2.476604309362957e-05, + "loss": 0.1857, "step": 56910 }, { "epoch": 2.66, - "learning_rate": 1.4757395340115326e-05, - "loss": 0.2309, + "learning_rate": 2.4765575023793552e-05, + "loss": 0.2067, "step": 56915 }, { "epoch": 2.66, - "learning_rate": 1.4756926538840186e-05, - "loss": 0.0593, + "learning_rate": 2.4765106953957532e-05, + "loss": 0.0749, "step": 56920 }, { "epoch": 2.66, - "learning_rate": 1.4756457737565046e-05, - "loss": 0.0374, + "learning_rate": 2.4764638884121512e-05, + "loss": 0.0356, "step": 56925 }, { "epoch": 2.66, - "learning_rate": 1.475598893628991e-05, - "loss": 0.0436, + "learning_rate": 2.4764170814285492e-05, + "loss": 0.0496, "step": 56930 }, { "epoch": 2.66, - "learning_rate": 1.475552013501477e-05, - "loss": 0.0978, + "learning_rate": 2.4763702744449475e-05, + "loss": 0.0854, "step": 56935 }, { "epoch": 2.66, - "learning_rate": 1.4755051333739629e-05, - "loss": 0.0841, + "learning_rate": 2.476323467461345e-05, + "loss": 0.1459, "step": 56940 }, { "epoch": 2.66, - "learning_rate": 1.4754582532464489e-05, - "loss": 0.124, + "learning_rate": 2.476276660477743e-05, + "loss": 0.0947, "step": 56945 }, { "epoch": 2.66, - "learning_rate": 1.475411373118935e-05, - "loss": 0.1214, + "learning_rate": 2.4762298534941414e-05, + "loss": 0.1419, "step": 56950 }, { "epoch": 2.66, - "learning_rate": 1.475364492991421e-05, - "loss": 0.11, + "learning_rate": 2.4761830465105394e-05, + "loss": 0.088, "step": 56955 }, { "epoch": 2.66, - "learning_rate": 1.475317612863907e-05, - "loss": 0.2251, + "learning_rate": 2.4761362395269374e-05, + "loss": 0.2764, "step": 56960 }, { "epoch": 2.66, - "learning_rate": 1.475270732736393e-05, - "loss": 0.3817, + "learning_rate": 2.4760894325433354e-05, + "loss": 0.2743, "step": 56965 }, { "epoch": 2.66, - "learning_rate": 1.4752238526088792e-05, - "loss": 0.0172, + "learning_rate": 2.4760426255597337e-05, + "loss": 0.0423, "step": 56970 }, { "epoch": 2.66, - "learning_rate": 1.4751769724813654e-05, - "loss": 0.0308, + "learning_rate": 2.4759958185761317e-05, + "loss": 0.038, "step": 56975 }, { "epoch": 2.66, - "learning_rate": 1.4751300923538514e-05, - "loss": 0.0851, + "learning_rate": 2.4759490115925297e-05, + "loss": 0.0391, "step": 56980 }, { "epoch": 2.66, - "learning_rate": 1.4750832122263374e-05, - "loss": 0.0944, + "learning_rate": 2.4759022046089277e-05, + "loss": 0.0262, "step": 56985 }, { "epoch": 2.66, - "learning_rate": 1.4750363320988235e-05, - "loss": 0.0882, + "learning_rate": 2.475855397625326e-05, + "loss": 0.0708, "step": 56990 }, { "epoch": 2.66, - "learning_rate": 1.4749894519713095e-05, - "loss": 0.1397, + "learning_rate": 2.475808590641724e-05, + "loss": 0.1333, "step": 56995 }, { "epoch": 2.66, - "learning_rate": 1.4749425718437955e-05, - "loss": 0.1041, + "learning_rate": 2.475761783658122e-05, + "loss": 0.0835, "step": 57000 }, { "epoch": 2.66, - "learning_rate": 1.4748956917162815e-05, - "loss": 0.1524, + "learning_rate": 2.4757149766745196e-05, + "loss": 0.1772, "step": 57005 }, { "epoch": 2.66, - "learning_rate": 1.4748488115887677e-05, - "loss": 0.2294, + "learning_rate": 2.475668169690918e-05, + "loss": 0.206, "step": 57010 }, { "epoch": 2.66, - "learning_rate": 1.4748019314612537e-05, - "loss": 0.3339, + "learning_rate": 2.475621362707316e-05, + "loss": 0.2662, "step": 57015 }, { "epoch": 2.66, - "learning_rate": 1.4747550513337397e-05, - "loss": 0.0251, + "learning_rate": 2.475574555723714e-05, + "loss": 0.0727, "step": 57020 }, { "epoch": 2.66, - "learning_rate": 1.4747081712062258e-05, - "loss": 0.0389, + "learning_rate": 2.4755277487401122e-05, + "loss": 0.0496, "step": 57025 }, { "epoch": 2.66, - "learning_rate": 1.474661291078712e-05, - "loss": 0.0649, + "learning_rate": 2.4754809417565102e-05, + "loss": 0.0757, "step": 57030 }, { "epoch": 2.66, - "learning_rate": 1.474614410951198e-05, - "loss": 0.1636, + "learning_rate": 2.475434134772908e-05, + "loss": 0.0172, "step": 57035 }, { "epoch": 2.66, - "learning_rate": 1.474567530823684e-05, - "loss": 0.0941, + "learning_rate": 2.475387327789306e-05, + "loss": 0.0645, "step": 57040 }, { "epoch": 2.66, - "learning_rate": 1.47452065069617e-05, - "loss": 0.1066, + "learning_rate": 2.4753405208057045e-05, + "loss": 0.0947, "step": 57045 }, { "epoch": 2.66, - "learning_rate": 1.4744737705686561e-05, - "loss": 0.155, + "learning_rate": 2.4752937138221024e-05, + "loss": 0.1176, "step": 57050 }, { "epoch": 2.66, - "learning_rate": 1.4744268904411421e-05, - "loss": 0.196, + "learning_rate": 2.4752469068385004e-05, + "loss": 0.1293, "step": 57055 }, { "epoch": 2.66, - "learning_rate": 1.4743800103136281e-05, - "loss": 0.2468, + "learning_rate": 2.4752000998548984e-05, + "loss": 0.3489, "step": 57060 }, { "epoch": 2.66, - "learning_rate": 1.4743331301861141e-05, - "loss": 0.2952, + "learning_rate": 2.4751532928712964e-05, + "loss": 0.2541, "step": 57065 }, { "epoch": 2.66, - "learning_rate": 1.4742862500586004e-05, - "loss": 0.0832, + "learning_rate": 2.4751064858876944e-05, + "loss": 0.0181, "step": 57070 }, { "epoch": 2.66, - "learning_rate": 1.4742393699310864e-05, - "loss": 0.0328, + "learning_rate": 2.4750596789040923e-05, + "loss": 0.0612, "step": 57075 }, { "epoch": 2.66, - "learning_rate": 1.4741924898035724e-05, - "loss": 0.075, + "learning_rate": 2.4750128719204907e-05, + "loss": 0.0179, "step": 57080 }, { "epoch": 2.66, - "learning_rate": 1.4741456096760584e-05, - "loss": 0.0648, + "learning_rate": 2.4749660649368887e-05, + "loss": 0.0416, "step": 57085 }, { "epoch": 2.66, - "learning_rate": 1.4740987295485446e-05, - "loss": 0.0571, + "learning_rate": 2.4749192579532866e-05, + "loss": 0.0869, "step": 57090 }, { "epoch": 2.66, - "learning_rate": 1.4740518494210306e-05, - "loss": 0.0371, + "learning_rate": 2.4748724509696846e-05, + "loss": 0.1073, "step": 57095 }, { "epoch": 2.66, - "learning_rate": 1.4740049692935166e-05, - "loss": 0.0917, + "learning_rate": 2.474825643986083e-05, + "loss": 0.1542, "step": 57100 }, { "epoch": 2.66, - "learning_rate": 1.4739580891660026e-05, - "loss": 0.1858, + "learning_rate": 2.474778837002481e-05, + "loss": 0.0929, "step": 57105 }, { "epoch": 2.66, - "learning_rate": 1.4739112090384885e-05, - "loss": 0.2585, + "learning_rate": 2.474732030018879e-05, + "loss": 0.2521, "step": 57110 }, { "epoch": 2.67, - "learning_rate": 1.4738643289109749e-05, - "loss": 0.2586, + "learning_rate": 2.474685223035277e-05, + "loss": 0.2317, "step": 57115 }, { "epoch": 2.67, - "learning_rate": 1.4738174487834609e-05, - "loss": 0.0449, + "learning_rate": 2.4746384160516752e-05, + "loss": 0.0269, "step": 57120 }, { "epoch": 2.67, - "learning_rate": 1.4737705686559469e-05, - "loss": 0.0409, + "learning_rate": 2.4745916090680732e-05, + "loss": 0.0535, "step": 57125 }, { "epoch": 2.67, - "learning_rate": 1.473723688528433e-05, - "loss": 0.0844, + "learning_rate": 2.4745448020844708e-05, + "loss": 0.044, "step": 57130 }, { "epoch": 2.67, - "learning_rate": 1.473676808400919e-05, - "loss": 0.1158, + "learning_rate": 2.474497995100869e-05, + "loss": 0.0247, "step": 57135 }, { "epoch": 2.67, - "learning_rate": 1.473629928273405e-05, - "loss": 0.0533, + "learning_rate": 2.474451188117267e-05, + "loss": 0.1443, "step": 57140 }, { "epoch": 2.67, - "learning_rate": 1.473583048145891e-05, - "loss": 0.0643, + "learning_rate": 2.474404381133665e-05, + "loss": 0.121, "step": 57145 }, { "epoch": 2.67, - "learning_rate": 1.473536168018377e-05, - "loss": 0.1587, + "learning_rate": 2.474357574150063e-05, + "loss": 0.1178, "step": 57150 }, { "epoch": 2.67, - "learning_rate": 1.4734892878908632e-05, - "loss": 0.1664, + "learning_rate": 2.4743107671664614e-05, + "loss": 0.0933, "step": 57155 }, { "epoch": 2.67, - "learning_rate": 1.4734424077633493e-05, - "loss": 0.3299, + "learning_rate": 2.4742639601828594e-05, + "loss": 0.2813, "step": 57160 }, { "epoch": 2.67, - "learning_rate": 1.4733955276358353e-05, - "loss": 0.2293, + "learning_rate": 2.4742171531992574e-05, + "loss": 0.301, "step": 57165 }, { "epoch": 2.67, - "learning_rate": 1.4733486475083215e-05, - "loss": 0.057, + "learning_rate": 2.4741703462156554e-05, + "loss": 0.1123, "step": 57170 }, { "epoch": 2.67, - "learning_rate": 1.4733017673808075e-05, - "loss": 0.0548, + "learning_rate": 2.4741235392320537e-05, + "loss": 0.0386, "step": 57175 }, { "epoch": 2.67, - "learning_rate": 1.4732548872532935e-05, - "loss": 0.0323, + "learning_rate": 2.4740767322484517e-05, + "loss": 0.0567, "step": 57180 }, { "epoch": 2.67, - "learning_rate": 1.4732080071257795e-05, - "loss": 0.0656, + "learning_rate": 2.4740299252648496e-05, + "loss": 0.0879, "step": 57185 }, { "epoch": 2.67, - "learning_rate": 1.4731611269982655e-05, - "loss": 0.0821, + "learning_rate": 2.4739831182812476e-05, + "loss": 0.0381, "step": 57190 }, { "epoch": 2.67, - "learning_rate": 1.4731142468707516e-05, - "loss": 0.1533, + "learning_rate": 2.4739363112976456e-05, + "loss": 0.1663, "step": 57195 }, { "epoch": 2.67, - "learning_rate": 1.4730673667432376e-05, - "loss": 0.1175, + "learning_rate": 2.4738895043140436e-05, + "loss": 0.1015, "step": 57200 }, { "epoch": 2.67, - "learning_rate": 1.4730204866157236e-05, - "loss": 0.1819, + "learning_rate": 2.4738426973304416e-05, + "loss": 0.0837, "step": 57205 }, { "epoch": 2.67, - "learning_rate": 1.47297360648821e-05, - "loss": 0.2317, + "learning_rate": 2.47379589034684e-05, + "loss": 0.2533, "step": 57210 }, { "epoch": 2.67, - "learning_rate": 1.472926726360696e-05, - "loss": 0.194, + "learning_rate": 2.473749083363238e-05, + "loss": 0.3941, "step": 57215 }, { "epoch": 2.67, - "learning_rate": 1.472879846233182e-05, - "loss": 0.0501, + "learning_rate": 2.473702276379636e-05, + "loss": 0.0044, "step": 57220 }, { "epoch": 2.67, - "learning_rate": 1.472832966105668e-05, - "loss": 0.061, + "learning_rate": 2.473655469396034e-05, + "loss": 0.0511, "step": 57225 }, { "epoch": 2.67, - "learning_rate": 1.4727860859781539e-05, - "loss": 0.0412, + "learning_rate": 2.473608662412432e-05, + "loss": 0.0827, "step": 57230 }, { "epoch": 2.67, - "learning_rate": 1.47273920585064e-05, - "loss": 0.027, + "learning_rate": 2.47356185542883e-05, + "loss": 0.0745, "step": 57235 }, { "epoch": 2.67, - "learning_rate": 1.472692325723126e-05, - "loss": 0.1291, + "learning_rate": 2.473515048445228e-05, + "loss": 0.0707, "step": 57240 }, { "epoch": 2.67, - "learning_rate": 1.472645445595612e-05, - "loss": 0.0757, + "learning_rate": 2.473468241461626e-05, + "loss": 0.1048, "step": 57245 }, { "epoch": 2.67, - "learning_rate": 1.472598565468098e-05, - "loss": 0.1663, + "learning_rate": 2.4734214344780244e-05, + "loss": 0.1516, "step": 57250 }, { "epoch": 2.67, - "learning_rate": 1.4725516853405844e-05, - "loss": 0.2098, + "learning_rate": 2.473374627494422e-05, + "loss": 0.1173, "step": 57255 }, { "epoch": 2.67, - "learning_rate": 1.4725048052130704e-05, - "loss": 0.2877, + "learning_rate": 2.47332782051082e-05, + "loss": 0.1629, "step": 57260 }, { "epoch": 2.67, - "learning_rate": 1.4724579250855564e-05, - "loss": 0.3111, + "learning_rate": 2.4732810135272184e-05, + "loss": 0.2528, "step": 57265 }, { "epoch": 2.67, - "learning_rate": 1.4724110449580424e-05, - "loss": 0.1081, + "learning_rate": 2.4732342065436163e-05, + "loss": 0.0543, "step": 57270 }, { "epoch": 2.67, - "learning_rate": 1.4723641648305285e-05, - "loss": 0.04, + "learning_rate": 2.4731873995600143e-05, + "loss": 0.0509, "step": 57275 }, { "epoch": 2.67, - "learning_rate": 1.4723172847030145e-05, - "loss": 0.0321, + "learning_rate": 2.4731405925764123e-05, + "loss": 0.1243, "step": 57280 }, { "epoch": 2.67, - "learning_rate": 1.4722704045755005e-05, - "loss": 0.0997, + "learning_rate": 2.4730937855928106e-05, + "loss": 0.0362, "step": 57285 }, { "epoch": 2.67, - "learning_rate": 1.4722235244479865e-05, - "loss": 0.0482, + "learning_rate": 2.4730469786092086e-05, + "loss": 0.0898, "step": 57290 }, { "epoch": 2.67, - "learning_rate": 1.4721766443204725e-05, - "loss": 0.122, + "learning_rate": 2.4730001716256066e-05, + "loss": 0.0491, "step": 57295 }, { "epoch": 2.67, - "learning_rate": 1.4721297641929588e-05, - "loss": 0.0914, + "learning_rate": 2.4729533646420046e-05, + "loss": 0.075, "step": 57300 }, { "epoch": 2.67, - "learning_rate": 1.4720828840654448e-05, - "loss": 0.1392, + "learning_rate": 2.472906557658403e-05, + "loss": 0.1388, "step": 57305 }, { "epoch": 2.67, - "learning_rate": 1.4720360039379308e-05, - "loss": 0.2636, + "learning_rate": 2.472859750674801e-05, + "loss": 0.2626, "step": 57310 }, { "epoch": 2.67, - "learning_rate": 1.471989123810417e-05, - "loss": 0.401, + "learning_rate": 2.472812943691199e-05, + "loss": 0.3333, "step": 57315 }, { "epoch": 2.67, - "learning_rate": 1.471942243682903e-05, - "loss": 0.0342, + "learning_rate": 2.472766136707597e-05, + "loss": 0.0611, "step": 57320 }, { "epoch": 2.67, - "learning_rate": 1.471895363555389e-05, - "loss": 0.0521, + "learning_rate": 2.4727193297239948e-05, + "loss": 0.0921, "step": 57325 }, { "epoch": 2.68, - "learning_rate": 1.471848483427875e-05, - "loss": 0.0773, + "learning_rate": 2.4726725227403928e-05, + "loss": 0.0896, "step": 57330 }, { "epoch": 2.68, - "learning_rate": 1.471801603300361e-05, - "loss": 0.0623, + "learning_rate": 2.4726257157567908e-05, + "loss": 0.0765, "step": 57335 }, { "epoch": 2.68, - "learning_rate": 1.4717547231728471e-05, - "loss": 0.0676, + "learning_rate": 2.472578908773189e-05, + "loss": 0.0917, "step": 57340 }, { "epoch": 2.68, - "learning_rate": 1.4717078430453331e-05, - "loss": 0.1184, + "learning_rate": 2.472532101789587e-05, + "loss": 0.1287, "step": 57345 }, { "epoch": 2.68, - "learning_rate": 1.4716609629178193e-05, - "loss": 0.1047, + "learning_rate": 2.472485294805985e-05, + "loss": 0.0531, "step": 57350 }, { "epoch": 2.68, - "learning_rate": 1.4716140827903054e-05, - "loss": 0.1898, + "learning_rate": 2.472438487822383e-05, + "loss": 0.1417, "step": 57355 }, { "epoch": 2.68, - "learning_rate": 1.4715672026627914e-05, - "loss": 0.3378, + "learning_rate": 2.4723916808387814e-05, + "loss": 0.2587, "step": 57360 }, { "epoch": 2.68, - "learning_rate": 1.4715203225352774e-05, - "loss": 0.1942, + "learning_rate": 2.4723448738551794e-05, + "loss": 0.2192, "step": 57365 }, { "epoch": 2.68, - "learning_rate": 1.4714734424077634e-05, - "loss": 0.0085, + "learning_rate": 2.4722980668715773e-05, + "loss": 0.0221, "step": 57370 }, { "epoch": 2.68, - "learning_rate": 1.4714265622802494e-05, - "loss": 0.0643, + "learning_rate": 2.4722512598879757e-05, + "loss": 0.0919, "step": 57375 }, { "epoch": 2.68, - "learning_rate": 1.4713796821527356e-05, - "loss": 0.0491, + "learning_rate": 2.4722044529043733e-05, + "loss": 0.0742, "step": 57380 }, { "epoch": 2.68, - "learning_rate": 1.4713328020252216e-05, - "loss": 0.1337, + "learning_rate": 2.4721576459207713e-05, + "loss": 0.0954, "step": 57385 }, { "epoch": 2.68, - "learning_rate": 1.4712859218977076e-05, - "loss": 0.099, + "learning_rate": 2.4721108389371693e-05, + "loss": 0.0399, "step": 57390 }, { "epoch": 2.68, - "learning_rate": 1.4712390417701939e-05, - "loss": 0.1491, + "learning_rate": 2.4720640319535676e-05, + "loss": 0.0851, "step": 57395 }, { "epoch": 2.68, - "learning_rate": 1.4711921616426799e-05, - "loss": 0.1278, + "learning_rate": 2.4720172249699656e-05, + "loss": 0.1236, "step": 57400 }, { "epoch": 2.68, - "learning_rate": 1.4711452815151659e-05, - "loss": 0.2245, + "learning_rate": 2.4719704179863635e-05, + "loss": 0.1798, "step": 57405 }, { "epoch": 2.68, - "learning_rate": 1.4710984013876519e-05, - "loss": 0.2875, + "learning_rate": 2.4719236110027615e-05, + "loss": 0.2454, "step": 57410 }, { "epoch": 2.68, - "learning_rate": 1.4710515212601379e-05, - "loss": 0.2976, + "learning_rate": 2.47187680401916e-05, + "loss": 0.3206, "step": 57415 }, { "epoch": 2.68, - "learning_rate": 1.471004641132624e-05, - "loss": 0.0304, + "learning_rate": 2.471829997035558e-05, + "loss": 0.0538, "step": 57420 }, { "epoch": 2.68, - "learning_rate": 1.47095776100511e-05, - "loss": 0.0545, + "learning_rate": 2.4717831900519558e-05, + "loss": 0.0657, "step": 57425 }, { "epoch": 2.68, - "learning_rate": 1.470910880877596e-05, - "loss": 0.061, + "learning_rate": 2.4717363830683538e-05, + "loss": 0.0539, "step": 57430 }, { "epoch": 2.68, - "learning_rate": 1.470864000750082e-05, - "loss": 0.0573, + "learning_rate": 2.471689576084752e-05, + "loss": 0.0371, "step": 57435 }, { "epoch": 2.68, - "learning_rate": 1.4708171206225683e-05, - "loss": 0.1313, + "learning_rate": 2.47164276910115e-05, + "loss": 0.107, "step": 57440 }, { "epoch": 2.68, - "learning_rate": 1.4707702404950543e-05, - "loss": 0.1579, + "learning_rate": 2.4715959621175477e-05, + "loss": 0.2194, "step": 57445 }, { "epoch": 2.68, - "learning_rate": 1.4707233603675403e-05, - "loss": 0.067, + "learning_rate": 2.471549155133946e-05, + "loss": 0.119, "step": 57450 }, { "epoch": 2.68, - "learning_rate": 1.4706764802400263e-05, - "loss": 0.1581, + "learning_rate": 2.471502348150344e-05, + "loss": 0.2139, "step": 57455 }, { "epoch": 2.68, - "learning_rate": 1.4706296001125125e-05, - "loss": 0.2488, + "learning_rate": 2.471455541166742e-05, + "loss": 0.1608, "step": 57460 }, { "epoch": 2.68, - "learning_rate": 1.4705827199849985e-05, - "loss": 0.2114, + "learning_rate": 2.47140873418314e-05, + "loss": 0.3079, "step": 57465 }, { "epoch": 2.68, - "learning_rate": 1.4705358398574845e-05, - "loss": 0.0693, + "learning_rate": 2.4713619271995383e-05, + "loss": 0.0415, "step": 57470 }, { "epoch": 2.68, - "learning_rate": 1.4704889597299705e-05, - "loss": 0.031, + "learning_rate": 2.4713151202159363e-05, + "loss": 0.0355, "step": 57475 }, { "epoch": 2.68, - "learning_rate": 1.4704420796024566e-05, - "loss": 0.0395, + "learning_rate": 2.4712683132323343e-05, + "loss": 0.0756, "step": 57480 }, { "epoch": 2.68, - "learning_rate": 1.4703951994749428e-05, - "loss": 0.0701, + "learning_rate": 2.4712215062487323e-05, + "loss": 0.043, "step": 57485 }, { "epoch": 2.68, - "learning_rate": 1.4703483193474288e-05, - "loss": 0.0973, + "learning_rate": 2.4711746992651306e-05, + "loss": 0.0609, "step": 57490 }, { "epoch": 2.68, - "learning_rate": 1.4703014392199148e-05, - "loss": 0.0761, + "learning_rate": 2.4711278922815286e-05, + "loss": 0.0613, "step": 57495 }, { "epoch": 2.68, - "learning_rate": 1.470254559092401e-05, - "loss": 0.0594, + "learning_rate": 2.4710810852979266e-05, + "loss": 0.1123, "step": 57500 }, { "epoch": 2.68, - "learning_rate": 1.470207678964887e-05, - "loss": 0.156, + "learning_rate": 2.471034278314325e-05, + "loss": 0.0651, "step": 57505 }, { "epoch": 2.68, - "learning_rate": 1.470160798837373e-05, - "loss": 0.2419, + "learning_rate": 2.4709874713307225e-05, + "loss": 0.1312, "step": 57510 }, { "epoch": 2.68, - "learning_rate": 1.470113918709859e-05, - "loss": 0.2983, + "learning_rate": 2.4709406643471205e-05, + "loss": 0.1825, "step": 57515 }, { "epoch": 2.68, - "learning_rate": 1.4700670385823451e-05, - "loss": 0.0448, + "learning_rate": 2.4708938573635185e-05, + "loss": 0.0397, "step": 57520 }, { "epoch": 2.68, - "learning_rate": 1.470020158454831e-05, - "loss": 0.0478, + "learning_rate": 2.4708470503799168e-05, + "loss": 0.0311, "step": 57525 }, { "epoch": 2.68, - "learning_rate": 1.469973278327317e-05, - "loss": 0.1021, + "learning_rate": 2.4708002433963148e-05, + "loss": 0.0295, "step": 57530 }, { "epoch": 2.68, - "learning_rate": 1.4699263981998032e-05, - "loss": 0.0631, + "learning_rate": 2.4707534364127128e-05, + "loss": 0.038, "step": 57535 }, { "epoch": 2.68, - "learning_rate": 1.4698795180722894e-05, - "loss": 0.0271, + "learning_rate": 2.4707066294291108e-05, + "loss": 0.0646, "step": 57540 }, { "epoch": 2.69, - "learning_rate": 1.4698326379447754e-05, - "loss": 0.1192, + "learning_rate": 2.470659822445509e-05, + "loss": 0.0442, "step": 57545 }, { "epoch": 2.69, - "learning_rate": 1.4697857578172614e-05, - "loss": 0.0927, + "learning_rate": 2.470613015461907e-05, + "loss": 0.1179, "step": 57550 }, { "epoch": 2.69, - "learning_rate": 1.4697388776897474e-05, - "loss": 0.2041, + "learning_rate": 2.470566208478305e-05, + "loss": 0.1921, "step": 57555 }, { "epoch": 2.69, - "learning_rate": 1.4696919975622335e-05, - "loss": 0.2522, + "learning_rate": 2.4705194014947034e-05, + "loss": 0.2513, "step": 57560 }, { "epoch": 2.69, - "learning_rate": 1.4696451174347195e-05, - "loss": 0.2796, + "learning_rate": 2.4704725945111013e-05, + "loss": 0.4523, "step": 57565 }, { "epoch": 2.69, - "learning_rate": 1.4695982373072055e-05, - "loss": 0.0444, + "learning_rate": 2.470425787527499e-05, + "loss": 0.0439, "step": 57570 }, { "epoch": 2.69, - "learning_rate": 1.4695513571796915e-05, - "loss": 0.0531, + "learning_rate": 2.470378980543897e-05, + "loss": 0.0282, "step": 57575 }, { "epoch": 2.69, - "learning_rate": 1.4695044770521779e-05, - "loss": 0.1158, + "learning_rate": 2.4703321735602953e-05, + "loss": 0.0716, "step": 57580 }, { "epoch": 2.69, - "learning_rate": 1.4694575969246638e-05, - "loss": 0.0572, + "learning_rate": 2.4702853665766933e-05, + "loss": 0.0731, "step": 57585 }, { "epoch": 2.69, - "learning_rate": 1.4694107167971498e-05, - "loss": 0.1109, + "learning_rate": 2.4702385595930912e-05, + "loss": 0.1536, "step": 57590 }, { "epoch": 2.69, - "learning_rate": 1.4693638366696358e-05, - "loss": 0.0798, + "learning_rate": 2.4701917526094892e-05, + "loss": 0.1643, "step": 57595 }, { "epoch": 2.69, - "learning_rate": 1.469316956542122e-05, - "loss": 0.1928, + "learning_rate": 2.4701449456258875e-05, + "loss": 0.0649, "step": 57600 }, { "epoch": 2.69, - "learning_rate": 1.469270076414608e-05, - "loss": 0.2423, + "learning_rate": 2.4700981386422855e-05, + "loss": 0.2279, "step": 57605 }, { "epoch": 2.69, - "learning_rate": 1.469223196287094e-05, - "loss": 0.2319, + "learning_rate": 2.4700513316586835e-05, + "loss": 0.185, "step": 57610 }, { "epoch": 2.69, - "learning_rate": 1.46917631615958e-05, - "loss": 0.2285, + "learning_rate": 2.4700045246750815e-05, + "loss": 0.1194, "step": 57615 }, { "epoch": 2.69, - "learning_rate": 1.469129436032066e-05, - "loss": 0.0387, + "learning_rate": 2.4699577176914798e-05, + "loss": 0.044, "step": 57620 }, { "epoch": 2.69, - "learning_rate": 1.4690825559045523e-05, - "loss": 0.0568, + "learning_rate": 2.4699109107078778e-05, + "loss": 0.0205, "step": 57625 }, { "epoch": 2.69, - "learning_rate": 1.4690356757770383e-05, - "loss": 0.0665, + "learning_rate": 2.4698641037242758e-05, + "loss": 0.058, "step": 57630 }, { "epoch": 2.69, - "learning_rate": 1.4689887956495243e-05, - "loss": 0.1037, + "learning_rate": 2.4698172967406738e-05, + "loss": 0.0455, "step": 57635 }, { "epoch": 2.69, - "learning_rate": 1.4689419155220105e-05, - "loss": 0.0905, + "learning_rate": 2.4697704897570717e-05, + "loss": 0.0448, "step": 57640 }, { "epoch": 2.69, - "learning_rate": 1.4688950353944964e-05, - "loss": 0.0775, + "learning_rate": 2.4697236827734697e-05, + "loss": 0.1466, "step": 57645 }, { "epoch": 2.69, - "learning_rate": 1.4688481552669824e-05, - "loss": 0.1319, + "learning_rate": 2.4696768757898677e-05, + "loss": 0.1105, "step": 57650 }, { "epoch": 2.69, - "learning_rate": 1.4688012751394684e-05, - "loss": 0.1594, + "learning_rate": 2.469630068806266e-05, + "loss": 0.4656, "step": 57655 }, { "epoch": 2.69, - "learning_rate": 1.4687543950119544e-05, - "loss": 0.3679, + "learning_rate": 2.469583261822664e-05, + "loss": 0.24, "step": 57660 }, { "epoch": 2.69, - "learning_rate": 1.4687075148844406e-05, - "loss": 0.6374, + "learning_rate": 2.469536454839062e-05, + "loss": 0.2053, "step": 57665 }, { "epoch": 2.69, - "learning_rate": 1.4686606347569266e-05, - "loss": 0.0202, + "learning_rate": 2.46948964785546e-05, + "loss": 0.0614, "step": 57670 }, { "epoch": 2.69, - "learning_rate": 1.4686137546294127e-05, - "loss": 0.0683, + "learning_rate": 2.4694428408718583e-05, + "loss": 0.0198, "step": 57675 }, { "epoch": 2.69, - "learning_rate": 1.4685668745018989e-05, - "loss": 0.0405, + "learning_rate": 2.4693960338882563e-05, + "loss": 0.0258, "step": 57680 }, { "epoch": 2.69, - "learning_rate": 1.4685199943743849e-05, - "loss": 0.0402, + "learning_rate": 2.4693492269046543e-05, + "loss": 0.0339, "step": 57685 }, { "epoch": 2.69, - "learning_rate": 1.4684731142468709e-05, - "loss": 0.1176, + "learning_rate": 2.4693024199210526e-05, + "loss": 0.0708, "step": 57690 }, { "epoch": 2.69, - "learning_rate": 1.4684262341193569e-05, - "loss": 0.0638, + "learning_rate": 2.4692556129374506e-05, + "loss": 0.1115, "step": 57695 }, { "epoch": 2.69, - "learning_rate": 1.4683793539918429e-05, - "loss": 0.1016, + "learning_rate": 2.4692088059538482e-05, + "loss": 0.0644, "step": 57700 }, { "epoch": 2.69, - "learning_rate": 1.468332473864329e-05, - "loss": 0.0693, + "learning_rate": 2.4691619989702462e-05, + "loss": 0.1303, "step": 57705 }, { "epoch": 2.69, - "learning_rate": 1.468285593736815e-05, - "loss": 0.1265, + "learning_rate": 2.4691151919866445e-05, + "loss": 0.2525, "step": 57710 }, { "epoch": 2.69, - "learning_rate": 1.468238713609301e-05, - "loss": 0.2031, + "learning_rate": 2.4690683850030425e-05, + "loss": 0.1985, "step": 57715 }, { "epoch": 2.69, - "learning_rate": 1.4681918334817874e-05, - "loss": 0.0728, + "learning_rate": 2.4690215780194405e-05, + "loss": 0.0156, "step": 57720 }, { "epoch": 2.69, - "learning_rate": 1.4681449533542734e-05, - "loss": 0.0588, + "learning_rate": 2.4689747710358384e-05, + "loss": 0.0468, "step": 57725 }, { "epoch": 2.69, - "learning_rate": 1.4680980732267593e-05, - "loss": 0.0832, + "learning_rate": 2.4689279640522368e-05, + "loss": 0.0986, "step": 57730 }, { "epoch": 2.69, - "learning_rate": 1.4680511930992453e-05, - "loss": 0.177, + "learning_rate": 2.4688811570686348e-05, + "loss": 0.092, "step": 57735 }, { "epoch": 2.69, - "learning_rate": 1.4680043129717313e-05, - "loss": 0.1171, + "learning_rate": 2.4688343500850327e-05, + "loss": 0.1119, "step": 57740 }, { "epoch": 2.69, - "learning_rate": 1.4679574328442175e-05, - "loss": 0.1549, + "learning_rate": 2.468787543101431e-05, + "loss": 0.0664, "step": 57745 }, { "epoch": 2.69, - "learning_rate": 1.4679105527167035e-05, - "loss": 0.1312, + "learning_rate": 2.468740736117829e-05, + "loss": 0.14, "step": 57750 }, { "epoch": 2.69, - "learning_rate": 1.4678636725891895e-05, - "loss": 0.2211, + "learning_rate": 2.468693929134227e-05, + "loss": 0.1064, "step": 57755 }, { "epoch": 2.7, - "learning_rate": 1.4678167924616755e-05, - "loss": 0.3696, + "learning_rate": 2.4686471221506247e-05, + "loss": 0.2536, "step": 57760 }, { "epoch": 2.7, - "learning_rate": 1.4677699123341618e-05, - "loss": 0.2266, + "learning_rate": 2.468600315167023e-05, + "loss": 0.1373, "step": 57765 }, { "epoch": 2.7, - "learning_rate": 1.4677230322066478e-05, - "loss": 0.0231, + "learning_rate": 2.468553508183421e-05, + "loss": 0.0581, "step": 57770 }, { "epoch": 2.7, - "learning_rate": 1.4676761520791338e-05, - "loss": 0.029, + "learning_rate": 2.468506701199819e-05, + "loss": 0.07, "step": 57775 }, { "epoch": 2.7, - "learning_rate": 1.4676292719516198e-05, - "loss": 0.0498, + "learning_rate": 2.468459894216217e-05, + "loss": 0.0596, "step": 57780 }, { "epoch": 2.7, - "learning_rate": 1.467582391824106e-05, - "loss": 0.0852, + "learning_rate": 2.4684130872326152e-05, + "loss": 0.056, "step": 57785 }, { "epoch": 2.7, - "learning_rate": 1.467535511696592e-05, - "loss": 0.0683, + "learning_rate": 2.4683662802490132e-05, + "loss": 0.1122, "step": 57790 }, { "epoch": 2.7, - "learning_rate": 1.467488631569078e-05, - "loss": 0.0996, + "learning_rate": 2.4683194732654112e-05, + "loss": 0.0833, "step": 57795 }, { "epoch": 2.7, - "learning_rate": 1.467441751441564e-05, - "loss": 0.1235, + "learning_rate": 2.4682726662818092e-05, + "loss": 0.0884, "step": 57800 }, { "epoch": 2.7, - "learning_rate": 1.46739487131405e-05, - "loss": 0.1227, + "learning_rate": 2.4682258592982075e-05, + "loss": 0.1954, "step": 57805 }, { "epoch": 2.7, - "learning_rate": 1.4673479911865363e-05, - "loss": 0.1879, + "learning_rate": 2.4681790523146055e-05, + "loss": 0.2491, "step": 57810 }, { "epoch": 2.7, - "learning_rate": 1.4673011110590223e-05, - "loss": 0.2279, + "learning_rate": 2.4681322453310035e-05, + "loss": 0.2213, "step": 57815 }, { "epoch": 2.7, - "learning_rate": 1.4672542309315082e-05, - "loss": 0.0929, + "learning_rate": 2.4680854383474018e-05, + "loss": 0.0237, "step": 57820 }, { "epoch": 2.7, - "learning_rate": 1.4672073508039944e-05, - "loss": 0.0051, + "learning_rate": 2.4680386313637994e-05, + "loss": 0.0484, "step": 57825 }, { "epoch": 2.7, - "learning_rate": 1.4671604706764804e-05, - "loss": 0.035, + "learning_rate": 2.4679918243801974e-05, + "loss": 0.0398, "step": 57830 }, { "epoch": 2.7, - "learning_rate": 1.4671135905489664e-05, - "loss": 0.0929, + "learning_rate": 2.4679450173965954e-05, + "loss": 0.0586, "step": 57835 }, { "epoch": 2.7, - "learning_rate": 1.4670667104214524e-05, - "loss": 0.0664, + "learning_rate": 2.4678982104129937e-05, + "loss": 0.0567, "step": 57840 }, { "epoch": 2.7, - "learning_rate": 1.4670198302939384e-05, - "loss": 0.0527, + "learning_rate": 2.4678514034293917e-05, + "loss": 0.0963, "step": 57845 }, { "epoch": 2.7, - "learning_rate": 1.4669729501664245e-05, - "loss": 0.1453, + "learning_rate": 2.4678045964457897e-05, + "loss": 0.0822, "step": 57850 }, { "epoch": 2.7, - "learning_rate": 1.4669260700389105e-05, - "loss": 0.1784, + "learning_rate": 2.4677577894621877e-05, + "loss": 0.1504, "step": 57855 }, { "epoch": 2.7, - "learning_rate": 1.4668791899113967e-05, - "loss": 0.1884, + "learning_rate": 2.467710982478586e-05, + "loss": 0.2156, "step": 57860 }, { "epoch": 2.7, - "learning_rate": 1.4668323097838829e-05, - "loss": 0.3005, + "learning_rate": 2.467664175494984e-05, + "loss": 0.2169, "step": 57865 }, { "epoch": 2.7, - "learning_rate": 1.4667854296563689e-05, - "loss": 0.0338, + "learning_rate": 2.467617368511382e-05, + "loss": 0.011, "step": 57870 }, { "epoch": 2.7, - "learning_rate": 1.4667385495288549e-05, - "loss": 0.0088, + "learning_rate": 2.4675705615277803e-05, + "loss": 0.0415, "step": 57875 }, { "epoch": 2.7, - "learning_rate": 1.4666916694013408e-05, - "loss": 0.0496, + "learning_rate": 2.4675237545441783e-05, + "loss": 0.0223, "step": 57880 }, { "epoch": 2.7, - "learning_rate": 1.4666447892738268e-05, - "loss": 0.0948, + "learning_rate": 2.4674769475605762e-05, + "loss": 0.083, "step": 57885 }, { "epoch": 2.7, - "learning_rate": 1.466597909146313e-05, - "loss": 0.1013, + "learning_rate": 2.467430140576974e-05, + "loss": 0.0573, "step": 57890 }, { "epoch": 2.7, - "learning_rate": 1.466551029018799e-05, - "loss": 0.0931, + "learning_rate": 2.4673833335933722e-05, + "loss": 0.0796, "step": 57895 }, { "epoch": 2.7, - "learning_rate": 1.466504148891285e-05, - "loss": 0.0853, + "learning_rate": 2.4673365266097702e-05, + "loss": 0.1186, "step": 57900 }, { "epoch": 2.7, - "learning_rate": 1.4664572687637713e-05, - "loss": 0.1581, + "learning_rate": 2.467289719626168e-05, + "loss": 0.1458, "step": 57905 }, { "epoch": 2.7, - "learning_rate": 1.4664103886362573e-05, - "loss": 0.1908, + "learning_rate": 2.467242912642566e-05, + "loss": 0.1805, "step": 57910 }, { "epoch": 2.7, - "learning_rate": 1.4663635085087433e-05, - "loss": 0.203, + "learning_rate": 2.4671961056589645e-05, + "loss": 0.4035, "step": 57915 }, { "epoch": 2.7, - "learning_rate": 1.4663166283812293e-05, - "loss": 0.0477, + "learning_rate": 2.4671492986753624e-05, + "loss": 0.0672, "step": 57920 }, { "epoch": 2.7, - "learning_rate": 1.4662697482537153e-05, - "loss": 0.0474, + "learning_rate": 2.4671024916917604e-05, + "loss": 0.058, "step": 57925 }, { "epoch": 2.7, - "learning_rate": 1.4662228681262015e-05, - "loss": 0.0508, + "learning_rate": 2.4670556847081587e-05, + "loss": 0.0408, "step": 57930 }, { "epoch": 2.7, - "learning_rate": 1.4661759879986874e-05, - "loss": 0.063, + "learning_rate": 2.4670088777245567e-05, + "loss": 0.0546, "step": 57935 }, { "epoch": 2.7, - "learning_rate": 1.4661291078711734e-05, - "loss": 0.0944, + "learning_rate": 2.4669620707409547e-05, + "loss": 0.1206, "step": 57940 }, { "epoch": 2.7, - "learning_rate": 1.4660822277436594e-05, - "loss": 0.1145, + "learning_rate": 2.4669152637573527e-05, + "loss": 0.0743, "step": 57945 }, { "epoch": 2.7, - "learning_rate": 1.4660353476161458e-05, - "loss": 0.0749, + "learning_rate": 2.4668684567737507e-05, + "loss": 0.1345, "step": 57950 }, { "epoch": 2.7, - "learning_rate": 1.4659884674886318e-05, - "loss": 0.1389, + "learning_rate": 2.4668216497901487e-05, + "loss": 0.1159, "step": 57955 }, { "epoch": 2.7, - "learning_rate": 1.4659415873611178e-05, - "loss": 0.938, + "learning_rate": 2.4667748428065466e-05, + "loss": 0.3535, "step": 57960 }, { "epoch": 2.7, - "learning_rate": 1.4658947072336037e-05, - "loss": 0.3698, + "learning_rate": 2.4667280358229446e-05, + "loss": 0.3006, "step": 57965 }, { "epoch": 2.7, - "learning_rate": 1.4658478271060899e-05, - "loss": 0.0518, + "learning_rate": 2.466681228839343e-05, + "loss": 0.0226, "step": 57970 }, { "epoch": 2.71, - "learning_rate": 1.4658009469785759e-05, - "loss": 0.0445, + "learning_rate": 2.466634421855741e-05, + "loss": 0.0352, "step": 57975 }, { "epoch": 2.71, - "learning_rate": 1.4657540668510619e-05, - "loss": 0.041, + "learning_rate": 2.466587614872139e-05, + "loss": 0.0564, "step": 57980 }, { "epoch": 2.71, - "learning_rate": 1.4657071867235479e-05, - "loss": 0.1005, + "learning_rate": 2.4665408078885372e-05, + "loss": 0.0693, "step": 57985 }, { "epoch": 2.71, - "learning_rate": 1.465660306596034e-05, - "loss": 0.1254, + "learning_rate": 2.4664940009049352e-05, + "loss": 0.0672, "step": 57990 }, { "epoch": 2.71, - "learning_rate": 1.46561342646852e-05, - "loss": 0.076, + "learning_rate": 2.4664471939213332e-05, + "loss": 0.1035, "step": 57995 }, { "epoch": 2.71, - "learning_rate": 1.4655665463410062e-05, - "loss": 0.0907, + "learning_rate": 2.4664003869377312e-05, + "loss": 0.0663, "step": 58000 }, { "epoch": 2.71, - "learning_rate": 1.4655196662134922e-05, - "loss": 0.1793, + "learning_rate": 2.4663535799541295e-05, + "loss": 0.2174, "step": 58005 }, { "epoch": 2.71, - "learning_rate": 1.4654727860859784e-05, - "loss": 0.2108, + "learning_rate": 2.4663067729705275e-05, + "loss": 0.2538, "step": 58010 }, { "epoch": 2.71, - "learning_rate": 1.4654259059584644e-05, - "loss": 0.2919, + "learning_rate": 2.466259965986925e-05, + "loss": 0.189, "step": 58015 }, { "epoch": 2.71, - "learning_rate": 1.4653790258309504e-05, - "loss": 0.0323, + "learning_rate": 2.466213159003323e-05, + "loss": 0.0508, "step": 58020 }, { "epoch": 2.71, - "learning_rate": 1.4653321457034363e-05, - "loss": 0.0621, + "learning_rate": 2.4661663520197214e-05, + "loss": 0.0506, "step": 58025 }, { "epoch": 2.71, - "learning_rate": 1.4652852655759225e-05, - "loss": 0.0744, + "learning_rate": 2.4661195450361194e-05, + "loss": 0.0819, "step": 58030 }, { "epoch": 2.71, - "learning_rate": 1.4652383854484085e-05, - "loss": 0.0495, + "learning_rate": 2.4660727380525174e-05, + "loss": 0.1452, "step": 58035 }, { "epoch": 2.71, - "learning_rate": 1.4651915053208945e-05, - "loss": 0.0437, + "learning_rate": 2.4660259310689154e-05, + "loss": 0.0802, "step": 58040 }, { "epoch": 2.71, - "learning_rate": 1.4651446251933807e-05, - "loss": 0.1159, + "learning_rate": 2.4659791240853137e-05, + "loss": 0.1489, "step": 58045 }, { "epoch": 2.71, - "learning_rate": 1.4650977450658668e-05, - "loss": 0.1174, + "learning_rate": 2.4659323171017117e-05, + "loss": 0.1678, "step": 58050 }, { "epoch": 2.71, - "learning_rate": 1.4650508649383528e-05, - "loss": 0.1162, + "learning_rate": 2.4658855101181096e-05, + "loss": 0.1887, "step": 58055 }, { "epoch": 2.71, - "learning_rate": 1.4650039848108388e-05, - "loss": 0.3202, + "learning_rate": 2.465838703134508e-05, + "loss": 0.266, "step": 58060 }, { "epoch": 2.71, - "learning_rate": 1.4649571046833248e-05, - "loss": 0.2968, + "learning_rate": 2.465791896150906e-05, + "loss": 0.2013, "step": 58065 }, { "epoch": 2.71, - "learning_rate": 1.464910224555811e-05, - "loss": 0.0346, + "learning_rate": 2.465745089167304e-05, + "loss": 0.0661, "step": 58070 }, { "epoch": 2.71, - "learning_rate": 1.464863344428297e-05, - "loss": 0.0612, + "learning_rate": 2.465698282183702e-05, + "loss": 0.0563, "step": 58075 }, { "epoch": 2.71, - "learning_rate": 1.464816464300783e-05, - "loss": 0.0309, + "learning_rate": 2.4656514752001e-05, + "loss": 0.0246, "step": 58080 }, { "epoch": 2.71, - "learning_rate": 1.464769584173269e-05, - "loss": 0.0749, + "learning_rate": 2.465604668216498e-05, + "loss": 0.0701, "step": 58085 }, { "epoch": 2.71, - "learning_rate": 1.4647227040457553e-05, - "loss": 0.0842, + "learning_rate": 2.465557861232896e-05, + "loss": 0.1198, "step": 58090 }, { "epoch": 2.71, - "learning_rate": 1.4646758239182413e-05, - "loss": 0.0822, + "learning_rate": 2.465511054249294e-05, + "loss": 0.0721, "step": 58095 }, { "epoch": 2.71, - "learning_rate": 1.4646289437907273e-05, - "loss": 0.1735, + "learning_rate": 2.465464247265692e-05, + "loss": 0.1164, "step": 58100 }, { "epoch": 2.71, - "learning_rate": 1.4645820636632133e-05, - "loss": 0.1367, + "learning_rate": 2.46541744028209e-05, + "loss": 0.1727, "step": 58105 }, { "epoch": 2.71, - "learning_rate": 1.4645351835356994e-05, - "loss": 0.2274, + "learning_rate": 2.465370633298488e-05, + "loss": 0.3224, "step": 58110 }, { "epoch": 2.71, - "learning_rate": 1.4644883034081854e-05, - "loss": 0.2721, + "learning_rate": 2.4653238263148864e-05, + "loss": 0.2044, "step": 58115 }, { "epoch": 2.71, - "learning_rate": 1.4644414232806714e-05, - "loss": 0.0713, + "learning_rate": 2.4652770193312844e-05, + "loss": 0.0322, "step": 58120 }, { "epoch": 2.71, - "learning_rate": 1.4643945431531574e-05, - "loss": 0.0516, + "learning_rate": 2.4652302123476824e-05, + "loss": 0.0549, "step": 58125 }, { "epoch": 2.71, - "learning_rate": 1.4643476630256434e-05, - "loss": 0.0327, + "learning_rate": 2.4651834053640804e-05, + "loss": 0.067, "step": 58130 }, { "epoch": 2.71, - "learning_rate": 1.4643007828981297e-05, - "loss": 0.0882, + "learning_rate": 2.4651365983804787e-05, + "loss": 0.017, "step": 58135 }, { "epoch": 2.71, - "learning_rate": 1.4642539027706157e-05, - "loss": 0.1025, + "learning_rate": 2.4650897913968764e-05, + "loss": 0.1286, "step": 58140 }, { "epoch": 2.71, - "learning_rate": 1.4642070226431017e-05, - "loss": 0.0793, + "learning_rate": 2.4650429844132743e-05, + "loss": 0.1283, "step": 58145 }, { "epoch": 2.71, - "learning_rate": 1.4641601425155879e-05, - "loss": 0.2249, + "learning_rate": 2.4649961774296723e-05, + "loss": 0.137, "step": 58150 }, { "epoch": 2.71, - "learning_rate": 1.4641132623880739e-05, - "loss": 0.187, + "learning_rate": 2.4649493704460706e-05, + "loss": 0.1427, "step": 58155 }, { "epoch": 2.71, - "learning_rate": 1.4640663822605599e-05, - "loss": 0.2013, + "learning_rate": 2.4649025634624686e-05, + "loss": 0.1582, "step": 58160 }, { "epoch": 2.71, - "learning_rate": 1.4640195021330459e-05, - "loss": 0.3344, + "learning_rate": 2.4648557564788666e-05, + "loss": 0.29, "step": 58165 }, { "epoch": 2.71, - "learning_rate": 1.4639726220055318e-05, - "loss": 0.0645, + "learning_rate": 2.464808949495265e-05, + "loss": 0.0159, "step": 58170 }, { "epoch": 2.71, - "learning_rate": 1.463925741878018e-05, - "loss": 0.0234, + "learning_rate": 2.464762142511663e-05, + "loss": 0.0291, "step": 58175 }, { "epoch": 2.71, - "learning_rate": 1.463878861750504e-05, - "loss": 0.0384, + "learning_rate": 2.464715335528061e-05, + "loss": 0.1181, "step": 58180 }, { "epoch": 2.71, - "learning_rate": 1.4638319816229902e-05, - "loss": 0.0991, + "learning_rate": 2.464668528544459e-05, + "loss": 0.0576, "step": 58185 }, { "epoch": 2.72, - "learning_rate": 1.4637851014954763e-05, - "loss": 0.064, + "learning_rate": 2.4646217215608572e-05, + "loss": 0.1022, "step": 58190 }, { "epoch": 2.72, - "learning_rate": 1.4637382213679623e-05, - "loss": 0.1129, + "learning_rate": 2.4645749145772552e-05, + "loss": 0.0577, "step": 58195 }, { "epoch": 2.72, - "learning_rate": 1.4636913412404483e-05, - "loss": 0.1446, + "learning_rate": 2.464528107593653e-05, + "loss": 0.0849, "step": 58200 }, { "epoch": 2.72, - "learning_rate": 1.4636444611129343e-05, - "loss": 0.1168, + "learning_rate": 2.4644813006100508e-05, + "loss": 0.0814, "step": 58205 }, { "epoch": 2.72, - "learning_rate": 1.4635975809854203e-05, - "loss": 0.2535, + "learning_rate": 2.464434493626449e-05, + "loss": 0.178, "step": 58210 }, { "epoch": 2.72, - "learning_rate": 1.4635507008579065e-05, - "loss": 0.2867, + "learning_rate": 2.464387686642847e-05, + "loss": 0.2908, "step": 58215 }, { "epoch": 2.72, - "learning_rate": 1.4635038207303925e-05, - "loss": 0.0554, + "learning_rate": 2.464340879659245e-05, + "loss": 0.0314, "step": 58220 }, { "epoch": 2.72, - "learning_rate": 1.4634569406028785e-05, - "loss": 0.0347, + "learning_rate": 2.464294072675643e-05, + "loss": 0.0343, "step": 58225 }, { "epoch": 2.72, - "learning_rate": 1.4634100604753648e-05, - "loss": 0.0444, + "learning_rate": 2.4642472656920414e-05, + "loss": 0.0463, "step": 58230 }, { "epoch": 2.72, - "learning_rate": 1.4633631803478508e-05, - "loss": 0.0649, + "learning_rate": 2.4642004587084394e-05, + "loss": 0.0404, "step": 58235 }, { "epoch": 2.72, - "learning_rate": 1.4633163002203368e-05, - "loss": 0.0463, + "learning_rate": 2.4641536517248373e-05, + "loss": 0.0787, "step": 58240 }, { "epoch": 2.72, - "learning_rate": 1.4632694200928228e-05, - "loss": 0.0279, + "learning_rate": 2.4641068447412357e-05, + "loss": 0.1323, "step": 58245 }, { "epoch": 2.72, - "learning_rate": 1.4632225399653088e-05, - "loss": 0.0854, + "learning_rate": 2.4640600377576336e-05, + "loss": 0.1375, "step": 58250 }, { "epoch": 2.72, - "learning_rate": 1.463175659837795e-05, - "loss": 0.2262, + "learning_rate": 2.4640132307740316e-05, + "loss": 0.1497, "step": 58255 }, { "epoch": 2.72, - "learning_rate": 1.4631287797102809e-05, - "loss": 0.2178, + "learning_rate": 2.4639664237904296e-05, + "loss": 0.3273, "step": 58260 }, { "epoch": 2.72, - "learning_rate": 1.4630818995827669e-05, - "loss": 0.4489, + "learning_rate": 2.4639196168068276e-05, + "loss": 0.3801, "step": 58265 }, { "epoch": 2.72, - "learning_rate": 1.4630350194552529e-05, - "loss": 0.0468, + "learning_rate": 2.4638728098232256e-05, + "loss": 0.0657, "step": 58270 }, { "epoch": 2.72, - "learning_rate": 1.4629881393277392e-05, - "loss": 0.0298, + "learning_rate": 2.4638260028396236e-05, + "loss": 0.0782, "step": 58275 }, { "epoch": 2.72, - "learning_rate": 1.4629412592002252e-05, - "loss": 0.0473, + "learning_rate": 2.4637791958560215e-05, + "loss": 0.0649, "step": 58280 }, { "epoch": 2.72, - "learning_rate": 1.4628943790727112e-05, - "loss": 0.0977, + "learning_rate": 2.46373238887242e-05, + "loss": 0.0892, "step": 58285 }, { "epoch": 2.72, - "learning_rate": 1.4628474989451972e-05, - "loss": 0.0873, + "learning_rate": 2.463685581888818e-05, + "loss": 0.0592, "step": 58290 }, { "epoch": 2.72, - "learning_rate": 1.4628006188176834e-05, - "loss": 0.155, + "learning_rate": 2.4636387749052158e-05, + "loss": 0.0859, "step": 58295 }, { "epoch": 2.72, - "learning_rate": 1.4627537386901694e-05, - "loss": 0.1499, + "learning_rate": 2.463591967921614e-05, + "loss": 0.0975, "step": 58300 }, { "epoch": 2.72, - "learning_rate": 1.4627068585626554e-05, - "loss": 0.1295, + "learning_rate": 2.463545160938012e-05, + "loss": 0.1094, "step": 58305 }, { "epoch": 2.72, - "learning_rate": 1.4626599784351414e-05, - "loss": 0.2148, + "learning_rate": 2.46349835395441e-05, + "loss": 0.1775, "step": 58310 }, { "epoch": 2.72, - "learning_rate": 1.4626130983076273e-05, - "loss": 0.2469, + "learning_rate": 2.463451546970808e-05, + "loss": 0.3567, "step": 58315 }, { "epoch": 2.72, - "learning_rate": 1.4625662181801135e-05, - "loss": 0.0731, + "learning_rate": 2.4634047399872064e-05, + "loss": 0.0263, "step": 58320 }, { "epoch": 2.72, - "learning_rate": 1.4625193380525997e-05, - "loss": 0.051, + "learning_rate": 2.4633579330036044e-05, + "loss": 0.0487, "step": 58325 }, { "epoch": 2.72, - "learning_rate": 1.4624724579250857e-05, - "loss": 0.037, + "learning_rate": 2.463311126020002e-05, + "loss": 0.0568, "step": 58330 }, { "epoch": 2.72, - "learning_rate": 1.4624255777975718e-05, - "loss": 0.081, + "learning_rate": 2.4632643190364e-05, + "loss": 0.0598, "step": 58335 }, { "epoch": 2.72, - "learning_rate": 1.4623786976700578e-05, - "loss": 0.1175, + "learning_rate": 2.4632175120527983e-05, + "loss": 0.0464, "step": 58340 }, { "epoch": 2.72, - "learning_rate": 1.4623318175425438e-05, - "loss": 0.1785, + "learning_rate": 2.4631707050691963e-05, + "loss": 0.1119, "step": 58345 }, { "epoch": 2.72, - "learning_rate": 1.4622849374150298e-05, - "loss": 0.1697, + "learning_rate": 2.4631238980855943e-05, + "loss": 0.0869, "step": 58350 }, { "epoch": 2.72, - "learning_rate": 1.4622380572875158e-05, - "loss": 0.2097, + "learning_rate": 2.4630770911019926e-05, + "loss": 0.1877, "step": 58355 }, { "epoch": 2.72, - "learning_rate": 1.462191177160002e-05, - "loss": 0.1784, + "learning_rate": 2.4630302841183906e-05, + "loss": 0.2785, "step": 58360 }, { "epoch": 2.72, - "learning_rate": 1.462144297032488e-05, - "loss": 0.2476, + "learning_rate": 2.4629834771347886e-05, + "loss": 0.2328, "step": 58365 }, { "epoch": 2.72, - "learning_rate": 1.4620974169049741e-05, - "loss": 0.0426, + "learning_rate": 2.4629366701511866e-05, + "loss": 0.0259, "step": 58370 }, { "epoch": 2.72, - "learning_rate": 1.4620505367774603e-05, - "loss": 0.0245, + "learning_rate": 2.462889863167585e-05, + "loss": 0.0814, "step": 58375 }, { "epoch": 2.72, - "learning_rate": 1.4620036566499463e-05, - "loss": 0.0399, + "learning_rate": 2.462843056183983e-05, + "loss": 0.04, "step": 58380 }, { "epoch": 2.72, - "learning_rate": 1.4619567765224323e-05, - "loss": 0.0598, + "learning_rate": 2.462796249200381e-05, + "loss": 0.0433, "step": 58385 }, { "epoch": 2.72, - "learning_rate": 1.4619098963949183e-05, - "loss": 0.0551, + "learning_rate": 2.462749442216779e-05, + "loss": 0.0543, "step": 58390 }, { "epoch": 2.72, - "learning_rate": 1.4618630162674044e-05, - "loss": 0.0788, + "learning_rate": 2.4627026352331768e-05, + "loss": 0.129, "step": 58395 }, { "epoch": 2.73, - "learning_rate": 1.4618161361398904e-05, - "loss": 0.1213, + "learning_rate": 2.4626558282495748e-05, + "loss": 0.1634, "step": 58400 }, { "epoch": 2.73, - "learning_rate": 1.4617692560123764e-05, - "loss": 0.1959, + "learning_rate": 2.4626090212659728e-05, + "loss": 0.1017, "step": 58405 }, { "epoch": 2.73, - "learning_rate": 1.4617223758848624e-05, - "loss": 0.279, + "learning_rate": 2.4625622142823708e-05, + "loss": 0.3119, "step": 58410 }, { "epoch": 2.73, - "learning_rate": 1.4616754957573487e-05, - "loss": 0.449, + "learning_rate": 2.462515407298769e-05, + "loss": 0.2509, "step": 58415 }, { "epoch": 2.73, - "learning_rate": 1.4616286156298347e-05, - "loss": 0.0205, + "learning_rate": 2.462468600315167e-05, + "loss": 0.0798, "step": 58420 }, { "epoch": 2.73, - "learning_rate": 1.4615817355023207e-05, - "loss": 0.0736, + "learning_rate": 2.462421793331565e-05, + "loss": 0.0665, "step": 58425 }, { "epoch": 2.73, - "learning_rate": 1.4615348553748067e-05, - "loss": 0.059, + "learning_rate": 2.4623749863479634e-05, + "loss": 0.031, "step": 58430 }, { "epoch": 2.73, - "learning_rate": 1.4614879752472929e-05, - "loss": 0.0961, + "learning_rate": 2.4623281793643613e-05, + "loss": 0.0664, "step": 58435 }, { "epoch": 2.73, - "learning_rate": 1.4614410951197789e-05, - "loss": 0.0493, + "learning_rate": 2.4622813723807593e-05, + "loss": 0.0758, "step": 58440 }, { "epoch": 2.73, - "learning_rate": 1.4613942149922649e-05, - "loss": 0.1004, + "learning_rate": 2.4622345653971573e-05, + "loss": 0.1216, "step": 58445 }, { "epoch": 2.73, - "learning_rate": 1.4613473348647509e-05, - "loss": 0.1301, + "learning_rate": 2.4621877584135556e-05, + "loss": 0.1148, "step": 58450 }, { "epoch": 2.73, - "learning_rate": 1.4613004547372369e-05, - "loss": 0.1581, + "learning_rate": 2.4621409514299533e-05, + "loss": 0.2464, "step": 58455 }, { "epoch": 2.73, - "learning_rate": 1.4612535746097232e-05, - "loss": 0.2438, + "learning_rate": 2.4620941444463513e-05, + "loss": 0.2974, "step": 58460 }, { "epoch": 2.73, - "learning_rate": 1.4612066944822092e-05, - "loss": 0.3437, + "learning_rate": 2.4620473374627492e-05, + "loss": 0.3838, "step": 58465 }, { "epoch": 2.73, - "learning_rate": 1.4611598143546952e-05, - "loss": 0.0362, + "learning_rate": 2.4620005304791476e-05, + "loss": 0.0361, "step": 58470 }, { "epoch": 2.73, - "learning_rate": 1.4611129342271813e-05, - "loss": 0.0567, + "learning_rate": 2.4619537234955455e-05, + "loss": 0.0284, "step": 58475 }, { "epoch": 2.73, - "learning_rate": 1.4610660540996673e-05, - "loss": 0.025, + "learning_rate": 2.4619069165119435e-05, + "loss": 0.0614, "step": 58480 }, { "epoch": 2.73, - "learning_rate": 1.4610191739721533e-05, - "loss": 0.0793, + "learning_rate": 2.461860109528342e-05, + "loss": 0.0618, "step": 58485 }, { "epoch": 2.73, - "learning_rate": 1.4609722938446393e-05, - "loss": 0.197, + "learning_rate": 2.4618133025447398e-05, + "loss": 0.0829, "step": 58490 }, { "epoch": 2.73, - "learning_rate": 1.4609254137171253e-05, - "loss": 0.0953, + "learning_rate": 2.4617664955611378e-05, + "loss": 0.0779, "step": 58495 }, { "epoch": 2.73, - "learning_rate": 1.4608785335896115e-05, - "loss": 0.1387, + "learning_rate": 2.4617196885775358e-05, + "loss": 0.111, "step": 58500 }, { "epoch": 2.73, - "learning_rate": 1.4608316534620975e-05, - "loss": 0.1528, + "learning_rate": 2.461672881593934e-05, + "loss": 0.0683, "step": 58505 }, { "epoch": 2.73, - "learning_rate": 1.4607847733345836e-05, - "loss": 0.2204, + "learning_rate": 2.461626074610332e-05, + "loss": 0.2439, "step": 58510 }, { "epoch": 2.73, - "learning_rate": 1.4607378932070698e-05, - "loss": 0.2327, + "learning_rate": 2.46157926762673e-05, + "loss": 0.2115, "step": 58515 }, { "epoch": 2.73, - "learning_rate": 1.4606910130795558e-05, - "loss": 0.0646, + "learning_rate": 2.4615324606431277e-05, + "loss": 0.0217, "step": 58520 }, { "epoch": 2.73, - "learning_rate": 1.4606441329520418e-05, - "loss": 0.0443, + "learning_rate": 2.461485653659526e-05, + "loss": 0.0406, "step": 58525 }, { "epoch": 2.73, - "learning_rate": 1.4605972528245278e-05, - "loss": 0.0464, + "learning_rate": 2.461438846675924e-05, + "loss": 0.0669, "step": 58530 }, { "epoch": 2.73, - "learning_rate": 1.4605503726970138e-05, - "loss": 0.0746, + "learning_rate": 2.461392039692322e-05, + "loss": 0.0582, "step": 58535 }, { "epoch": 2.73, - "learning_rate": 1.4605034925695e-05, - "loss": 0.0805, + "learning_rate": 2.4613452327087203e-05, + "loss": 0.1054, "step": 58540 }, { "epoch": 2.73, - "learning_rate": 1.460456612441986e-05, - "loss": 0.0536, + "learning_rate": 2.4612984257251183e-05, + "loss": 0.1727, "step": 58545 }, { "epoch": 2.73, - "learning_rate": 1.460409732314472e-05, - "loss": 0.2158, + "learning_rate": 2.4612516187415163e-05, + "loss": 0.1682, "step": 58550 }, { "epoch": 2.73, - "learning_rate": 1.4603628521869582e-05, - "loss": 0.1918, + "learning_rate": 2.4612048117579143e-05, + "loss": 0.1315, "step": 58555 }, { "epoch": 2.73, - "learning_rate": 1.4603159720594442e-05, - "loss": 0.1836, + "learning_rate": 2.4611580047743126e-05, + "loss": 0.2927, "step": 58560 }, { "epoch": 2.73, - "learning_rate": 1.4602690919319302e-05, - "loss": 0.3381, + "learning_rate": 2.4611111977907106e-05, + "loss": 0.371, "step": 58565 }, { "epoch": 2.73, - "learning_rate": 1.4602222118044162e-05, - "loss": 0.1033, + "learning_rate": 2.4610643908071085e-05, + "loss": 0.0317, "step": 58570 }, { "epoch": 2.73, - "learning_rate": 1.4601753316769022e-05, - "loss": 0.0329, + "learning_rate": 2.4610175838235065e-05, + "loss": 0.0541, "step": 58575 }, { "epoch": 2.73, - "learning_rate": 1.4601284515493884e-05, - "loss": 0.0704, + "learning_rate": 2.4609707768399045e-05, + "loss": 0.0844, "step": 58580 }, { "epoch": 2.73, - "learning_rate": 1.4600815714218744e-05, - "loss": 0.0311, + "learning_rate": 2.4609239698563025e-05, + "loss": 0.1184, "step": 58585 }, { "epoch": 2.73, - "learning_rate": 1.4600346912943604e-05, - "loss": 0.0826, + "learning_rate": 2.4608771628727005e-05, + "loss": 0.0605, "step": 58590 }, { "epoch": 2.73, - "learning_rate": 1.4599878111668464e-05, - "loss": 0.1267, + "learning_rate": 2.4608303558890988e-05, + "loss": 0.0964, "step": 58595 }, { "epoch": 2.73, - "learning_rate": 1.4599409310393327e-05, - "loss": 0.1818, + "learning_rate": 2.4607835489054968e-05, + "loss": 0.1134, "step": 58600 }, { "epoch": 2.73, - "learning_rate": 1.4598940509118187e-05, - "loss": 0.1993, + "learning_rate": 2.4607367419218948e-05, + "loss": 0.0912, "step": 58605 }, { "epoch": 2.73, - "learning_rate": 1.4598471707843047e-05, - "loss": 0.2257, + "learning_rate": 2.4606899349382927e-05, + "loss": 0.2443, "step": 58610 }, { "epoch": 2.74, - "learning_rate": 1.4598002906567907e-05, - "loss": 0.2809, + "learning_rate": 2.460643127954691e-05, + "loss": 0.3277, "step": 58615 }, { "epoch": 2.74, - "learning_rate": 1.4597534105292768e-05, - "loss": 0.0743, + "learning_rate": 2.460596320971089e-05, + "loss": 0.0408, "step": 58620 }, { "epoch": 2.74, - "learning_rate": 1.4597065304017628e-05, - "loss": 0.0514, + "learning_rate": 2.460549513987487e-05, + "loss": 0.0378, "step": 58625 }, { "epoch": 2.74, - "learning_rate": 1.4596596502742488e-05, - "loss": 0.087, + "learning_rate": 2.460502707003885e-05, + "loss": 0.0622, "step": 58630 }, { "epoch": 2.74, - "learning_rate": 1.4596127701467348e-05, - "loss": 0.0579, + "learning_rate": 2.4604559000202833e-05, + "loss": 0.0432, "step": 58635 }, { "epoch": 2.74, - "learning_rate": 1.4595658900192208e-05, - "loss": 0.1159, + "learning_rate": 2.4604090930366813e-05, + "loss": 0.0413, "step": 58640 }, { "epoch": 2.74, - "learning_rate": 1.459519009891707e-05, - "loss": 0.0654, + "learning_rate": 2.460362286053079e-05, + "loss": 0.1034, "step": 58645 }, { "epoch": 2.74, - "learning_rate": 1.4594721297641931e-05, - "loss": 0.0656, + "learning_rate": 2.460315479069477e-05, + "loss": 0.0883, "step": 58650 }, { "epoch": 2.74, - "learning_rate": 1.4594252496366791e-05, - "loss": 0.0926, + "learning_rate": 2.4602686720858753e-05, + "loss": 0.1961, "step": 58655 }, { "epoch": 2.74, - "learning_rate": 1.4593783695091653e-05, - "loss": 0.1706, + "learning_rate": 2.4602218651022732e-05, + "loss": 0.2413, "step": 58660 }, { "epoch": 2.74, - "learning_rate": 1.4593314893816513e-05, - "loss": 0.1598, + "learning_rate": 2.4601750581186712e-05, + "loss": 0.3507, "step": 58665 }, { "epoch": 2.74, - "learning_rate": 1.4592846092541373e-05, - "loss": 0.027, + "learning_rate": 2.4601282511350695e-05, + "loss": 0.0091, "step": 58670 }, { "epoch": 2.74, - "learning_rate": 1.4592377291266233e-05, - "loss": 0.0427, + "learning_rate": 2.4600814441514675e-05, + "loss": 0.0602, "step": 58675 }, { "epoch": 2.74, - "learning_rate": 1.4591908489991093e-05, - "loss": 0.0539, + "learning_rate": 2.4600346371678655e-05, + "loss": 0.0563, "step": 58680 }, { "epoch": 2.74, - "learning_rate": 1.4591439688715954e-05, - "loss": 0.084, + "learning_rate": 2.4599878301842635e-05, + "loss": 0.0797, "step": 58685 }, { "epoch": 2.74, - "learning_rate": 1.4590970887440814e-05, - "loss": 0.0856, + "learning_rate": 2.4599410232006618e-05, + "loss": 0.0701, "step": 58690 }, { "epoch": 2.74, - "learning_rate": 1.4590502086165676e-05, - "loss": 0.0986, + "learning_rate": 2.4598942162170598e-05, + "loss": 0.1964, "step": 58695 }, { "epoch": 2.74, - "learning_rate": 1.4590033284890538e-05, - "loss": 0.1526, + "learning_rate": 2.4598474092334578e-05, + "loss": 0.1213, "step": 58700 }, { "epoch": 2.74, - "learning_rate": 1.4589564483615397e-05, - "loss": 0.1722, + "learning_rate": 2.4598006022498557e-05, + "loss": 0.0986, "step": 58705 }, { "epoch": 2.74, - "learning_rate": 1.4589095682340257e-05, - "loss": 0.2371, + "learning_rate": 2.4597537952662537e-05, + "loss": 0.208, "step": 58710 }, { "epoch": 2.74, - "learning_rate": 1.4588626881065117e-05, - "loss": 0.2128, + "learning_rate": 2.4597069882826517e-05, + "loss": 0.3041, "step": 58715 }, { "epoch": 2.74, - "learning_rate": 1.4588158079789977e-05, - "loss": 0.0469, + "learning_rate": 2.4596601812990497e-05, + "loss": 0.0336, "step": 58720 }, { "epoch": 2.74, - "learning_rate": 1.4587689278514839e-05, - "loss": 0.0508, + "learning_rate": 2.459613374315448e-05, + "loss": 0.0485, "step": 58725 }, { "epoch": 2.74, - "learning_rate": 1.4587220477239699e-05, - "loss": 0.0676, + "learning_rate": 2.459566567331846e-05, + "loss": 0.0615, "step": 58730 }, { "epoch": 2.74, - "learning_rate": 1.4586751675964559e-05, - "loss": 0.0685, + "learning_rate": 2.459519760348244e-05, + "loss": 0.0847, "step": 58735 }, { "epoch": 2.74, - "learning_rate": 1.4586282874689422e-05, - "loss": 0.0737, + "learning_rate": 2.459472953364642e-05, + "loss": 0.0681, "step": 58740 }, { "epoch": 2.74, - "learning_rate": 1.4585814073414282e-05, - "loss": 0.062, + "learning_rate": 2.4594261463810403e-05, + "loss": 0.0926, "step": 58745 }, { "epoch": 2.74, - "learning_rate": 1.4585345272139142e-05, - "loss": 0.0714, + "learning_rate": 2.4593793393974383e-05, + "loss": 0.1255, "step": 58750 }, { "epoch": 2.74, - "learning_rate": 1.4584876470864002e-05, - "loss": 0.1646, + "learning_rate": 2.4593325324138362e-05, + "loss": 0.1154, "step": 58755 }, { "epoch": 2.74, - "learning_rate": 1.4584407669588862e-05, - "loss": 0.2577, + "learning_rate": 2.4592857254302342e-05, + "loss": 0.322, "step": 58760 }, { "epoch": 2.74, - "learning_rate": 1.4583938868313723e-05, - "loss": 0.2857, + "learning_rate": 2.4592389184466325e-05, + "loss": 0.2027, "step": 58765 }, { "epoch": 2.74, - "learning_rate": 1.4583470067038583e-05, - "loss": 0.0321, + "learning_rate": 2.4591921114630302e-05, + "loss": 0.0245, "step": 58770 }, { "epoch": 2.74, - "learning_rate": 1.4583001265763443e-05, - "loss": 0.0556, + "learning_rate": 2.4591453044794282e-05, + "loss": 0.0577, "step": 58775 }, { "epoch": 2.74, - "learning_rate": 1.4582532464488303e-05, - "loss": 0.1188, + "learning_rate": 2.4590984974958265e-05, + "loss": 0.0626, "step": 58780 }, { "epoch": 2.74, - "learning_rate": 1.4582063663213165e-05, - "loss": 0.0495, + "learning_rate": 2.4590516905122245e-05, + "loss": 0.1288, "step": 58785 }, { "epoch": 2.74, - "learning_rate": 1.4581594861938026e-05, - "loss": 0.1317, + "learning_rate": 2.4590048835286225e-05, + "loss": 0.1014, "step": 58790 }, { "epoch": 2.74, - "learning_rate": 1.4581126060662886e-05, - "loss": 0.1131, + "learning_rate": 2.4589580765450204e-05, + "loss": 0.186, "step": 58795 }, { "epoch": 2.74, - "learning_rate": 1.4580657259387746e-05, - "loss": 0.1266, + "learning_rate": 2.4589112695614188e-05, + "loss": 0.1054, "step": 58800 }, { "epoch": 2.74, - "learning_rate": 1.4580188458112608e-05, - "loss": 0.1261, + "learning_rate": 2.4588644625778167e-05, + "loss": 0.0748, "step": 58805 }, { "epoch": 2.74, - "learning_rate": 1.4579719656837468e-05, - "loss": 0.17, + "learning_rate": 2.4588176555942147e-05, + "loss": 0.258, "step": 58810 }, { "epoch": 2.74, - "learning_rate": 1.4579250855562328e-05, - "loss": 0.2425, + "learning_rate": 2.4587708486106127e-05, + "loss": 0.231, "step": 58815 }, { "epoch": 2.74, - "learning_rate": 1.4578782054287188e-05, - "loss": 0.0795, + "learning_rate": 2.458724041627011e-05, + "loss": 0.0669, "step": 58820 }, { "epoch": 2.74, - "learning_rate": 1.457831325301205e-05, - "loss": 0.0429, + "learning_rate": 2.458677234643409e-05, + "loss": 0.0449, "step": 58825 }, { "epoch": 2.75, - "learning_rate": 1.457784445173691e-05, - "loss": 0.0437, + "learning_rate": 2.458630427659807e-05, + "loss": 0.0623, "step": 58830 }, { "epoch": 2.75, - "learning_rate": 1.4577375650461771e-05, - "loss": 0.0756, + "learning_rate": 2.4585836206762046e-05, + "loss": 0.0469, "step": 58835 }, { "epoch": 2.75, - "learning_rate": 1.4576906849186631e-05, - "loss": 0.0911, + "learning_rate": 2.458536813692603e-05, + "loss": 0.104, "step": 58840 }, { "epoch": 2.75, - "learning_rate": 1.4576438047911493e-05, - "loss": 0.1158, + "learning_rate": 2.458490006709001e-05, + "loss": 0.0931, "step": 58845 }, { "epoch": 2.75, - "learning_rate": 1.4575969246636352e-05, - "loss": 0.1984, + "learning_rate": 2.458443199725399e-05, + "loss": 0.1118, "step": 58850 }, { "epoch": 2.75, - "learning_rate": 1.4575500445361212e-05, - "loss": 0.1924, + "learning_rate": 2.4583963927417972e-05, + "loss": 0.2072, "step": 58855 }, { "epoch": 2.75, - "learning_rate": 1.4575031644086072e-05, - "loss": 0.2444, + "learning_rate": 2.4583495857581952e-05, + "loss": 0.1836, "step": 58860 }, { "epoch": 2.75, - "learning_rate": 1.4574562842810934e-05, - "loss": 0.2247, + "learning_rate": 2.4583027787745932e-05, + "loss": 0.3149, "step": 58865 }, { "epoch": 2.75, - "learning_rate": 1.4574094041535794e-05, - "loss": 0.0268, + "learning_rate": 2.4582559717909912e-05, + "loss": 0.0274, "step": 58870 }, { "epoch": 2.75, - "learning_rate": 1.4573625240260654e-05, - "loss": 0.033, + "learning_rate": 2.4582091648073895e-05, + "loss": 0.0136, "step": 58875 }, { "epoch": 2.75, - "learning_rate": 1.4573156438985515e-05, - "loss": 0.0568, + "learning_rate": 2.4581623578237875e-05, + "loss": 0.1211, "step": 58880 }, { "epoch": 2.75, - "learning_rate": 1.4572687637710377e-05, - "loss": 0.0308, + "learning_rate": 2.4581155508401855e-05, + "loss": 0.0906, "step": 58885 }, { "epoch": 2.75, - "learning_rate": 1.4572218836435237e-05, - "loss": 0.065, + "learning_rate": 2.4580687438565834e-05, + "loss": 0.0337, "step": 58890 }, { "epoch": 2.75, - "learning_rate": 1.4571750035160097e-05, - "loss": 0.1286, + "learning_rate": 2.4580219368729818e-05, + "loss": 0.0511, "step": 58895 }, { "epoch": 2.75, - "learning_rate": 1.4571281233884957e-05, - "loss": 0.1551, + "learning_rate": 2.4579751298893794e-05, + "loss": 0.1626, "step": 58900 }, { "epoch": 2.75, - "learning_rate": 1.4570812432609819e-05, - "loss": 0.3151, + "learning_rate": 2.4579283229057774e-05, + "loss": 0.0778, "step": 58905 }, { "epoch": 2.75, - "learning_rate": 1.4570343631334678e-05, - "loss": 0.2196, + "learning_rate": 2.4578815159221757e-05, + "loss": 0.1925, "step": 58910 }, { "epoch": 2.75, - "learning_rate": 1.4569874830059538e-05, - "loss": 0.3267, + "learning_rate": 2.4578347089385737e-05, + "loss": 0.3855, "step": 58915 }, { "epoch": 2.75, - "learning_rate": 1.4569406028784398e-05, - "loss": 0.0439, + "learning_rate": 2.4577879019549717e-05, + "loss": 0.0631, "step": 58920 }, { "epoch": 2.75, - "learning_rate": 1.4568937227509262e-05, - "loss": 0.0192, + "learning_rate": 2.4577410949713697e-05, + "loss": 0.0465, "step": 58925 }, { "epoch": 2.75, - "learning_rate": 1.4568468426234122e-05, - "loss": 0.058, + "learning_rate": 2.457694287987768e-05, + "loss": 0.0557, "step": 58930 }, { "epoch": 2.75, - "learning_rate": 1.4567999624958981e-05, - "loss": 0.096, + "learning_rate": 2.457647481004166e-05, + "loss": 0.0435, "step": 58935 }, { "epoch": 2.75, - "learning_rate": 1.4567530823683841e-05, - "loss": 0.1104, + "learning_rate": 2.457600674020564e-05, + "loss": 0.103, "step": 58940 }, { "epoch": 2.75, - "learning_rate": 1.4567062022408703e-05, - "loss": 0.1205, + "learning_rate": 2.457553867036962e-05, + "loss": 0.0684, "step": 58945 }, { "epoch": 2.75, - "learning_rate": 1.4566593221133563e-05, - "loss": 0.0622, + "learning_rate": 2.4575070600533602e-05, + "loss": 0.1101, "step": 58950 }, { "epoch": 2.75, - "learning_rate": 1.4566124419858423e-05, - "loss": 0.2004, + "learning_rate": 2.4574602530697582e-05, + "loss": 0.1637, "step": 58955 }, { "epoch": 2.75, - "learning_rate": 1.4565655618583283e-05, - "loss": 0.2747, + "learning_rate": 2.457413446086156e-05, + "loss": 0.1489, "step": 58960 }, { "epoch": 2.75, - "learning_rate": 1.4565186817308143e-05, - "loss": 0.2782, + "learning_rate": 2.4573666391025542e-05, + "loss": 0.2171, "step": 58965 }, { "epoch": 2.75, - "learning_rate": 1.4564718016033004e-05, - "loss": 0.0895, + "learning_rate": 2.4573198321189522e-05, + "loss": 0.0703, "step": 58970 }, { "epoch": 2.75, - "learning_rate": 1.4564249214757866e-05, - "loss": 0.1072, + "learning_rate": 2.45727302513535e-05, + "loss": 0.0522, "step": 58975 }, { "epoch": 2.75, - "learning_rate": 1.4563780413482726e-05, - "loss": 0.0672, + "learning_rate": 2.457226218151748e-05, + "loss": 0.0429, "step": 58980 }, { "epoch": 2.75, - "learning_rate": 1.4563311612207588e-05, - "loss": 0.0578, + "learning_rate": 2.4571794111681465e-05, + "loss": 0.1265, "step": 58985 }, { "epoch": 2.75, - "learning_rate": 1.4562842810932448e-05, - "loss": 0.0943, + "learning_rate": 2.4571326041845444e-05, + "loss": 0.1143, "step": 58990 }, { "epoch": 2.75, - "learning_rate": 1.4562374009657307e-05, - "loss": 0.1205, + "learning_rate": 2.4570857972009424e-05, + "loss": 0.0375, "step": 58995 }, { "epoch": 2.75, - "learning_rate": 1.4561905208382167e-05, - "loss": 0.2066, + "learning_rate": 2.4570389902173404e-05, + "loss": 0.0493, "step": 59000 }, { "epoch": 2.75, - "learning_rate": 1.4561436407107027e-05, - "loss": 0.2212, + "learning_rate": 2.4569921832337387e-05, + "loss": 0.1706, "step": 59005 }, { "epoch": 2.75, - "learning_rate": 1.4560967605831889e-05, - "loss": 0.3231, + "learning_rate": 2.4569453762501367e-05, + "loss": 0.2414, "step": 59010 }, { "epoch": 2.75, - "learning_rate": 1.4560498804556749e-05, - "loss": 0.18, + "learning_rate": 2.4568985692665347e-05, + "loss": 0.2894, "step": 59015 }, { "epoch": 2.75, - "learning_rate": 1.456003000328161e-05, - "loss": 0.025, + "learning_rate": 2.4568517622829327e-05, + "loss": 0.0499, "step": 59020 }, { "epoch": 2.75, - "learning_rate": 1.4559561202006472e-05, - "loss": 0.0556, + "learning_rate": 2.4568049552993306e-05, + "loss": 0.0648, "step": 59025 }, { "epoch": 2.75, - "learning_rate": 1.4559092400731332e-05, - "loss": 0.0676, + "learning_rate": 2.4567581483157286e-05, + "loss": 0.0364, "step": 59030 }, { "epoch": 2.75, - "learning_rate": 1.4558623599456192e-05, - "loss": 0.0609, + "learning_rate": 2.4567113413321266e-05, + "loss": 0.078, "step": 59035 }, { "epoch": 2.75, - "learning_rate": 1.4558154798181052e-05, - "loss": 0.0417, + "learning_rate": 2.456664534348525e-05, + "loss": 0.0685, "step": 59040 }, { "epoch": 2.76, - "learning_rate": 1.4557685996905912e-05, - "loss": 0.0902, + "learning_rate": 2.456617727364923e-05, + "loss": 0.0632, "step": 59045 }, { "epoch": 2.76, - "learning_rate": 1.4557217195630774e-05, - "loss": 0.0511, + "learning_rate": 2.456570920381321e-05, + "loss": 0.0546, "step": 59050 }, { "epoch": 2.76, - "learning_rate": 1.4556748394355633e-05, - "loss": 0.1862, + "learning_rate": 2.456524113397719e-05, + "loss": 0.2355, "step": 59055 }, { "epoch": 2.76, - "learning_rate": 1.4556279593080493e-05, - "loss": 0.2465, + "learning_rate": 2.4564773064141172e-05, + "loss": 0.2846, "step": 59060 }, { "epoch": 2.76, - "learning_rate": 1.4555810791805357e-05, - "loss": 0.4655, + "learning_rate": 2.4564304994305152e-05, + "loss": 0.2363, "step": 59065 }, { "epoch": 2.76, - "learning_rate": 1.4555341990530217e-05, - "loss": 0.0271, + "learning_rate": 2.456383692446913e-05, + "loss": 0.055, "step": 59070 }, { "epoch": 2.76, - "learning_rate": 1.4554873189255077e-05, - "loss": 0.0468, + "learning_rate": 2.456336885463311e-05, + "loss": 0.0605, "step": 59075 }, { "epoch": 2.76, - "learning_rate": 1.4554404387979937e-05, - "loss": 0.0496, + "learning_rate": 2.4562900784797095e-05, + "loss": 0.0555, "step": 59080 }, { "epoch": 2.76, - "learning_rate": 1.4553935586704796e-05, - "loss": 0.0626, + "learning_rate": 2.4562432714961074e-05, + "loss": 0.0546, "step": 59085 }, { "epoch": 2.76, - "learning_rate": 1.4553466785429658e-05, - "loss": 0.0464, + "learning_rate": 2.456196464512505e-05, + "loss": 0.1494, "step": 59090 }, { "epoch": 2.76, - "learning_rate": 1.4552997984154518e-05, - "loss": 0.1007, + "learning_rate": 2.4561496575289034e-05, + "loss": 0.1066, "step": 59095 }, { "epoch": 2.76, - "learning_rate": 1.4552529182879378e-05, - "loss": 0.0917, + "learning_rate": 2.4561028505453014e-05, + "loss": 0.0528, "step": 59100 }, { "epoch": 2.76, - "learning_rate": 1.4552060381604238e-05, - "loss": 0.1745, + "learning_rate": 2.4560560435616994e-05, + "loss": 0.1629, "step": 59105 }, { "epoch": 2.76, - "learning_rate": 1.4551591580329098e-05, - "loss": 0.3145, + "learning_rate": 2.4560092365780974e-05, + "loss": 0.3085, "step": 59110 }, { "epoch": 2.76, - "learning_rate": 1.4551122779053961e-05, - "loss": 0.2732, + "learning_rate": 2.4559624295944957e-05, + "loss": 0.3952, "step": 59115 }, { "epoch": 2.76, - "learning_rate": 1.4550653977778821e-05, - "loss": 0.0603, + "learning_rate": 2.4559156226108937e-05, + "loss": 0.0439, "step": 59120 }, { "epoch": 2.76, - "learning_rate": 1.4550185176503681e-05, - "loss": 0.0799, + "learning_rate": 2.4558688156272916e-05, + "loss": 0.0105, "step": 59125 }, { "epoch": 2.76, - "learning_rate": 1.4549716375228543e-05, - "loss": 0.0561, + "learning_rate": 2.4558220086436896e-05, + "loss": 0.0264, "step": 59130 }, { "epoch": 2.76, - "learning_rate": 1.4549247573953403e-05, - "loss": 0.1065, + "learning_rate": 2.455775201660088e-05, + "loss": 0.0578, "step": 59135 }, { "epoch": 2.76, - "learning_rate": 1.4548778772678262e-05, - "loss": 0.0601, + "learning_rate": 2.455728394676486e-05, + "loss": 0.0735, "step": 59140 }, { "epoch": 2.76, - "learning_rate": 1.4548309971403122e-05, - "loss": 0.0949, + "learning_rate": 2.455681587692884e-05, + "loss": 0.0657, "step": 59145 }, { "epoch": 2.76, - "learning_rate": 1.4547841170127982e-05, - "loss": 0.1082, + "learning_rate": 2.455634780709282e-05, + "loss": 0.1978, "step": 59150 }, { "epoch": 2.76, - "learning_rate": 1.4547372368852844e-05, - "loss": 0.1364, + "learning_rate": 2.45558797372568e-05, + "loss": 0.0991, "step": 59155 }, { "epoch": 2.76, - "learning_rate": 1.4546903567577706e-05, - "loss": 0.1918, + "learning_rate": 2.455541166742078e-05, + "loss": 0.2722, "step": 59160 }, { "epoch": 2.76, - "learning_rate": 1.4546434766302566e-05, - "loss": 0.1907, + "learning_rate": 2.455494359758476e-05, + "loss": 0.2074, "step": 59165 }, { "epoch": 2.76, - "learning_rate": 1.4545965965027427e-05, - "loss": 0.0355, + "learning_rate": 2.455447552774874e-05, + "loss": 0.0808, "step": 59170 }, { "epoch": 2.76, - "learning_rate": 1.4545497163752287e-05, - "loss": 0.0816, + "learning_rate": 2.455400745791272e-05, + "loss": 0.0249, "step": 59175 }, { "epoch": 2.76, - "learning_rate": 1.4545028362477147e-05, - "loss": 0.0342, + "learning_rate": 2.45535393880767e-05, + "loss": 0.0193, "step": 59180 }, { "epoch": 2.76, - "learning_rate": 1.4544559561202007e-05, - "loss": 0.04, + "learning_rate": 2.455307131824068e-05, + "loss": 0.1137, "step": 59185 }, { "epoch": 2.76, - "learning_rate": 1.4544090759926867e-05, - "loss": 0.0471, + "learning_rate": 2.4552603248404664e-05, + "loss": 0.062, "step": 59190 }, { "epoch": 2.76, - "learning_rate": 1.4543621958651729e-05, - "loss": 0.1639, + "learning_rate": 2.4552135178568644e-05, + "loss": 0.0972, "step": 59195 }, { "epoch": 2.76, - "learning_rate": 1.4543153157376588e-05, - "loss": 0.0879, + "learning_rate": 2.4551667108732624e-05, + "loss": 0.1854, "step": 59200 }, { "epoch": 2.76, - "learning_rate": 1.454268435610145e-05, - "loss": 0.1821, + "learning_rate": 2.4551199038896604e-05, + "loss": 0.1432, "step": 59205 }, { "epoch": 2.76, - "learning_rate": 1.4542215554826312e-05, - "loss": 0.188, + "learning_rate": 2.4550730969060587e-05, + "loss": 0.2352, "step": 59210 }, { "epoch": 2.76, - "learning_rate": 1.4541746753551172e-05, - "loss": 0.2354, + "learning_rate": 2.4550262899224563e-05, + "loss": 0.2308, "step": 59215 }, { "epoch": 2.76, - "learning_rate": 1.4541277952276032e-05, - "loss": 0.0471, + "learning_rate": 2.4549794829388543e-05, + "loss": 0.0405, "step": 59220 }, { "epoch": 2.76, - "learning_rate": 1.4540809151000892e-05, - "loss": 0.0739, + "learning_rate": 2.4549326759552526e-05, + "loss": 0.059, "step": 59225 }, { "epoch": 2.76, - "learning_rate": 1.4540340349725751e-05, - "loss": 0.0486, + "learning_rate": 2.4548858689716506e-05, + "loss": 0.0529, "step": 59230 }, { "epoch": 2.76, - "learning_rate": 1.4539871548450613e-05, - "loss": 0.0612, + "learning_rate": 2.4548390619880486e-05, + "loss": 0.0721, "step": 59235 }, { "epoch": 2.76, - "learning_rate": 1.4539402747175473e-05, - "loss": 0.0693, + "learning_rate": 2.4547922550044466e-05, + "loss": 0.1207, "step": 59240 }, { "epoch": 2.76, - "learning_rate": 1.4538933945900333e-05, - "loss": 0.0583, + "learning_rate": 2.454745448020845e-05, + "loss": 0.0641, "step": 59245 }, { "epoch": 2.76, - "learning_rate": 1.4538465144625196e-05, - "loss": 0.1747, + "learning_rate": 2.454698641037243e-05, + "loss": 0.0872, "step": 59250 }, { "epoch": 2.76, - "learning_rate": 1.4537996343350056e-05, - "loss": 0.2391, + "learning_rate": 2.454651834053641e-05, + "loss": 0.1695, "step": 59255 }, { "epoch": 2.77, - "learning_rate": 1.4537527542074916e-05, - "loss": 0.2129, + "learning_rate": 2.454605027070039e-05, + "loss": 0.2749, "step": 59260 }, { "epoch": 2.77, - "learning_rate": 1.4537058740799776e-05, - "loss": 0.3122, + "learning_rate": 2.454558220086437e-05, + "loss": 0.3331, "step": 59265 }, { "epoch": 2.77, - "learning_rate": 1.4536589939524636e-05, - "loss": 0.039, + "learning_rate": 2.454511413102835e-05, + "loss": 0.0622, "step": 59270 }, { "epoch": 2.77, - "learning_rate": 1.4536121138249498e-05, - "loss": 0.0314, + "learning_rate": 2.454464606119233e-05, + "loss": 0.0229, "step": 59275 }, { "epoch": 2.77, - "learning_rate": 1.4535652336974358e-05, - "loss": 0.0421, + "learning_rate": 2.454417799135631e-05, + "loss": 0.0383, "step": 59280 }, { "epoch": 2.77, - "learning_rate": 1.4535183535699218e-05, - "loss": 0.0855, + "learning_rate": 2.454370992152029e-05, + "loss": 0.0859, "step": 59285 }, { "epoch": 2.77, - "learning_rate": 1.4534714734424077e-05, - "loss": 0.0655, + "learning_rate": 2.454324185168427e-05, + "loss": 0.0216, "step": 59290 }, { "epoch": 2.77, - "learning_rate": 1.4534245933148939e-05, - "loss": 0.0452, + "learning_rate": 2.454277378184825e-05, + "loss": 0.077, "step": 59295 }, { "epoch": 2.77, - "learning_rate": 1.45337771318738e-05, - "loss": 0.1428, + "learning_rate": 2.4542305712012234e-05, + "loss": 0.1276, "step": 59300 }, { "epoch": 2.77, - "learning_rate": 1.453330833059866e-05, - "loss": 0.1613, + "learning_rate": 2.4541837642176214e-05, + "loss": 0.1631, "step": 59305 }, { "epoch": 2.77, - "learning_rate": 1.453283952932352e-05, - "loss": 0.2401, + "learning_rate": 2.4541369572340193e-05, + "loss": 0.2293, "step": 59310 }, { "epoch": 2.77, - "learning_rate": 1.4532370728048382e-05, - "loss": 0.26, + "learning_rate": 2.4540901502504173e-05, + "loss": 0.2981, "step": 59315 }, { "epoch": 2.77, - "learning_rate": 1.4531901926773242e-05, - "loss": 0.0539, + "learning_rate": 2.4540433432668156e-05, + "loss": 0.0204, "step": 59320 }, { "epoch": 2.77, - "learning_rate": 1.4531433125498102e-05, - "loss": 0.0868, + "learning_rate": 2.4539965362832136e-05, + "loss": 0.0446, "step": 59325 }, { "epoch": 2.77, - "learning_rate": 1.4530964324222962e-05, - "loss": 0.052, + "learning_rate": 2.4539497292996116e-05, + "loss": 0.1101, "step": 59330 }, { "epoch": 2.77, - "learning_rate": 1.4530495522947824e-05, - "loss": 0.1254, + "learning_rate": 2.45390292231601e-05, + "loss": 0.0811, "step": 59335 }, { "epoch": 2.77, - "learning_rate": 1.4530026721672684e-05, - "loss": 0.0738, + "learning_rate": 2.4538561153324076e-05, + "loss": 0.0517, "step": 59340 }, { "epoch": 2.77, - "learning_rate": 1.4529557920397545e-05, - "loss": 0.0963, + "learning_rate": 2.4538093083488055e-05, + "loss": 0.0766, "step": 59345 }, { "epoch": 2.77, - "learning_rate": 1.4529089119122405e-05, - "loss": 0.1563, + "learning_rate": 2.4537625013652035e-05, + "loss": 0.1947, "step": 59350 }, { "epoch": 2.77, - "learning_rate": 1.4528620317847267e-05, - "loss": 0.1559, + "learning_rate": 2.453715694381602e-05, + "loss": 0.1056, "step": 59355 }, { "epoch": 2.77, - "learning_rate": 1.4528151516572127e-05, - "loss": 0.2358, + "learning_rate": 2.453668887398e-05, + "loss": 0.23, "step": 59360 }, { "epoch": 2.77, - "learning_rate": 1.4527682715296987e-05, - "loss": 0.2168, + "learning_rate": 2.4536220804143978e-05, + "loss": 0.3537, "step": 59365 }, { "epoch": 2.77, - "learning_rate": 1.4527213914021847e-05, - "loss": 0.0453, + "learning_rate": 2.4535752734307958e-05, + "loss": 0.0398, "step": 59370 }, { "epoch": 2.77, - "learning_rate": 1.4526745112746708e-05, - "loss": 0.0242, + "learning_rate": 2.453528466447194e-05, + "loss": 0.0274, "step": 59375 }, { "epoch": 2.77, - "learning_rate": 1.4526276311471568e-05, - "loss": 0.0305, + "learning_rate": 2.453481659463592e-05, + "loss": 0.0728, "step": 59380 }, { "epoch": 2.77, - "learning_rate": 1.4525807510196428e-05, - "loss": 0.0608, + "learning_rate": 2.45343485247999e-05, + "loss": 0.0965, "step": 59385 }, { "epoch": 2.77, - "learning_rate": 1.452533870892129e-05, - "loss": 0.095, + "learning_rate": 2.4533880454963884e-05, + "loss": 0.0684, "step": 59390 }, { "epoch": 2.77, - "learning_rate": 1.4524869907646151e-05, - "loss": 0.1336, + "learning_rate": 2.4533412385127864e-05, + "loss": 0.0728, "step": 59395 }, { "epoch": 2.77, - "learning_rate": 1.4524401106371011e-05, - "loss": 0.0679, + "learning_rate": 2.4532944315291844e-05, + "loss": 0.1069, "step": 59400 }, { "epoch": 2.77, - "learning_rate": 1.4523932305095871e-05, - "loss": 0.1726, + "learning_rate": 2.453247624545582e-05, + "loss": 0.2129, "step": 59405 }, { "epoch": 2.77, - "learning_rate": 1.4523463503820731e-05, - "loss": 0.1142, + "learning_rate": 2.4532008175619803e-05, + "loss": 0.2177, "step": 59410 }, { "epoch": 2.77, - "learning_rate": 1.4522994702545593e-05, - "loss": 0.2268, + "learning_rate": 2.4531540105783783e-05, + "loss": 0.2914, "step": 59415 }, { "epoch": 2.77, - "learning_rate": 1.4522525901270453e-05, - "loss": 0.0513, + "learning_rate": 2.4531072035947763e-05, + "loss": 0.1037, "step": 59420 }, { "epoch": 2.77, - "learning_rate": 1.4522057099995313e-05, - "loss": 0.0354, + "learning_rate": 2.4530603966111743e-05, + "loss": 0.0971, "step": 59425 }, { "epoch": 2.77, - "learning_rate": 1.4521588298720173e-05, - "loss": 0.1059, + "learning_rate": 2.4530135896275726e-05, + "loss": 0.0731, "step": 59430 }, { "epoch": 2.77, - "learning_rate": 1.4521119497445032e-05, - "loss": 0.1179, + "learning_rate": 2.4529667826439706e-05, + "loss": 0.081, "step": 59435 }, { "epoch": 2.77, - "learning_rate": 1.4520650696169896e-05, - "loss": 0.0819, + "learning_rate": 2.4529199756603686e-05, + "loss": 0.1812, "step": 59440 }, { "epoch": 2.77, - "learning_rate": 1.4520181894894756e-05, - "loss": 0.1253, + "learning_rate": 2.4528731686767665e-05, + "loss": 0.0985, "step": 59445 }, { "epoch": 2.77, - "learning_rate": 1.4519713093619616e-05, - "loss": 0.1265, + "learning_rate": 2.452826361693165e-05, + "loss": 0.1389, "step": 59450 }, { "epoch": 2.77, - "learning_rate": 1.4519244292344477e-05, - "loss": 0.1525, + "learning_rate": 2.452779554709563e-05, + "loss": 0.2728, "step": 59455 }, { "epoch": 2.77, - "learning_rate": 1.4518775491069337e-05, - "loss": 0.1325, + "learning_rate": 2.4527327477259608e-05, + "loss": 0.1614, "step": 59460 }, { "epoch": 2.77, - "learning_rate": 1.4518306689794197e-05, - "loss": 0.2705, + "learning_rate": 2.4526859407423588e-05, + "loss": 0.3465, "step": 59465 }, { "epoch": 2.77, - "learning_rate": 1.4517837888519057e-05, - "loss": 0.0597, + "learning_rate": 2.4526391337587568e-05, + "loss": 0.0404, "step": 59470 }, { "epoch": 2.78, - "learning_rate": 1.4517369087243917e-05, - "loss": 0.0441, + "learning_rate": 2.4525923267751548e-05, + "loss": 0.0417, "step": 59475 }, { "epoch": 2.78, - "learning_rate": 1.4516900285968779e-05, - "loss": 0.0538, + "learning_rate": 2.4525455197915527e-05, + "loss": 0.1063, "step": 59480 }, { "epoch": 2.78, - "learning_rate": 1.451643148469364e-05, - "loss": 0.0849, + "learning_rate": 2.452498712807951e-05, + "loss": 0.0506, "step": 59485 }, { "epoch": 2.78, - "learning_rate": 1.45159626834185e-05, - "loss": 0.0892, + "learning_rate": 2.452451905824349e-05, + "loss": 0.0576, "step": 59490 }, { "epoch": 2.78, - "learning_rate": 1.4515493882143362e-05, - "loss": 0.0865, + "learning_rate": 2.452405098840747e-05, + "loss": 0.1029, "step": 59495 }, { "epoch": 2.78, - "learning_rate": 1.4515025080868222e-05, - "loss": 0.0963, + "learning_rate": 2.452358291857145e-05, + "loss": 0.1083, "step": 59500 }, { "epoch": 2.78, - "learning_rate": 1.4514556279593082e-05, - "loss": 0.1407, + "learning_rate": 2.4523114848735433e-05, + "loss": 0.1709, "step": 59505 }, { "epoch": 2.78, - "learning_rate": 1.4514087478317942e-05, - "loss": 0.3032, + "learning_rate": 2.4522646778899413e-05, + "loss": 0.3075, "step": 59510 }, { "epoch": 2.78, - "learning_rate": 1.4513618677042802e-05, - "loss": 0.3026, + "learning_rate": 2.4522178709063393e-05, + "loss": 0.2071, "step": 59515 }, { "epoch": 2.78, - "learning_rate": 1.4513149875767663e-05, - "loss": 0.0481, + "learning_rate": 2.4521710639227376e-05, + "loss": 0.0512, "step": 59520 }, { "epoch": 2.78, - "learning_rate": 1.4512681074492523e-05, - "loss": 0.0437, + "learning_rate": 2.4521242569391356e-05, + "loss": 0.0446, "step": 59525 }, { "epoch": 2.78, - "learning_rate": 1.4512212273217385e-05, - "loss": 0.0677, + "learning_rate": 2.4520774499555332e-05, + "loss": 0.0948, "step": 59530 }, { "epoch": 2.78, - "learning_rate": 1.4511743471942246e-05, - "loss": 0.0582, + "learning_rate": 2.4520306429719312e-05, + "loss": 0.0577, "step": 59535 }, { "epoch": 2.78, - "learning_rate": 1.4511274670667106e-05, - "loss": 0.0841, + "learning_rate": 2.4519838359883295e-05, + "loss": 0.0826, "step": 59540 }, { "epoch": 2.78, - "learning_rate": 1.4510805869391966e-05, - "loss": 0.0991, + "learning_rate": 2.4519370290047275e-05, + "loss": 0.0682, "step": 59545 }, { "epoch": 2.78, - "learning_rate": 1.4510337068116826e-05, - "loss": 0.0962, + "learning_rate": 2.4518902220211255e-05, + "loss": 0.1066, "step": 59550 }, { "epoch": 2.78, - "learning_rate": 1.4509868266841686e-05, - "loss": 0.1709, + "learning_rate": 2.4518434150375235e-05, + "loss": 0.1609, "step": 59555 }, { "epoch": 2.78, - "learning_rate": 1.4509399465566548e-05, - "loss": 0.2061, + "learning_rate": 2.4517966080539218e-05, + "loss": 0.1646, "step": 59560 }, { "epoch": 2.78, - "learning_rate": 1.4508930664291408e-05, - "loss": 0.2598, + "learning_rate": 2.4517498010703198e-05, + "loss": 0.2672, "step": 59565 }, { "epoch": 2.78, - "learning_rate": 1.4508461863016268e-05, - "loss": 0.0427, + "learning_rate": 2.4517029940867178e-05, + "loss": 0.1135, "step": 59570 }, { "epoch": 2.78, - "learning_rate": 1.4507993061741131e-05, - "loss": 0.0679, + "learning_rate": 2.451656187103116e-05, + "loss": 0.071, "step": 59575 }, { "epoch": 2.78, - "learning_rate": 1.4507524260465991e-05, - "loss": 0.0677, + "learning_rate": 2.451609380119514e-05, + "loss": 0.022, "step": 59580 }, { "epoch": 2.78, - "learning_rate": 1.450705545919085e-05, + "learning_rate": 2.451562573135912e-05, "loss": 0.0553, "step": 59585 }, { "epoch": 2.78, - "learning_rate": 1.450658665791571e-05, - "loss": 0.0714, + "learning_rate": 2.45151576615231e-05, + "loss": 0.1212, "step": 59590 }, { "epoch": 2.78, - "learning_rate": 1.450611785664057e-05, - "loss": 0.0563, + "learning_rate": 2.451468959168708e-05, + "loss": 0.0703, "step": 59595 }, { "epoch": 2.78, - "learning_rate": 1.4505649055365432e-05, - "loss": 0.0597, + "learning_rate": 2.451422152185106e-05, + "loss": 0.0869, "step": 59600 }, { "epoch": 2.78, - "learning_rate": 1.4505180254090292e-05, - "loss": 0.1147, + "learning_rate": 2.451375345201504e-05, + "loss": 0.1151, "step": 59605 }, { "epoch": 2.78, - "learning_rate": 1.4504711452815152e-05, - "loss": 0.1732, + "learning_rate": 2.451328538217902e-05, + "loss": 0.2272, "step": 59610 }, { "epoch": 2.78, - "learning_rate": 1.4504242651540012e-05, - "loss": 0.2322, + "learning_rate": 2.4512817312343003e-05, + "loss": 0.3476, "step": 59615 }, { "epoch": 2.78, - "learning_rate": 1.4503773850264872e-05, - "loss": 0.0126, + "learning_rate": 2.4512349242506983e-05, + "loss": 0.0297, "step": 59620 }, { "epoch": 2.78, - "learning_rate": 1.4503305048989735e-05, - "loss": 0.0667, + "learning_rate": 2.4511881172670963e-05, + "loss": 0.0401, "step": 59625 }, { "epoch": 2.78, - "learning_rate": 1.4502836247714595e-05, - "loss": 0.0183, + "learning_rate": 2.4511413102834942e-05, + "loss": 0.0499, "step": 59630 }, { "epoch": 2.78, - "learning_rate": 1.4502367446439455e-05, - "loss": 0.1245, + "learning_rate": 2.4510945032998926e-05, + "loss": 0.0771, "step": 59635 }, { "epoch": 2.78, - "learning_rate": 1.4501898645164317e-05, - "loss": 0.211, + "learning_rate": 2.4510476963162905e-05, + "loss": 0.0387, "step": 59640 }, { "epoch": 2.78, - "learning_rate": 1.4501429843889177e-05, - "loss": 0.0844, + "learning_rate": 2.4510008893326885e-05, + "loss": 0.1, "step": 59645 }, { "epoch": 2.78, - "learning_rate": 1.4500961042614037e-05, - "loss": 0.1949, + "learning_rate": 2.450954082349087e-05, + "loss": 0.115, "step": 59650 }, { "epoch": 2.78, - "learning_rate": 1.4500492241338897e-05, - "loss": 0.1545, + "learning_rate": 2.4509072753654845e-05, + "loss": 0.152, "step": 59655 }, { "epoch": 2.78, - "learning_rate": 1.4500023440063757e-05, - "loss": 0.3161, + "learning_rate": 2.4508604683818825e-05, + "loss": 0.2709, "step": 59660 }, { "epoch": 2.78, - "learning_rate": 1.4499554638788618e-05, - "loss": 0.2175, + "learning_rate": 2.4508136613982804e-05, + "loss": 0.2453, "step": 59665 }, { "epoch": 2.78, - "learning_rate": 1.449908583751348e-05, - "loss": 0.0381, + "learning_rate": 2.4507668544146788e-05, + "loss": 0.0476, "step": 59670 }, { "epoch": 2.78, - "learning_rate": 1.449861703623834e-05, - "loss": 0.0196, + "learning_rate": 2.4507200474310767e-05, + "loss": 0.0311, "step": 59675 }, { "epoch": 2.78, - "learning_rate": 1.4498148234963201e-05, - "loss": 0.1143, + "learning_rate": 2.4506732404474747e-05, + "loss": 0.0805, "step": 59680 }, { "epoch": 2.78, - "learning_rate": 1.4497679433688061e-05, - "loss": 0.0837, + "learning_rate": 2.4506264334638727e-05, + "loss": 0.1354, "step": 59685 }, { "epoch": 2.79, - "learning_rate": 1.4497210632412921e-05, - "loss": 0.129, + "learning_rate": 2.450579626480271e-05, + "loss": 0.1196, "step": 59690 }, { "epoch": 2.79, - "learning_rate": 1.4496741831137781e-05, - "loss": 0.0908, + "learning_rate": 2.450532819496669e-05, + "loss": 0.0571, "step": 59695 }, { "epoch": 2.79, - "learning_rate": 1.4496273029862641e-05, - "loss": 0.1649, + "learning_rate": 2.450486012513067e-05, + "loss": 0.0936, "step": 59700 }, { "epoch": 2.79, - "learning_rate": 1.4495804228587503e-05, - "loss": 0.1539, + "learning_rate": 2.4504392055294653e-05, + "loss": 0.2136, "step": 59705 }, { "epoch": 2.79, - "learning_rate": 1.4495335427312363e-05, - "loss": 0.3572, + "learning_rate": 2.4503923985458633e-05, + "loss": 0.2878, "step": 59710 }, { "epoch": 2.79, - "learning_rate": 1.4494866626037224e-05, - "loss": 0.3036, + "learning_rate": 2.4503455915622613e-05, + "loss": 0.2839, "step": 59715 }, { "epoch": 2.79, - "learning_rate": 1.4494397824762086e-05, - "loss": 0.0154, + "learning_rate": 2.450298784578659e-05, + "loss": 0.026, "step": 59720 }, { "epoch": 2.79, - "learning_rate": 1.4493929023486946e-05, - "loss": 0.0211, + "learning_rate": 2.4502519775950572e-05, + "loss": 0.0408, "step": 59725 }, { "epoch": 2.79, - "learning_rate": 1.4493460222211806e-05, - "loss": 0.0294, + "learning_rate": 2.4502051706114552e-05, + "loss": 0.1123, "step": 59730 }, { "epoch": 2.79, - "learning_rate": 1.4492991420936666e-05, - "loss": 0.0781, + "learning_rate": 2.4501583636278532e-05, + "loss": 0.0279, "step": 59735 }, { "epoch": 2.79, - "learning_rate": 1.4492522619661526e-05, - "loss": 0.0934, + "learning_rate": 2.4501115566442512e-05, + "loss": 0.1024, "step": 59740 }, { "epoch": 2.79, - "learning_rate": 1.4492053818386387e-05, - "loss": 0.0954, + "learning_rate": 2.4500647496606495e-05, + "loss": 0.1186, "step": 59745 }, { "epoch": 2.79, - "learning_rate": 1.4491585017111247e-05, - "loss": 0.1425, + "learning_rate": 2.4500179426770475e-05, + "loss": 0.0859, "step": 59750 }, { "epoch": 2.79, - "learning_rate": 1.4491116215836107e-05, - "loss": 0.1863, + "learning_rate": 2.4499711356934455e-05, + "loss": 0.1142, "step": 59755 }, { "epoch": 2.79, - "learning_rate": 1.4490647414560967e-05, - "loss": 0.1952, + "learning_rate": 2.4499243287098438e-05, + "loss": 0.2424, "step": 59760 }, { "epoch": 2.79, - "learning_rate": 1.449017861328583e-05, - "loss": 0.1966, + "learning_rate": 2.4498775217262418e-05, + "loss": 0.3073, "step": 59765 }, { "epoch": 2.79, - "learning_rate": 1.448970981201069e-05, - "loss": 0.0379, + "learning_rate": 2.4498307147426398e-05, + "loss": 0.0441, "step": 59770 }, { "epoch": 2.79, - "learning_rate": 1.448924101073555e-05, - "loss": 0.0309, + "learning_rate": 2.4497839077590377e-05, + "loss": 0.0324, "step": 59775 }, { "epoch": 2.79, - "learning_rate": 1.448877220946041e-05, - "loss": 0.0834, + "learning_rate": 2.4497371007754357e-05, + "loss": 0.0323, "step": 59780 }, { "epoch": 2.79, - "learning_rate": 1.4488303408185272e-05, - "loss": 0.064, + "learning_rate": 2.4496902937918337e-05, + "loss": 0.0365, "step": 59785 }, { "epoch": 2.79, - "learning_rate": 1.4487834606910132e-05, - "loss": 0.0708, + "learning_rate": 2.4496434868082317e-05, + "loss": 0.0499, "step": 59790 }, { "epoch": 2.79, - "learning_rate": 1.4487365805634992e-05, - "loss": 0.1656, + "learning_rate": 2.4495966798246297e-05, + "loss": 0.0849, "step": 59795 }, { "epoch": 2.79, - "learning_rate": 1.4486897004359852e-05, - "loss": 0.133, + "learning_rate": 2.449549872841028e-05, + "loss": 0.1446, "step": 59800 }, { "epoch": 2.79, - "learning_rate": 1.4486428203084713e-05, - "loss": 0.1545, + "learning_rate": 2.449503065857426e-05, + "loss": 0.1163, "step": 59805 }, { "epoch": 2.79, - "learning_rate": 1.4485959401809575e-05, - "loss": 0.1484, + "learning_rate": 2.449456258873824e-05, + "loss": 0.1863, "step": 59810 }, { "epoch": 2.79, - "learning_rate": 1.4485490600534435e-05, - "loss": 0.2154, + "learning_rate": 2.449409451890222e-05, + "loss": 0.2137, "step": 59815 }, { "epoch": 2.79, - "learning_rate": 1.4485021799259296e-05, - "loss": 0.0304, + "learning_rate": 2.4493626449066203e-05, + "loss": 0.0261, "step": 59820 }, { "epoch": 2.79, - "learning_rate": 1.4484552997984156e-05, - "loss": 0.2941, + "learning_rate": 2.4493158379230182e-05, + "loss": 0.0784, "step": 59825 }, { "epoch": 2.79, - "learning_rate": 1.4484084196709016e-05, - "loss": 0.099, + "learning_rate": 2.4492690309394162e-05, + "loss": 0.1117, "step": 59830 }, { "epoch": 2.79, - "learning_rate": 1.4483615395433876e-05, - "loss": 0.0803, + "learning_rate": 2.4492222239558145e-05, + "loss": 0.079, "step": 59835 }, { "epoch": 2.79, - "learning_rate": 1.4483146594158736e-05, - "loss": 0.0795, + "learning_rate": 2.4491754169722125e-05, + "loss": 0.071, "step": 59840 }, { "epoch": 2.79, - "learning_rate": 1.4482677792883598e-05, - "loss": 0.112, + "learning_rate": 2.44912860998861e-05, + "loss": 0.1125, "step": 59845 }, { "epoch": 2.79, - "learning_rate": 1.4482208991608458e-05, - "loss": 0.1533, + "learning_rate": 2.449081803005008e-05, + "loss": 0.0955, "step": 59850 }, { "epoch": 2.79, - "learning_rate": 1.448174019033332e-05, - "loss": 0.1552, + "learning_rate": 2.4490349960214065e-05, + "loss": 0.1655, "step": 59855 }, { "epoch": 2.79, - "learning_rate": 1.4481271389058181e-05, - "loss": 0.2634, + "learning_rate": 2.4489881890378044e-05, + "loss": 0.1266, "step": 59860 }, { "epoch": 2.79, - "learning_rate": 1.4480802587783041e-05, - "loss": 0.2807, + "learning_rate": 2.4489413820542024e-05, + "loss": 0.3275, "step": 59865 }, { "epoch": 2.79, - "learning_rate": 1.4480333786507901e-05, - "loss": 0.0613, + "learning_rate": 2.4488945750706004e-05, + "loss": 0.0658, "step": 59870 }, { "epoch": 2.79, - "learning_rate": 1.447986498523276e-05, - "loss": 0.0644, + "learning_rate": 2.4488477680869987e-05, + "loss": 0.0367, "step": 59875 }, { "epoch": 2.79, - "learning_rate": 1.447939618395762e-05, - "loss": 0.0322, + "learning_rate": 2.4488009611033967e-05, + "loss": 0.0492, "step": 59880 }, { "epoch": 2.79, - "learning_rate": 1.4478927382682482e-05, - "loss": 0.0303, + "learning_rate": 2.4487541541197947e-05, + "loss": 0.0986, "step": 59885 }, { "epoch": 2.79, - "learning_rate": 1.4478458581407342e-05, - "loss": 0.0804, + "learning_rate": 2.448707347136193e-05, + "loss": 0.0614, "step": 59890 }, { "epoch": 2.79, - "learning_rate": 1.4477989780132202e-05, - "loss": 0.136, + "learning_rate": 2.448660540152591e-05, + "loss": 0.0977, "step": 59895 }, { "epoch": 2.8, - "learning_rate": 1.4477520978857066e-05, - "loss": 0.1515, + "learning_rate": 2.448613733168989e-05, + "loss": 0.1351, "step": 59900 }, { "epoch": 2.8, - "learning_rate": 1.4477052177581926e-05, - "loss": 0.2001, + "learning_rate": 2.448566926185387e-05, + "loss": 0.1106, "step": 59905 }, { "epoch": 2.8, - "learning_rate": 1.4476583376306785e-05, - "loss": 0.2547, + "learning_rate": 2.448520119201785e-05, + "loss": 0.248, "step": 59910 }, { "epoch": 2.8, - "learning_rate": 1.4476114575031645e-05, - "loss": 0.315, + "learning_rate": 2.448473312218183e-05, + "loss": 0.269, "step": 59915 }, { "epoch": 2.8, - "learning_rate": 1.4475645773756505e-05, - "loss": 0.082, + "learning_rate": 2.448426505234581e-05, + "loss": 0.0223, "step": 59920 }, { "epoch": 2.8, - "learning_rate": 1.4475176972481367e-05, - "loss": 0.0578, + "learning_rate": 2.448379698250979e-05, + "loss": 0.0459, "step": 59925 }, { "epoch": 2.8, - "learning_rate": 1.4474708171206227e-05, - "loss": 0.0752, + "learning_rate": 2.4483328912673772e-05, + "loss": 0.0505, "step": 59930 }, { "epoch": 2.8, - "learning_rate": 1.4474239369931087e-05, - "loss": 0.0437, + "learning_rate": 2.4482860842837752e-05, + "loss": 0.0455, "step": 59935 }, { "epoch": 2.8, - "learning_rate": 1.4473770568655947e-05, - "loss": 0.1486, + "learning_rate": 2.4482392773001732e-05, + "loss": 0.0739, "step": 59940 }, { "epoch": 2.8, - "learning_rate": 1.4473301767380807e-05, - "loss": 0.1381, + "learning_rate": 2.4481924703165715e-05, + "loss": 0.1352, "step": 59945 }, { "epoch": 2.8, - "learning_rate": 1.447283296610567e-05, - "loss": 0.0976, + "learning_rate": 2.4481456633329695e-05, + "loss": 0.1433, "step": 59950 }, { "epoch": 2.8, - "learning_rate": 1.447236416483053e-05, - "loss": 0.0948, + "learning_rate": 2.4480988563493675e-05, + "loss": 0.1034, "step": 59955 }, { "epoch": 2.8, - "learning_rate": 1.447189536355539e-05, - "loss": 0.1661, + "learning_rate": 2.4480520493657654e-05, + "loss": 0.1249, "step": 59960 }, { "epoch": 2.8, - "learning_rate": 1.4471426562280251e-05, - "loss": 0.1997, + "learning_rate": 2.4480052423821638e-05, + "loss": 0.2903, "step": 59965 }, { "epoch": 2.8, - "learning_rate": 1.4470957761005111e-05, - "loss": 0.0974, + "learning_rate": 2.4479584353985614e-05, + "loss": 0.0758, "step": 59970 }, { "epoch": 2.8, - "learning_rate": 1.4470488959729971e-05, - "loss": 0.0479, + "learning_rate": 2.4479116284149594e-05, + "loss": 0.0193, "step": 59975 }, { "epoch": 2.8, - "learning_rate": 1.4470020158454831e-05, - "loss": 0.0315, + "learning_rate": 2.4478648214313574e-05, + "loss": 0.0632, "step": 59980 }, { "epoch": 2.8, - "learning_rate": 1.4469551357179691e-05, - "loss": 0.0788, + "learning_rate": 2.4478180144477557e-05, + "loss": 0.0705, "step": 59985 }, { "epoch": 2.8, - "learning_rate": 1.4469082555904553e-05, - "loss": 0.0727, + "learning_rate": 2.4477712074641537e-05, + "loss": 0.1063, "step": 59990 }, { "epoch": 2.8, - "learning_rate": 1.4468613754629414e-05, - "loss": 0.1111, + "learning_rate": 2.4477244004805516e-05, + "loss": 0.0351, "step": 59995 }, { "epoch": 2.8, - "learning_rate": 1.4468144953354274e-05, - "loss": 0.0669, + "learning_rate": 2.44767759349695e-05, + "loss": 0.1615, "step": 60000 }, { "epoch": 2.8, - "learning_rate": 1.4467676152079136e-05, - "loss": 0.1314, + "learning_rate": 2.447630786513348e-05, + "loss": 0.1302, "step": 60005 }, { "epoch": 2.8, - "learning_rate": 1.4467207350803996e-05, - "loss": 0.2948, + "learning_rate": 2.447583979529746e-05, + "loss": 0.2119, "step": 60010 }, { "epoch": 2.8, - "learning_rate": 1.4466738549528856e-05, - "loss": 0.2877, + "learning_rate": 2.447537172546144e-05, + "loss": 0.2506, "step": 60015 }, { "epoch": 2.8, - "learning_rate": 1.4466269748253716e-05, - "loss": 0.0344, + "learning_rate": 2.4474903655625422e-05, + "loss": 0.0472, "step": 60020 }, { "epoch": 2.8, - "learning_rate": 1.4465800946978576e-05, - "loss": 0.0666, + "learning_rate": 2.4474435585789402e-05, + "loss": 0.0203, "step": 60025 }, { "epoch": 2.8, - "learning_rate": 1.4465332145703437e-05, - "loss": 0.053, + "learning_rate": 2.4473967515953382e-05, + "loss": 0.0179, "step": 60030 }, { "epoch": 2.8, - "learning_rate": 1.4464863344428297e-05, - "loss": 0.0348, + "learning_rate": 2.447349944611736e-05, + "loss": 0.0817, "step": 60035 }, { "epoch": 2.8, - "learning_rate": 1.4464394543153159e-05, - "loss": 0.0455, + "learning_rate": 2.447303137628134e-05, + "loss": 0.0724, "step": 60040 }, { "epoch": 2.8, - "learning_rate": 1.446392574187802e-05, - "loss": 0.0948, + "learning_rate": 2.447256330644532e-05, + "loss": 0.0559, "step": 60045 }, { "epoch": 2.8, - "learning_rate": 1.446345694060288e-05, - "loss": 0.1343, + "learning_rate": 2.44720952366093e-05, + "loss": 0.0986, "step": 60050 }, { "epoch": 2.8, - "learning_rate": 1.446298813932774e-05, - "loss": 0.1746, + "learning_rate": 2.447162716677328e-05, + "loss": 0.0917, "step": 60055 }, { "epoch": 2.8, - "learning_rate": 1.44625193380526e-05, - "loss": 0.2422, + "learning_rate": 2.4471159096937264e-05, + "loss": 0.3108, "step": 60060 }, { "epoch": 2.8, - "learning_rate": 1.446205053677746e-05, - "loss": 0.4788, + "learning_rate": 2.4470691027101244e-05, + "loss": 0.2526, "step": 60065 }, { "epoch": 2.8, - "learning_rate": 1.4461581735502322e-05, - "loss": 0.0414, + "learning_rate": 2.4470222957265224e-05, + "loss": 0.0525, "step": 60070 }, { "epoch": 2.8, - "learning_rate": 1.4461112934227182e-05, - "loss": 0.0488, + "learning_rate": 2.4469754887429207e-05, + "loss": 0.0104, "step": 60075 }, { "epoch": 2.8, - "learning_rate": 1.4460644132952042e-05, - "loss": 0.057, + "learning_rate": 2.4469286817593187e-05, + "loss": 0.0332, "step": 60080 }, { "epoch": 2.8, - "learning_rate": 1.4460175331676902e-05, - "loss": 0.0404, + "learning_rate": 2.4468818747757167e-05, + "loss": 0.0493, "step": 60085 }, { "epoch": 2.8, - "learning_rate": 1.4459706530401765e-05, - "loss": 0.1053, + "learning_rate": 2.4468350677921147e-05, + "loss": 0.0624, "step": 60090 }, { "epoch": 2.8, - "learning_rate": 1.4459237729126625e-05, - "loss": 0.1003, + "learning_rate": 2.446788260808513e-05, + "loss": 0.0975, "step": 60095 }, { "epoch": 2.8, - "learning_rate": 1.4458768927851485e-05, - "loss": 0.042, + "learning_rate": 2.4467414538249106e-05, + "loss": 0.0622, "step": 60100 }, { "epoch": 2.8, - "learning_rate": 1.4458300126576345e-05, - "loss": 0.1399, + "learning_rate": 2.4466946468413086e-05, + "loss": 0.1787, "step": 60105 }, { "epoch": 2.8, - "learning_rate": 1.4457831325301207e-05, - "loss": 0.1524, + "learning_rate": 2.4466478398577066e-05, + "loss": 0.255, "step": 60110 }, { "epoch": 2.81, - "learning_rate": 1.4457362524026066e-05, - "loss": 0.2414, + "learning_rate": 2.446601032874105e-05, + "loss": 0.2408, "step": 60115 }, { "epoch": 2.81, - "learning_rate": 1.4456893722750926e-05, - "loss": 0.0433, + "learning_rate": 2.446554225890503e-05, + "loss": 0.0576, "step": 60120 }, { "epoch": 2.81, - "learning_rate": 1.4456424921475786e-05, - "loss": 0.0522, + "learning_rate": 2.446507418906901e-05, + "loss": 0.0079, "step": 60125 }, { "epoch": 2.81, - "learning_rate": 1.4455956120200646e-05, - "loss": 0.0466, + "learning_rate": 2.4464606119232992e-05, + "loss": 0.0536, "step": 60130 }, { "epoch": 2.81, - "learning_rate": 1.445548731892551e-05, - "loss": 0.1194, + "learning_rate": 2.446413804939697e-05, + "loss": 0.0669, "step": 60135 }, { "epoch": 2.81, - "learning_rate": 1.445501851765037e-05, - "loss": 0.0274, + "learning_rate": 2.446366997956095e-05, + "loss": 0.08, "step": 60140 }, { "epoch": 2.81, - "learning_rate": 1.445454971637523e-05, - "loss": 0.0872, + "learning_rate": 2.446320190972493e-05, + "loss": 0.1084, "step": 60145 }, { "epoch": 2.81, - "learning_rate": 1.4454080915100091e-05, - "loss": 0.3045, + "learning_rate": 2.4462733839888915e-05, + "loss": 0.0946, "step": 60150 }, { "epoch": 2.81, - "learning_rate": 1.4453612113824951e-05, - "loss": 0.1004, + "learning_rate": 2.4462265770052894e-05, + "loss": 0.2113, "step": 60155 }, { "epoch": 2.81, - "learning_rate": 1.4453143312549811e-05, - "loss": 0.2428, + "learning_rate": 2.446179770021687e-05, + "loss": 0.1546, "step": 60160 }, { "epoch": 2.81, - "learning_rate": 1.4452674511274671e-05, - "loss": 0.3085, + "learning_rate": 2.446132963038085e-05, + "loss": 0.2933, "step": 60165 }, { "epoch": 2.81, - "learning_rate": 1.4452205709999532e-05, - "loss": 0.0418, + "learning_rate": 2.4460861560544834e-05, + "loss": 0.039, "step": 60170 }, { "epoch": 2.81, - "learning_rate": 1.4451736908724392e-05, - "loss": 0.0232, + "learning_rate": 2.4460393490708814e-05, + "loss": 0.0461, "step": 60175 }, { "epoch": 2.81, - "learning_rate": 1.4451268107449254e-05, - "loss": 0.054, + "learning_rate": 2.4459925420872793e-05, + "loss": 0.0186, "step": 60180 }, { "epoch": 2.81, - "learning_rate": 1.4450799306174114e-05, - "loss": 0.0278, + "learning_rate": 2.4459457351036777e-05, + "loss": 0.0763, "step": 60185 }, { "epoch": 2.81, - "learning_rate": 1.4450330504898976e-05, - "loss": 0.0537, + "learning_rate": 2.4458989281200756e-05, + "loss": 0.0467, "step": 60190 }, { "epoch": 2.81, - "learning_rate": 1.4449861703623836e-05, - "loss": 0.0563, + "learning_rate": 2.4458521211364736e-05, + "loss": 0.0992, "step": 60195 }, { "epoch": 2.81, - "learning_rate": 1.4449392902348695e-05, - "loss": 0.215, + "learning_rate": 2.4458053141528716e-05, + "loss": 0.0936, "step": 60200 }, { "epoch": 2.81, - "learning_rate": 1.4448924101073555e-05, - "loss": 0.2211, + "learning_rate": 2.44575850716927e-05, + "loss": 0.1019, "step": 60205 }, { "epoch": 2.81, - "learning_rate": 1.4448455299798417e-05, - "loss": 0.2244, + "learning_rate": 2.445711700185668e-05, + "loss": 0.1582, "step": 60210 }, { "epoch": 2.81, - "learning_rate": 1.4447986498523277e-05, - "loss": 0.304, + "learning_rate": 2.445664893202066e-05, + "loss": 0.2476, "step": 60215 }, { "epoch": 2.81, - "learning_rate": 1.4447517697248137e-05, - "loss": 0.0084, + "learning_rate": 2.445618086218464e-05, + "loss": 0.0351, "step": 60220 }, { "epoch": 2.81, - "learning_rate": 1.4447048895972999e-05, - "loss": 0.0444, + "learning_rate": 2.445571279234862e-05, + "loss": 0.0333, "step": 60225 }, { "epoch": 2.81, - "learning_rate": 1.444658009469786e-05, - "loss": 0.0909, + "learning_rate": 2.44552447225126e-05, + "loss": 0.0444, "step": 60230 }, { "epoch": 2.81, - "learning_rate": 1.444611129342272e-05, - "loss": 0.0759, + "learning_rate": 2.4454776652676578e-05, + "loss": 0.0282, "step": 60235 }, { "epoch": 2.81, - "learning_rate": 1.444564249214758e-05, - "loss": 0.1385, + "learning_rate": 2.4454308582840558e-05, + "loss": 0.0617, "step": 60240 }, { "epoch": 2.81, - "learning_rate": 1.444517369087244e-05, - "loss": 0.1458, + "learning_rate": 2.445384051300454e-05, + "loss": 0.1827, "step": 60245 }, { "epoch": 2.81, - "learning_rate": 1.4444704889597302e-05, - "loss": 0.0788, + "learning_rate": 2.445337244316852e-05, + "loss": 0.2022, "step": 60250 }, { "epoch": 2.81, - "learning_rate": 1.4444236088322162e-05, - "loss": 0.1923, + "learning_rate": 2.44529043733325e-05, + "loss": 0.1955, "step": 60255 }, { "epoch": 2.81, - "learning_rate": 1.4443767287047021e-05, - "loss": 0.3621, + "learning_rate": 2.4452436303496484e-05, + "loss": 0.213, "step": 60260 }, { "epoch": 2.81, - "learning_rate": 1.4443298485771881e-05, - "loss": 0.2793, + "learning_rate": 2.4451968233660464e-05, + "loss": 0.2272, "step": 60265 }, { "epoch": 2.81, - "learning_rate": 1.4442829684496741e-05, - "loss": 0.0878, + "learning_rate": 2.4451500163824444e-05, + "loss": 0.0147, "step": 60270 }, { "epoch": 2.81, - "learning_rate": 1.4442360883221605e-05, - "loss": 0.0328, + "learning_rate": 2.4451032093988424e-05, + "loss": 0.0804, "step": 60275 }, { "epoch": 2.81, - "learning_rate": 1.4441892081946465e-05, - "loss": 0.0712, + "learning_rate": 2.4450564024152407e-05, + "loss": 0.0417, "step": 60280 }, { "epoch": 2.81, - "learning_rate": 1.4441423280671325e-05, - "loss": 0.0215, + "learning_rate": 2.4450095954316387e-05, + "loss": 0.0388, "step": 60285 }, { "epoch": 2.81, - "learning_rate": 1.4440954479396186e-05, - "loss": 0.0537, + "learning_rate": 2.4449627884480363e-05, + "loss": 0.1295, "step": 60290 }, { "epoch": 2.81, - "learning_rate": 1.4440485678121046e-05, - "loss": 0.0864, + "learning_rate": 2.4449159814644343e-05, + "loss": 0.173, "step": 60295 }, { "epoch": 2.81, - "learning_rate": 1.4440016876845906e-05, - "loss": 0.1054, + "learning_rate": 2.4448691744808326e-05, + "loss": 0.0887, "step": 60300 }, { "epoch": 2.81, - "learning_rate": 1.4439548075570766e-05, - "loss": 0.1377, + "learning_rate": 2.4448223674972306e-05, + "loss": 0.176, "step": 60305 }, { "epoch": 2.81, - "learning_rate": 1.4439079274295626e-05, - "loss": 0.2142, + "learning_rate": 2.4447755605136286e-05, + "loss": 0.1139, "step": 60310 }, { "epoch": 2.81, - "learning_rate": 1.4438610473020488e-05, - "loss": 0.2317, + "learning_rate": 2.444728753530027e-05, + "loss": 0.2864, "step": 60315 }, { "epoch": 2.81, - "learning_rate": 1.4438141671745349e-05, - "loss": 0.0255, + "learning_rate": 2.444681946546425e-05, + "loss": 0.0525, "step": 60320 }, { "epoch": 2.81, - "learning_rate": 1.4437672870470209e-05, - "loss": 0.0471, + "learning_rate": 2.444635139562823e-05, + "loss": 0.0649, "step": 60325 }, { "epoch": 2.82, - "learning_rate": 1.443720406919507e-05, - "loss": 0.056, + "learning_rate": 2.4445883325792208e-05, + "loss": 0.063, "step": 60330 }, { "epoch": 2.82, - "learning_rate": 1.443673526791993e-05, - "loss": 0.1309, + "learning_rate": 2.444541525595619e-05, + "loss": 0.0791, "step": 60335 }, { "epoch": 2.82, - "learning_rate": 1.443626646664479e-05, - "loss": 0.0999, + "learning_rate": 2.444494718612017e-05, + "loss": 0.1119, "step": 60340 }, { "epoch": 2.82, - "learning_rate": 1.443579766536965e-05, - "loss": 0.1236, + "learning_rate": 2.444447911628415e-05, + "loss": 0.1171, "step": 60345 }, { "epoch": 2.82, - "learning_rate": 1.443532886409451e-05, - "loss": 0.1663, + "learning_rate": 2.4444011046448128e-05, + "loss": 0.2339, "step": 60350 }, { "epoch": 2.82, - "learning_rate": 1.4434860062819372e-05, - "loss": 0.2222, + "learning_rate": 2.444354297661211e-05, + "loss": 0.2005, "step": 60355 }, { "epoch": 2.82, - "learning_rate": 1.4434391261544232e-05, - "loss": 0.2062, + "learning_rate": 2.444307490677609e-05, + "loss": 0.3308, "step": 60360 }, { "epoch": 2.82, - "learning_rate": 1.4433922460269094e-05, - "loss": 0.1832, + "learning_rate": 2.444260683694007e-05, + "loss": 0.3391, "step": 60365 }, { "epoch": 2.82, - "learning_rate": 1.4433453658993955e-05, - "loss": 0.0224, + "learning_rate": 2.4442138767104054e-05, + "loss": 0.031, "step": 60370 }, { "epoch": 2.82, - "learning_rate": 1.4432984857718815e-05, - "loss": 0.0739, + "learning_rate": 2.4441670697268033e-05, + "loss": 0.0791, "step": 60375 }, { "epoch": 2.82, - "learning_rate": 1.4432516056443675e-05, - "loss": 0.045, + "learning_rate": 2.4441202627432013e-05, + "loss": 0.0219, "step": 60380 }, { "epoch": 2.82, - "learning_rate": 1.4432047255168535e-05, - "loss": 0.1194, + "learning_rate": 2.4440734557595993e-05, + "loss": 0.0218, "step": 60385 }, { "epoch": 2.82, - "learning_rate": 1.4431578453893395e-05, - "loss": 0.1009, + "learning_rate": 2.4440266487759976e-05, + "loss": 0.0515, "step": 60390 }, { "epoch": 2.82, - "learning_rate": 1.4431109652618257e-05, - "loss": 0.0623, + "learning_rate": 2.4439798417923956e-05, + "loss": 0.1438, "step": 60395 }, { "epoch": 2.82, - "learning_rate": 1.4430640851343117e-05, - "loss": 0.095, + "learning_rate": 2.4439330348087936e-05, + "loss": 0.159, "step": 60400 }, { "epoch": 2.82, - "learning_rate": 1.4430172050067976e-05, - "loss": 0.2098, + "learning_rate": 2.4438862278251916e-05, + "loss": 0.1257, "step": 60405 }, { "epoch": 2.82, - "learning_rate": 1.4429703248792836e-05, - "loss": 0.2175, + "learning_rate": 2.44383942084159e-05, + "loss": 0.1489, "step": 60410 }, { "epoch": 2.82, - "learning_rate": 1.44292344475177e-05, - "loss": 0.2349, + "learning_rate": 2.4437926138579875e-05, + "loss": 0.2007, "step": 60415 }, { "epoch": 2.82, - "learning_rate": 1.442876564624256e-05, - "loss": 0.0606, + "learning_rate": 2.4437458068743855e-05, + "loss": 0.033, "step": 60420 }, { "epoch": 2.82, - "learning_rate": 1.442829684496742e-05, - "loss": 0.0261, + "learning_rate": 2.4436989998907835e-05, + "loss": 0.1097, "step": 60425 }, { "epoch": 2.82, - "learning_rate": 1.442782804369228e-05, - "loss": 0.0415, + "learning_rate": 2.4436521929071818e-05, + "loss": 0.1007, "step": 60430 }, { "epoch": 2.82, - "learning_rate": 1.4427359242417141e-05, - "loss": 0.0852, + "learning_rate": 2.4436053859235798e-05, + "loss": 0.0648, "step": 60435 }, { "epoch": 2.82, - "learning_rate": 1.4426890441142001e-05, - "loss": 0.0841, + "learning_rate": 2.4435585789399778e-05, + "loss": 0.1449, "step": 60440 }, { "epoch": 2.82, - "learning_rate": 1.4426421639866861e-05, - "loss": 0.0815, + "learning_rate": 2.443511771956376e-05, + "loss": 0.1316, "step": 60445 }, { "epoch": 2.82, - "learning_rate": 1.4425952838591721e-05, - "loss": 0.0924, + "learning_rate": 2.443464964972774e-05, + "loss": 0.0919, "step": 60450 }, { "epoch": 2.82, - "learning_rate": 1.4425484037316581e-05, - "loss": 0.1154, + "learning_rate": 2.443418157989172e-05, + "loss": 0.1348, "step": 60455 }, { "epoch": 2.82, - "learning_rate": 1.4425015236041444e-05, - "loss": 0.2005, + "learning_rate": 2.44337135100557e-05, + "loss": 0.2028, "step": 60460 }, { "epoch": 2.82, - "learning_rate": 1.4424546434766304e-05, - "loss": 0.2531, + "learning_rate": 2.4433245440219684e-05, + "loss": 0.3667, "step": 60465 }, { "epoch": 2.82, - "learning_rate": 1.4424077633491164e-05, - "loss": 0.0243, + "learning_rate": 2.4432777370383664e-05, + "loss": 0.0199, "step": 60470 }, { "epoch": 2.82, - "learning_rate": 1.4423608832216026e-05, - "loss": 0.0358, + "learning_rate": 2.4432309300547643e-05, + "loss": 0.0429, "step": 60475 }, { "epoch": 2.82, - "learning_rate": 1.4423140030940886e-05, - "loss": 0.0395, + "learning_rate": 2.443184123071162e-05, + "loss": 0.0243, "step": 60480 }, { "epoch": 2.82, - "learning_rate": 1.4422671229665746e-05, - "loss": 0.0441, + "learning_rate": 2.4431373160875603e-05, + "loss": 0.0567, "step": 60485 }, { "epoch": 2.82, - "learning_rate": 1.4422202428390606e-05, - "loss": 0.0726, + "learning_rate": 2.4430905091039583e-05, + "loss": 0.0627, "step": 60490 }, { "epoch": 2.82, - "learning_rate": 1.4421733627115465e-05, - "loss": 0.1384, + "learning_rate": 2.4430437021203563e-05, + "loss": 0.0771, "step": 60495 }, { "epoch": 2.82, - "learning_rate": 1.4421264825840327e-05, - "loss": 0.1046, + "learning_rate": 2.4429968951367546e-05, + "loss": 0.1636, "step": 60500 }, { "epoch": 2.82, - "learning_rate": 1.4420796024565189e-05, - "loss": 0.1901, + "learning_rate": 2.4429500881531526e-05, + "loss": 0.1397, "step": 60505 }, { "epoch": 2.82, - "learning_rate": 1.4420327223290049e-05, - "loss": 0.3416, + "learning_rate": 2.4429032811695505e-05, + "loss": 0.2548, "step": 60510 }, { "epoch": 2.82, - "learning_rate": 1.441985842201491e-05, - "loss": 0.2348, + "learning_rate": 2.4428564741859485e-05, + "loss": 0.3335, "step": 60515 }, { "epoch": 2.82, - "learning_rate": 1.441938962073977e-05, - "loss": 0.0289, + "learning_rate": 2.442809667202347e-05, + "loss": 0.0661, "step": 60520 }, { "epoch": 2.82, - "learning_rate": 1.441892081946463e-05, - "loss": 0.0462, + "learning_rate": 2.4427628602187448e-05, + "loss": 0.0513, "step": 60525 }, { "epoch": 2.82, - "learning_rate": 1.441845201818949e-05, - "loss": 0.0298, + "learning_rate": 2.4427160532351428e-05, + "loss": 0.0348, "step": 60530 }, { "epoch": 2.82, - "learning_rate": 1.441798321691435e-05, - "loss": 0.0507, + "learning_rate": 2.4426692462515408e-05, + "loss": 0.1036, "step": 60535 }, { "epoch": 2.82, - "learning_rate": 1.4417514415639212e-05, - "loss": 0.0689, + "learning_rate": 2.4426224392679388e-05, + "loss": 0.0371, "step": 60540 }, { "epoch": 2.83, - "learning_rate": 1.4417045614364072e-05, - "loss": 0.0854, + "learning_rate": 2.4425756322843368e-05, + "loss": 0.0545, "step": 60545 }, { "epoch": 2.83, - "learning_rate": 1.4416576813088933e-05, - "loss": 0.1096, + "learning_rate": 2.4425288253007347e-05, + "loss": 0.0814, "step": 60550 }, { "epoch": 2.83, - "learning_rate": 1.4416108011813795e-05, - "loss": 0.1565, + "learning_rate": 2.442482018317133e-05, + "loss": 0.1206, "step": 60555 }, { "epoch": 2.83, - "learning_rate": 1.4415639210538655e-05, - "loss": 0.2587, + "learning_rate": 2.442435211333531e-05, + "loss": 0.2098, "step": 60560 }, { "epoch": 2.83, - "learning_rate": 1.4415170409263515e-05, - "loss": 0.3275, + "learning_rate": 2.442388404349929e-05, + "loss": 0.2092, "step": 60565 }, { "epoch": 2.83, - "learning_rate": 1.4414701607988375e-05, - "loss": 0.0329, + "learning_rate": 2.442341597366327e-05, + "loss": 0.0672, "step": 60570 }, { "epoch": 2.83, - "learning_rate": 1.4414232806713235e-05, - "loss": 0.0129, + "learning_rate": 2.4422947903827253e-05, + "loss": 0.0361, "step": 60575 }, { "epoch": 2.83, - "learning_rate": 1.4413764005438096e-05, - "loss": 0.0224, + "learning_rate": 2.4422479833991233e-05, + "loss": 0.0672, "step": 60580 }, { "epoch": 2.83, - "learning_rate": 1.4413295204162956e-05, - "loss": 0.0823, + "learning_rate": 2.4422011764155213e-05, + "loss": 0.0813, "step": 60585 }, { "epoch": 2.83, - "learning_rate": 1.4412826402887816e-05, - "loss": 0.1211, + "learning_rate": 2.4421543694319193e-05, + "loss": 0.1038, "step": 60590 }, { "epoch": 2.83, - "learning_rate": 1.4412357601612676e-05, - "loss": 0.0709, + "learning_rate": 2.4421075624483176e-05, + "loss": 0.085, "step": 60595 }, { "epoch": 2.83, - "learning_rate": 1.441188880033754e-05, - "loss": 0.1255, + "learning_rate": 2.4420607554647156e-05, + "loss": 0.1322, "step": 60600 }, { "epoch": 2.83, - "learning_rate": 1.44114199990624e-05, - "loss": 0.1404, + "learning_rate": 2.4420139484811132e-05, + "loss": 0.1983, "step": 60605 }, { "epoch": 2.83, - "learning_rate": 1.441095119778726e-05, - "loss": 0.1606, + "learning_rate": 2.4419671414975112e-05, + "loss": 0.1474, "step": 60610 }, { "epoch": 2.83, - "learning_rate": 1.4410482396512119e-05, - "loss": 0.1291, + "learning_rate": 2.4419203345139095e-05, + "loss": 0.3845, "step": 60615 }, { "epoch": 2.83, - "learning_rate": 1.441001359523698e-05, - "loss": 0.0979, + "learning_rate": 2.4418735275303075e-05, + "loss": 0.055, "step": 60620 }, { "epoch": 2.83, - "learning_rate": 1.440954479396184e-05, - "loss": 0.0407, + "learning_rate": 2.4418267205467055e-05, + "loss": 0.0347, "step": 60625 }, { "epoch": 2.83, - "learning_rate": 1.44090759926867e-05, - "loss": 0.025, + "learning_rate": 2.4417799135631038e-05, + "loss": 0.0667, "step": 60630 }, { "epoch": 2.83, - "learning_rate": 1.440860719141156e-05, - "loss": 0.0598, + "learning_rate": 2.4417331065795018e-05, + "loss": 0.0311, "step": 60635 }, { "epoch": 2.83, - "learning_rate": 1.4408138390136422e-05, - "loss": 0.0943, + "learning_rate": 2.4416862995958998e-05, + "loss": 0.0487, "step": 60640 }, { "epoch": 2.83, - "learning_rate": 1.4407669588861284e-05, - "loss": 0.1123, + "learning_rate": 2.4416394926122977e-05, + "loss": 0.0644, "step": 60645 }, { "epoch": 2.83, - "learning_rate": 1.4407200787586144e-05, - "loss": 0.133, + "learning_rate": 2.441592685628696e-05, + "loss": 0.1066, "step": 60650 }, { "epoch": 2.83, - "learning_rate": 1.4406731986311004e-05, - "loss": 0.2083, + "learning_rate": 2.441545878645094e-05, + "loss": 0.1963, "step": 60655 }, { "epoch": 2.83, - "learning_rate": 1.4406263185035865e-05, - "loss": 0.2574, + "learning_rate": 2.441499071661492e-05, + "loss": 0.3125, "step": 60660 }, { "epoch": 2.83, - "learning_rate": 1.4405794383760725e-05, - "loss": 0.241, + "learning_rate": 2.44145226467789e-05, + "loss": 0.2088, "step": 60665 }, { "epoch": 2.83, - "learning_rate": 1.4405325582485585e-05, - "loss": 0.0453, + "learning_rate": 2.441405457694288e-05, + "loss": 0.0164, "step": 60670 }, { "epoch": 2.83, - "learning_rate": 1.4404856781210445e-05, - "loss": 0.0483, + "learning_rate": 2.441358650710686e-05, + "loss": 0.0451, "step": 60675 }, { "epoch": 2.83, - "learning_rate": 1.4404387979935307e-05, - "loss": 0.0555, + "learning_rate": 2.441311843727084e-05, + "loss": 0.0368, "step": 60680 }, { "epoch": 2.83, - "learning_rate": 1.4403919178660167e-05, - "loss": 0.0621, + "learning_rate": 2.4412650367434823e-05, + "loss": 0.0716, "step": 60685 }, { "epoch": 2.83, - "learning_rate": 1.4403450377385028e-05, - "loss": 0.0828, + "learning_rate": 2.4412182297598803e-05, + "loss": 0.0277, "step": 60690 }, { "epoch": 2.83, - "learning_rate": 1.4402981576109888e-05, - "loss": 0.0364, + "learning_rate": 2.4411714227762782e-05, + "loss": 0.0362, "step": 60695 }, { "epoch": 2.83, - "learning_rate": 1.440251277483475e-05, - "loss": 0.0896, + "learning_rate": 2.4411246157926762e-05, + "loss": 0.1361, "step": 60700 }, { "epoch": 2.83, - "learning_rate": 1.440204397355961e-05, - "loss": 0.2197, + "learning_rate": 2.4410778088090745e-05, + "loss": 0.1438, "step": 60705 }, { "epoch": 2.83, - "learning_rate": 1.440157517228447e-05, - "loss": 0.2638, + "learning_rate": 2.4410310018254725e-05, + "loss": 0.1406, "step": 60710 }, { "epoch": 2.83, - "learning_rate": 1.440110637100933e-05, - "loss": 0.1872, + "learning_rate": 2.4409841948418705e-05, + "loss": 0.2761, "step": 60715 }, { "epoch": 2.83, - "learning_rate": 1.4400637569734191e-05, - "loss": 0.0346, + "learning_rate": 2.4409373878582685e-05, + "loss": 0.0729, "step": 60720 }, { "epoch": 2.83, - "learning_rate": 1.4400168768459051e-05, - "loss": 0.0323, + "learning_rate": 2.4408905808746668e-05, + "loss": 0.063, "step": 60725 }, { "epoch": 2.83, - "learning_rate": 1.4399699967183911e-05, - "loss": 0.0482, + "learning_rate": 2.4408437738910645e-05, + "loss": 0.0291, "step": 60730 }, { "epoch": 2.83, - "learning_rate": 1.4399231165908771e-05, - "loss": 0.0482, + "learning_rate": 2.4407969669074624e-05, + "loss": 0.0664, "step": 60735 }, { "epoch": 2.83, - "learning_rate": 1.4398762364633634e-05, - "loss": 0.0727, + "learning_rate": 2.4407501599238608e-05, + "loss": 0.0994, "step": 60740 }, { "epoch": 2.83, - "learning_rate": 1.4398293563358494e-05, - "loss": 0.0952, + "learning_rate": 2.4407033529402587e-05, + "loss": 0.0764, "step": 60745 }, { "epoch": 2.83, - "learning_rate": 1.4397824762083354e-05, - "loss": 0.1124, + "learning_rate": 2.4406565459566567e-05, + "loss": 0.094, "step": 60750 }, { "epoch": 2.83, - "learning_rate": 1.4397355960808214e-05, - "loss": 0.0838, + "learning_rate": 2.4406097389730547e-05, + "loss": 0.1506, "step": 60755 }, { "epoch": 2.84, - "learning_rate": 1.4396887159533076e-05, - "loss": 0.1974, + "learning_rate": 2.440562931989453e-05, + "loss": 0.149, "step": 60760 }, { "epoch": 2.84, - "learning_rate": 1.4396418358257936e-05, - "loss": 0.2214, + "learning_rate": 2.440516125005851e-05, + "loss": 0.2185, "step": 60765 }, { "epoch": 2.84, - "learning_rate": 1.4395949556982796e-05, - "loss": 0.0473, + "learning_rate": 2.440469318022249e-05, + "loss": 0.0627, "step": 60770 }, { "epoch": 2.84, - "learning_rate": 1.4395480755707656e-05, - "loss": 0.0588, + "learning_rate": 2.440422511038647e-05, + "loss": 0.0275, "step": 60775 }, { "epoch": 2.84, - "learning_rate": 1.4395011954432516e-05, - "loss": 0.1081, + "learning_rate": 2.4403757040550453e-05, + "loss": 0.0384, "step": 60780 }, { "epoch": 2.84, - "learning_rate": 1.4394543153157379e-05, - "loss": 0.0593, + "learning_rate": 2.4403288970714433e-05, + "loss": 0.096, "step": 60785 }, { "epoch": 2.84, - "learning_rate": 1.4394074351882239e-05, - "loss": 0.0878, + "learning_rate": 2.4402820900878413e-05, + "loss": 0.1149, "step": 60790 }, { "epoch": 2.84, - "learning_rate": 1.4393605550607099e-05, - "loss": 0.135, + "learning_rate": 2.4402352831042392e-05, + "loss": 0.1999, "step": 60795 }, { "epoch": 2.84, - "learning_rate": 1.439313674933196e-05, - "loss": 0.0865, + "learning_rate": 2.4401884761206372e-05, + "loss": 0.0822, "step": 60800 }, { "epoch": 2.84, - "learning_rate": 1.439266794805682e-05, - "loss": 0.1214, + "learning_rate": 2.4401416691370352e-05, + "loss": 0.1295, "step": 60805 }, { "epoch": 2.84, - "learning_rate": 1.439219914678168e-05, - "loss": 0.2504, + "learning_rate": 2.4400948621534332e-05, + "loss": 0.2997, "step": 60810 }, { "epoch": 2.84, - "learning_rate": 1.439173034550654e-05, - "loss": 0.4163, + "learning_rate": 2.4400480551698315e-05, + "loss": 0.2911, "step": 60815 }, { "epoch": 2.84, - "learning_rate": 1.43912615442314e-05, - "loss": 0.0759, + "learning_rate": 2.4400012481862295e-05, + "loss": 0.0212, "step": 60820 }, { "epoch": 2.84, - "learning_rate": 1.4390792742956262e-05, - "loss": 0.0468, + "learning_rate": 2.4399544412026275e-05, + "loss": 0.0702, "step": 60825 }, { "epoch": 2.84, - "learning_rate": 1.4390323941681123e-05, - "loss": 0.0518, + "learning_rate": 2.4399076342190254e-05, + "loss": 0.0685, "step": 60830 }, { "epoch": 2.84, - "learning_rate": 1.4389855140405983e-05, - "loss": 0.053, + "learning_rate": 2.4398608272354238e-05, + "loss": 0.0828, "step": 60835 }, { "epoch": 2.84, - "learning_rate": 1.4389386339130845e-05, - "loss": 0.0998, + "learning_rate": 2.4398140202518217e-05, + "loss": 0.0848, "step": 60840 }, { "epoch": 2.84, - "learning_rate": 1.4388917537855705e-05, - "loss": 0.111, + "learning_rate": 2.4397672132682197e-05, + "loss": 0.0776, "step": 60845 }, { "epoch": 2.84, - "learning_rate": 1.4388448736580565e-05, - "loss": 0.1585, + "learning_rate": 2.4397204062846177e-05, + "loss": 0.141, "step": 60850 }, { "epoch": 2.84, - "learning_rate": 1.4387979935305425e-05, - "loss": 0.1454, + "learning_rate": 2.4396735993010157e-05, + "loss": 0.0924, "step": 60855 }, { "epoch": 2.84, - "learning_rate": 1.4387511134030285e-05, - "loss": 0.2594, + "learning_rate": 2.4396267923174137e-05, + "loss": 0.2505, "step": 60860 }, { "epoch": 2.84, - "learning_rate": 1.4387042332755146e-05, - "loss": 0.1849, + "learning_rate": 2.4395799853338117e-05, + "loss": 0.3442, "step": 60865 }, { "epoch": 2.84, - "learning_rate": 1.4386573531480006e-05, - "loss": 0.019, + "learning_rate": 2.43953317835021e-05, + "loss": 0.024, "step": 60870 }, { "epoch": 2.84, - "learning_rate": 1.4386104730204868e-05, - "loss": 0.0433, + "learning_rate": 2.439486371366608e-05, + "loss": 0.0099, "step": 60875 }, { "epoch": 2.84, - "learning_rate": 1.438563592892973e-05, - "loss": 0.0268, + "learning_rate": 2.439439564383006e-05, + "loss": 0.0559, "step": 60880 }, { "epoch": 2.84, - "learning_rate": 1.438516712765459e-05, - "loss": 0.0626, + "learning_rate": 2.439392757399404e-05, + "loss": 0.1163, "step": 60885 }, { "epoch": 2.84, - "learning_rate": 1.438469832637945e-05, - "loss": 0.0737, + "learning_rate": 2.4393459504158022e-05, + "loss": 0.0899, "step": 60890 }, { "epoch": 2.84, - "learning_rate": 1.438422952510431e-05, - "loss": 0.076, + "learning_rate": 2.4392991434322002e-05, + "loss": 0.0595, "step": 60895 }, { "epoch": 2.84, - "learning_rate": 1.438376072382917e-05, - "loss": 0.2067, + "learning_rate": 2.4392523364485982e-05, + "loss": 0.0824, "step": 60900 }, { "epoch": 2.84, - "learning_rate": 1.438329192255403e-05, - "loss": 0.1042, + "learning_rate": 2.4392055294649962e-05, + "loss": 0.0924, "step": 60905 }, { "epoch": 2.84, - "learning_rate": 1.438282312127889e-05, - "loss": 0.2718, + "learning_rate": 2.4391587224813945e-05, + "loss": 0.2858, "step": 60910 }, { "epoch": 2.84, - "learning_rate": 1.438235432000375e-05, - "loss": 0.3147, + "learning_rate": 2.4391119154977925e-05, + "loss": 0.1593, "step": 60915 }, { "epoch": 2.84, - "learning_rate": 1.438188551872861e-05, - "loss": 0.0184, + "learning_rate": 2.43906510851419e-05, + "loss": 0.0711, "step": 60920 }, { "epoch": 2.84, - "learning_rate": 1.4381416717453474e-05, - "loss": 0.0486, + "learning_rate": 2.4390183015305885e-05, + "loss": 0.0241, "step": 60925 }, { "epoch": 2.84, - "learning_rate": 1.4380947916178334e-05, - "loss": 0.0481, + "learning_rate": 2.4389714945469864e-05, + "loss": 0.058, "step": 60930 }, { "epoch": 2.84, - "learning_rate": 1.4380479114903194e-05, - "loss": 0.0807, + "learning_rate": 2.4389246875633844e-05, + "loss": 0.0993, "step": 60935 }, { "epoch": 2.84, - "learning_rate": 1.4380010313628054e-05, - "loss": 0.0744, + "learning_rate": 2.4388778805797824e-05, + "loss": 0.0619, "step": 60940 }, { "epoch": 2.84, - "learning_rate": 1.4379541512352915e-05, - "loss": 0.0737, + "learning_rate": 2.4388310735961807e-05, + "loss": 0.0851, "step": 60945 }, { "epoch": 2.84, - "learning_rate": 1.4379072711077775e-05, - "loss": 0.1207, + "learning_rate": 2.4387842666125787e-05, + "loss": 0.0801, "step": 60950 }, { "epoch": 2.84, - "learning_rate": 1.4378603909802635e-05, - "loss": 0.1458, + "learning_rate": 2.4387374596289767e-05, + "loss": 0.1628, "step": 60955 }, { "epoch": 2.84, - "learning_rate": 1.4378135108527495e-05, - "loss": 0.2767, + "learning_rate": 2.4386906526453747e-05, + "loss": 0.2158, "step": 60960 }, { "epoch": 2.84, - "learning_rate": 1.4377666307252355e-05, - "loss": 0.2507, + "learning_rate": 2.438643845661773e-05, + "loss": 0.2365, "step": 60965 }, { "epoch": 2.84, - "learning_rate": 1.4377197505977218e-05, - "loss": 0.064, + "learning_rate": 2.438597038678171e-05, + "loss": 0.074, "step": 60970 }, { "epoch": 2.85, - "learning_rate": 1.4376728704702078e-05, - "loss": 0.0188, + "learning_rate": 2.438550231694569e-05, + "loss": 0.0071, "step": 60975 }, { "epoch": 2.85, - "learning_rate": 1.4376259903426938e-05, - "loss": 0.0876, + "learning_rate": 2.4385034247109673e-05, + "loss": 0.0436, "step": 60980 }, { "epoch": 2.85, - "learning_rate": 1.43757911021518e-05, - "loss": 0.0304, + "learning_rate": 2.438456617727365e-05, + "loss": 0.0791, "step": 60985 }, { "epoch": 2.85, - "learning_rate": 1.437532230087666e-05, - "loss": 0.1134, + "learning_rate": 2.438409810743763e-05, + "loss": 0.0707, "step": 60990 }, { "epoch": 2.85, - "learning_rate": 1.437485349960152e-05, - "loss": 0.1214, + "learning_rate": 2.438363003760161e-05, + "loss": 0.0873, "step": 60995 }, { "epoch": 2.85, - "learning_rate": 1.437438469832638e-05, - "loss": 0.1877, + "learning_rate": 2.4383161967765592e-05, + "loss": 0.089, "step": 61000 }, { "epoch": 2.85, - "learning_rate": 1.437391589705124e-05, - "loss": 0.1703, + "learning_rate": 2.4382693897929572e-05, + "loss": 0.1736, "step": 61005 }, { "epoch": 2.85, - "learning_rate": 1.4373447095776101e-05, - "loss": 0.2195, + "learning_rate": 2.438222582809355e-05, + "loss": 0.1735, "step": 61010 }, { "epoch": 2.85, - "learning_rate": 1.4372978294500963e-05, - "loss": 0.354, + "learning_rate": 2.438175775825753e-05, + "loss": 0.2373, "step": 61015 }, { "epoch": 2.85, - "learning_rate": 1.4372509493225823e-05, - "loss": 0.0335, + "learning_rate": 2.4381289688421515e-05, + "loss": 0.0902, "step": 61020 }, { "epoch": 2.85, - "learning_rate": 1.4372040691950684e-05, - "loss": 0.0675, + "learning_rate": 2.4380821618585494e-05, + "loss": 0.0455, "step": 61025 }, { "epoch": 2.85, - "learning_rate": 1.4371571890675544e-05, - "loss": 0.0821, + "learning_rate": 2.4380353548749474e-05, + "loss": 0.0767, "step": 61030 }, { "epoch": 2.85, - "learning_rate": 1.4371103089400404e-05, - "loss": 0.0535, + "learning_rate": 2.4379885478913454e-05, + "loss": 0.0361, "step": 61035 }, { "epoch": 2.85, - "learning_rate": 1.4370634288125264e-05, - "loss": 0.1184, + "learning_rate": 2.4379417409077437e-05, + "loss": 0.0537, "step": 61040 }, { "epoch": 2.85, - "learning_rate": 1.4370165486850124e-05, - "loss": 0.0816, + "learning_rate": 2.4378949339241414e-05, + "loss": 0.0887, "step": 61045 }, { "epoch": 2.85, - "learning_rate": 1.4369696685574986e-05, - "loss": 0.0924, + "learning_rate": 2.4378481269405394e-05, + "loss": 0.0741, "step": 61050 }, { "epoch": 2.85, - "learning_rate": 1.4369227884299846e-05, - "loss": 0.1121, + "learning_rate": 2.4378013199569377e-05, + "loss": 0.1016, "step": 61055 }, { "epoch": 2.85, - "learning_rate": 1.4368759083024706e-05, - "loss": 0.2305, + "learning_rate": 2.4377545129733357e-05, + "loss": 0.2068, "step": 61060 }, { "epoch": 2.85, - "learning_rate": 1.4368290281749569e-05, - "loss": 0.413, + "learning_rate": 2.4377077059897336e-05, + "loss": 0.1699, "step": 61065 }, { "epoch": 2.85, - "learning_rate": 1.4367821480474429e-05, - "loss": 0.0227, + "learning_rate": 2.4376608990061316e-05, + "loss": 0.0394, "step": 61070 }, { "epoch": 2.85, - "learning_rate": 1.4367352679199289e-05, - "loss": 0.0205, + "learning_rate": 2.43761409202253e-05, + "loss": 0.0519, "step": 61075 }, { "epoch": 2.85, - "learning_rate": 1.4366883877924149e-05, - "loss": 0.0373, + "learning_rate": 2.437567285038928e-05, + "loss": 0.0488, "step": 61080 }, { "epoch": 2.85, - "learning_rate": 1.4366415076649009e-05, - "loss": 0.1037, + "learning_rate": 2.437520478055326e-05, + "loss": 0.0327, "step": 61085 }, { "epoch": 2.85, - "learning_rate": 1.436594627537387e-05, - "loss": 0.0472, + "learning_rate": 2.437473671071724e-05, + "loss": 0.1175, "step": 61090 }, { "epoch": 2.85, - "learning_rate": 1.436547747409873e-05, - "loss": 0.1188, + "learning_rate": 2.4374268640881222e-05, + "loss": 0.0967, "step": 61095 }, { "epoch": 2.85, - "learning_rate": 1.436500867282359e-05, - "loss": 0.0784, + "learning_rate": 2.4373800571045202e-05, + "loss": 0.1732, "step": 61100 }, { "epoch": 2.85, - "learning_rate": 1.436453987154845e-05, - "loss": 0.158, + "learning_rate": 2.437333250120918e-05, + "loss": 0.0984, "step": 61105 }, { "epoch": 2.85, - "learning_rate": 1.4364071070273314e-05, - "loss": 0.192, + "learning_rate": 2.437286443137316e-05, + "loss": 0.2185, "step": 61110 }, { "epoch": 2.85, - "learning_rate": 1.4363602268998173e-05, - "loss": 0.2495, + "learning_rate": 2.437239636153714e-05, + "loss": 0.2051, "step": 61115 }, { "epoch": 2.85, - "learning_rate": 1.4363133467723033e-05, - "loss": 0.077, + "learning_rate": 2.437192829170112e-05, + "loss": 0.0535, "step": 61120 }, { "epoch": 2.85, - "learning_rate": 1.4362664666447893e-05, - "loss": 0.063, + "learning_rate": 2.43714602218651e-05, + "loss": 0.0825, "step": 61125 }, { "epoch": 2.85, - "learning_rate": 1.4362195865172755e-05, - "loss": 0.037, + "learning_rate": 2.4370992152029084e-05, + "loss": 0.0412, "step": 61130 }, { "epoch": 2.85, - "learning_rate": 1.4361727063897615e-05, - "loss": 0.0352, + "learning_rate": 2.4370524082193064e-05, + "loss": 0.0505, "step": 61135 }, { "epoch": 2.85, - "learning_rate": 1.4361258262622475e-05, - "loss": 0.0903, + "learning_rate": 2.4370056012357044e-05, + "loss": 0.0527, "step": 61140 }, { "epoch": 2.85, - "learning_rate": 1.4360789461347335e-05, - "loss": 0.0915, + "learning_rate": 2.4369587942521024e-05, + "loss": 0.2246, "step": 61145 }, { "epoch": 2.85, - "learning_rate": 1.4360320660072196e-05, - "loss": 0.1707, + "learning_rate": 2.4369119872685007e-05, + "loss": 0.1156, "step": 61150 }, { "epoch": 2.85, - "learning_rate": 1.4359851858797058e-05, - "loss": 0.1028, + "learning_rate": 2.4368651802848987e-05, + "loss": 0.1205, "step": 61155 }, { "epoch": 2.85, - "learning_rate": 1.4359383057521918e-05, - "loss": 0.2532, + "learning_rate": 2.4368183733012966e-05, + "loss": 0.2444, "step": 61160 }, { "epoch": 2.85, - "learning_rate": 1.4358914256246778e-05, - "loss": 0.307, + "learning_rate": 2.436771566317695e-05, + "loss": 0.2647, "step": 61165 }, { "epoch": 2.85, - "learning_rate": 1.435844545497164e-05, - "loss": 0.0508, + "learning_rate": 2.4367247593340926e-05, + "loss": 0.078, "step": 61170 }, { "epoch": 2.85, - "learning_rate": 1.43579766536965e-05, - "loss": 0.0748, + "learning_rate": 2.4366779523504906e-05, + "loss": 0.0541, "step": 61175 }, { "epoch": 2.85, - "learning_rate": 1.435750785242136e-05, - "loss": 0.0633, + "learning_rate": 2.4366311453668886e-05, + "loss": 0.0712, "step": 61180 }, { "epoch": 2.85, - "learning_rate": 1.435703905114622e-05, - "loss": 0.0856, + "learning_rate": 2.436584338383287e-05, + "loss": 0.0953, "step": 61185 }, { "epoch": 2.86, - "learning_rate": 1.4356570249871081e-05, - "loss": 0.0617, + "learning_rate": 2.436537531399685e-05, + "loss": 0.0654, "step": 61190 }, { "epoch": 2.86, - "learning_rate": 1.4356101448595941e-05, - "loss": 0.1655, + "learning_rate": 2.436490724416083e-05, + "loss": 0.0437, "step": 61195 }, { "epoch": 2.86, - "learning_rate": 1.4355632647320802e-05, - "loss": 0.2037, + "learning_rate": 2.436443917432481e-05, + "loss": 0.1437, "step": 61200 }, { "epoch": 2.86, - "learning_rate": 1.4355163846045662e-05, - "loss": 0.2147, + "learning_rate": 2.436397110448879e-05, + "loss": 0.1068, "step": 61205 }, { "epoch": 2.86, - "learning_rate": 1.4354695044770524e-05, - "loss": 0.2051, + "learning_rate": 2.436350303465277e-05, + "loss": 0.1847, "step": 61210 }, { "epoch": 2.86, - "learning_rate": 1.4354226243495384e-05, - "loss": 0.4263, + "learning_rate": 2.436303496481675e-05, + "loss": 0.3212, "step": 61215 }, { "epoch": 2.86, - "learning_rate": 1.4353757442220244e-05, - "loss": 0.0426, + "learning_rate": 2.436256689498073e-05, + "loss": 0.068, "step": 61220 }, { "epoch": 2.86, - "learning_rate": 1.4353288640945104e-05, - "loss": 0.0636, + "learning_rate": 2.4362098825144714e-05, + "loss": 0.0279, "step": 61225 }, { "epoch": 2.86, - "learning_rate": 1.4352819839669965e-05, - "loss": 0.0451, + "learning_rate": 2.4361630755308694e-05, + "loss": 0.0346, "step": 61230 }, { "epoch": 2.86, - "learning_rate": 1.4352351038394825e-05, - "loss": 0.0919, + "learning_rate": 2.436116268547267e-05, + "loss": 0.043, "step": 61235 }, { "epoch": 2.86, - "learning_rate": 1.4351882237119685e-05, - "loss": 0.069, + "learning_rate": 2.4360694615636654e-05, + "loss": 0.0752, "step": 61240 }, { "epoch": 2.86, - "learning_rate": 1.4351413435844545e-05, - "loss": 0.0685, + "learning_rate": 2.4360226545800634e-05, + "loss": 0.075, "step": 61245 }, { "epoch": 2.86, - "learning_rate": 1.4350944634569409e-05, - "loss": 0.0752, + "learning_rate": 2.4359758475964613e-05, + "loss": 0.1264, "step": 61250 }, { "epoch": 2.86, - "learning_rate": 1.4350475833294269e-05, - "loss": 0.1631, + "learning_rate": 2.4359290406128593e-05, + "loss": 0.1511, "step": 61255 }, { "epoch": 2.86, - "learning_rate": 1.4350007032019128e-05, - "loss": 0.2122, + "learning_rate": 2.4358822336292576e-05, + "loss": 0.1892, "step": 61260 }, { "epoch": 2.86, - "learning_rate": 1.4349538230743988e-05, - "loss": 0.251, + "learning_rate": 2.4358354266456556e-05, + "loss": 0.2657, "step": 61265 }, { "epoch": 2.86, - "learning_rate": 1.434906942946885e-05, - "loss": 0.07, + "learning_rate": 2.4357886196620536e-05, + "loss": 0.0295, "step": 61270 }, { "epoch": 2.86, - "learning_rate": 1.434860062819371e-05, - "loss": 0.0372, + "learning_rate": 2.4357418126784516e-05, + "loss": 0.0323, "step": 61275 }, { "epoch": 2.86, - "learning_rate": 1.434813182691857e-05, - "loss": 0.035, + "learning_rate": 2.43569500569485e-05, + "loss": 0.0585, "step": 61280 }, { "epoch": 2.86, - "learning_rate": 1.434766302564343e-05, - "loss": 0.0704, + "learning_rate": 2.435648198711248e-05, + "loss": 0.1003, "step": 61285 }, { "epoch": 2.86, - "learning_rate": 1.434719422436829e-05, - "loss": 0.0953, + "learning_rate": 2.435601391727646e-05, + "loss": 0.124, "step": 61290 }, { "epoch": 2.86, - "learning_rate": 1.4346725423093153e-05, - "loss": 0.0836, + "learning_rate": 2.4355545847440442e-05, + "loss": 0.0432, "step": 61295 }, { "epoch": 2.86, - "learning_rate": 1.4346256621818013e-05, - "loss": 0.0872, + "learning_rate": 2.4355077777604418e-05, + "loss": 0.153, "step": 61300 }, { "epoch": 2.86, - "learning_rate": 1.4345787820542873e-05, - "loss": 0.1247, + "learning_rate": 2.4354609707768398e-05, + "loss": 0.2346, "step": 61305 }, { "epoch": 2.86, - "learning_rate": 1.4345319019267735e-05, - "loss": 0.1821, + "learning_rate": 2.4354141637932378e-05, + "loss": 0.3643, "step": 61310 }, { "epoch": 2.86, - "learning_rate": 1.4344850217992595e-05, - "loss": 0.2211, + "learning_rate": 2.435367356809636e-05, + "loss": 0.2115, "step": 61315 }, { "epoch": 2.86, - "learning_rate": 1.4344381416717454e-05, - "loss": 0.0347, + "learning_rate": 2.435320549826034e-05, + "loss": 0.0354, "step": 61320 }, { "epoch": 2.86, - "learning_rate": 1.4343912615442314e-05, - "loss": 0.059, + "learning_rate": 2.435273742842432e-05, + "loss": 0.0395, "step": 61325 }, { "epoch": 2.86, - "learning_rate": 1.4343443814167174e-05, - "loss": 0.0936, + "learning_rate": 2.43522693585883e-05, + "loss": 0.0449, "step": 61330 }, { "epoch": 2.86, - "learning_rate": 1.4342975012892036e-05, - "loss": 0.0936, + "learning_rate": 2.4351801288752284e-05, + "loss": 0.0886, "step": 61335 }, { "epoch": 2.86, - "learning_rate": 1.4342506211616898e-05, - "loss": 0.0566, + "learning_rate": 2.4351333218916264e-05, + "loss": 0.0702, "step": 61340 }, { "epoch": 2.86, - "learning_rate": 1.4342037410341758e-05, - "loss": 0.1761, + "learning_rate": 2.4350865149080243e-05, + "loss": 0.0505, "step": 61345 }, { "epoch": 2.86, - "learning_rate": 1.4341568609066619e-05, - "loss": 0.1749, + "learning_rate": 2.4350397079244227e-05, + "loss": 0.1158, "step": 61350 }, { "epoch": 2.86, - "learning_rate": 1.4341099807791479e-05, - "loss": 0.1305, + "learning_rate": 2.4349929009408206e-05, + "loss": 0.0956, "step": 61355 }, { "epoch": 2.86, - "learning_rate": 1.4340631006516339e-05, - "loss": 0.2353, + "learning_rate": 2.4349460939572183e-05, + "loss": 0.1982, "step": 61360 }, { "epoch": 2.86, - "learning_rate": 1.4340162205241199e-05, - "loss": 0.2172, + "learning_rate": 2.4348992869736163e-05, + "loss": 0.3243, "step": 61365 }, { "epoch": 2.86, - "learning_rate": 1.4339693403966059e-05, - "loss": 0.0978, + "learning_rate": 2.4348524799900146e-05, + "loss": 0.0685, "step": 61370 }, { "epoch": 2.86, - "learning_rate": 1.433922460269092e-05, - "loss": 0.06, + "learning_rate": 2.4348056730064126e-05, + "loss": 0.0371, "step": 61375 }, { "epoch": 2.86, - "learning_rate": 1.433875580141578e-05, - "loss": 0.0539, + "learning_rate": 2.4347588660228106e-05, + "loss": 0.0439, "step": 61380 }, { "epoch": 2.86, - "learning_rate": 1.433828700014064e-05, - "loss": 0.0406, + "learning_rate": 2.4347120590392085e-05, + "loss": 0.0727, "step": 61385 }, { "epoch": 2.86, - "learning_rate": 1.4337818198865504e-05, - "loss": 0.0755, + "learning_rate": 2.434665252055607e-05, + "loss": 0.1047, "step": 61390 }, { "epoch": 2.86, - "learning_rate": 1.4337349397590364e-05, - "loss": 0.119, + "learning_rate": 2.434618445072005e-05, + "loss": 0.0731, "step": 61395 }, { "epoch": 2.87, - "learning_rate": 1.4336880596315224e-05, - "loss": 0.1196, + "learning_rate": 2.4345716380884028e-05, + "loss": 0.1474, "step": 61400 }, { "epoch": 2.87, - "learning_rate": 1.4336411795040083e-05, - "loss": 0.0995, + "learning_rate": 2.434524831104801e-05, + "loss": 0.1503, "step": 61405 }, { "epoch": 2.87, - "learning_rate": 1.4335942993764943e-05, - "loss": 0.1492, + "learning_rate": 2.434478024121199e-05, + "loss": 0.1643, "step": 61410 }, { "epoch": 2.87, - "learning_rate": 1.4335474192489805e-05, - "loss": 0.2199, + "learning_rate": 2.434431217137597e-05, + "loss": 0.2506, "step": 61415 }, { "epoch": 2.87, - "learning_rate": 1.4335005391214665e-05, - "loss": 0.0173, + "learning_rate": 2.434384410153995e-05, + "loss": 0.0481, "step": 61420 }, { "epoch": 2.87, - "learning_rate": 1.4334536589939525e-05, - "loss": 0.034, + "learning_rate": 2.434337603170393e-05, + "loss": 0.0373, "step": 61425 }, { "epoch": 2.87, - "learning_rate": 1.4334067788664385e-05, - "loss": 0.0801, + "learning_rate": 2.434290796186791e-05, + "loss": 0.0473, "step": 61430 }, { "epoch": 2.87, - "learning_rate": 1.4333598987389248e-05, - "loss": 0.1636, + "learning_rate": 2.434243989203189e-05, + "loss": 0.0797, "step": 61435 }, { "epoch": 2.87, - "learning_rate": 1.4333130186114108e-05, - "loss": 0.0911, + "learning_rate": 2.434197182219587e-05, + "loss": 0.0475, "step": 61440 }, { "epoch": 2.87, - "learning_rate": 1.4332661384838968e-05, - "loss": 0.0715, + "learning_rate": 2.4341503752359853e-05, + "loss": 0.058, "step": 61445 }, { "epoch": 2.87, - "learning_rate": 1.4332192583563828e-05, - "loss": 0.1519, + "learning_rate": 2.4341035682523833e-05, + "loss": 0.1259, "step": 61450 }, { "epoch": 2.87, - "learning_rate": 1.433172378228869e-05, - "loss": 0.1616, + "learning_rate": 2.4340567612687813e-05, + "loss": 0.1367, "step": 61455 }, { "epoch": 2.87, - "learning_rate": 1.433125498101355e-05, - "loss": 0.2893, + "learning_rate": 2.4340099542851793e-05, + "loss": 0.2598, "step": 61460 }, { "epoch": 2.87, - "learning_rate": 1.433078617973841e-05, - "loss": 0.2141, + "learning_rate": 2.4339631473015776e-05, + "loss": 0.3622, "step": 61465 }, { "epoch": 2.87, - "learning_rate": 1.433031737846327e-05, - "loss": 0.0291, + "learning_rate": 2.4339163403179756e-05, + "loss": 0.0815, "step": 61470 }, { "epoch": 2.87, - "learning_rate": 1.432984857718813e-05, - "loss": 0.0565, + "learning_rate": 2.4338695333343736e-05, + "loss": 0.0363, "step": 61475 }, { "epoch": 2.87, - "learning_rate": 1.4329379775912993e-05, - "loss": 0.0623, + "learning_rate": 2.433822726350772e-05, + "loss": 0.0565, "step": 61480 }, { "epoch": 2.87, - "learning_rate": 1.4328910974637853e-05, - "loss": 0.1416, + "learning_rate": 2.43377591936717e-05, + "loss": 0.0606, "step": 61485 }, { "epoch": 2.87, - "learning_rate": 1.4328442173362713e-05, - "loss": 0.0452, + "learning_rate": 2.4337291123835675e-05, + "loss": 0.0921, "step": 61490 }, { "epoch": 2.87, - "learning_rate": 1.4327973372087574e-05, - "loss": 0.0532, + "learning_rate": 2.4336823053999655e-05, + "loss": 0.1805, "step": 61495 }, { "epoch": 2.87, - "learning_rate": 1.4327504570812434e-05, - "loss": 0.1677, + "learning_rate": 2.4336354984163638e-05, + "loss": 0.1077, "step": 61500 }, { "epoch": 2.87, - "learning_rate": 1.4327035769537294e-05, - "loss": 0.1528, + "learning_rate": 2.4335886914327618e-05, + "loss": 0.1032, "step": 61505 }, { "epoch": 2.87, - "learning_rate": 1.4326566968262154e-05, - "loss": 0.2106, + "learning_rate": 2.4335418844491598e-05, + "loss": 0.1976, "step": 61510 }, { "epoch": 2.87, - "learning_rate": 1.4326098166987014e-05, - "loss": 0.2591, + "learning_rate": 2.4334950774655578e-05, + "loss": 0.2539, "step": 61515 }, { "epoch": 2.87, - "learning_rate": 1.4325629365711876e-05, - "loss": 0.085, + "learning_rate": 2.433448270481956e-05, + "loss": 0.0383, "step": 61520 }, { "epoch": 2.87, - "learning_rate": 1.4325160564436737e-05, - "loss": 0.0254, + "learning_rate": 2.433401463498354e-05, + "loss": 0.0519, "step": 61525 }, { "epoch": 2.87, - "learning_rate": 1.4324691763161597e-05, - "loss": 0.0941, + "learning_rate": 2.433354656514752e-05, + "loss": 0.0411, "step": 61530 }, { "epoch": 2.87, - "learning_rate": 1.4324222961886459e-05, - "loss": 0.051, + "learning_rate": 2.4333078495311504e-05, + "loss": 0.045, "step": 61535 }, { "epoch": 2.87, - "learning_rate": 1.4323754160611319e-05, - "loss": 0.0773, + "learning_rate": 2.4332610425475483e-05, + "loss": 0.0384, "step": 61540 }, { "epoch": 2.87, - "learning_rate": 1.4323285359336179e-05, - "loss": 0.1572, + "learning_rate": 2.4332142355639463e-05, + "loss": 0.1072, "step": 61545 }, { "epoch": 2.87, - "learning_rate": 1.4322816558061039e-05, - "loss": 0.1354, + "learning_rate": 2.433167428580344e-05, + "loss": 0.1773, "step": 61550 }, { "epoch": 2.87, - "learning_rate": 1.4322347756785898e-05, - "loss": 0.1235, + "learning_rate": 2.4331206215967423e-05, + "loss": 0.1756, "step": 61555 }, { "epoch": 2.87, - "learning_rate": 1.432187895551076e-05, - "loss": 0.3151, + "learning_rate": 2.4330738146131403e-05, + "loss": 0.3195, "step": 61560 }, { "epoch": 2.87, - "learning_rate": 1.432141015423562e-05, - "loss": 0.1998, + "learning_rate": 2.4330270076295383e-05, + "loss": 0.3297, "step": 61565 }, { "epoch": 2.87, - "learning_rate": 1.432094135296048e-05, - "loss": 0.0537, + "learning_rate": 2.4329802006459362e-05, + "loss": 0.0287, "step": 61570 }, { "epoch": 2.87, - "learning_rate": 1.4320472551685343e-05, - "loss": 0.0522, + "learning_rate": 2.4329333936623346e-05, + "loss": 0.0272, "step": 61575 }, { "epoch": 2.87, - "learning_rate": 1.4320003750410203e-05, - "loss": 0.0644, + "learning_rate": 2.4328865866787325e-05, + "loss": 0.0611, "step": 61580 }, { "epoch": 2.87, - "learning_rate": 1.4319534949135063e-05, - "loss": 0.0724, + "learning_rate": 2.4328397796951305e-05, + "loss": 0.0606, "step": 61585 }, { "epoch": 2.87, - "learning_rate": 1.4319066147859923e-05, - "loss": 0.0751, + "learning_rate": 2.432792972711529e-05, + "loss": 0.0713, "step": 61590 }, { "epoch": 2.87, - "learning_rate": 1.4318597346584785e-05, - "loss": 0.0635, + "learning_rate": 2.4327461657279268e-05, + "loss": 0.1068, "step": 61595 }, { "epoch": 2.87, - "learning_rate": 1.4318128545309645e-05, - "loss": 0.0984, + "learning_rate": 2.4326993587443248e-05, + "loss": 0.1738, "step": 61600 }, { "epoch": 2.87, - "learning_rate": 1.4317659744034505e-05, - "loss": 0.2148, + "learning_rate": 2.4326525517607228e-05, + "loss": 0.2613, "step": 61605 }, { "epoch": 2.87, - "learning_rate": 1.4317190942759364e-05, - "loss": 0.2069, + "learning_rate": 2.432605744777121e-05, + "loss": 0.1856, "step": 61610 }, { "epoch": 2.88, - "learning_rate": 1.4316722141484224e-05, - "loss": 0.1928, + "learning_rate": 2.4325589377935187e-05, + "loss": 0.2767, "step": 61615 }, { "epoch": 2.88, - "learning_rate": 1.4316253340209088e-05, - "loss": 0.0248, + "learning_rate": 2.4325121308099167e-05, + "loss": 0.0698, "step": 61620 }, { "epoch": 2.88, - "learning_rate": 1.4315784538933948e-05, - "loss": 0.0543, + "learning_rate": 2.4324653238263147e-05, + "loss": 0.0076, "step": 61625 }, { "epoch": 2.88, - "learning_rate": 1.4315315737658808e-05, - "loss": 0.0697, + "learning_rate": 2.432418516842713e-05, + "loss": 0.0321, "step": 61630 }, { "epoch": 2.88, - "learning_rate": 1.431484693638367e-05, - "loss": 0.067, + "learning_rate": 2.432371709859111e-05, + "loss": 0.0851, "step": 61635 }, { "epoch": 2.88, - "learning_rate": 1.431437813510853e-05, - "loss": 0.0684, + "learning_rate": 2.432324902875509e-05, + "loss": 0.1152, "step": 61640 }, { "epoch": 2.88, - "learning_rate": 1.4313909333833389e-05, - "loss": 0.0666, + "learning_rate": 2.432278095891907e-05, + "loss": 0.0597, "step": 61645 }, { "epoch": 2.88, - "learning_rate": 1.4313440532558249e-05, - "loss": 0.1475, + "learning_rate": 2.4322312889083053e-05, + "loss": 0.1604, "step": 61650 }, { "epoch": 2.88, - "learning_rate": 1.4312971731283109e-05, - "loss": 0.0815, + "learning_rate": 2.4321844819247033e-05, + "loss": 0.1788, "step": 61655 }, { "epoch": 2.88, - "learning_rate": 1.431250293000797e-05, - "loss": 0.432, + "learning_rate": 2.4321376749411013e-05, + "loss": 0.1336, "step": 61660 }, { "epoch": 2.88, - "learning_rate": 1.4312034128732832e-05, - "loss": 0.1714, + "learning_rate": 2.4320908679574996e-05, + "loss": 0.1934, "step": 61665 }, { "epoch": 2.88, - "learning_rate": 1.4311565327457692e-05, - "loss": 0.0722, + "learning_rate": 2.4320440609738976e-05, + "loss": 0.0273, "step": 61670 }, { "epoch": 2.88, - "learning_rate": 1.4311096526182554e-05, - "loss": 0.1003, + "learning_rate": 2.4319972539902955e-05, + "loss": 0.0323, "step": 61675 }, { "epoch": 2.88, - "learning_rate": 1.4310627724907414e-05, - "loss": 0.0671, + "learning_rate": 2.4319504470066932e-05, + "loss": 0.0202, "step": 61680 }, { "epoch": 2.88, - "learning_rate": 1.4310158923632274e-05, - "loss": 0.0371, + "learning_rate": 2.4319036400230915e-05, + "loss": 0.0769, "step": 61685 }, { "epoch": 2.88, - "learning_rate": 1.4309690122357134e-05, - "loss": 0.0837, + "learning_rate": 2.4318568330394895e-05, + "loss": 0.083, "step": 61690 }, { "epoch": 2.88, - "learning_rate": 1.4309221321081994e-05, - "loss": 0.1399, + "learning_rate": 2.4318100260558875e-05, + "loss": 0.0766, "step": 61695 }, { "epoch": 2.88, - "learning_rate": 1.4308752519806855e-05, - "loss": 0.0965, + "learning_rate": 2.4317632190722855e-05, + "loss": 0.1065, "step": 61700 }, { "epoch": 2.88, - "learning_rate": 1.4308283718531715e-05, - "loss": 0.1974, + "learning_rate": 2.4317164120886838e-05, + "loss": 0.1781, "step": 61705 }, { "epoch": 2.88, - "learning_rate": 1.4307814917256575e-05, - "loss": 0.1911, + "learning_rate": 2.4316696051050818e-05, + "loss": 0.0665, "step": 61710 }, { "epoch": 2.88, - "learning_rate": 1.4307346115981438e-05, - "loss": 0.2249, + "learning_rate": 2.4316227981214797e-05, + "loss": 0.2226, "step": 61715 }, { "epoch": 2.88, - "learning_rate": 1.4306877314706298e-05, - "loss": 0.043, + "learning_rate": 2.431575991137878e-05, + "loss": 0.081, "step": 61720 }, { "epoch": 2.88, - "learning_rate": 1.4306408513431158e-05, - "loss": 0.0711, + "learning_rate": 2.431529184154276e-05, + "loss": 0.0485, "step": 61725 }, { "epoch": 2.88, - "learning_rate": 1.4305939712156018e-05, - "loss": 0.0502, + "learning_rate": 2.431482377170674e-05, + "loss": 0.0235, "step": 61730 }, { "epoch": 2.88, - "learning_rate": 1.4305470910880878e-05, - "loss": 0.0629, + "learning_rate": 2.431435570187072e-05, + "loss": 0.069, "step": 61735 }, { "epoch": 2.88, - "learning_rate": 1.430500210960574e-05, - "loss": 0.0486, + "learning_rate": 2.43138876320347e-05, + "loss": 0.0751, "step": 61740 }, { "epoch": 2.88, - "learning_rate": 1.43045333083306e-05, - "loss": 0.111, + "learning_rate": 2.431341956219868e-05, + "loss": 0.1192, "step": 61745 }, { "epoch": 2.88, - "learning_rate": 1.430406450705546e-05, - "loss": 0.0744, + "learning_rate": 2.431295149236266e-05, + "loss": 0.1559, "step": 61750 }, { "epoch": 2.88, - "learning_rate": 1.430359570578032e-05, - "loss": 0.1504, + "learning_rate": 2.431248342252664e-05, + "loss": 0.22, "step": 61755 }, { "epoch": 2.88, - "learning_rate": 1.4303126904505183e-05, - "loss": 0.165, + "learning_rate": 2.4312015352690623e-05, + "loss": 0.2909, "step": 61760 }, { "epoch": 2.88, - "learning_rate": 1.4302658103230043e-05, - "loss": 0.3405, + "learning_rate": 2.4311547282854602e-05, + "loss": 0.2338, "step": 61765 }, { "epoch": 2.88, - "learning_rate": 1.4302189301954903e-05, - "loss": 0.0162, + "learning_rate": 2.4311079213018582e-05, + "loss": 0.0535, "step": 61770 }, { "epoch": 2.88, - "learning_rate": 1.4301720500679763e-05, - "loss": 0.0862, + "learning_rate": 2.4310611143182565e-05, + "loss": 0.0662, "step": 61775 }, { "epoch": 2.88, - "learning_rate": 1.4301251699404624e-05, - "loss": 0.0322, + "learning_rate": 2.4310143073346545e-05, + "loss": 0.0611, "step": 61780 }, { "epoch": 2.88, - "learning_rate": 1.4300782898129484e-05, - "loss": 0.048, + "learning_rate": 2.4309675003510525e-05, + "loss": 0.0273, "step": 61785 }, { "epoch": 2.88, - "learning_rate": 1.4300314096854344e-05, - "loss": 0.0931, + "learning_rate": 2.4309206933674505e-05, + "loss": 0.0856, "step": 61790 }, { "epoch": 2.88, - "learning_rate": 1.4299845295579204e-05, - "loss": 0.0647, + "learning_rate": 2.4308738863838488e-05, + "loss": 0.1327, "step": 61795 }, { "epoch": 2.88, - "learning_rate": 1.4299376494304064e-05, - "loss": 0.0786, + "learning_rate": 2.4308270794002468e-05, + "loss": 0.1152, "step": 61800 }, { "epoch": 2.88, - "learning_rate": 1.4298907693028927e-05, - "loss": 0.2143, + "learning_rate": 2.4307802724166444e-05, + "loss": 0.1681, "step": 61805 }, { "epoch": 2.88, - "learning_rate": 1.4298438891753787e-05, - "loss": 0.1282, + "learning_rate": 2.4307334654330424e-05, + "loss": 0.1321, "step": 61810 }, { "epoch": 2.88, - "learning_rate": 1.4297970090478647e-05, - "loss": 0.3249, + "learning_rate": 2.4306866584494407e-05, + "loss": 0.1941, "step": 61815 }, { "epoch": 2.88, - "learning_rate": 1.4297501289203509e-05, - "loss": 0.0299, + "learning_rate": 2.4306398514658387e-05, + "loss": 0.0509, "step": 61820 }, { "epoch": 2.88, - "learning_rate": 1.4297032487928369e-05, - "loss": 0.0226, + "learning_rate": 2.4305930444822367e-05, + "loss": 0.0275, "step": 61825 }, { "epoch": 2.89, - "learning_rate": 1.4296563686653229e-05, - "loss": 0.0279, + "learning_rate": 2.4305462374986347e-05, + "loss": 0.0283, "step": 61830 }, { "epoch": 2.89, - "learning_rate": 1.4296094885378089e-05, - "loss": 0.0813, + "learning_rate": 2.430499430515033e-05, + "loss": 0.0961, "step": 61835 }, { "epoch": 2.89, - "learning_rate": 1.4295626084102949e-05, - "loss": 0.0733, + "learning_rate": 2.430452623531431e-05, + "loss": 0.0573, "step": 61840 }, { "epoch": 2.89, - "learning_rate": 1.429515728282781e-05, - "loss": 0.0613, + "learning_rate": 2.430405816547829e-05, + "loss": 0.1085, "step": 61845 }, { "epoch": 2.89, - "learning_rate": 1.4294688481552672e-05, - "loss": 0.172, + "learning_rate": 2.4303590095642273e-05, + "loss": 0.1402, "step": 61850 }, { "epoch": 2.89, - "learning_rate": 1.4294219680277532e-05, - "loss": 0.2214, + "learning_rate": 2.4303122025806253e-05, + "loss": 0.1818, "step": 61855 }, { "epoch": 2.89, - "learning_rate": 1.4293750879002393e-05, - "loss": 0.1672, + "learning_rate": 2.4302653955970232e-05, + "loss": 0.175, "step": 61860 }, { "epoch": 2.89, - "learning_rate": 1.4293282077727253e-05, - "loss": 0.207, + "learning_rate": 2.4302185886134212e-05, + "loss": 0.3747, "step": 61865 }, { "epoch": 2.89, - "learning_rate": 1.4292813276452113e-05, - "loss": 0.0749, + "learning_rate": 2.4301717816298192e-05, + "loss": 0.0274, "step": 61870 }, { "epoch": 2.89, - "learning_rate": 1.4292344475176973e-05, - "loss": 0.0837, + "learning_rate": 2.4301249746462172e-05, + "loss": 0.0117, "step": 61875 }, { "epoch": 2.89, - "learning_rate": 1.4291875673901833e-05, - "loss": 0.0423, + "learning_rate": 2.430078167662615e-05, + "loss": 0.0528, "step": 61880 }, { "epoch": 2.89, - "learning_rate": 1.4291406872626695e-05, - "loss": 0.0607, + "learning_rate": 2.430031360679013e-05, + "loss": 0.0439, "step": 61885 }, { "epoch": 2.89, - "learning_rate": 1.4290938071351555e-05, - "loss": 0.073, + "learning_rate": 2.4299845536954115e-05, + "loss": 0.0745, "step": 61890 }, { "epoch": 2.89, - "learning_rate": 1.4290469270076415e-05, - "loss": 0.1784, + "learning_rate": 2.4299377467118095e-05, + "loss": 0.1921, "step": 61895 }, { "epoch": 2.89, - "learning_rate": 1.4290000468801278e-05, - "loss": 0.1983, + "learning_rate": 2.4298909397282074e-05, + "loss": 0.0633, "step": 61900 }, { "epoch": 2.89, - "learning_rate": 1.4289531667526138e-05, - "loss": 0.1165, + "learning_rate": 2.4298441327446058e-05, + "loss": 0.1569, "step": 61905 }, { "epoch": 2.89, - "learning_rate": 1.4289062866250998e-05, - "loss": 0.246, + "learning_rate": 2.4297973257610037e-05, + "loss": 0.1319, "step": 61910 }, { "epoch": 2.89, - "learning_rate": 1.4288594064975858e-05, - "loss": 0.4182, + "learning_rate": 2.4297505187774017e-05, + "loss": 0.1784, "step": 61915 }, { "epoch": 2.89, - "learning_rate": 1.4288125263700718e-05, - "loss": 0.0554, + "learning_rate": 2.4297037117937997e-05, + "loss": 0.0167, "step": 61920 }, { "epoch": 2.89, - "learning_rate": 1.428765646242558e-05, - "loss": 0.0374, + "learning_rate": 2.429656904810198e-05, + "loss": 0.04, "step": 61925 }, { "epoch": 2.89, - "learning_rate": 1.428718766115044e-05, - "loss": 0.0909, + "learning_rate": 2.4296100978265957e-05, + "loss": 0.0985, "step": 61930 }, { "epoch": 2.89, - "learning_rate": 1.4286718859875299e-05, - "loss": 0.0459, + "learning_rate": 2.4295632908429936e-05, + "loss": 0.0926, "step": 61935 }, { "epoch": 2.89, - "learning_rate": 1.4286250058600159e-05, - "loss": 0.0981, + "learning_rate": 2.4295164838593916e-05, + "loss": 0.0379, "step": 61940 }, { "epoch": 2.89, - "learning_rate": 1.4285781257325022e-05, - "loss": 0.173, + "learning_rate": 2.42946967687579e-05, + "loss": 0.2004, "step": 61945 }, { "epoch": 2.89, - "learning_rate": 1.4285312456049882e-05, - "loss": 0.1568, + "learning_rate": 2.429422869892188e-05, + "loss": 0.0933, "step": 61950 }, { "epoch": 2.89, - "learning_rate": 1.4284843654774742e-05, - "loss": 0.1952, + "learning_rate": 2.429376062908586e-05, + "loss": 0.168, "step": 61955 }, { "epoch": 2.89, - "learning_rate": 1.4284374853499602e-05, - "loss": 0.2491, + "learning_rate": 2.4293292559249842e-05, + "loss": 0.1649, "step": 61960 }, { "epoch": 2.89, - "learning_rate": 1.4283906052224464e-05, - "loss": 0.3357, + "learning_rate": 2.4292824489413822e-05, + "loss": 0.2429, "step": 61965 }, { "epoch": 2.89, - "learning_rate": 1.4283437250949324e-05, - "loss": 0.0521, + "learning_rate": 2.4292356419577802e-05, + "loss": 0.094, "step": 61970 }, { "epoch": 2.89, - "learning_rate": 1.4282968449674184e-05, - "loss": 0.0331, + "learning_rate": 2.4291888349741782e-05, + "loss": 0.0499, "step": 61975 }, { "epoch": 2.89, - "learning_rate": 1.4282499648399044e-05, - "loss": 0.0295, + "learning_rate": 2.4291420279905765e-05, + "loss": 0.0279, "step": 61980 }, { "epoch": 2.89, - "learning_rate": 1.4282030847123905e-05, - "loss": 0.0363, + "learning_rate": 2.4290952210069745e-05, + "loss": 0.0924, "step": 61985 }, { "epoch": 2.89, - "learning_rate": 1.4281562045848767e-05, - "loss": 0.1385, + "learning_rate": 2.4290484140233725e-05, + "loss": 0.0661, "step": 61990 }, { "epoch": 2.89, - "learning_rate": 1.4281093244573627e-05, - "loss": 0.0938, + "learning_rate": 2.42900160703977e-05, + "loss": 0.1303, "step": 61995 }, { "epoch": 2.89, - "learning_rate": 1.4280624443298487e-05, - "loss": 0.041, + "learning_rate": 2.4289548000561684e-05, + "loss": 0.1382, "step": 62000 }, { "epoch": 2.89, - "learning_rate": 1.4280155642023348e-05, - "loss": 0.1099, + "learning_rate": 2.4289079930725664e-05, + "loss": 0.0747, "step": 62005 }, { "epoch": 2.89, - "learning_rate": 1.4279686840748208e-05, - "loss": 0.2192, + "learning_rate": 2.4288611860889644e-05, + "loss": 0.1682, "step": 62010 }, { "epoch": 2.89, - "learning_rate": 1.4279218039473068e-05, - "loss": 0.203, + "learning_rate": 2.4288143791053627e-05, + "loss": 0.2472, "step": 62015 }, { "epoch": 2.89, - "learning_rate": 1.4278749238197928e-05, - "loss": 0.0887, + "learning_rate": 2.4287675721217607e-05, + "loss": 0.0919, "step": 62020 }, { "epoch": 2.89, - "learning_rate": 1.427828043692279e-05, - "loss": 0.0572, + "learning_rate": 2.4287207651381587e-05, + "loss": 0.0293, "step": 62025 }, { "epoch": 2.89, - "learning_rate": 1.427781163564765e-05, - "loss": 0.0779, + "learning_rate": 2.4286739581545567e-05, + "loss": 0.0307, "step": 62030 }, { "epoch": 2.89, - "learning_rate": 1.427734283437251e-05, - "loss": 0.0652, + "learning_rate": 2.428627151170955e-05, + "loss": 0.0959, "step": 62035 }, { "epoch": 2.89, - "learning_rate": 1.4276874033097371e-05, - "loss": 0.096, + "learning_rate": 2.428580344187353e-05, + "loss": 0.0687, "step": 62040 }, { "epoch": 2.9, - "learning_rate": 1.4276405231822233e-05, - "loss": 0.0918, + "learning_rate": 2.428533537203751e-05, + "loss": 0.0993, "step": 62045 }, { "epoch": 2.9, - "learning_rate": 1.4275936430547093e-05, - "loss": 0.1491, + "learning_rate": 2.428486730220149e-05, + "loss": 0.2027, "step": 62050 }, { "epoch": 2.9, - "learning_rate": 1.4275467629271953e-05, - "loss": 0.0714, + "learning_rate": 2.428439923236547e-05, + "loss": 0.1111, "step": 62055 }, { "epoch": 2.9, - "learning_rate": 1.4274998827996813e-05, - "loss": 0.2989, + "learning_rate": 2.428393116252945e-05, + "loss": 0.2029, "step": 62060 }, { "epoch": 2.9, - "learning_rate": 1.4274530026721674e-05, - "loss": 0.151, + "learning_rate": 2.428346309269343e-05, + "loss": 0.1714, "step": 62065 }, { "epoch": 2.9, - "learning_rate": 1.4274061225446534e-05, - "loss": 0.3291, + "learning_rate": 2.428299502285741e-05, + "loss": 0.0518, "step": 62070 }, { "epoch": 2.9, - "learning_rate": 1.4273592424171394e-05, - "loss": 0.0823, + "learning_rate": 2.428252695302139e-05, + "loss": 0.0558, "step": 62075 }, { "epoch": 2.9, - "learning_rate": 1.4273123622896254e-05, - "loss": 0.0368, + "learning_rate": 2.428205888318537e-05, + "loss": 0.0818, "step": 62080 }, { "epoch": 2.9, - "learning_rate": 1.4272654821621117e-05, - "loss": 0.0819, + "learning_rate": 2.428159081334935e-05, + "loss": 0.1121, "step": 62085 }, { "epoch": 2.9, - "learning_rate": 1.4272186020345977e-05, - "loss": 0.1533, + "learning_rate": 2.4281122743513335e-05, + "loss": 0.0769, "step": 62090 }, { "epoch": 2.9, - "learning_rate": 1.4271717219070837e-05, - "loss": 0.0694, + "learning_rate": 2.4280654673677314e-05, + "loss": 0.0717, "step": 62095 }, { "epoch": 2.9, - "learning_rate": 1.4271248417795697e-05, - "loss": 0.1272, + "learning_rate": 2.4280186603841294e-05, + "loss": 0.0822, "step": 62100 }, { "epoch": 2.9, - "learning_rate": 1.4270779616520559e-05, - "loss": 0.1179, + "learning_rate": 2.4279718534005274e-05, + "loss": 0.1347, "step": 62105 }, { "epoch": 2.9, - "learning_rate": 1.4270310815245419e-05, - "loss": 0.1788, + "learning_rate": 2.4279250464169257e-05, + "loss": 0.2519, "step": 62110 }, { "epoch": 2.9, - "learning_rate": 1.4269842013970279e-05, - "loss": 0.2079, + "learning_rate": 2.4278782394333237e-05, + "loss": 0.232, "step": 62115 }, { "epoch": 2.9, - "learning_rate": 1.4269373212695139e-05, - "loss": 0.0381, + "learning_rate": 2.4278314324497213e-05, + "loss": 0.0362, "step": 62120 }, { "epoch": 2.9, - "learning_rate": 1.4268904411419999e-05, - "loss": 0.0527, + "learning_rate": 2.4277846254661193e-05, + "loss": 0.0422, "step": 62125 }, { "epoch": 2.9, - "learning_rate": 1.4268435610144862e-05, - "loss": 0.0433, + "learning_rate": 2.4277378184825176e-05, + "loss": 0.0254, "step": 62130 }, { "epoch": 2.9, - "learning_rate": 1.4267966808869722e-05, - "loss": 0.0685, + "learning_rate": 2.4276910114989156e-05, + "loss": 0.0787, "step": 62135 }, { "epoch": 2.9, - "learning_rate": 1.4267498007594582e-05, - "loss": 0.0537, + "learning_rate": 2.4276442045153136e-05, + "loss": 0.11, "step": 62140 }, { "epoch": 2.9, - "learning_rate": 1.4267029206319443e-05, - "loss": 0.0927, + "learning_rate": 2.427597397531712e-05, + "loss": 0.101, "step": 62145 }, { "epoch": 2.9, - "learning_rate": 1.4266560405044303e-05, - "loss": 0.1351, + "learning_rate": 2.42755059054811e-05, + "loss": 0.1464, "step": 62150 }, { "epoch": 2.9, - "learning_rate": 1.4266091603769163e-05, - "loss": 0.0956, + "learning_rate": 2.427503783564508e-05, + "loss": 0.135, "step": 62155 }, { "epoch": 2.9, - "learning_rate": 1.4265622802494023e-05, - "loss": 0.1689, + "learning_rate": 2.427456976580906e-05, + "loss": 0.2666, "step": 62160 }, { "epoch": 2.9, - "learning_rate": 1.4265154001218883e-05, - "loss": 0.2929, + "learning_rate": 2.4274101695973042e-05, + "loss": 0.1826, "step": 62165 }, { "epoch": 2.9, - "learning_rate": 1.4264685199943745e-05, - "loss": 0.0412, + "learning_rate": 2.4273633626137022e-05, + "loss": 0.0506, "step": 62170 }, { "epoch": 2.9, - "learning_rate": 1.4264216398668606e-05, - "loss": 0.0285, + "learning_rate": 2.4273165556301e-05, + "loss": 0.0364, "step": 62175 }, { "epoch": 2.9, - "learning_rate": 1.4263747597393466e-05, - "loss": 0.0957, + "learning_rate": 2.427269748646498e-05, + "loss": 0.0439, "step": 62180 }, { "epoch": 2.9, - "learning_rate": 1.4263278796118328e-05, - "loss": 0.0749, + "learning_rate": 2.427222941662896e-05, + "loss": 0.0592, "step": 62185 }, { "epoch": 2.9, - "learning_rate": 1.4262809994843188e-05, - "loss": 0.11, + "learning_rate": 2.427176134679294e-05, + "loss": 0.0831, "step": 62190 }, { "epoch": 2.9, - "learning_rate": 1.4262341193568048e-05, - "loss": 0.0803, + "learning_rate": 2.427129327695692e-05, + "loss": 0.1056, "step": 62195 }, { "epoch": 2.9, - "learning_rate": 1.4261872392292908e-05, - "loss": 0.0942, + "learning_rate": 2.4270825207120904e-05, + "loss": 0.125, "step": 62200 }, { "epoch": 2.9, - "learning_rate": 1.4261403591017768e-05, - "loss": 0.1192, + "learning_rate": 2.4270357137284884e-05, + "loss": 0.2117, "step": 62205 }, { "epoch": 2.9, - "learning_rate": 1.426093478974263e-05, - "loss": 0.2655, + "learning_rate": 2.4269889067448864e-05, + "loss": 0.3004, "step": 62210 }, { "epoch": 2.9, - "learning_rate": 1.426046598846749e-05, - "loss": 0.2694, + "learning_rate": 2.4269420997612844e-05, + "loss": 0.2234, "step": 62215 }, { "epoch": 2.9, - "learning_rate": 1.425999718719235e-05, - "loss": 0.0398, + "learning_rate": 2.4268952927776827e-05, + "loss": 0.0448, "step": 62220 }, { "epoch": 2.9, - "learning_rate": 1.4259528385917213e-05, - "loss": 0.0649, + "learning_rate": 2.4268484857940807e-05, + "loss": 0.0236, "step": 62225 }, { "epoch": 2.9, - "learning_rate": 1.4259059584642072e-05, - "loss": 0.0913, + "learning_rate": 2.4268016788104786e-05, + "loss": 0.082, "step": 62230 }, { "epoch": 2.9, - "learning_rate": 1.4258590783366932e-05, - "loss": 0.1374, + "learning_rate": 2.4267548718268766e-05, + "loss": 0.0562, "step": 62235 }, { "epoch": 2.9, - "learning_rate": 1.4258121982091792e-05, - "loss": 0.0531, + "learning_rate": 2.426708064843275e-05, + "loss": 0.0415, "step": 62240 }, { "epoch": 2.9, - "learning_rate": 1.4257653180816652e-05, - "loss": 0.0716, + "learning_rate": 2.4266612578596726e-05, + "loss": 0.0682, "step": 62245 }, { "epoch": 2.9, - "learning_rate": 1.4257184379541514e-05, - "loss": 0.1004, + "learning_rate": 2.4266144508760706e-05, + "loss": 0.092, "step": 62250 }, { "epoch": 2.9, - "learning_rate": 1.4256715578266374e-05, + "learning_rate": 2.4265676438924685e-05, "loss": 0.137, "step": 62255 }, { "epoch": 2.91, - "learning_rate": 1.4256246776991234e-05, - "loss": 0.262, + "learning_rate": 2.426520836908867e-05, + "loss": 0.2428, "step": 62260 }, { "epoch": 2.91, - "learning_rate": 1.4255777975716094e-05, - "loss": 0.2588, + "learning_rate": 2.426474029925265e-05, + "loss": 0.276, "step": 62265 }, { "epoch": 2.91, - "learning_rate": 1.4255309174440957e-05, - "loss": 0.022, + "learning_rate": 2.4264272229416628e-05, + "loss": 0.0869, "step": 62270 }, { "epoch": 2.91, - "learning_rate": 1.4254840373165817e-05, - "loss": 0.0912, + "learning_rate": 2.426380415958061e-05, + "loss": 0.0545, "step": 62275 }, { "epoch": 2.91, - "learning_rate": 1.4254371571890677e-05, - "loss": 0.0856, + "learning_rate": 2.426333608974459e-05, + "loss": 0.0699, "step": 62280 }, { "epoch": 2.91, - "learning_rate": 1.4253902770615537e-05, - "loss": 0.0125, + "learning_rate": 2.426286801990857e-05, + "loss": 0.0621, "step": 62285 }, { "epoch": 2.91, - "learning_rate": 1.4253433969340398e-05, - "loss": 0.0782, + "learning_rate": 2.426239995007255e-05, + "loss": 0.0954, "step": 62290 }, { "epoch": 2.91, - "learning_rate": 1.4252965168065258e-05, - "loss": 0.1191, + "learning_rate": 2.4261931880236534e-05, + "loss": 0.1235, "step": 62295 }, { "epoch": 2.91, - "learning_rate": 1.4252496366790118e-05, - "loss": 0.1224, + "learning_rate": 2.4261463810400514e-05, + "loss": 0.1832, "step": 62300 }, { "epoch": 2.91, - "learning_rate": 1.4252027565514978e-05, - "loss": 0.1303, + "learning_rate": 2.4260995740564494e-05, + "loss": 0.1592, "step": 62305 }, { "epoch": 2.91, - "learning_rate": 1.4251558764239838e-05, - "loss": 0.2055, + "learning_rate": 2.426052767072847e-05, + "loss": 0.1928, "step": 62310 }, { "epoch": 2.91, - "learning_rate": 1.4251089962964702e-05, - "loss": 0.2296, + "learning_rate": 2.4260059600892453e-05, + "loss": 0.2388, "step": 62315 }, { "epoch": 2.91, - "learning_rate": 1.4250621161689561e-05, - "loss": 0.0625, + "learning_rate": 2.4259591531056433e-05, + "loss": 0.0617, "step": 62320 }, { "epoch": 2.91, - "learning_rate": 1.4250152360414421e-05, - "loss": 0.0877, + "learning_rate": 2.4259123461220413e-05, + "loss": 0.0122, "step": 62325 }, { "epoch": 2.91, - "learning_rate": 1.4249683559139283e-05, - "loss": 0.0508, + "learning_rate": 2.4258655391384396e-05, + "loss": 0.0475, "step": 62330 }, { "epoch": 2.91, - "learning_rate": 1.4249214757864143e-05, - "loss": 0.0208, + "learning_rate": 2.4258187321548376e-05, + "loss": 0.0282, "step": 62335 }, { "epoch": 2.91, - "learning_rate": 1.4248745956589003e-05, - "loss": 0.066, + "learning_rate": 2.4257719251712356e-05, + "loss": 0.0403, "step": 62340 }, { "epoch": 2.91, - "learning_rate": 1.4248277155313863e-05, - "loss": 0.1559, + "learning_rate": 2.4257251181876336e-05, + "loss": 0.0951, "step": 62345 }, { "epoch": 2.91, - "learning_rate": 1.4247808354038723e-05, - "loss": 0.1327, + "learning_rate": 2.425678311204032e-05, + "loss": 0.1381, "step": 62350 }, { "epoch": 2.91, - "learning_rate": 1.4247339552763584e-05, - "loss": 0.1594, + "learning_rate": 2.42563150422043e-05, + "loss": 0.1644, "step": 62355 }, { "epoch": 2.91, - "learning_rate": 1.4246870751488444e-05, - "loss": 0.2158, + "learning_rate": 2.425584697236828e-05, + "loss": 0.1559, "step": 62360 }, { "epoch": 2.91, - "learning_rate": 1.4246401950213306e-05, - "loss": 0.4769, + "learning_rate": 2.425537890253226e-05, + "loss": 0.2036, "step": 62365 }, { "epoch": 2.91, - "learning_rate": 1.4245933148938168e-05, - "loss": 0.0163, + "learning_rate": 2.425491083269624e-05, + "loss": 0.0376, "step": 62370 }, { "epoch": 2.91, - "learning_rate": 1.4245464347663028e-05, - "loss": 0.0447, + "learning_rate": 2.4254442762860218e-05, + "loss": 0.0281, "step": 62375 }, { "epoch": 2.91, - "learning_rate": 1.4244995546387887e-05, - "loss": 0.0663, + "learning_rate": 2.4253974693024198e-05, + "loss": 0.0222, "step": 62380 }, { "epoch": 2.91, - "learning_rate": 1.4244526745112747e-05, - "loss": 0.0934, + "learning_rate": 2.425350662318818e-05, + "loss": 0.06, "step": 62385 }, { "epoch": 2.91, - "learning_rate": 1.4244057943837607e-05, - "loss": 0.0503, + "learning_rate": 2.425303855335216e-05, + "loss": 0.0387, "step": 62390 }, { "epoch": 2.91, - "learning_rate": 1.4243589142562469e-05, - "loss": 0.1216, + "learning_rate": 2.425257048351614e-05, + "loss": 0.0702, "step": 62395 }, { "epoch": 2.91, - "learning_rate": 1.4243120341287329e-05, - "loss": 0.1094, + "learning_rate": 2.425210241368012e-05, + "loss": 0.062, "step": 62400 }, { "epoch": 2.91, - "learning_rate": 1.4242651540012189e-05, - "loss": 0.1975, + "learning_rate": 2.4251634343844104e-05, + "loss": 0.1915, "step": 62405 }, { "epoch": 2.91, - "learning_rate": 1.4242182738737052e-05, - "loss": 0.2689, + "learning_rate": 2.4251166274008084e-05, + "loss": 0.1869, "step": 62410 }, { "epoch": 2.91, - "learning_rate": 1.4241713937461912e-05, - "loss": 0.338, + "learning_rate": 2.4250698204172063e-05, + "loss": 0.354, "step": 62415 }, { "epoch": 2.91, - "learning_rate": 1.4241245136186772e-05, - "loss": 0.0438, + "learning_rate": 2.4250230134336043e-05, + "loss": 0.0541, "step": 62420 }, { "epoch": 2.91, - "learning_rate": 1.4240776334911632e-05, - "loss": 0.0389, + "learning_rate": 2.4249762064500026e-05, + "loss": 0.0214, "step": 62425 }, { "epoch": 2.91, - "learning_rate": 1.4240307533636492e-05, - "loss": 0.0787, + "learning_rate": 2.4249293994664006e-05, + "loss": 0.0189, "step": 62430 }, { "epoch": 2.91, - "learning_rate": 1.4239838732361353e-05, - "loss": 0.0889, + "learning_rate": 2.4248825924827983e-05, + "loss": 0.0817, "step": 62435 }, { "epoch": 2.91, - "learning_rate": 1.4239369931086213e-05, - "loss": 0.0315, + "learning_rate": 2.4248357854991962e-05, + "loss": 0.0589, "step": 62440 }, { "epoch": 2.91, - "learning_rate": 1.4238901129811073e-05, - "loss": 0.0751, + "learning_rate": 2.4247889785155946e-05, + "loss": 0.1001, "step": 62445 }, { "epoch": 2.91, - "learning_rate": 1.4238432328535933e-05, - "loss": 0.116, + "learning_rate": 2.4247421715319925e-05, + "loss": 0.1036, "step": 62450 }, { "epoch": 2.91, - "learning_rate": 1.4237963527260797e-05, - "loss": 0.1544, + "learning_rate": 2.4246953645483905e-05, + "loss": 0.1051, "step": 62455 }, { "epoch": 2.91, - "learning_rate": 1.4237494725985657e-05, - "loss": 0.1478, + "learning_rate": 2.424648557564789e-05, + "loss": 0.2078, "step": 62460 }, { "epoch": 2.91, - "learning_rate": 1.4237025924710516e-05, - "loss": 0.2487, + "learning_rate": 2.4246017505811868e-05, + "loss": 0.2313, "step": 62465 }, { "epoch": 2.91, - "learning_rate": 1.4236557123435376e-05, - "loss": 0.0402, + "learning_rate": 2.4245549435975848e-05, + "loss": 0.0199, "step": 62470 }, { "epoch": 2.92, - "learning_rate": 1.4236088322160238e-05, - "loss": 0.0696, + "learning_rate": 2.4245081366139828e-05, + "loss": 0.0428, "step": 62475 }, { "epoch": 2.92, - "learning_rate": 1.4235619520885098e-05, - "loss": 0.0629, + "learning_rate": 2.424461329630381e-05, + "loss": 0.0405, "step": 62480 }, { "epoch": 2.92, - "learning_rate": 1.4235150719609958e-05, - "loss": 0.0973, + "learning_rate": 2.424414522646779e-05, + "loss": 0.0835, "step": 62485 }, { "epoch": 2.92, - "learning_rate": 1.4234681918334818e-05, - "loss": 0.0445, + "learning_rate": 2.424367715663177e-05, + "loss": 0.0966, "step": 62490 }, { "epoch": 2.92, - "learning_rate": 1.423421311705968e-05, - "loss": 0.1361, + "learning_rate": 2.424320908679575e-05, + "loss": 0.1012, "step": 62495 }, { "epoch": 2.92, - "learning_rate": 1.4233744315784541e-05, - "loss": 0.1161, + "learning_rate": 2.424274101695973e-05, + "loss": 0.069, "step": 62500 }, { "epoch": 2.92, - "learning_rate": 1.4233275514509401e-05, - "loss": 0.185, + "learning_rate": 2.424227294712371e-05, + "loss": 0.261, "step": 62505 }, { "epoch": 2.92, - "learning_rate": 1.4232806713234261e-05, - "loss": 0.4291, + "learning_rate": 2.424180487728769e-05, + "loss": 0.4236, "step": 62510 }, { "epoch": 2.92, - "learning_rate": 1.4232337911959123e-05, - "loss": 0.2839, + "learning_rate": 2.4241336807451673e-05, + "loss": 0.2733, "step": 62515 }, { "epoch": 2.92, - "learning_rate": 1.4231869110683983e-05, - "loss": 0.0522, + "learning_rate": 2.4240868737615653e-05, + "loss": 0.0231, "step": 62520 }, { "epoch": 2.92, - "learning_rate": 1.4231400309408842e-05, - "loss": 0.0422, + "learning_rate": 2.4240400667779633e-05, + "loss": 0.0412, "step": 62525 }, { "epoch": 2.92, - "learning_rate": 1.4230931508133702e-05, - "loss": 0.115, + "learning_rate": 2.4239932597943613e-05, + "loss": 0.0379, "step": 62530 }, { "epoch": 2.92, - "learning_rate": 1.4230462706858564e-05, - "loss": 0.0269, + "learning_rate": 2.4239464528107596e-05, + "loss": 0.0517, "step": 62535 }, { "epoch": 2.92, - "learning_rate": 1.4229993905583424e-05, - "loss": 0.0985, + "learning_rate": 2.4238996458271576e-05, + "loss": 0.0839, "step": 62540 }, { "epoch": 2.92, - "learning_rate": 1.4229525104308284e-05, - "loss": 0.0298, + "learning_rate": 2.4238528388435556e-05, + "loss": 0.1312, "step": 62545 }, { "epoch": 2.92, - "learning_rate": 1.4229056303033146e-05, - "loss": 0.0855, + "learning_rate": 2.4238060318599535e-05, + "loss": 0.1746, "step": 62550 }, { "epoch": 2.92, - "learning_rate": 1.4228587501758007e-05, - "loss": 0.1481, + "learning_rate": 2.423759224876352e-05, + "loss": 0.1599, "step": 62555 }, { "epoch": 2.92, - "learning_rate": 1.4228118700482867e-05, - "loss": 0.0833, + "learning_rate": 2.4237124178927495e-05, + "loss": 0.1256, "step": 62560 }, { "epoch": 2.92, - "learning_rate": 1.4227649899207727e-05, - "loss": 0.2215, + "learning_rate": 2.4236656109091475e-05, + "loss": 0.2098, "step": 62565 }, { "epoch": 2.92, - "learning_rate": 1.4227181097932587e-05, - "loss": 0.0178, + "learning_rate": 2.4236188039255458e-05, + "loss": 0.018, "step": 62570 }, { "epoch": 2.92, - "learning_rate": 1.4226712296657449e-05, - "loss": 0.0319, + "learning_rate": 2.4235719969419438e-05, + "loss": 0.0437, "step": 62575 }, { "epoch": 2.92, - "learning_rate": 1.4226243495382308e-05, - "loss": 0.085, + "learning_rate": 2.4235251899583418e-05, + "loss": 0.0274, "step": 62580 }, { "epoch": 2.92, - "learning_rate": 1.4225774694107168e-05, - "loss": 0.0879, + "learning_rate": 2.4234783829747397e-05, + "loss": 0.0552, "step": 62585 }, { "epoch": 2.92, - "learning_rate": 1.4225305892832028e-05, - "loss": 0.0685, + "learning_rate": 2.423431575991138e-05, + "loss": 0.0478, "step": 62590 }, { "epoch": 2.92, - "learning_rate": 1.4224837091556892e-05, - "loss": 0.0989, + "learning_rate": 2.423384769007536e-05, + "loss": 0.154, "step": 62595 }, { "epoch": 2.92, - "learning_rate": 1.4224368290281752e-05, - "loss": 0.0491, + "learning_rate": 2.423337962023934e-05, + "loss": 0.0941, "step": 62600 }, { "epoch": 2.92, - "learning_rate": 1.4223899489006612e-05, - "loss": 0.1561, + "learning_rate": 2.423291155040332e-05, + "loss": 0.1631, "step": 62605 }, { "epoch": 2.92, - "learning_rate": 1.4223430687731471e-05, - "loss": 0.2575, + "learning_rate": 2.4232443480567303e-05, + "loss": 0.2482, "step": 62610 }, { "epoch": 2.92, - "learning_rate": 1.4222961886456333e-05, - "loss": 0.1737, + "learning_rate": 2.4231975410731283e-05, + "loss": 0.2467, "step": 62615 }, { "epoch": 2.92, - "learning_rate": 1.4222493085181193e-05, - "loss": 0.0793, + "learning_rate": 2.4231507340895263e-05, + "loss": 0.0199, "step": 62620 }, { "epoch": 2.92, - "learning_rate": 1.4222024283906053e-05, - "loss": 0.0559, + "learning_rate": 2.423103927105924e-05, + "loss": 0.0346, "step": 62625 }, { "epoch": 2.92, - "learning_rate": 1.4221555482630913e-05, - "loss": 0.0621, + "learning_rate": 2.4230571201223223e-05, + "loss": 0.0191, "step": 62630 }, { "epoch": 2.92, - "learning_rate": 1.4221086681355773e-05, - "loss": 0.1113, + "learning_rate": 2.4230103131387202e-05, + "loss": 0.0524, "step": 62635 }, { "epoch": 2.92, - "learning_rate": 1.4220617880080636e-05, - "loss": 0.0519, + "learning_rate": 2.4229635061551182e-05, + "loss": 0.0683, "step": 62640 }, { "epoch": 2.92, - "learning_rate": 1.4220149078805496e-05, - "loss": 0.1117, + "learning_rate": 2.4229166991715165e-05, + "loss": 0.1245, "step": 62645 }, { "epoch": 2.92, - "learning_rate": 1.4219680277530356e-05, - "loss": 0.1568, + "learning_rate": 2.4228698921879145e-05, + "loss": 0.1553, "step": 62650 }, { "epoch": 2.92, - "learning_rate": 1.4219211476255218e-05, - "loss": 0.1619, + "learning_rate": 2.4228230852043125e-05, + "loss": 0.1299, "step": 62655 }, { "epoch": 2.92, - "learning_rate": 1.4218742674980078e-05, - "loss": 0.2119, + "learning_rate": 2.4227762782207105e-05, + "loss": 0.2061, "step": 62660 }, { "epoch": 2.92, - "learning_rate": 1.4218273873704938e-05, - "loss": 0.1848, + "learning_rate": 2.4227294712371088e-05, + "loss": 0.2275, "step": 62665 }, { "epoch": 2.92, - "learning_rate": 1.4217805072429797e-05, - "loss": 0.0474, + "learning_rate": 2.4226826642535068e-05, + "loss": 0.0413, "step": 62670 }, { "epoch": 2.92, - "learning_rate": 1.4217336271154657e-05, - "loss": 0.0365, + "learning_rate": 2.4226358572699048e-05, + "loss": 0.0218, "step": 62675 }, { "epoch": 2.92, - "learning_rate": 1.4216867469879519e-05, - "loss": 0.0216, + "learning_rate": 2.4225890502863028e-05, + "loss": 0.0585, "step": 62680 }, { "epoch": 2.92, - "learning_rate": 1.4216398668604379e-05, - "loss": 0.053, + "learning_rate": 2.422542243302701e-05, + "loss": 0.0461, "step": 62685 }, { "epoch": 2.93, - "learning_rate": 1.421592986732924e-05, - "loss": 0.1091, + "learning_rate": 2.4224954363190987e-05, + "loss": 0.0453, "step": 62690 }, { "epoch": 2.93, - "learning_rate": 1.4215461066054102e-05, - "loss": 0.1586, + "learning_rate": 2.4224486293354967e-05, + "loss": 0.0638, "step": 62695 }, { "epoch": 2.93, - "learning_rate": 1.4214992264778962e-05, - "loss": 0.0618, + "learning_rate": 2.422401822351895e-05, + "loss": 0.1183, "step": 62700 }, { "epoch": 2.93, - "learning_rate": 1.4214523463503822e-05, - "loss": 0.2021, + "learning_rate": 2.422355015368293e-05, + "loss": 0.1473, "step": 62705 }, { "epoch": 2.93, - "learning_rate": 1.4214054662228682e-05, - "loss": 0.2014, + "learning_rate": 2.422308208384691e-05, + "loss": 0.2651, "step": 62710 }, { "epoch": 2.93, - "learning_rate": 1.4213585860953542e-05, - "loss": 0.24, + "learning_rate": 2.422261401401089e-05, + "loss": 0.3302, "step": 62715 }, { "epoch": 2.93, - "learning_rate": 1.4213117059678404e-05, - "loss": 0.0265, + "learning_rate": 2.4222145944174873e-05, + "loss": 0.0361, "step": 62720 }, { "epoch": 2.93, - "learning_rate": 1.4212648258403264e-05, - "loss": 0.0587, + "learning_rate": 2.4221677874338853e-05, + "loss": 0.0223, "step": 62725 }, { "epoch": 2.93, - "learning_rate": 1.4212179457128123e-05, - "loss": 0.0479, + "learning_rate": 2.4221209804502832e-05, + "loss": 0.0328, "step": 62730 }, { "epoch": 2.93, - "learning_rate": 1.4211710655852987e-05, - "loss": 0.1283, + "learning_rate": 2.4220741734666812e-05, + "loss": 0.0705, "step": 62735 }, { "epoch": 2.93, - "learning_rate": 1.4211241854577847e-05, - "loss": 0.0703, + "learning_rate": 2.4220273664830796e-05, + "loss": 0.0713, "step": 62740 }, { "epoch": 2.93, - "learning_rate": 1.4210773053302707e-05, - "loss": 0.0843, + "learning_rate": 2.4219805594994775e-05, + "loss": 0.1249, "step": 62745 }, { "epoch": 2.93, - "learning_rate": 1.4210304252027567e-05, - "loss": 0.0996, + "learning_rate": 2.4219337525158752e-05, + "loss": 0.1163, "step": 62750 }, { "epoch": 2.93, - "learning_rate": 1.4209835450752427e-05, - "loss": 0.2006, + "learning_rate": 2.4218869455322735e-05, + "loss": 0.1602, "step": 62755 }, { "epoch": 2.93, - "learning_rate": 1.4209366649477288e-05, - "loss": 0.3242, + "learning_rate": 2.4218401385486715e-05, + "loss": 0.3166, "step": 62760 }, { "epoch": 2.93, - "learning_rate": 1.4208897848202148e-05, - "loss": 0.391, + "learning_rate": 2.4217933315650695e-05, + "loss": 0.2019, "step": 62765 }, { "epoch": 2.93, - "learning_rate": 1.4208429046927008e-05, - "loss": 0.0666, + "learning_rate": 2.4217465245814674e-05, + "loss": 0.0548, "step": 62770 }, { "epoch": 2.93, - "learning_rate": 1.4207960245651868e-05, - "loss": 0.0281, + "learning_rate": 2.4216997175978658e-05, + "loss": 0.0238, "step": 62775 }, { "epoch": 2.93, - "learning_rate": 1.4207491444376731e-05, - "loss": 0.1492, + "learning_rate": 2.4216529106142637e-05, + "loss": 0.0893, "step": 62780 }, { "epoch": 2.93, - "learning_rate": 1.4207022643101591e-05, - "loss": 0.0293, + "learning_rate": 2.4216061036306617e-05, + "loss": 0.0386, "step": 62785 }, { "epoch": 2.93, - "learning_rate": 1.4206553841826451e-05, - "loss": 0.0691, + "learning_rate": 2.4215592966470597e-05, + "loss": 0.1056, "step": 62790 }, { "epoch": 2.93, - "learning_rate": 1.4206085040551311e-05, - "loss": 0.0777, + "learning_rate": 2.421512489663458e-05, + "loss": 0.1014, "step": 62795 }, { "epoch": 2.93, - "learning_rate": 1.4205616239276173e-05, - "loss": 0.2008, + "learning_rate": 2.421465682679856e-05, + "loss": 0.0761, "step": 62800 }, { "epoch": 2.93, - "learning_rate": 1.4205147438001033e-05, - "loss": 0.1873, + "learning_rate": 2.421418875696254e-05, + "loss": 0.1406, "step": 62805 }, { "epoch": 2.93, - "learning_rate": 1.4204678636725893e-05, - "loss": 0.2477, + "learning_rate": 2.4213720687126523e-05, + "loss": 0.2066, "step": 62810 }, { "epoch": 2.93, - "learning_rate": 1.4204209835450752e-05, - "loss": 0.2355, + "learning_rate": 2.42132526172905e-05, + "loss": 0.1796, "step": 62815 }, { "epoch": 2.93, - "learning_rate": 1.4203741034175612e-05, - "loss": 0.0399, + "learning_rate": 2.421278454745448e-05, + "loss": 0.0369, "step": 62820 }, { "epoch": 2.93, - "learning_rate": 1.4203272232900476e-05, - "loss": 0.05, + "learning_rate": 2.421231647761846e-05, + "loss": 0.0343, "step": 62825 }, { "epoch": 2.93, - "learning_rate": 1.4202803431625336e-05, - "loss": 0.0489, + "learning_rate": 2.4211848407782442e-05, + "loss": 0.0331, "step": 62830 }, { "epoch": 2.93, - "learning_rate": 1.4202334630350196e-05, - "loss": 0.0676, + "learning_rate": 2.4211380337946422e-05, + "loss": 0.0711, "step": 62835 }, { "epoch": 2.93, - "learning_rate": 1.4201865829075057e-05, - "loss": 0.0944, + "learning_rate": 2.4210912268110402e-05, + "loss": 0.0932, "step": 62840 }, { "epoch": 2.93, - "learning_rate": 1.4201397027799917e-05, - "loss": 0.1006, + "learning_rate": 2.4210444198274382e-05, + "loss": 0.0931, "step": 62845 }, { "epoch": 2.93, - "learning_rate": 1.4200928226524777e-05, - "loss": 0.1669, + "learning_rate": 2.4209976128438365e-05, + "loss": 0.0469, "step": 62850 }, { "epoch": 2.93, - "learning_rate": 1.4200459425249637e-05, - "loss": 0.1534, + "learning_rate": 2.4209508058602345e-05, + "loss": 0.1935, "step": 62855 }, { "epoch": 2.93, - "learning_rate": 1.4199990623974497e-05, - "loss": 0.1898, + "learning_rate": 2.4209039988766325e-05, + "loss": 0.1608, "step": 62860 }, { "epoch": 2.93, - "learning_rate": 1.4199521822699359e-05, - "loss": 0.2592, + "learning_rate": 2.4208571918930305e-05, + "loss": 0.2193, "step": 62865 }, { "epoch": 2.93, - "learning_rate": 1.4199053021424219e-05, - "loss": 0.0166, + "learning_rate": 2.4208103849094288e-05, + "loss": 0.0788, "step": 62870 }, { "epoch": 2.93, - "learning_rate": 1.419858422014908e-05, - "loss": 0.0357, + "learning_rate": 2.4207635779258268e-05, + "loss": 0.0601, "step": 62875 }, { "epoch": 2.93, - "learning_rate": 1.4198115418873942e-05, - "loss": 0.0386, + "learning_rate": 2.4207167709422244e-05, + "loss": 0.034, "step": 62880 }, { "epoch": 2.93, - "learning_rate": 1.4197646617598802e-05, - "loss": 0.0523, + "learning_rate": 2.4206699639586227e-05, + "loss": 0.0355, "step": 62885 }, { "epoch": 2.93, - "learning_rate": 1.4197177816323662e-05, - "loss": 0.1094, + "learning_rate": 2.4206231569750207e-05, + "loss": 0.0392, "step": 62890 }, { "epoch": 2.93, - "learning_rate": 1.4196709015048522e-05, - "loss": 0.1759, + "learning_rate": 2.4205763499914187e-05, + "loss": 0.0807, "step": 62895 }, { "epoch": 2.94, - "learning_rate": 1.4196240213773382e-05, - "loss": 0.0962, + "learning_rate": 2.4205295430078167e-05, + "loss": 0.1078, "step": 62900 }, { "epoch": 2.94, - "learning_rate": 1.4195771412498243e-05, - "loss": 0.1184, + "learning_rate": 2.420482736024215e-05, + "loss": 0.1747, "step": 62905 }, { "epoch": 2.94, - "learning_rate": 1.4195302611223103e-05, - "loss": 0.2146, + "learning_rate": 2.420435929040613e-05, + "loss": 0.1978, "step": 62910 }, { "epoch": 2.94, - "learning_rate": 1.4194833809947963e-05, - "loss": 0.2689, + "learning_rate": 2.420389122057011e-05, + "loss": 0.2202, "step": 62915 }, { "epoch": 2.94, - "learning_rate": 1.4194365008672826e-05, - "loss": 0.0281, + "learning_rate": 2.420342315073409e-05, + "loss": 0.0395, "step": 62920 }, { "epoch": 2.94, - "learning_rate": 1.4193896207397686e-05, - "loss": 0.0596, + "learning_rate": 2.4202955080898072e-05, + "loss": 0.0087, "step": 62925 }, { "epoch": 2.94, - "learning_rate": 1.4193427406122546e-05, - "loss": 0.0905, + "learning_rate": 2.4202487011062052e-05, + "loss": 0.0778, "step": 62930 }, { "epoch": 2.94, - "learning_rate": 1.4192958604847406e-05, - "loss": 0.0953, + "learning_rate": 2.4202018941226032e-05, + "loss": 0.0473, "step": 62935 }, { "epoch": 2.94, - "learning_rate": 1.4192489803572266e-05, - "loss": 0.0945, + "learning_rate": 2.4201550871390012e-05, + "loss": 0.1018, "step": 62940 }, { "epoch": 2.94, - "learning_rate": 1.4192021002297128e-05, - "loss": 0.1268, + "learning_rate": 2.4201082801553992e-05, + "loss": 0.0669, "step": 62945 }, { "epoch": 2.94, - "learning_rate": 1.4191552201021988e-05, - "loss": 0.1404, + "learning_rate": 2.420061473171797e-05, + "loss": 0.0629, "step": 62950 }, { "epoch": 2.94, - "learning_rate": 1.4191083399746848e-05, - "loss": 0.0784, + "learning_rate": 2.420014666188195e-05, + "loss": 0.1694, "step": 62955 }, { "epoch": 2.94, - "learning_rate": 1.4190614598471708e-05, - "loss": 0.2586, + "learning_rate": 2.4199678592045935e-05, + "loss": 0.3145, "step": 62960 }, { "epoch": 2.94, - "learning_rate": 1.419014579719657e-05, - "loss": 0.216, + "learning_rate": 2.4199210522209914e-05, + "loss": 0.2555, "step": 62965 }, { "epoch": 2.94, - "learning_rate": 1.418967699592143e-05, - "loss": 0.0615, + "learning_rate": 2.4198742452373894e-05, + "loss": 0.0611, "step": 62970 }, { "epoch": 2.94, - "learning_rate": 1.418920819464629e-05, - "loss": 0.0367, + "learning_rate": 2.4198274382537874e-05, + "loss": 0.0464, "step": 62975 }, { "epoch": 2.94, - "learning_rate": 1.418873939337115e-05, - "loss": 0.07, + "learning_rate": 2.4197806312701857e-05, + "loss": 0.0323, "step": 62980 }, { "epoch": 2.94, - "learning_rate": 1.4188270592096012e-05, - "loss": 0.0625, + "learning_rate": 2.4197338242865837e-05, + "loss": 0.0762, "step": 62985 }, { "epoch": 2.94, - "learning_rate": 1.4187801790820872e-05, - "loss": 0.0718, + "learning_rate": 2.4196870173029817e-05, + "loss": 0.0858, "step": 62990 }, { "epoch": 2.94, - "learning_rate": 1.4187332989545732e-05, - "loss": 0.1033, + "learning_rate": 2.41964021031938e-05, + "loss": 0.1115, "step": 62995 }, { "epoch": 2.94, - "learning_rate": 1.4186864188270592e-05, - "loss": 0.1077, + "learning_rate": 2.419593403335778e-05, + "loss": 0.0466, "step": 63000 }, { "epoch": 2.94, - "learning_rate": 1.4186395386995454e-05, - "loss": 0.1611, + "learning_rate": 2.4195465963521756e-05, + "loss": 0.1256, "step": 63005 }, { "epoch": 2.94, - "learning_rate": 1.4185926585720314e-05, - "loss": 0.2172, + "learning_rate": 2.4194997893685736e-05, + "loss": 0.2292, "step": 63010 }, { "epoch": 2.94, - "learning_rate": 1.4185457784445175e-05, - "loss": 0.2392, + "learning_rate": 2.419452982384972e-05, + "loss": 0.2535, "step": 63015 }, { "epoch": 2.94, - "learning_rate": 1.4184988983170037e-05, - "loss": 0.0908, + "learning_rate": 2.41940617540137e-05, + "loss": 0.0445, "step": 63020 }, { "epoch": 2.94, - "learning_rate": 1.4184520181894897e-05, - "loss": 0.0453, + "learning_rate": 2.419359368417768e-05, + "loss": 0.0609, "step": 63025 }, { "epoch": 2.94, - "learning_rate": 1.4184051380619757e-05, - "loss": 0.0317, + "learning_rate": 2.419312561434166e-05, + "loss": 0.0444, "step": 63030 }, { "epoch": 2.94, - "learning_rate": 1.4183582579344617e-05, - "loss": 0.0595, + "learning_rate": 2.4192657544505642e-05, + "loss": 0.0541, "step": 63035 }, { "epoch": 2.94, - "learning_rate": 1.4183113778069477e-05, - "loss": 0.0727, + "learning_rate": 2.4192189474669622e-05, + "loss": 0.0605, "step": 63040 }, { "epoch": 2.94, - "learning_rate": 1.4182644976794338e-05, - "loss": 0.0852, + "learning_rate": 2.41917214048336e-05, + "loss": 0.1205, "step": 63045 }, { "epoch": 2.94, - "learning_rate": 1.4182176175519198e-05, - "loss": 0.1202, + "learning_rate": 2.419125333499758e-05, + "loss": 0.0461, "step": 63050 }, { "epoch": 2.94, - "learning_rate": 1.4181707374244058e-05, - "loss": 0.1415, + "learning_rate": 2.4190785265161565e-05, + "loss": 0.168, "step": 63055 }, { "epoch": 2.94, - "learning_rate": 1.4181238572968921e-05, - "loss": 0.2081, + "learning_rate": 2.4190317195325545e-05, + "loss": 0.1311, "step": 63060 }, { "epoch": 2.94, - "learning_rate": 1.4180769771693781e-05, - "loss": 0.3448, + "learning_rate": 2.4189849125489524e-05, + "loss": 0.1908, "step": 63065 }, { "epoch": 2.94, - "learning_rate": 1.4180300970418641e-05, - "loss": 0.0215, + "learning_rate": 2.4189381055653504e-05, + "loss": 0.0581, "step": 63070 }, { "epoch": 2.94, - "learning_rate": 1.4179832169143501e-05, - "loss": 0.0511, + "learning_rate": 2.4188912985817484e-05, + "loss": 0.0383, "step": 63075 }, { "epoch": 2.94, - "learning_rate": 1.4179363367868361e-05, - "loss": 0.0203, + "learning_rate": 2.4188444915981464e-05, + "loss": 0.0243, "step": 63080 }, { "epoch": 2.94, - "learning_rate": 1.4178894566593223e-05, - "loss": 0.0664, + "learning_rate": 2.4187976846145444e-05, + "loss": 0.0817, "step": 63085 }, { "epoch": 2.94, - "learning_rate": 1.4178425765318083e-05, - "loss": 0.0897, + "learning_rate": 2.4187508776309427e-05, + "loss": 0.1185, "step": 63090 }, { "epoch": 2.94, - "learning_rate": 1.4177956964042943e-05, - "loss": 0.1132, + "learning_rate": 2.4187040706473407e-05, + "loss": 0.1229, "step": 63095 }, { "epoch": 2.94, - "learning_rate": 1.4177488162767803e-05, - "loss": 0.1802, + "learning_rate": 2.4186572636637386e-05, + "loss": 0.1517, "step": 63100 }, { "epoch": 2.94, - "learning_rate": 1.4177019361492666e-05, - "loss": 0.1214, + "learning_rate": 2.4186104566801366e-05, + "loss": 0.2088, "step": 63105 }, { "epoch": 2.94, - "learning_rate": 1.4176550560217526e-05, - "loss": 0.2478, + "learning_rate": 2.418563649696535e-05, + "loss": 0.231, "step": 63110 }, { "epoch": 2.95, - "learning_rate": 1.4176081758942386e-05, - "loss": 0.299, + "learning_rate": 2.418516842712933e-05, + "loss": 0.2014, "step": 63115 }, { "epoch": 2.95, - "learning_rate": 1.4175612957667246e-05, - "loss": 0.0547, + "learning_rate": 2.418470035729331e-05, + "loss": 0.0771, "step": 63120 }, { "epoch": 2.95, - "learning_rate": 1.4175144156392107e-05, - "loss": 0.0359, + "learning_rate": 2.4184232287457292e-05, + "loss": 0.0238, "step": 63125 }, { "epoch": 2.95, - "learning_rate": 1.4174675355116967e-05, - "loss": 0.0356, + "learning_rate": 2.418376421762127e-05, + "loss": 0.0214, "step": 63130 }, { "epoch": 2.95, - "learning_rate": 1.4174206553841827e-05, - "loss": 0.0364, + "learning_rate": 2.418329614778525e-05, + "loss": 0.0765, "step": 63135 }, { "epoch": 2.95, - "learning_rate": 1.4173737752566687e-05, - "loss": 0.0752, + "learning_rate": 2.418282807794923e-05, + "loss": 0.1006, "step": 63140 }, { "epoch": 2.95, - "learning_rate": 1.4173268951291547e-05, - "loss": 0.0793, + "learning_rate": 2.418236000811321e-05, + "loss": 0.0525, "step": 63145 }, { "epoch": 2.95, - "learning_rate": 1.417280015001641e-05, - "loss": 0.1518, + "learning_rate": 2.418189193827719e-05, + "loss": 0.0991, "step": 63150 }, { "epoch": 2.95, - "learning_rate": 1.417233134874127e-05, - "loss": 0.2003, + "learning_rate": 2.418142386844117e-05, + "loss": 0.1976, "step": 63155 }, { "epoch": 2.95, - "learning_rate": 1.417186254746613e-05, - "loss": 0.2679, + "learning_rate": 2.418095579860515e-05, + "loss": 0.1167, "step": 63160 }, { "epoch": 2.95, - "learning_rate": 1.4171393746190992e-05, - "loss": 0.3248, + "learning_rate": 2.4180487728769134e-05, + "loss": 0.3467, "step": 63165 }, { "epoch": 2.95, - "learning_rate": 1.4170924944915852e-05, - "loss": 0.0329, + "learning_rate": 2.4180019658933114e-05, + "loss": 0.0368, "step": 63170 }, { "epoch": 2.95, - "learning_rate": 1.4170456143640712e-05, - "loss": 0.0632, + "learning_rate": 2.4179551589097094e-05, + "loss": 0.0271, "step": 63175 }, { "epoch": 2.95, - "learning_rate": 1.4169987342365572e-05, - "loss": 0.1266, + "learning_rate": 2.4179083519261077e-05, + "loss": 0.0217, "step": 63180 }, { "epoch": 2.95, - "learning_rate": 1.4169518541090432e-05, - "loss": 0.0548, + "learning_rate": 2.4178615449425057e-05, + "loss": 0.0944, "step": 63185 }, { "epoch": 2.95, - "learning_rate": 1.4169049739815293e-05, - "loss": 0.0715, + "learning_rate": 2.4178147379589037e-05, + "loss": 0.0368, "step": 63190 }, { "epoch": 2.95, - "learning_rate": 1.4168580938540153e-05, - "loss": 0.0533, + "learning_rate": 2.4177679309753013e-05, + "loss": 0.1101, "step": 63195 }, { "epoch": 2.95, - "learning_rate": 1.4168112137265015e-05, - "loss": 0.1372, + "learning_rate": 2.4177211239916996e-05, + "loss": 0.0614, "step": 63200 }, { "epoch": 2.95, - "learning_rate": 1.4167643335989876e-05, - "loss": 0.119, + "learning_rate": 2.4176743170080976e-05, + "loss": 0.1767, "step": 63205 }, { "epoch": 2.95, - "learning_rate": 1.4167174534714736e-05, - "loss": 0.2117, + "learning_rate": 2.4176275100244956e-05, + "loss": 0.2781, "step": 63210 }, { "epoch": 2.95, - "learning_rate": 1.4166705733439596e-05, - "loss": 0.3411, + "learning_rate": 2.4175807030408936e-05, + "loss": 0.4507, "step": 63215 }, { "epoch": 2.95, - "learning_rate": 1.4166236932164456e-05, - "loss": 0.095, + "learning_rate": 2.417533896057292e-05, + "loss": 0.0527, "step": 63220 }, { "epoch": 2.95, - "learning_rate": 1.4165768130889316e-05, - "loss": 0.0377, + "learning_rate": 2.41748708907369e-05, + "loss": 0.0222, "step": 63225 }, { "epoch": 2.95, - "learning_rate": 1.4165299329614178e-05, - "loss": 0.042, + "learning_rate": 2.417440282090088e-05, + "loss": 0.0543, "step": 63230 }, { "epoch": 2.95, - "learning_rate": 1.4164830528339038e-05, - "loss": 0.0263, + "learning_rate": 2.417393475106486e-05, + "loss": 0.0463, "step": 63235 }, { "epoch": 2.95, - "learning_rate": 1.4164361727063898e-05, - "loss": 0.1153, + "learning_rate": 2.417346668122884e-05, + "loss": 0.0956, "step": 63240 }, { "epoch": 2.95, - "learning_rate": 1.4163892925788761e-05, - "loss": 0.0538, + "learning_rate": 2.417299861139282e-05, + "loss": 0.0393, "step": 63245 }, { "epoch": 2.95, - "learning_rate": 1.4163424124513621e-05, - "loss": 0.184, + "learning_rate": 2.41725305415568e-05, + "loss": 0.0857, "step": 63250 }, { "epoch": 2.95, - "learning_rate": 1.4162955323238481e-05, - "loss": 0.145, + "learning_rate": 2.417206247172078e-05, + "loss": 0.1616, "step": 63255 }, { "epoch": 2.95, - "learning_rate": 1.416248652196334e-05, - "loss": 0.1811, + "learning_rate": 2.417159440188476e-05, + "loss": 0.2549, "step": 63260 }, { "epoch": 2.95, - "learning_rate": 1.41620177206882e-05, - "loss": 0.322, + "learning_rate": 2.417112633204874e-05, + "loss": 0.3067, "step": 63265 }, { "epoch": 2.95, - "learning_rate": 1.4161548919413062e-05, - "loss": 0.0432, + "learning_rate": 2.417065826221272e-05, + "loss": 0.0083, "step": 63270 }, { "epoch": 2.95, - "learning_rate": 1.4161080118137922e-05, - "loss": 0.0354, + "learning_rate": 2.4170190192376704e-05, + "loss": 0.0653, "step": 63275 }, { "epoch": 2.95, - "learning_rate": 1.4160611316862782e-05, - "loss": 0.0424, + "learning_rate": 2.4169722122540684e-05, + "loss": 0.0114, "step": 63280 }, { "epoch": 2.95, - "learning_rate": 1.4160142515587642e-05, - "loss": 0.0593, + "learning_rate": 2.4169254052704663e-05, + "loss": 0.0451, "step": 63285 }, { "epoch": 2.95, - "learning_rate": 1.4159673714312505e-05, - "loss": 0.0574, + "learning_rate": 2.4168785982868643e-05, + "loss": 0.0472, "step": 63290 }, { "epoch": 2.95, - "learning_rate": 1.4159204913037365e-05, - "loss": 0.0868, + "learning_rate": 2.4168317913032626e-05, + "loss": 0.0913, "step": 63295 }, { "epoch": 2.95, - "learning_rate": 1.4158736111762225e-05, - "loss": 0.1314, + "learning_rate": 2.4167849843196606e-05, + "loss": 0.1354, "step": 63300 }, { "epoch": 2.95, - "learning_rate": 1.4158267310487085e-05, - "loss": 0.085, + "learning_rate": 2.4167381773360586e-05, + "loss": 0.0834, "step": 63305 }, { "epoch": 2.95, - "learning_rate": 1.4157798509211947e-05, - "loss": 0.1749, + "learning_rate": 2.416691370352457e-05, + "loss": 0.1842, "step": 63310 }, { "epoch": 2.95, - "learning_rate": 1.4157329707936807e-05, - "loss": 0.263, + "learning_rate": 2.416644563368855e-05, + "loss": 0.2099, "step": 63315 }, { "epoch": 2.95, - "learning_rate": 1.4156860906661667e-05, - "loss": 0.0535, + "learning_rate": 2.4165977563852526e-05, + "loss": 0.1157, "step": 63320 }, { "epoch": 2.95, - "learning_rate": 1.4156392105386527e-05, - "loss": 0.0341, + "learning_rate": 2.4165509494016505e-05, + "loss": 0.0709, "step": 63325 }, { "epoch": 2.96, - "learning_rate": 1.4155923304111388e-05, - "loss": 0.0705, + "learning_rate": 2.416504142418049e-05, + "loss": 0.0374, "step": 63330 }, { "epoch": 2.96, - "learning_rate": 1.4155454502836248e-05, - "loss": 0.0689, + "learning_rate": 2.416457335434447e-05, + "loss": 0.1054, "step": 63335 }, { "epoch": 2.96, - "learning_rate": 1.415498570156111e-05, - "loss": 0.0664, + "learning_rate": 2.4164105284508448e-05, + "loss": 0.0839, "step": 63340 }, { "epoch": 2.96, - "learning_rate": 1.415451690028597e-05, - "loss": 0.0963, + "learning_rate": 2.4163637214672428e-05, + "loss": 0.1125, "step": 63345 }, { "epoch": 2.96, - "learning_rate": 1.4154048099010831e-05, - "loss": 0.1253, + "learning_rate": 2.416316914483641e-05, + "loss": 0.0766, "step": 63350 }, { "epoch": 2.96, - "learning_rate": 1.4153579297735691e-05, - "loss": 0.1531, + "learning_rate": 2.416270107500039e-05, + "loss": 0.1371, "step": 63355 }, { "epoch": 2.96, - "learning_rate": 1.4153110496460551e-05, - "loss": 0.3743, + "learning_rate": 2.416223300516437e-05, + "loss": 0.1878, "step": 63360 }, { "epoch": 2.96, - "learning_rate": 1.4152641695185411e-05, - "loss": 0.3862, + "learning_rate": 2.4161764935328354e-05, + "loss": 0.3318, "step": 63365 }, { "epoch": 2.96, - "learning_rate": 1.4152172893910273e-05, - "loss": 0.0714, + "learning_rate": 2.4161296865492334e-05, + "loss": 0.0314, "step": 63370 }, { "epoch": 2.96, - "learning_rate": 1.4151704092635133e-05, - "loss": 0.0333, + "learning_rate": 2.4160828795656314e-05, + "loss": 0.049, "step": 63375 }, { "epoch": 2.96, - "learning_rate": 1.4151235291359993e-05, - "loss": 0.0459, + "learning_rate": 2.4160360725820293e-05, + "loss": 0.0562, "step": 63380 }, { "epoch": 2.96, - "learning_rate": 1.4150766490084854e-05, - "loss": 0.024, + "learning_rate": 2.4159892655984273e-05, + "loss": 0.0936, "step": 63385 }, { "epoch": 2.96, - "learning_rate": 1.4150297688809716e-05, - "loss": 0.0285, + "learning_rate": 2.4159424586148253e-05, + "loss": 0.1393, "step": 63390 }, { "epoch": 2.96, - "learning_rate": 1.4149828887534576e-05, - "loss": 0.0494, + "learning_rate": 2.4158956516312233e-05, + "loss": 0.1414, "step": 63395 }, { "epoch": 2.96, - "learning_rate": 1.4149360086259436e-05, - "loss": 0.1061, + "learning_rate": 2.4158488446476213e-05, + "loss": 0.1436, "step": 63400 }, { "epoch": 2.96, - "learning_rate": 1.4148891284984296e-05, - "loss": 0.1774, + "learning_rate": 2.4158020376640196e-05, + "loss": 0.1471, "step": 63405 }, { "epoch": 2.96, - "learning_rate": 1.4148422483709157e-05, - "loss": 0.2098, + "learning_rate": 2.4157552306804176e-05, + "loss": 0.2445, "step": 63410 }, { "epoch": 2.96, - "learning_rate": 1.4147953682434017e-05, - "loss": 0.2257, + "learning_rate": 2.4157084236968156e-05, + "loss": 0.2649, "step": 63415 }, { "epoch": 2.96, - "learning_rate": 1.4147484881158877e-05, - "loss": 0.0405, + "learning_rate": 2.415661616713214e-05, + "loss": 0.0298, "step": 63420 }, { "epoch": 2.96, - "learning_rate": 1.4147016079883737e-05, - "loss": 0.0221, + "learning_rate": 2.415614809729612e-05, + "loss": 0.0304, "step": 63425 }, { "epoch": 2.96, - "learning_rate": 1.41465472786086e-05, - "loss": 0.0382, + "learning_rate": 2.41556800274601e-05, + "loss": 0.0624, "step": 63430 }, { "epoch": 2.96, - "learning_rate": 1.414607847733346e-05, - "loss": 0.0391, + "learning_rate": 2.4155211957624078e-05, + "loss": 0.0395, "step": 63435 }, { "epoch": 2.96, - "learning_rate": 1.414560967605832e-05, - "loss": 0.0743, + "learning_rate": 2.415474388778806e-05, + "loss": 0.1157, "step": 63440 }, { "epoch": 2.96, - "learning_rate": 1.414514087478318e-05, - "loss": 0.106, + "learning_rate": 2.4154275817952038e-05, + "loss": 0.1552, "step": 63445 }, { "epoch": 2.96, - "learning_rate": 1.4144672073508042e-05, - "loss": 0.1007, + "learning_rate": 2.4153807748116018e-05, + "loss": 0.1065, "step": 63450 }, { "epoch": 2.96, - "learning_rate": 1.4144203272232902e-05, - "loss": 0.1935, + "learning_rate": 2.4153339678279998e-05, + "loss": 0.106, "step": 63455 }, { "epoch": 2.96, - "learning_rate": 1.4143734470957762e-05, - "loss": 0.1602, + "learning_rate": 2.415287160844398e-05, + "loss": 0.2909, "step": 63460 }, { "epoch": 2.96, - "learning_rate": 1.4143265669682622e-05, - "loss": 0.3175, + "learning_rate": 2.415240353860796e-05, + "loss": 0.2588, "step": 63465 }, { "epoch": 2.96, - "learning_rate": 1.4142796868407482e-05, - "loss": 0.0462, + "learning_rate": 2.415193546877194e-05, + "loss": 0.067, "step": 63470 }, { "epoch": 2.96, - "learning_rate": 1.4142328067132345e-05, - "loss": 0.0184, + "learning_rate": 2.415146739893592e-05, + "loss": 0.0726, "step": 63475 }, { "epoch": 2.96, - "learning_rate": 1.4141859265857205e-05, - "loss": 0.027, + "learning_rate": 2.4150999329099903e-05, + "loss": 0.0086, "step": 63480 }, { "epoch": 2.96, - "learning_rate": 1.4141390464582065e-05, - "loss": 0.0426, + "learning_rate": 2.4150531259263883e-05, + "loss": 0.122, "step": 63485 }, { "epoch": 2.96, - "learning_rate": 1.4140921663306927e-05, - "loss": 0.0563, + "learning_rate": 2.4150063189427863e-05, + "loss": 0.0687, "step": 63490 }, { "epoch": 2.96, - "learning_rate": 1.4140452862031786e-05, - "loss": 0.1309, + "learning_rate": 2.4149595119591846e-05, + "loss": 0.0654, "step": 63495 }, { "epoch": 2.96, - "learning_rate": 1.4139984060756646e-05, - "loss": 0.1223, + "learning_rate": 2.4149127049755826e-05, + "loss": 0.0585, "step": 63500 }, { "epoch": 2.96, - "learning_rate": 1.4139515259481506e-05, - "loss": 0.1788, + "learning_rate": 2.4148658979919806e-05, + "loss": 0.1256, "step": 63505 }, { "epoch": 2.96, - "learning_rate": 1.4139046458206366e-05, - "loss": 0.3226, + "learning_rate": 2.4148190910083782e-05, + "loss": 0.2653, "step": 63510 }, { "epoch": 2.96, - "learning_rate": 1.4138577656931228e-05, - "loss": 0.278, + "learning_rate": 2.4147722840247766e-05, + "loss": 0.2189, "step": 63515 }, { "epoch": 2.96, - "learning_rate": 1.4138108855656088e-05, - "loss": 0.0424, + "learning_rate": 2.4147254770411745e-05, + "loss": 0.0423, "step": 63520 }, { "epoch": 2.96, - "learning_rate": 1.413764005438095e-05, - "loss": 0.0399, + "learning_rate": 2.4146786700575725e-05, + "loss": 0.0367, "step": 63525 }, { "epoch": 2.96, - "learning_rate": 1.4137171253105811e-05, - "loss": 0.0638, + "learning_rate": 2.4146318630739705e-05, + "loss": 0.044, "step": 63530 }, { "epoch": 2.96, - "learning_rate": 1.4136702451830671e-05, - "loss": 0.0928, + "learning_rate": 2.4145850560903688e-05, + "loss": 0.0619, "step": 63535 }, { "epoch": 2.96, - "learning_rate": 1.4136233650555531e-05, - "loss": 0.0727, + "learning_rate": 2.4145382491067668e-05, + "loss": 0.0601, "step": 63540 }, { "epoch": 2.97, - "learning_rate": 1.4135764849280391e-05, - "loss": 0.1228, + "learning_rate": 2.4144914421231648e-05, + "loss": 0.0659, "step": 63545 }, { "epoch": 2.97, - "learning_rate": 1.413529604800525e-05, - "loss": 0.1091, + "learning_rate": 2.414444635139563e-05, + "loss": 0.1026, "step": 63550 }, { "epoch": 2.97, - "learning_rate": 1.4134827246730112e-05, - "loss": 0.0921, + "learning_rate": 2.414397828155961e-05, + "loss": 0.1708, "step": 63555 }, { "epoch": 2.97, - "learning_rate": 1.4134358445454972e-05, - "loss": 0.3803, + "learning_rate": 2.414351021172359e-05, + "loss": 0.1861, "step": 63560 }, { "epoch": 2.97, - "learning_rate": 1.4133889644179832e-05, - "loss": 0.1736, + "learning_rate": 2.414304214188757e-05, + "loss": 0.2588, "step": 63565 }, { "epoch": 2.97, - "learning_rate": 1.4133420842904696e-05, - "loss": 0.0774, + "learning_rate": 2.4142574072051554e-05, + "loss": 0.0468, "step": 63570 }, { "epoch": 2.97, - "learning_rate": 1.4132952041629556e-05, - "loss": 0.1033, + "learning_rate": 2.414210600221553e-05, + "loss": 0.0252, "step": 63575 }, { "epoch": 2.97, - "learning_rate": 1.4132483240354416e-05, - "loss": 0.0655, + "learning_rate": 2.414163793237951e-05, + "loss": 0.0415, "step": 63580 }, { "epoch": 2.97, - "learning_rate": 1.4132014439079275e-05, - "loss": 0.1123, + "learning_rate": 2.414116986254349e-05, + "loss": 0.0742, "step": 63585 }, { "epoch": 2.97, - "learning_rate": 1.4131545637804135e-05, - "loss": 0.075, + "learning_rate": 2.4140701792707473e-05, + "loss": 0.0556, "step": 63590 }, { "epoch": 2.97, - "learning_rate": 1.4131076836528997e-05, - "loss": 0.1171, + "learning_rate": 2.4140233722871453e-05, + "loss": 0.0523, "step": 63595 }, { "epoch": 2.97, - "learning_rate": 1.4130608035253857e-05, - "loss": 0.1863, + "learning_rate": 2.4139765653035433e-05, + "loss": 0.129, "step": 63600 }, { "epoch": 2.97, - "learning_rate": 1.4130139233978717e-05, - "loss": 0.0909, + "learning_rate": 2.4139297583199416e-05, + "loss": 0.1882, "step": 63605 }, { "epoch": 2.97, - "learning_rate": 1.4129670432703577e-05, - "loss": 0.3155, + "learning_rate": 2.4138829513363396e-05, + "loss": 0.2611, "step": 63610 }, { "epoch": 2.97, - "learning_rate": 1.412920163142844e-05, - "loss": 0.1992, + "learning_rate": 2.4138361443527375e-05, + "loss": 0.1537, "step": 63615 }, { "epoch": 2.97, - "learning_rate": 1.41287328301533e-05, - "loss": 0.0821, + "learning_rate": 2.4137893373691355e-05, + "loss": 0.0419, "step": 63620 }, { "epoch": 2.97, - "learning_rate": 1.412826402887816e-05, - "loss": 0.1078, + "learning_rate": 2.413742530385534e-05, + "loss": 0.0177, "step": 63625 }, { "epoch": 2.97, - "learning_rate": 1.412779522760302e-05, - "loss": 0.0328, + "learning_rate": 2.4136957234019318e-05, + "loss": 0.0815, "step": 63630 }, { "epoch": 2.97, - "learning_rate": 1.4127326426327882e-05, - "loss": 0.0879, + "learning_rate": 2.4136489164183295e-05, + "loss": 0.057, "step": 63635 }, { "epoch": 2.97, - "learning_rate": 1.4126857625052741e-05, - "loss": 0.03, + "learning_rate": 2.4136021094347275e-05, + "loss": 0.0947, "step": 63640 }, { "epoch": 2.97, - "learning_rate": 1.4126388823777601e-05, - "loss": 0.1541, + "learning_rate": 2.4135553024511258e-05, + "loss": 0.1252, "step": 63645 }, { "epoch": 2.97, - "learning_rate": 1.4125920022502461e-05, - "loss": 0.0869, + "learning_rate": 2.4135084954675238e-05, + "loss": 0.0784, "step": 63650 }, { "epoch": 2.97, - "learning_rate": 1.4125451221227321e-05, - "loss": 0.1071, + "learning_rate": 2.4134616884839217e-05, + "loss": 0.0991, "step": 63655 }, { "epoch": 2.97, - "learning_rate": 1.4124982419952183e-05, - "loss": 0.2699, + "learning_rate": 2.4134148815003197e-05, + "loss": 0.1926, "step": 63660 }, { "epoch": 2.97, - "learning_rate": 1.4124513618677045e-05, - "loss": 0.1667, + "learning_rate": 2.413368074516718e-05, + "loss": 0.4223, "step": 63665 }, { "epoch": 2.97, - "learning_rate": 1.4124044817401904e-05, - "loss": 0.0572, + "learning_rate": 2.413321267533116e-05, + "loss": 0.0536, "step": 63670 }, { "epoch": 2.97, - "learning_rate": 1.4123576016126766e-05, - "loss": 0.0256, + "learning_rate": 2.413274460549514e-05, + "loss": 0.0268, "step": 63675 }, { "epoch": 2.97, - "learning_rate": 1.4123107214851626e-05, - "loss": 0.0349, + "learning_rate": 2.4132276535659123e-05, + "loss": 0.0167, "step": 63680 }, { "epoch": 2.97, - "learning_rate": 1.4122638413576486e-05, - "loss": 0.0604, + "learning_rate": 2.4131808465823103e-05, + "loss": 0.0705, "step": 63685 }, { "epoch": 2.97, - "learning_rate": 1.4122169612301346e-05, - "loss": 0.0297, + "learning_rate": 2.4131340395987083e-05, + "loss": 0.0285, "step": 63690 }, { "epoch": 2.97, - "learning_rate": 1.4121700811026206e-05, - "loss": 0.1569, + "learning_rate": 2.4130872326151063e-05, + "loss": 0.0866, "step": 63695 }, { "epoch": 2.97, - "learning_rate": 1.4121232009751067e-05, - "loss": 0.0974, + "learning_rate": 2.4130404256315042e-05, + "loss": 0.0753, "step": 63700 }, { "epoch": 2.97, - "learning_rate": 1.4120763208475927e-05, - "loss": 0.1207, + "learning_rate": 2.4129936186479022e-05, + "loss": 0.1502, "step": 63705 }, { "epoch": 2.97, - "learning_rate": 1.4120294407200789e-05, - "loss": 0.2527, + "learning_rate": 2.4129468116643002e-05, + "loss": 0.2183, "step": 63710 }, { "epoch": 2.97, - "learning_rate": 1.411982560592565e-05, - "loss": 0.2932, + "learning_rate": 2.4129000046806982e-05, + "loss": 0.2109, "step": 63715 }, { "epoch": 2.97, - "learning_rate": 1.411935680465051e-05, - "loss": 0.0372, + "learning_rate": 2.4128531976970965e-05, + "loss": 0.084, "step": 63720 }, { "epoch": 2.97, - "learning_rate": 1.411888800337537e-05, - "loss": 0.0309, + "learning_rate": 2.4128063907134945e-05, + "loss": 0.0534, "step": 63725 }, { "epoch": 2.97, - "learning_rate": 1.411841920210023e-05, - "loss": 0.019, + "learning_rate": 2.4127595837298925e-05, + "loss": 0.0606, "step": 63730 }, { "epoch": 2.97, - "learning_rate": 1.411795040082509e-05, - "loss": 0.0686, + "learning_rate": 2.4127127767462908e-05, + "loss": 0.0455, "step": 63735 }, { "epoch": 2.97, - "learning_rate": 1.4117481599549952e-05, - "loss": 0.1975, + "learning_rate": 2.4126659697626888e-05, + "loss": 0.0354, "step": 63740 }, { "epoch": 2.97, - "learning_rate": 1.4117012798274812e-05, - "loss": 0.1837, + "learning_rate": 2.4126191627790868e-05, + "loss": 0.0265, "step": 63745 }, { "epoch": 2.97, - "learning_rate": 1.4116543996999672e-05, - "loss": 0.1234, + "learning_rate": 2.4125723557954847e-05, + "loss": 0.156, "step": 63750 }, { "epoch": 2.97, - "learning_rate": 1.4116075195724535e-05, - "loss": 0.1988, + "learning_rate": 2.412525548811883e-05, + "loss": 0.1119, "step": 63755 }, { "epoch": 2.98, - "learning_rate": 1.4115606394449395e-05, - "loss": 0.2116, + "learning_rate": 2.4124787418282807e-05, + "loss": 0.1648, "step": 63760 }, { "epoch": 2.98, - "learning_rate": 1.4115137593174255e-05, - "loss": 0.2059, + "learning_rate": 2.4124319348446787e-05, + "loss": 0.2408, "step": 63765 }, { "epoch": 2.98, - "learning_rate": 1.4114668791899115e-05, - "loss": 0.0628, + "learning_rate": 2.4123851278610767e-05, + "loss": 0.0432, "step": 63770 }, { "epoch": 2.98, - "learning_rate": 1.4114199990623975e-05, - "loss": 0.0345, + "learning_rate": 2.412338320877475e-05, + "loss": 0.0339, "step": 63775 }, { "epoch": 2.98, - "learning_rate": 1.4113731189348837e-05, - "loss": 0.0374, + "learning_rate": 2.412291513893873e-05, + "loss": 0.0596, "step": 63780 }, { "epoch": 2.98, - "learning_rate": 1.4113262388073697e-05, - "loss": 0.1135, + "learning_rate": 2.412244706910271e-05, + "loss": 0.0279, "step": 63785 }, { "epoch": 2.98, - "learning_rate": 1.4112793586798556e-05, - "loss": 0.101, + "learning_rate": 2.4121978999266693e-05, + "loss": 0.0724, "step": 63790 }, { "epoch": 2.98, - "learning_rate": 1.4112324785523416e-05, - "loss": 0.0271, + "learning_rate": 2.4121510929430673e-05, + "loss": 0.0428, "step": 63795 }, { "epoch": 2.98, - "learning_rate": 1.411185598424828e-05, - "loss": 0.0826, + "learning_rate": 2.4121042859594652e-05, + "loss": 0.1633, "step": 63800 }, { "epoch": 2.98, - "learning_rate": 1.411138718297314e-05, - "loss": 0.1655, + "learning_rate": 2.4120574789758632e-05, + "loss": 0.1748, "step": 63805 }, { "epoch": 2.98, - "learning_rate": 1.4110918381698e-05, - "loss": 0.2046, + "learning_rate": 2.4120106719922615e-05, + "loss": 0.2044, "step": 63810 }, { "epoch": 2.98, - "learning_rate": 1.411044958042286e-05, - "loss": 0.2569, + "learning_rate": 2.4119638650086595e-05, + "loss": 0.3987, "step": 63815 }, { "epoch": 2.98, - "learning_rate": 1.4109980779147721e-05, - "loss": 0.0889, + "learning_rate": 2.4119170580250575e-05, + "loss": 0.053, "step": 63820 }, { "epoch": 2.98, - "learning_rate": 1.4109511977872581e-05, - "loss": 0.024, + "learning_rate": 2.411870251041455e-05, + "loss": 0.0649, "step": 63825 }, { "epoch": 2.98, - "learning_rate": 1.4109043176597441e-05, - "loss": 0.0664, + "learning_rate": 2.4118234440578535e-05, + "loss": 0.0428, "step": 63830 }, { "epoch": 2.98, - "learning_rate": 1.4108574375322301e-05, - "loss": 0.0197, + "learning_rate": 2.4117766370742514e-05, + "loss": 0.0502, "step": 63835 }, { "epoch": 2.98, - "learning_rate": 1.4108105574047163e-05, - "loss": 0.0726, + "learning_rate": 2.4117298300906494e-05, + "loss": 0.0718, "step": 63840 }, { "epoch": 2.98, - "learning_rate": 1.4107636772772022e-05, - "loss": 0.087, + "learning_rate": 2.4116830231070474e-05, + "loss": 0.0835, "step": 63845 }, { "epoch": 2.98, - "learning_rate": 1.4107167971496884e-05, - "loss": 0.0947, + "learning_rate": 2.4116362161234457e-05, + "loss": 0.11, "step": 63850 }, { "epoch": 2.98, - "learning_rate": 1.4106699170221744e-05, - "loss": 0.1588, + "learning_rate": 2.4115894091398437e-05, + "loss": 0.1532, "step": 63855 }, { "epoch": 2.98, - "learning_rate": 1.4106230368946606e-05, - "loss": 0.2632, + "learning_rate": 2.4115426021562417e-05, + "loss": 0.2339, "step": 63860 }, { "epoch": 2.98, - "learning_rate": 1.4105761567671466e-05, - "loss": 0.2116, + "learning_rate": 2.41149579517264e-05, + "loss": 0.342, "step": 63865 }, { "epoch": 2.98, - "learning_rate": 1.4105292766396326e-05, - "loss": 0.0705, + "learning_rate": 2.411448988189038e-05, + "loss": 0.0502, "step": 63870 }, { "epoch": 2.98, - "learning_rate": 1.4104823965121185e-05, - "loss": 0.0129, + "learning_rate": 2.411402181205436e-05, + "loss": 0.0223, "step": 63875 }, { "epoch": 2.98, - "learning_rate": 1.4104355163846047e-05, - "loss": 0.0476, + "learning_rate": 2.411355374221834e-05, + "loss": 0.026, "step": 63880 }, { "epoch": 2.98, - "learning_rate": 1.4103886362570907e-05, - "loss": 0.0482, + "learning_rate": 2.4113085672382323e-05, + "loss": 0.0227, "step": 63885 }, { "epoch": 2.98, - "learning_rate": 1.4103417561295767e-05, - "loss": 0.0601, + "learning_rate": 2.41126176025463e-05, + "loss": 0.1067, "step": 63890 }, { "epoch": 2.98, - "learning_rate": 1.4102948760020629e-05, - "loss": 0.1474, + "learning_rate": 2.411214953271028e-05, + "loss": 0.0624, "step": 63895 }, { "epoch": 2.98, - "learning_rate": 1.410247995874549e-05, - "loss": 0.16, + "learning_rate": 2.411168146287426e-05, + "loss": 0.1022, "step": 63900 }, { "epoch": 2.98, - "learning_rate": 1.410201115747035e-05, - "loss": 0.2903, + "learning_rate": 2.4111213393038242e-05, + "loss": 0.1, "step": 63905 }, { "epoch": 2.98, - "learning_rate": 1.410154235619521e-05, - "loss": 0.2707, + "learning_rate": 2.4110745323202222e-05, + "loss": 0.3413, "step": 63910 }, { "epoch": 2.98, - "learning_rate": 1.410107355492007e-05, - "loss": 0.315, + "learning_rate": 2.4110277253366202e-05, + "loss": 0.1548, "step": 63915 }, { "epoch": 2.98, - "learning_rate": 1.4100604753644932e-05, - "loss": 0.0473, + "learning_rate": 2.4109809183530185e-05, + "loss": 0.0298, "step": 63920 }, { "epoch": 2.98, - "learning_rate": 1.4100135952369792e-05, - "loss": 0.045, + "learning_rate": 2.4109341113694165e-05, + "loss": 0.0499, "step": 63925 }, { "epoch": 2.98, - "learning_rate": 1.4099667151094652e-05, - "loss": 0.0782, + "learning_rate": 2.4108873043858145e-05, + "loss": 0.0825, "step": 63930 }, { "epoch": 2.98, - "learning_rate": 1.4099198349819511e-05, - "loss": 0.0812, + "learning_rate": 2.4108404974022124e-05, + "loss": 0.0281, "step": 63935 }, { "epoch": 2.98, - "learning_rate": 1.4098729548544375e-05, - "loss": 0.0824, + "learning_rate": 2.4107936904186108e-05, + "loss": 0.0823, "step": 63940 }, { "epoch": 2.98, - "learning_rate": 1.4098260747269235e-05, - "loss": 0.1176, + "learning_rate": 2.4107468834350087e-05, + "loss": 0.0811, "step": 63945 }, { "epoch": 2.98, - "learning_rate": 1.4097791945994095e-05, - "loss": 0.151, + "learning_rate": 2.4107000764514064e-05, + "loss": 0.1565, "step": 63950 }, { "epoch": 2.98, - "learning_rate": 1.4097323144718955e-05, - "loss": 0.1791, + "learning_rate": 2.4106532694678044e-05, + "loss": 0.1216, "step": 63955 }, { "epoch": 2.98, - "learning_rate": 1.4096854343443816e-05, - "loss": 0.3215, + "learning_rate": 2.4106064624842027e-05, + "loss": 0.3768, "step": 63960 }, { "epoch": 2.98, - "learning_rate": 1.4096385542168676e-05, - "loss": 0.2454, + "learning_rate": 2.4105596555006007e-05, + "loss": 0.2631, "step": 63965 }, { "epoch": 2.98, - "learning_rate": 1.4095916740893536e-05, - "loss": 0.047, + "learning_rate": 2.4105128485169987e-05, + "loss": 0.0358, "step": 63970 }, { "epoch": 2.99, - "learning_rate": 1.4095447939618396e-05, - "loss": 0.0286, + "learning_rate": 2.410466041533397e-05, + "loss": 0.0213, "step": 63975 }, { "epoch": 2.99, - "learning_rate": 1.4094979138343256e-05, - "loss": 0.0482, + "learning_rate": 2.410419234549795e-05, + "loss": 0.0607, "step": 63980 }, { "epoch": 2.99, - "learning_rate": 1.4094510337068118e-05, - "loss": 0.0548, + "learning_rate": 2.410372427566193e-05, + "loss": 0.0393, "step": 63985 }, { "epoch": 2.99, - "learning_rate": 1.409404153579298e-05, - "loss": 0.0564, + "learning_rate": 2.410325620582591e-05, + "loss": 0.076, "step": 63990 }, { "epoch": 2.99, - "learning_rate": 1.4093572734517839e-05, - "loss": 0.0845, + "learning_rate": 2.4102788135989892e-05, + "loss": 0.1323, "step": 63995 }, { "epoch": 2.99, - "learning_rate": 1.40931039332427e-05, - "loss": 0.0653, + "learning_rate": 2.4102320066153872e-05, + "loss": 0.1455, "step": 64000 }, { "epoch": 2.99, - "learning_rate": 1.409263513196756e-05, - "loss": 0.1316, + "learning_rate": 2.4101851996317852e-05, + "loss": 0.2264, "step": 64005 }, { "epoch": 2.99, - "learning_rate": 1.409216633069242e-05, - "loss": 0.2763, + "learning_rate": 2.4101383926481832e-05, + "loss": 0.1147, "step": 64010 }, { "epoch": 2.99, - "learning_rate": 1.409169752941728e-05, - "loss": 0.2955, + "learning_rate": 2.410091585664581e-05, + "loss": 0.21, "step": 64015 }, { "epoch": 2.99, - "learning_rate": 1.409122872814214e-05, - "loss": 0.0321, + "learning_rate": 2.410044778680979e-05, + "loss": 0.0451, "step": 64020 }, { "epoch": 2.99, - "learning_rate": 1.4090759926867002e-05, - "loss": 0.0718, + "learning_rate": 2.409997971697377e-05, + "loss": 0.029, "step": 64025 }, { "epoch": 2.99, - "learning_rate": 1.4090291125591862e-05, - "loss": 0.0463, + "learning_rate": 2.409951164713775e-05, + "loss": 0.069, "step": 64030 }, { "epoch": 2.99, - "learning_rate": 1.4089822324316724e-05, - "loss": 0.0631, + "learning_rate": 2.4099043577301734e-05, + "loss": 0.0565, "step": 64035 }, { "epoch": 2.99, - "learning_rate": 1.4089353523041585e-05, - "loss": 0.0756, + "learning_rate": 2.4098575507465714e-05, + "loss": 0.0509, "step": 64040 }, { "epoch": 2.99, - "learning_rate": 1.4088884721766445e-05, - "loss": 0.0511, + "learning_rate": 2.4098107437629694e-05, + "loss": 0.1369, "step": 64045 }, { "epoch": 2.99, - "learning_rate": 1.4088415920491305e-05, - "loss": 0.1322, + "learning_rate": 2.4097639367793677e-05, + "loss": 0.2191, "step": 64050 }, { "epoch": 2.99, - "learning_rate": 1.4087947119216165e-05, - "loss": 0.1484, + "learning_rate": 2.4097171297957657e-05, + "loss": 0.1317, "step": 64055 }, { "epoch": 2.99, - "learning_rate": 1.4087478317941025e-05, - "loss": 0.2167, + "learning_rate": 2.4096703228121637e-05, + "loss": 0.1902, "step": 64060 }, { "epoch": 2.99, - "learning_rate": 1.4087009516665887e-05, - "loss": 0.4035, + "learning_rate": 2.4096235158285617e-05, + "loss": 0.2301, "step": 64065 }, { "epoch": 2.99, - "learning_rate": 1.4086540715390747e-05, - "loss": 0.0353, + "learning_rate": 2.40957670884496e-05, + "loss": 0.0941, "step": 64070 }, { "epoch": 2.99, - "learning_rate": 1.4086071914115607e-05, - "loss": 0.0812, + "learning_rate": 2.409529901861358e-05, + "loss": 0.0223, "step": 64075 }, { "epoch": 2.99, - "learning_rate": 1.408560311284047e-05, - "loss": 0.0497, + "learning_rate": 2.4094830948777556e-05, + "loss": 0.0256, "step": 64080 }, { "epoch": 2.99, - "learning_rate": 1.408513431156533e-05, - "loss": 0.0927, + "learning_rate": 2.4094362878941536e-05, + "loss": 0.0912, "step": 64085 }, { "epoch": 2.99, - "learning_rate": 1.408466551029019e-05, - "loss": 0.1061, + "learning_rate": 2.409389480910552e-05, + "loss": 0.0955, "step": 64090 }, { "epoch": 2.99, - "learning_rate": 1.408419670901505e-05, - "loss": 0.0803, + "learning_rate": 2.40934267392695e-05, + "loss": 0.0974, "step": 64095 }, { "epoch": 2.99, - "learning_rate": 1.408372790773991e-05, - "loss": 0.0983, + "learning_rate": 2.409295866943348e-05, + "loss": 0.0965, "step": 64100 }, { "epoch": 2.99, - "learning_rate": 1.4083259106464771e-05, - "loss": 0.1288, + "learning_rate": 2.4092490599597462e-05, + "loss": 0.1043, "step": 64105 }, { "epoch": 2.99, - "learning_rate": 1.4082790305189631e-05, - "loss": 0.1537, + "learning_rate": 2.4092022529761442e-05, + "loss": 0.1681, "step": 64110 }, { "epoch": 2.99, - "learning_rate": 1.4082321503914491e-05, - "loss": 0.2068, + "learning_rate": 2.409155445992542e-05, + "loss": 0.3309, "step": 64115 }, { "epoch": 2.99, - "learning_rate": 1.4081852702639351e-05, - "loss": 0.0357, + "learning_rate": 2.40910863900894e-05, + "loss": 0.0548, "step": 64120 }, { "epoch": 2.99, - "learning_rate": 1.4081383901364214e-05, - "loss": 0.0367, + "learning_rate": 2.4090618320253385e-05, + "loss": 0.0593, "step": 64125 }, { "epoch": 2.99, - "learning_rate": 1.4080915100089074e-05, - "loss": 0.0455, + "learning_rate": 2.4090150250417364e-05, + "loss": 0.02, "step": 64130 }, { "epoch": 2.99, - "learning_rate": 1.4080446298813934e-05, - "loss": 0.1085, + "learning_rate": 2.4089682180581344e-05, + "loss": 0.0772, "step": 64135 }, { "epoch": 2.99, - "learning_rate": 1.4079977497538794e-05, - "loss": 0.1128, + "learning_rate": 2.408921411074532e-05, + "loss": 0.0398, "step": 64140 }, { "epoch": 2.99, - "learning_rate": 1.4079508696263656e-05, - "loss": 0.0766, + "learning_rate": 2.4088746040909304e-05, + "loss": 0.0954, "step": 64145 }, { "epoch": 2.99, - "learning_rate": 1.4079039894988516e-05, - "loss": 0.0769, + "learning_rate": 2.4088277971073284e-05, + "loss": 0.0995, "step": 64150 }, { "epoch": 2.99, - "learning_rate": 1.4078571093713376e-05, - "loss": 0.1373, + "learning_rate": 2.4087809901237263e-05, + "loss": 0.11, "step": 64155 }, { "epoch": 2.99, - "learning_rate": 1.4078102292438236e-05, - "loss": 0.2616, + "learning_rate": 2.4087341831401247e-05, + "loss": 0.1444, "step": 64160 }, { "epoch": 2.99, - "learning_rate": 1.4077633491163096e-05, - "loss": 0.4626, + "learning_rate": 2.4086873761565227e-05, + "loss": 0.2479, "step": 64165 }, { "epoch": 2.99, - "learning_rate": 1.4077164689887957e-05, - "loss": 0.0197, + "learning_rate": 2.4086405691729206e-05, + "loss": 0.0511, "step": 64170 }, { "epoch": 2.99, - "learning_rate": 1.4076695888612819e-05, - "loss": 0.0197, + "learning_rate": 2.4085937621893186e-05, + "loss": 0.0359, "step": 64175 }, { "epoch": 2.99, - "learning_rate": 1.4076227087337679e-05, - "loss": 0.0383, + "learning_rate": 2.408546955205717e-05, + "loss": 0.0519, "step": 64180 }, { "epoch": 2.99, - "learning_rate": 1.407575828606254e-05, - "loss": 0.1042, + "learning_rate": 2.408500148222115e-05, + "loss": 0.0396, "step": 64185 }, { "epoch": 3.0, - "learning_rate": 1.40752894847874e-05, - "loss": 0.0343, + "learning_rate": 2.408453341238513e-05, + "loss": 0.091, "step": 64190 }, { "epoch": 3.0, - "learning_rate": 1.407482068351226e-05, - "loss": 0.0483, + "learning_rate": 2.408406534254911e-05, + "loss": 0.1266, "step": 64195 }, { "epoch": 3.0, - "learning_rate": 1.407435188223712e-05, - "loss": 0.0665, + "learning_rate": 2.4083597272713092e-05, + "loss": 0.1414, "step": 64200 }, { "epoch": 3.0, - "learning_rate": 1.407388308096198e-05, - "loss": 0.2019, + "learning_rate": 2.408312920287707e-05, + "loss": 0.1365, "step": 64205 }, { "epoch": 3.0, - "learning_rate": 1.4073414279686842e-05, - "loss": 0.231, + "learning_rate": 2.4082661133041048e-05, + "loss": 0.2704, "step": 64210 }, { "epoch": 3.0, - "learning_rate": 1.4072945478411702e-05, - "loss": 0.338, + "learning_rate": 2.408219306320503e-05, + "loss": 0.248, "step": 64215 }, { "epoch": 3.0, - "learning_rate": 1.4072476677136563e-05, - "loss": 0.0589, + "learning_rate": 2.408172499336901e-05, + "loss": 0.0238, "step": 64220 }, { "epoch": 3.0, - "learning_rate": 1.4072007875861425e-05, - "loss": 0.0093, + "learning_rate": 2.408125692353299e-05, + "loss": 0.041, "step": 64225 }, { "epoch": 3.0, - "learning_rate": 1.4071539074586285e-05, - "loss": 0.0857, + "learning_rate": 2.408078885369697e-05, + "loss": 0.0768, "step": 64230 }, { "epoch": 3.0, - "learning_rate": 1.4071070273311145e-05, - "loss": 0.0601, + "learning_rate": 2.4080320783860954e-05, + "loss": 0.1015, "step": 64235 }, { "epoch": 3.0, - "learning_rate": 1.4070601472036005e-05, - "loss": 0.1184, + "learning_rate": 2.4079852714024934e-05, + "loss": 0.0356, "step": 64240 }, { "epoch": 3.0, - "learning_rate": 1.4070132670760865e-05, - "loss": 0.0687, + "learning_rate": 2.4079384644188914e-05, + "loss": 0.0705, "step": 64245 }, { "epoch": 3.0, - "learning_rate": 1.4069663869485726e-05, - "loss": 0.0938, + "learning_rate": 2.4078916574352894e-05, + "loss": 0.1228, "step": 64250 }, { "epoch": 3.0, - "learning_rate": 1.4069195068210586e-05, - "loss": 0.0997, + "learning_rate": 2.4078448504516877e-05, + "loss": 0.1553, "step": 64255 }, { "epoch": 3.0, - "learning_rate": 1.4068726266935446e-05, - "loss": 0.2678, + "learning_rate": 2.4077980434680857e-05, + "loss": 0.1956, "step": 64260 }, { "epoch": 3.0, - "learning_rate": 1.406825746566031e-05, - "loss": 0.3459, + "learning_rate": 2.4077512364844836e-05, + "loss": 0.3237, "step": 64265 }, { "epoch": 3.0, - "learning_rate": 1.406778866438517e-05, - "loss": 0.0474, + "learning_rate": 2.4077044295008813e-05, + "loss": 0.0495, "step": 64270 }, { "epoch": 3.0, - "learning_rate": 1.406731986311003e-05, - "loss": 0.0988, + "learning_rate": 2.4076576225172796e-05, + "loss": 0.0439, "step": 64275 }, { "epoch": 3.0, - "learning_rate": 1.406685106183489e-05, - "loss": 0.0423, + "learning_rate": 2.4076108155336776e-05, + "loss": 0.0511, "step": 64280 }, { "epoch": 3.0, - "learning_rate": 1.406638226055975e-05, - "loss": 0.0893, + "learning_rate": 2.4075640085500756e-05, + "loss": 0.0824, "step": 64285 }, { "epoch": 3.0, - "learning_rate": 1.406591345928461e-05, - "loss": 0.1688, + "learning_rate": 2.407517201566474e-05, + "loss": 0.1603, "step": 64290 }, { "epoch": 3.0, - "eval_cer": 0.011432097636787493, - "eval_loss": 0.03627169877290726, - "eval_runtime": 381.0741, - "eval_samples_per_second": 49.99, - "eval_steps_per_second": 12.499, - "eval_wer": 0.09668894174500699, + "eval_cer": 0.011836236095430231, + "eval_loss": 0.024219496175646782, + "eval_runtime": 393.8928, + "eval_samples_per_second": 48.363, + "eval_steps_per_second": 12.092, + "eval_wer": 0.10190570947497321, "step": 64293 }, { "epoch": 3.0, - "learning_rate": 1.406544465800947e-05, - "loss": 0.2315, + "learning_rate": 2.407470394582872e-05, + "loss": 0.2661, "step": 64295 }, { "epoch": 3.0, - "learning_rate": 1.406497585673433e-05, - "loss": 0.1021, + "learning_rate": 2.40742358759927e-05, + "loss": 0.0736, "step": 64300 }, { "epoch": 3.0, - "learning_rate": 1.406450705545919e-05, - "loss": 0.0167, + "learning_rate": 2.407376780615668e-05, + "loss": 0.0304, "step": 64305 }, { "epoch": 3.0, - "learning_rate": 1.4064038254184052e-05, - "loss": 0.0554, + "learning_rate": 2.407329973632066e-05, + "loss": 0.0756, "step": 64310 }, { "epoch": 3.0, - "learning_rate": 1.4063569452908914e-05, - "loss": 0.0683, + "learning_rate": 2.407283166648464e-05, + "loss": 0.0803, "step": 64315 }, { "epoch": 3.0, - "learning_rate": 1.4063100651633774e-05, - "loss": 0.0589, + "learning_rate": 2.407236359664862e-05, + "loss": 0.0426, "step": 64320 }, { "epoch": 3.0, - "learning_rate": 1.4062631850358634e-05, - "loss": 0.1134, + "learning_rate": 2.40718955268126e-05, + "loss": 0.0728, "step": 64325 }, { "epoch": 3.0, - "learning_rate": 1.4062163049083495e-05, - "loss": 0.1114, + "learning_rate": 2.407142745697658e-05, + "loss": 0.0431, "step": 64330 }, { "epoch": 3.0, - "learning_rate": 1.4061694247808355e-05, - "loss": 0.1605, + "learning_rate": 2.407095938714056e-05, + "loss": 0.2122, "step": 64335 }, { "epoch": 3.0, - "learning_rate": 1.4061225446533215e-05, - "loss": 0.2171, + "learning_rate": 2.407049131730454e-05, + "loss": 0.2329, "step": 64340 }, { "epoch": 3.0, - "learning_rate": 1.4060756645258075e-05, - "loss": 0.2604, + "learning_rate": 2.4070023247468524e-05, + "loss": 0.2952, "step": 64345 }, { "epoch": 3.0, - "learning_rate": 1.4060287843982937e-05, - "loss": 0.0621, + "learning_rate": 2.4069555177632503e-05, + "loss": 0.0852, "step": 64350 }, { "epoch": 3.0, - "learning_rate": 1.4059819042707797e-05, - "loss": 0.0456, + "learning_rate": 2.4069087107796483e-05, + "loss": 0.0394, "step": 64355 }, { "epoch": 3.0, - "learning_rate": 1.4059350241432658e-05, - "loss": 0.0587, + "learning_rate": 2.4068619037960463e-05, + "loss": 0.0369, "step": 64360 }, { "epoch": 3.0, - "learning_rate": 1.4058881440157518e-05, - "loss": 0.0389, + "learning_rate": 2.4068150968124446e-05, + "loss": 0.0732, "step": 64365 }, { "epoch": 3.0, - "learning_rate": 1.405841263888238e-05, - "loss": 0.0616, + "learning_rate": 2.4067682898288426e-05, + "loss": 0.0406, "step": 64370 }, { "epoch": 3.0, - "learning_rate": 1.405794383760724e-05, - "loss": 0.0693, + "learning_rate": 2.4067214828452406e-05, + "loss": 0.0686, "step": 64375 }, { "epoch": 3.0, - "learning_rate": 1.40574750363321e-05, - "loss": 0.0516, + "learning_rate": 2.4066746758616386e-05, + "loss": 0.1074, "step": 64380 }, { "epoch": 3.0, - "learning_rate": 1.405700623505696e-05, - "loss": 0.1313, + "learning_rate": 2.406627868878037e-05, + "loss": 0.1855, "step": 64385 }, { "epoch": 3.0, - "learning_rate": 1.4056537433781821e-05, - "loss": 0.3616, + "learning_rate": 2.406581061894435e-05, + "loss": 0.3514, "step": 64390 }, { "epoch": 3.0, - "learning_rate": 1.4056068632506681e-05, - "loss": 0.2899, + "learning_rate": 2.4065342549108325e-05, + "loss": 0.2543, "step": 64395 }, { "epoch": 3.0, - "learning_rate": 1.4055599831231541e-05, - "loss": 0.0599, + "learning_rate": 2.406487447927231e-05, + "loss": 0.0924, "step": 64400 }, { "epoch": 3.01, - "learning_rate": 1.4055131029956403e-05, - "loss": 0.0295, + "learning_rate": 2.4064406409436288e-05, + "loss": 0.0407, "step": 64405 }, { "epoch": 3.01, - "learning_rate": 1.4054662228681264e-05, - "loss": 0.0761, + "learning_rate": 2.4063938339600268e-05, + "loss": 0.0544, "step": 64410 }, { "epoch": 3.01, - "learning_rate": 1.4054193427406124e-05, - "loss": 0.0633, + "learning_rate": 2.4063470269764248e-05, + "loss": 0.0208, "step": 64415 }, { "epoch": 3.01, - "learning_rate": 1.4053724626130984e-05, - "loss": 0.0678, + "learning_rate": 2.406300219992823e-05, + "loss": 0.0418, "step": 64420 }, { "epoch": 3.01, - "learning_rate": 1.4053255824855844e-05, - "loss": 0.081, + "learning_rate": 2.406253413009221e-05, + "loss": 0.1326, "step": 64425 }, { "epoch": 3.01, - "learning_rate": 1.4052787023580706e-05, - "loss": 0.1015, + "learning_rate": 2.406206606025619e-05, + "loss": 0.1135, "step": 64430 }, { "epoch": 3.01, - "learning_rate": 1.4052318222305566e-05, - "loss": 0.1764, + "learning_rate": 2.406159799042017e-05, + "loss": 0.1758, "step": 64435 }, { "epoch": 3.01, - "learning_rate": 1.4051849421030426e-05, - "loss": 0.1692, + "learning_rate": 2.4061129920584154e-05, + "loss": 0.1194, "step": 64440 }, { "epoch": 3.01, - "learning_rate": 1.4051380619755286e-05, - "loss": 0.3618, + "learning_rate": 2.4060661850748134e-05, + "loss": 0.2236, "step": 64445 }, { "epoch": 3.01, - "learning_rate": 1.4050911818480149e-05, - "loss": 0.0466, + "learning_rate": 2.4060193780912113e-05, + "loss": 0.0462, "step": 64450 }, { "epoch": 3.01, - "learning_rate": 1.4050443017205009e-05, - "loss": 0.0402, + "learning_rate": 2.4059725711076093e-05, + "loss": 0.0365, "step": 64455 }, { "epoch": 3.01, - "learning_rate": 1.4049974215929869e-05, - "loss": 0.0041, + "learning_rate": 2.4059257641240073e-05, + "loss": 0.0308, "step": 64460 }, { "epoch": 3.01, - "learning_rate": 1.4049505414654729e-05, - "loss": 0.0794, + "learning_rate": 2.4058789571404053e-05, + "loss": 0.0773, "step": 64465 }, { "epoch": 3.01, - "learning_rate": 1.404903661337959e-05, - "loss": 0.1197, + "learning_rate": 2.4058321501568033e-05, + "loss": 0.0885, "step": 64470 }, { "epoch": 3.01, - "learning_rate": 1.404856781210445e-05, - "loss": 0.0774, + "learning_rate": 2.4057853431732016e-05, + "loss": 0.1536, "step": 64475 }, { "epoch": 3.01, - "learning_rate": 1.404809901082931e-05, - "loss": 0.1861, + "learning_rate": 2.4057385361895996e-05, + "loss": 0.1161, "step": 64480 }, { "epoch": 3.01, - "learning_rate": 1.404763020955417e-05, - "loss": 0.1428, + "learning_rate": 2.4056917292059975e-05, + "loss": 0.1125, "step": 64485 }, { "epoch": 3.01, - "learning_rate": 1.404716140827903e-05, - "loss": 0.2123, + "learning_rate": 2.4056449222223955e-05, + "loss": 0.1425, "step": 64490 }, { "epoch": 3.01, - "learning_rate": 1.4046692607003892e-05, - "loss": 0.254, + "learning_rate": 2.405598115238794e-05, + "loss": 0.2037, "step": 64495 }, { "epoch": 3.01, - "learning_rate": 1.4046223805728753e-05, - "loss": 0.0496, + "learning_rate": 2.405551308255192e-05, + "loss": 0.04, "step": 64500 }, { "epoch": 3.01, - "learning_rate": 1.4045755004453613e-05, - "loss": 0.0437, + "learning_rate": 2.4055045012715898e-05, + "loss": 0.0305, "step": 64505 }, { "epoch": 3.01, - "learning_rate": 1.4045286203178475e-05, - "loss": 0.0811, + "learning_rate": 2.4054576942879878e-05, + "loss": 0.0713, "step": 64510 }, { "epoch": 3.01, - "learning_rate": 1.4044817401903335e-05, - "loss": 0.0684, + "learning_rate": 2.405410887304386e-05, + "loss": 0.0466, "step": 64515 }, { "epoch": 3.01, - "learning_rate": 1.4044348600628195e-05, - "loss": 0.0883, + "learning_rate": 2.4053640803207838e-05, + "loss": 0.0713, "step": 64520 }, { "epoch": 3.01, - "learning_rate": 1.4043879799353055e-05, - "loss": 0.0396, + "learning_rate": 2.4053172733371817e-05, + "loss": 0.0616, "step": 64525 }, { "epoch": 3.01, - "learning_rate": 1.4043410998077915e-05, - "loss": 0.0864, + "learning_rate": 2.40527046635358e-05, + "loss": 0.0961, "step": 64530 }, { "epoch": 3.01, - "learning_rate": 1.4042942196802776e-05, - "loss": 0.1839, + "learning_rate": 2.405223659369978e-05, + "loss": 0.1488, "step": 64535 }, { "epoch": 3.01, - "learning_rate": 1.4042473395527636e-05, - "loss": 0.2589, + "learning_rate": 2.405176852386376e-05, + "loss": 0.1579, "step": 64540 }, { "epoch": 3.01, - "learning_rate": 1.4042004594252498e-05, - "loss": 0.3049, + "learning_rate": 2.405130045402774e-05, + "loss": 0.218, "step": 64545 }, { "epoch": 3.01, - "learning_rate": 1.404153579297736e-05, - "loss": 0.0369, + "learning_rate": 2.4050832384191723e-05, + "loss": 0.0249, "step": 64550 }, { "epoch": 3.01, - "learning_rate": 1.404106699170222e-05, - "loss": 0.0151, + "learning_rate": 2.4050364314355703e-05, + "loss": 0.013, "step": 64555 }, { "epoch": 3.01, - "learning_rate": 1.404059819042708e-05, - "loss": 0.0752, + "learning_rate": 2.4049896244519683e-05, + "loss": 0.0139, "step": 64560 }, { "epoch": 3.01, - "learning_rate": 1.404012938915194e-05, - "loss": 0.0677, + "learning_rate": 2.4049428174683663e-05, + "loss": 0.0399, "step": 64565 }, { "epoch": 3.01, - "learning_rate": 1.40396605878768e-05, - "loss": 0.0562, + "learning_rate": 2.4048960104847646e-05, + "loss": 0.0323, "step": 64570 }, { "epoch": 3.01, - "learning_rate": 1.4039191786601661e-05, - "loss": 0.0675, + "learning_rate": 2.4048492035011626e-05, + "loss": 0.1049, "step": 64575 }, { "epoch": 3.01, - "learning_rate": 1.403872298532652e-05, - "loss": 0.2245, + "learning_rate": 2.4048023965175606e-05, + "loss": 0.072, "step": 64580 }, { "epoch": 3.01, - "learning_rate": 1.403825418405138e-05, - "loss": 0.0523, + "learning_rate": 2.4047555895339585e-05, + "loss": 0.185, "step": 64585 }, { "epoch": 3.01, - "learning_rate": 1.4037785382776244e-05, - "loss": 0.1616, + "learning_rate": 2.4047087825503565e-05, + "loss": 0.3518, "step": 64590 }, { "epoch": 3.01, - "learning_rate": 1.4037316581501104e-05, - "loss": 0.3289, + "learning_rate": 2.4046619755667545e-05, + "loss": 0.427, "step": 64595 }, { "epoch": 3.01, - "learning_rate": 1.4036847780225964e-05, - "loss": 0.067, + "learning_rate": 2.4046151685831525e-05, + "loss": 0.0305, "step": 64600 }, { "epoch": 3.01, - "learning_rate": 1.4036378978950824e-05, - "loss": 0.0411, + "learning_rate": 2.4045683615995508e-05, + "loss": 0.0394, "step": 64605 }, { "epoch": 3.01, - "learning_rate": 1.4035910177675684e-05, - "loss": 0.0761, + "learning_rate": 2.4045215546159488e-05, + "loss": 0.0725, "step": 64610 }, { "epoch": 3.02, - "learning_rate": 1.4035441376400545e-05, - "loss": 0.0312, + "learning_rate": 2.4044747476323468e-05, + "loss": 0.0384, "step": 64615 }, { "epoch": 3.02, - "learning_rate": 1.4034972575125405e-05, - "loss": 0.0734, + "learning_rate": 2.4044279406487448e-05, + "loss": 0.1229, "step": 64620 }, { "epoch": 3.02, - "learning_rate": 1.4034503773850265e-05, - "loss": 0.1185, + "learning_rate": 2.404381133665143e-05, + "loss": 0.1434, "step": 64625 }, { "epoch": 3.02, - "learning_rate": 1.4034034972575125e-05, - "loss": 0.1229, + "learning_rate": 2.404334326681541e-05, + "loss": 0.1293, "step": 64630 }, { "epoch": 3.02, - "learning_rate": 1.4033566171299985e-05, - "loss": 0.0916, + "learning_rate": 2.404287519697939e-05, + "loss": 0.1563, "step": 64635 }, { "epoch": 3.02, - "learning_rate": 1.4033097370024848e-05, - "loss": 0.174, + "learning_rate": 2.404240712714337e-05, + "loss": 0.3345, "step": 64640 }, { "epoch": 3.02, - "learning_rate": 1.4032628568749708e-05, - "loss": 0.3041, + "learning_rate": 2.404193905730735e-05, + "loss": 0.3985, "step": 64645 }, { "epoch": 3.02, - "learning_rate": 1.4032159767474568e-05, - "loss": 0.0549, + "learning_rate": 2.404147098747133e-05, + "loss": 0.0744, "step": 64650 }, { "epoch": 3.02, - "learning_rate": 1.403169096619943e-05, - "loss": 0.053, + "learning_rate": 2.404100291763531e-05, + "loss": 0.0122, "step": 64655 }, { "epoch": 3.02, - "learning_rate": 1.403122216492429e-05, - "loss": 0.0258, + "learning_rate": 2.4040534847799293e-05, + "loss": 0.0668, "step": 64660 }, { "epoch": 3.02, - "learning_rate": 1.403075336364915e-05, - "loss": 0.0875, + "learning_rate": 2.4040066777963273e-05, + "loss": 0.1537, "step": 64665 }, { "epoch": 3.02, - "learning_rate": 1.403028456237401e-05, - "loss": 0.0554, + "learning_rate": 2.4039598708127252e-05, + "loss": 0.0562, "step": 64670 }, { "epoch": 3.02, - "learning_rate": 1.402981576109887e-05, - "loss": 0.0884, + "learning_rate": 2.4039130638291232e-05, + "loss": 0.1346, "step": 64675 }, { "epoch": 3.02, - "learning_rate": 1.4029346959823731e-05, - "loss": 0.1799, + "learning_rate": 2.4038662568455215e-05, + "loss": 0.1182, "step": 64680 }, { "epoch": 3.02, - "learning_rate": 1.4028878158548593e-05, - "loss": 0.1663, + "learning_rate": 2.4038194498619195e-05, + "loss": 0.091, "step": 64685 }, { "epoch": 3.02, - "learning_rate": 1.4028409357273453e-05, - "loss": 0.208, + "learning_rate": 2.4037726428783175e-05, + "loss": 0.1375, "step": 64690 }, { "epoch": 3.02, - "learning_rate": 1.4027940555998315e-05, - "loss": 0.462, + "learning_rate": 2.4037258358947155e-05, + "loss": 0.1974, "step": 64695 }, { "epoch": 3.02, - "learning_rate": 1.4027471754723174e-05, - "loss": 0.0676, + "learning_rate": 2.4036790289111138e-05, + "loss": 0.0714, "step": 64700 }, { "epoch": 3.02, - "learning_rate": 1.4027002953448034e-05, - "loss": 0.0337, + "learning_rate": 2.4036322219275118e-05, + "loss": 0.0577, "step": 64705 }, { "epoch": 3.02, - "learning_rate": 1.4026534152172894e-05, - "loss": 0.0228, + "learning_rate": 2.4035854149439094e-05, + "loss": 0.0317, "step": 64710 }, { "epoch": 3.02, - "learning_rate": 1.4026065350897754e-05, - "loss": 0.0328, + "learning_rate": 2.4035386079603078e-05, + "loss": 0.0354, "step": 64715 }, { "epoch": 3.02, - "learning_rate": 1.4025596549622616e-05, - "loss": 0.0326, + "learning_rate": 2.4034918009767057e-05, + "loss": 0.1302, "step": 64720 }, { "epoch": 3.02, - "learning_rate": 1.4025127748347476e-05, - "loss": 0.0464, + "learning_rate": 2.4034449939931037e-05, + "loss": 0.1018, "step": 64725 }, { "epoch": 3.02, - "learning_rate": 1.4024658947072337e-05, - "loss": 0.1157, + "learning_rate": 2.4033981870095017e-05, + "loss": 0.1755, "step": 64730 }, { "epoch": 3.02, - "learning_rate": 1.4024190145797199e-05, - "loss": 0.1096, + "learning_rate": 2.4033513800259e-05, + "loss": 0.0861, "step": 64735 }, { "epoch": 3.02, - "learning_rate": 1.4023721344522059e-05, - "loss": 0.2395, + "learning_rate": 2.403304573042298e-05, + "loss": 0.189, "step": 64740 }, { "epoch": 3.02, - "learning_rate": 1.4023252543246919e-05, - "loss": 0.2545, + "learning_rate": 2.403257766058696e-05, + "loss": 0.1081, "step": 64745 }, { "epoch": 3.02, - "learning_rate": 1.4022783741971779e-05, - "loss": 0.0549, + "learning_rate": 2.403210959075094e-05, + "loss": 0.071, "step": 64750 }, { "epoch": 3.02, - "learning_rate": 1.402231494069664e-05, - "loss": 0.0309, + "learning_rate": 2.4031641520914923e-05, + "loss": 0.0398, "step": 64755 }, { "epoch": 3.02, - "learning_rate": 1.40218461394215e-05, - "loss": 0.0336, + "learning_rate": 2.4031173451078903e-05, + "loss": 0.0524, "step": 64760 }, { "epoch": 3.02, - "learning_rate": 1.402137733814636e-05, - "loss": 0.0846, + "learning_rate": 2.4030705381242883e-05, + "loss": 0.0661, "step": 64765 }, { "epoch": 3.02, - "learning_rate": 1.402090853687122e-05, - "loss": 0.0801, + "learning_rate": 2.4030237311406866e-05, + "loss": 0.0828, "step": 64770 }, { "epoch": 3.02, - "learning_rate": 1.4020439735596084e-05, - "loss": 0.0981, + "learning_rate": 2.4029769241570842e-05, + "loss": 0.024, "step": 64775 }, { "epoch": 3.02, - "learning_rate": 1.4019970934320944e-05, - "loss": 0.0964, + "learning_rate": 2.4029301171734822e-05, + "loss": 0.1556, "step": 64780 }, { "epoch": 3.02, - "learning_rate": 1.4019502133045804e-05, - "loss": 0.1676, + "learning_rate": 2.4028833101898802e-05, + "loss": 0.0791, "step": 64785 }, { "epoch": 3.02, - "learning_rate": 1.4019033331770663e-05, - "loss": 0.2262, + "learning_rate": 2.4028365032062785e-05, + "loss": 0.2287, "step": 64790 }, { "epoch": 3.02, - "learning_rate": 1.4018564530495525e-05, - "loss": 0.3382, + "learning_rate": 2.4027896962226765e-05, + "loss": 0.3312, "step": 64795 }, { "epoch": 3.02, - "learning_rate": 1.4018095729220385e-05, - "loss": 0.1401, + "learning_rate": 2.4027428892390745e-05, + "loss": 0.0829, "step": 64800 }, { "epoch": 3.02, - "learning_rate": 1.4017626927945245e-05, - "loss": 0.0242, + "learning_rate": 2.4026960822554724e-05, + "loss": 0.043, "step": 64805 }, { "epoch": 3.02, - "learning_rate": 1.4017158126670105e-05, - "loss": 0.0722, + "learning_rate": 2.4026492752718708e-05, + "loss": 0.0425, "step": 64810 }, { "epoch": 3.02, - "learning_rate": 1.4016689325394965e-05, - "loss": 0.0581, + "learning_rate": 2.4026024682882688e-05, + "loss": 0.038, "step": 64815 }, { "epoch": 3.02, - "learning_rate": 1.4016220524119826e-05, - "loss": 0.0565, + "learning_rate": 2.4025556613046667e-05, + "loss": 0.1446, "step": 64820 }, { "epoch": 3.02, - "learning_rate": 1.4015751722844688e-05, - "loss": 0.0378, + "learning_rate": 2.402508854321065e-05, + "loss": 0.08, "step": 64825 }, { "epoch": 3.03, - "learning_rate": 1.4015282921569548e-05, - "loss": 0.1204, + "learning_rate": 2.402462047337463e-05, + "loss": 0.1038, "step": 64830 }, { "epoch": 3.03, - "learning_rate": 1.401481412029441e-05, - "loss": 0.0845, + "learning_rate": 2.4024152403538607e-05, + "loss": 0.1653, "step": 64835 }, { "epoch": 3.03, - "learning_rate": 1.401434531901927e-05, - "loss": 0.1998, + "learning_rate": 2.4023684333702587e-05, + "loss": 0.1564, "step": 64840 }, { "epoch": 3.03, - "learning_rate": 1.401387651774413e-05, - "loss": 0.2978, + "learning_rate": 2.402321626386657e-05, + "loss": 0.1972, "step": 64845 }, { "epoch": 3.03, - "learning_rate": 1.401340771646899e-05, - "loss": 0.0672, + "learning_rate": 2.402274819403055e-05, + "loss": 0.0771, "step": 64850 }, { "epoch": 3.03, - "learning_rate": 1.401293891519385e-05, - "loss": 0.0436, + "learning_rate": 2.402228012419453e-05, + "loss": 0.0421, "step": 64855 }, { "epoch": 3.03, - "learning_rate": 1.4012470113918711e-05, - "loss": 0.0702, + "learning_rate": 2.402181205435851e-05, + "loss": 0.0837, "step": 64860 }, { "epoch": 3.03, - "learning_rate": 1.4012001312643571e-05, - "loss": 0.0439, + "learning_rate": 2.4021343984522492e-05, + "loss": 0.073, "step": 64865 }, { "epoch": 3.03, - "learning_rate": 1.4011532511368433e-05, - "loss": 0.0986, + "learning_rate": 2.4020875914686472e-05, + "loss": 0.065, "step": 64870 }, { "epoch": 3.03, - "learning_rate": 1.4011063710093294e-05, - "loss": 0.05, + "learning_rate": 2.4020407844850452e-05, + "loss": 0.0497, "step": 64875 }, { "epoch": 3.03, - "learning_rate": 1.4010594908818154e-05, - "loss": 0.1149, + "learning_rate": 2.4019939775014432e-05, + "loss": 0.1016, "step": 64880 }, { "epoch": 3.03, - "learning_rate": 1.4010126107543014e-05, - "loss": 0.1209, + "learning_rate": 2.4019471705178415e-05, + "loss": 0.0923, "step": 64885 }, { "epoch": 3.03, - "learning_rate": 1.4009657306267874e-05, - "loss": 0.1562, + "learning_rate": 2.4019003635342395e-05, + "loss": 0.149, "step": 64890 }, { "epoch": 3.03, - "learning_rate": 1.4009188504992734e-05, - "loss": 0.2224, + "learning_rate": 2.4018535565506375e-05, + "loss": 0.2703, "step": 64895 }, { "epoch": 3.03, - "learning_rate": 1.4008719703717596e-05, - "loss": 0.1391, + "learning_rate": 2.4018067495670355e-05, + "loss": 0.0324, "step": 64900 }, { "epoch": 3.03, - "learning_rate": 1.4008250902442455e-05, - "loss": 0.0347, + "learning_rate": 2.4017599425834334e-05, + "loss": 0.0473, "step": 64905 }, { "epoch": 3.03, - "learning_rate": 1.4007782101167315e-05, - "loss": 0.0537, + "learning_rate": 2.4017131355998314e-05, + "loss": 0.079, "step": 64910 }, { "epoch": 3.03, - "learning_rate": 1.4007313299892179e-05, - "loss": 0.0274, + "learning_rate": 2.4016663286162294e-05, + "loss": 0.0536, "step": 64915 }, { "epoch": 3.03, - "learning_rate": 1.4006844498617039e-05, - "loss": 0.049, + "learning_rate": 2.4016195216326277e-05, + "loss": 0.0943, "step": 64920 }, { "epoch": 3.03, - "learning_rate": 1.4006375697341899e-05, - "loss": 0.0963, + "learning_rate": 2.4015727146490257e-05, + "loss": 0.1196, "step": 64925 }, { "epoch": 3.03, - "learning_rate": 1.4005906896066759e-05, - "loss": 0.0633, + "learning_rate": 2.4015259076654237e-05, + "loss": 0.1189, "step": 64930 }, { "epoch": 3.03, - "learning_rate": 1.4005438094791618e-05, - "loss": 0.0924, + "learning_rate": 2.4014791006818217e-05, + "loss": 0.2347, "step": 64935 }, { "epoch": 3.03, - "learning_rate": 1.400496929351648e-05, - "loss": 0.2455, + "learning_rate": 2.40143229369822e-05, + "loss": 0.1777, "step": 64940 }, { "epoch": 3.03, - "learning_rate": 1.400450049224134e-05, - "loss": 0.229, + "learning_rate": 2.401385486714618e-05, + "loss": 0.2343, "step": 64945 }, { "epoch": 3.03, - "learning_rate": 1.40040316909662e-05, - "loss": 0.0981, + "learning_rate": 2.401338679731016e-05, + "loss": 0.078, "step": 64950 }, { "epoch": 3.03, - "learning_rate": 1.400356288969106e-05, - "loss": 0.0606, + "learning_rate": 2.4012918727474143e-05, + "loss": 0.0282, "step": 64955 }, { "epoch": 3.03, - "learning_rate": 1.400309408841592e-05, - "loss": 0.0343, + "learning_rate": 2.4012450657638123e-05, + "loss": 0.1023, "step": 64960 }, { "epoch": 3.03, - "learning_rate": 1.4002625287140783e-05, - "loss": 0.0352, + "learning_rate": 2.40119825878021e-05, + "loss": 0.0813, "step": 64965 }, { "epoch": 3.03, - "learning_rate": 1.4002156485865643e-05, - "loss": 0.0588, + "learning_rate": 2.401151451796608e-05, + "loss": 0.0717, "step": 64970 }, { "epoch": 3.03, - "learning_rate": 1.4001687684590503e-05, - "loss": 0.0514, + "learning_rate": 2.4011046448130062e-05, + "loss": 0.0378, "step": 64975 }, { "epoch": 3.03, - "learning_rate": 1.4001218883315365e-05, - "loss": 0.1118, + "learning_rate": 2.4010578378294042e-05, + "loss": 0.0434, "step": 64980 }, { "epoch": 3.03, - "learning_rate": 1.4000750082040225e-05, - "loss": 0.1195, + "learning_rate": 2.401011030845802e-05, + "loss": 0.0728, "step": 64985 }, { "epoch": 3.03, - "learning_rate": 1.4000281280765085e-05, - "loss": 0.1169, + "learning_rate": 2.4009642238622e-05, + "loss": 0.2031, "step": 64990 }, { "epoch": 3.03, - "learning_rate": 1.3999812479489944e-05, - "loss": 0.2991, + "learning_rate": 2.4009174168785985e-05, + "loss": 0.3233, "step": 64995 }, { "epoch": 3.03, - "learning_rate": 1.3999343678214804e-05, - "loss": 0.0672, + "learning_rate": 2.4008706098949964e-05, + "loss": 0.0408, "step": 65000 }, { "epoch": 3.03, - "learning_rate": 1.3998874876939666e-05, - "loss": 0.0248, + "learning_rate": 2.4008238029113944e-05, + "loss": 0.0596, "step": 65005 }, { "epoch": 3.03, - "learning_rate": 1.3998406075664528e-05, - "loss": 0.0432, + "learning_rate": 2.4007769959277928e-05, + "loss": 0.0227, "step": 65010 }, { "epoch": 3.03, - "learning_rate": 1.3997937274389388e-05, - "loss": 0.0714, + "learning_rate": 2.4007301889441907e-05, + "loss": 0.0575, "step": 65015 }, { "epoch": 3.03, - "learning_rate": 1.399746847311425e-05, - "loss": 0.1364, + "learning_rate": 2.4006833819605887e-05, + "loss": 0.0682, "step": 65020 }, { "epoch": 3.03, - "learning_rate": 1.3996999671839109e-05, - "loss": 0.0531, + "learning_rate": 2.4006365749769864e-05, + "loss": 0.0263, "step": 65025 }, { "epoch": 3.03, - "learning_rate": 1.3996530870563969e-05, - "loss": 0.1025, + "learning_rate": 2.4005897679933847e-05, + "loss": 0.1422, "step": 65030 }, { "epoch": 3.03, - "learning_rate": 1.3996062069288829e-05, - "loss": 0.1427, + "learning_rate": 2.4005429610097827e-05, + "loss": 0.0933, "step": 65035 }, { "epoch": 3.03, - "learning_rate": 1.3995593268013689e-05, - "loss": 0.1708, + "learning_rate": 2.4004961540261806e-05, + "loss": 0.2428, "step": 65040 }, { "epoch": 3.04, - "learning_rate": 1.399512446673855e-05, - "loss": 0.3074, + "learning_rate": 2.4004493470425786e-05, + "loss": 0.3646, "step": 65045 }, { "epoch": 3.04, - "learning_rate": 1.399465566546341e-05, - "loss": 0.1141, + "learning_rate": 2.400402540058977e-05, + "loss": 0.0364, "step": 65050 }, { "epoch": 3.04, - "learning_rate": 1.3994186864188272e-05, - "loss": 0.0437, + "learning_rate": 2.400355733075375e-05, + "loss": 0.0704, "step": 65055 }, { "epoch": 3.04, - "learning_rate": 1.3993718062913134e-05, - "loss": 0.047, + "learning_rate": 2.400308926091773e-05, + "loss": 0.0176, "step": 65060 }, { "epoch": 3.04, - "learning_rate": 1.3993249261637994e-05, - "loss": 0.0521, + "learning_rate": 2.400262119108171e-05, + "loss": 0.0855, "step": 65065 }, { "epoch": 3.04, - "learning_rate": 1.3992780460362854e-05, - "loss": 0.04, + "learning_rate": 2.4002153121245692e-05, + "loss": 0.0665, "step": 65070 }, { "epoch": 3.04, - "learning_rate": 1.3992311659087714e-05, - "loss": 0.132, + "learning_rate": 2.4001685051409672e-05, + "loss": 0.0518, "step": 65075 }, { "epoch": 3.04, - "learning_rate": 1.3991842857812573e-05, - "loss": 0.0976, + "learning_rate": 2.4001216981573652e-05, + "loss": 0.121, "step": 65080 }, { "epoch": 3.04, - "learning_rate": 1.3991374056537435e-05, - "loss": 0.1697, + "learning_rate": 2.4000748911737635e-05, + "loss": 0.183, "step": 65085 }, { "epoch": 3.04, - "learning_rate": 1.3990905255262295e-05, - "loss": 0.164, + "learning_rate": 2.400028084190161e-05, + "loss": 0.1685, "step": 65090 }, { "epoch": 3.04, - "learning_rate": 1.3990436453987155e-05, - "loss": 0.175, + "learning_rate": 2.399981277206559e-05, + "loss": 0.1591, "step": 65095 }, { "epoch": 3.04, - "learning_rate": 1.3989967652712018e-05, - "loss": 0.0484, + "learning_rate": 2.399934470222957e-05, + "loss": 0.0276, "step": 65100 }, { "epoch": 3.04, - "learning_rate": 1.3989498851436878e-05, - "loss": 0.0495, + "learning_rate": 2.3998876632393554e-05, + "loss": 0.0711, "step": 65105 }, { "epoch": 3.04, - "learning_rate": 1.3989030050161738e-05, - "loss": 0.0101, + "learning_rate": 2.3998408562557534e-05, + "loss": 0.0244, "step": 65110 }, { "epoch": 3.04, - "learning_rate": 1.3988561248886598e-05, - "loss": 0.0512, + "learning_rate": 2.3997940492721514e-05, + "loss": 0.0595, "step": 65115 }, { "epoch": 3.04, - "learning_rate": 1.3988092447611458e-05, - "loss": 0.0571, + "learning_rate": 2.3997472422885494e-05, + "loss": 0.0496, "step": 65120 }, { "epoch": 3.04, - "learning_rate": 1.398762364633632e-05, - "loss": 0.1731, + "learning_rate": 2.3997004353049477e-05, + "loss": 0.146, "step": 65125 }, { "epoch": 3.04, - "learning_rate": 1.398715484506118e-05, - "loss": 0.1818, + "learning_rate": 2.3996536283213457e-05, + "loss": 0.096, "step": 65130 }, { "epoch": 3.04, - "learning_rate": 1.398668604378604e-05, - "loss": 0.1292, + "learning_rate": 2.3996068213377436e-05, + "loss": 0.1773, "step": 65135 }, { "epoch": 3.04, - "learning_rate": 1.39862172425109e-05, - "loss": 0.314, + "learning_rate": 2.399560014354142e-05, + "loss": 0.3202, "step": 65140 }, { "epoch": 3.04, - "learning_rate": 1.3985748441235761e-05, - "loss": 0.2696, + "learning_rate": 2.39951320737054e-05, + "loss": 0.3204, "step": 65145 }, { "epoch": 3.04, - "learning_rate": 1.3985279639960623e-05, - "loss": 0.0891, + "learning_rate": 2.3994664003869376e-05, + "loss": 0.0631, "step": 65150 }, { "epoch": 3.04, - "learning_rate": 1.3984810838685483e-05, - "loss": 0.0257, + "learning_rate": 2.3994195934033356e-05, + "loss": 0.0224, "step": 65155 }, { "epoch": 3.04, - "learning_rate": 1.3984342037410343e-05, - "loss": 0.0532, + "learning_rate": 2.399372786419734e-05, + "loss": 0.0326, "step": 65160 }, { "epoch": 3.04, - "learning_rate": 1.3983873236135204e-05, - "loss": 0.0476, + "learning_rate": 2.399325979436132e-05, + "loss": 0.0495, "step": 65165 }, { "epoch": 3.04, - "learning_rate": 1.3983404434860064e-05, - "loss": 0.0654, + "learning_rate": 2.39927917245253e-05, + "loss": 0.1167, "step": 65170 }, { "epoch": 3.04, - "learning_rate": 1.3982935633584924e-05, - "loss": 0.0515, + "learning_rate": 2.399232365468928e-05, + "loss": 0.0982, "step": 65175 }, { "epoch": 3.04, - "learning_rate": 1.3982466832309784e-05, - "loss": 0.1277, + "learning_rate": 2.399185558485326e-05, + "loss": 0.1403, "step": 65180 }, { "epoch": 3.04, - "learning_rate": 1.3981998031034646e-05, - "loss": 0.0926, + "learning_rate": 2.399138751501724e-05, + "loss": 0.0675, "step": 65185 }, { "epoch": 3.04, - "learning_rate": 1.3981529229759506e-05, - "loss": 0.1293, + "learning_rate": 2.399091944518122e-05, + "loss": 0.1292, "step": 65190 }, { "epoch": 3.04, - "learning_rate": 1.3981060428484367e-05, - "loss": 0.343, + "learning_rate": 2.3990451375345204e-05, + "loss": 0.3208, "step": 65195 }, { "epoch": 3.04, - "learning_rate": 1.3980591627209227e-05, - "loss": 0.0822, + "learning_rate": 2.3989983305509184e-05, + "loss": 0.0662, "step": 65200 }, { "epoch": 3.04, - "learning_rate": 1.3980122825934089e-05, - "loss": 0.0387, + "learning_rate": 2.3989515235673164e-05, + "loss": 0.0247, "step": 65205 }, { "epoch": 3.04, - "learning_rate": 1.3979654024658949e-05, - "loss": 0.02, + "learning_rate": 2.3989047165837144e-05, + "loss": 0.0706, "step": 65210 }, { "epoch": 3.04, - "learning_rate": 1.3979185223383809e-05, - "loss": 0.0687, + "learning_rate": 2.3988579096001124e-05, + "loss": 0.0441, "step": 65215 }, { "epoch": 3.04, - "learning_rate": 1.3978716422108669e-05, - "loss": 0.1311, + "learning_rate": 2.3988111026165104e-05, + "loss": 0.0527, "step": 65220 }, { "epoch": 3.04, - "learning_rate": 1.397824762083353e-05, - "loss": 0.0949, + "learning_rate": 2.3987642956329083e-05, + "loss": 0.0816, "step": 65225 }, { "epoch": 3.04, - "learning_rate": 1.397777881955839e-05, - "loss": 0.0685, + "learning_rate": 2.3987174886493063e-05, + "loss": 0.074, "step": 65230 }, { "epoch": 3.04, - "learning_rate": 1.397731001828325e-05, - "loss": 0.0929, + "learning_rate": 2.3986706816657046e-05, + "loss": 0.1659, "step": 65235 }, { "epoch": 3.04, - "learning_rate": 1.3976841217008112e-05, - "loss": 0.2325, + "learning_rate": 2.3986238746821026e-05, + "loss": 0.2156, "step": 65240 }, { "epoch": 3.04, - "learning_rate": 1.3976372415732973e-05, - "loss": 0.2006, + "learning_rate": 2.3985770676985006e-05, + "loss": 0.2172, "step": 65245 }, { "epoch": 3.04, - "learning_rate": 1.3975903614457833e-05, - "loss": 0.0695, + "learning_rate": 2.3985302607148986e-05, + "loss": 0.0979, "step": 65250 }, { "epoch": 3.04, - "learning_rate": 1.3975434813182693e-05, - "loss": 0.0692, + "learning_rate": 2.398483453731297e-05, + "loss": 0.0541, "step": 65255 }, { "epoch": 3.05, - "learning_rate": 1.3974966011907553e-05, - "loss": 0.0339, + "learning_rate": 2.398436646747695e-05, + "loss": 0.0368, "step": 65260 }, { "epoch": 3.05, - "learning_rate": 1.3974497210632415e-05, - "loss": 0.023, + "learning_rate": 2.398389839764093e-05, + "loss": 0.0618, "step": 65265 }, { "epoch": 3.05, - "learning_rate": 1.3974028409357275e-05, - "loss": 0.1405, + "learning_rate": 2.3983430327804912e-05, + "loss": 0.1031, "step": 65270 }, { "epoch": 3.05, - "learning_rate": 1.3973559608082135e-05, - "loss": 0.1026, + "learning_rate": 2.3982962257968892e-05, + "loss": 0.0578, "step": 65275 }, { "epoch": 3.05, - "learning_rate": 1.3973090806806995e-05, - "loss": 0.1097, + "learning_rate": 2.3982494188132868e-05, + "loss": 0.0752, "step": 65280 }, { "epoch": 3.05, - "learning_rate": 1.3972622005531854e-05, - "loss": 0.2419, + "learning_rate": 2.3982026118296848e-05, + "loss": 0.1096, "step": 65285 }, { "epoch": 3.05, - "learning_rate": 1.3972153204256718e-05, - "loss": 0.1716, + "learning_rate": 2.398155804846083e-05, + "loss": 0.1155, "step": 65290 }, { "epoch": 3.05, - "learning_rate": 1.3971684402981578e-05, - "loss": 0.2981, + "learning_rate": 2.398108997862481e-05, + "loss": 0.2076, "step": 65295 }, { "epoch": 3.05, - "learning_rate": 1.3971215601706438e-05, - "loss": 0.0676, + "learning_rate": 2.398062190878879e-05, + "loss": 0.0557, "step": 65300 }, { "epoch": 3.05, - "learning_rate": 1.39707468004313e-05, - "loss": 0.0832, + "learning_rate": 2.398015383895277e-05, + "loss": 0.0363, "step": 65305 }, { "epoch": 3.05, - "learning_rate": 1.397027799915616e-05, - "loss": 0.0183, + "learning_rate": 2.3979685769116754e-05, + "loss": 0.0965, "step": 65310 }, { "epoch": 3.05, - "learning_rate": 1.396980919788102e-05, - "loss": 0.0464, + "learning_rate": 2.3979217699280734e-05, + "loss": 0.0371, "step": 65315 }, { "epoch": 3.05, - "learning_rate": 1.3969340396605879e-05, - "loss": 0.0661, + "learning_rate": 2.3978749629444713e-05, + "loss": 0.0716, "step": 65320 }, { "epoch": 3.05, - "learning_rate": 1.3968871595330739e-05, - "loss": 0.0894, + "learning_rate": 2.3978281559608697e-05, + "loss": 0.0749, "step": 65325 }, { "epoch": 3.05, - "learning_rate": 1.39684027940556e-05, - "loss": 0.1564, + "learning_rate": 2.3977813489772676e-05, + "loss": 0.1213, "step": 65330 }, { "epoch": 3.05, - "learning_rate": 1.3967933992780462e-05, - "loss": 0.0455, + "learning_rate": 2.3977345419936656e-05, + "loss": 0.1031, "step": 65335 }, { "epoch": 3.05, - "learning_rate": 1.3967465191505322e-05, - "loss": 0.1823, + "learning_rate": 2.3976877350100633e-05, + "loss": 0.2017, "step": 65340 }, { "epoch": 3.05, - "learning_rate": 1.3966996390230184e-05, - "loss": 0.2856, + "learning_rate": 2.3976409280264616e-05, + "loss": 0.3182, "step": 65345 }, { "epoch": 3.05, - "learning_rate": 1.3966527588955044e-05, - "loss": 0.0937, + "learning_rate": 2.3975941210428596e-05, + "loss": 0.1117, "step": 65350 }, { "epoch": 3.05, - "learning_rate": 1.3966058787679904e-05, - "loss": 0.0362, + "learning_rate": 2.3975473140592576e-05, + "loss": 0.0854, "step": 65355 }, { "epoch": 3.05, - "learning_rate": 1.3965589986404764e-05, - "loss": 0.0686, + "learning_rate": 2.3975005070756555e-05, + "loss": 0.1, "step": 65360 }, { "epoch": 3.05, - "learning_rate": 1.3965121185129624e-05, - "loss": 0.0787, + "learning_rate": 2.397453700092054e-05, + "loss": 0.0419, "step": 65365 }, { "epoch": 3.05, - "learning_rate": 1.3964652383854485e-05, - "loss": 0.109, + "learning_rate": 2.397406893108452e-05, + "loss": 0.0734, "step": 65370 }, { "epoch": 3.05, - "learning_rate": 1.3964183582579345e-05, - "loss": 0.0994, + "learning_rate": 2.3973600861248498e-05, + "loss": 0.1185, "step": 65375 }, { "epoch": 3.05, - "learning_rate": 1.3963714781304207e-05, - "loss": 0.1041, + "learning_rate": 2.397313279141248e-05, + "loss": 0.1313, "step": 65380 }, { "epoch": 3.05, - "learning_rate": 1.3963245980029068e-05, - "loss": 0.0703, + "learning_rate": 2.397266472157646e-05, + "loss": 0.2009, "step": 65385 }, { "epoch": 3.05, - "learning_rate": 1.3962777178753928e-05, - "loss": 0.2074, + "learning_rate": 2.397219665174044e-05, + "loss": 0.2362, "step": 65390 }, { "epoch": 3.05, - "learning_rate": 1.3962308377478788e-05, - "loss": 0.2059, + "learning_rate": 2.397172858190442e-05, + "loss": 0.2012, "step": 65395 }, { "epoch": 3.05, - "learning_rate": 1.3961839576203648e-05, - "loss": 0.0528, + "learning_rate": 2.3971260512068404e-05, + "loss": 0.0686, "step": 65400 }, { "epoch": 3.05, - "learning_rate": 1.3961370774928508e-05, - "loss": 0.1225, + "learning_rate": 2.397079244223238e-05, + "loss": 0.0373, "step": 65405 }, { "epoch": 3.05, - "learning_rate": 1.396090197365337e-05, - "loss": 0.0581, + "learning_rate": 2.397032437239636e-05, + "loss": 0.042, "step": 65410 }, { "epoch": 3.05, - "learning_rate": 1.396043317237823e-05, - "loss": 0.0427, + "learning_rate": 2.396985630256034e-05, + "loss": 0.0756, "step": 65415 }, { "epoch": 3.05, - "learning_rate": 1.395996437110309e-05, - "loss": 0.0252, + "learning_rate": 2.3969388232724323e-05, + "loss": 0.0467, "step": 65420 }, { "epoch": 3.05, - "learning_rate": 1.3959495569827953e-05, - "loss": 0.0547, + "learning_rate": 2.3968920162888303e-05, + "loss": 0.0702, "step": 65425 }, { "epoch": 3.05, - "learning_rate": 1.3959026768552813e-05, - "loss": 0.0974, + "learning_rate": 2.3968452093052283e-05, + "loss": 0.1068, "step": 65430 }, { "epoch": 3.05, - "learning_rate": 1.3958557967277673e-05, - "loss": 0.1627, + "learning_rate": 2.3967984023216263e-05, + "loss": 0.0891, "step": 65435 }, { "epoch": 3.05, - "learning_rate": 1.3958089166002533e-05, - "loss": 0.2218, + "learning_rate": 2.3967515953380246e-05, + "loss": 0.18, "step": 65440 }, { "epoch": 3.05, - "learning_rate": 1.3957620364727393e-05, - "loss": 0.3212, + "learning_rate": 2.3967047883544226e-05, + "loss": 0.1971, "step": 65445 }, { "epoch": 3.05, - "learning_rate": 1.3957151563452254e-05, - "loss": 0.075, + "learning_rate": 2.3966579813708206e-05, + "loss": 0.0535, "step": 65450 }, { "epoch": 3.05, - "learning_rate": 1.3956682762177114e-05, - "loss": 0.0222, + "learning_rate": 2.396611174387219e-05, + "loss": 0.0225, "step": 65455 }, { "epoch": 3.05, - "learning_rate": 1.3956213960901974e-05, - "loss": 0.0682, + "learning_rate": 2.396564367403617e-05, + "loss": 0.0284, "step": 65460 }, { "epoch": 3.05, - "learning_rate": 1.3955745159626834e-05, - "loss": 0.0816, + "learning_rate": 2.396517560420015e-05, + "loss": 0.0714, "step": 65465 }, { "epoch": 3.05, - "learning_rate": 1.3955276358351694e-05, - "loss": 0.1097, + "learning_rate": 2.3964707534364125e-05, + "loss": 0.0963, "step": 65470 }, { "epoch": 3.06, - "learning_rate": 1.3954807557076557e-05, - "loss": 0.0523, + "learning_rate": 2.3964239464528108e-05, + "loss": 0.1264, "step": 65475 }, { "epoch": 3.06, - "learning_rate": 1.3954338755801417e-05, - "loss": 0.08, + "learning_rate": 2.3963771394692088e-05, + "loss": 0.0836, "step": 65480 }, { "epoch": 3.06, - "learning_rate": 1.3953869954526277e-05, - "loss": 0.163, + "learning_rate": 2.3963303324856068e-05, + "loss": 0.1145, "step": 65485 }, { "epoch": 3.06, - "learning_rate": 1.3953401153251139e-05, - "loss": 0.0968, + "learning_rate": 2.3962835255020048e-05, + "loss": 0.1625, "step": 65490 }, { "epoch": 3.06, - "learning_rate": 1.3952932351975999e-05, - "loss": 0.2553, + "learning_rate": 2.396236718518403e-05, + "loss": 0.273, "step": 65495 }, { "epoch": 3.06, - "learning_rate": 1.3952463550700859e-05, - "loss": 0.0787, + "learning_rate": 2.396189911534801e-05, + "loss": 0.0755, "step": 65500 }, { "epoch": 3.06, - "learning_rate": 1.3951994749425719e-05, - "loss": 0.0242, + "learning_rate": 2.396143104551199e-05, + "loss": 0.0184, "step": 65505 }, { "epoch": 3.06, - "learning_rate": 1.3951525948150579e-05, - "loss": 0.0399, + "learning_rate": 2.3960962975675974e-05, + "loss": 0.0235, "step": 65510 }, { "epoch": 3.06, - "learning_rate": 1.395105714687544e-05, - "loss": 0.0426, + "learning_rate": 2.3960494905839953e-05, + "loss": 0.0845, "step": 65515 }, { "epoch": 3.06, - "learning_rate": 1.3950588345600302e-05, - "loss": 0.1501, + "learning_rate": 2.3960026836003933e-05, + "loss": 0.0795, "step": 65520 }, { "epoch": 3.06, - "learning_rate": 1.3950119544325162e-05, - "loss": 0.0746, + "learning_rate": 2.3959558766167913e-05, + "loss": 0.1228, "step": 65525 }, { "epoch": 3.06, - "learning_rate": 1.3949650743050023e-05, - "loss": 0.1603, + "learning_rate": 2.3959090696331893e-05, + "loss": 0.0823, "step": 65530 }, { "epoch": 3.06, - "learning_rate": 1.3949181941774883e-05, - "loss": 0.1407, + "learning_rate": 2.3958622626495873e-05, + "loss": 0.1894, "step": 65535 }, { "epoch": 3.06, - "learning_rate": 1.3948713140499743e-05, - "loss": 0.2105, + "learning_rate": 2.3958154556659853e-05, + "loss": 0.162, "step": 65540 }, { "epoch": 3.06, - "learning_rate": 1.3948244339224603e-05, - "loss": 0.2679, + "learning_rate": 2.3957686486823832e-05, + "loss": 0.308, "step": 65545 }, { "epoch": 3.06, - "learning_rate": 1.3947775537949463e-05, - "loss": 0.0636, + "learning_rate": 2.3957218416987816e-05, + "loss": 0.0924, "step": 65550 }, { "epoch": 3.06, - "learning_rate": 1.3947306736674325e-05, - "loss": 0.0332, + "learning_rate": 2.3956750347151795e-05, + "loss": 0.0077, "step": 65555 }, { "epoch": 3.06, - "learning_rate": 1.3946837935399185e-05, - "loss": 0.0484, + "learning_rate": 2.3956282277315775e-05, + "loss": 0.0396, "step": 65560 }, { "epoch": 3.06, - "learning_rate": 1.3946369134124046e-05, - "loss": 0.0443, + "learning_rate": 2.395581420747976e-05, + "loss": 0.0459, "step": 65565 }, { "epoch": 3.06, - "learning_rate": 1.3945900332848908e-05, - "loss": 0.0765, + "learning_rate": 2.3955346137643738e-05, + "loss": 0.0866, "step": 65570 }, { "epoch": 3.06, - "learning_rate": 1.3945431531573768e-05, - "loss": 0.087, + "learning_rate": 2.3954878067807718e-05, + "loss": 0.1221, "step": 65575 }, { "epoch": 3.06, - "learning_rate": 1.3944962730298628e-05, - "loss": 0.1172, + "learning_rate": 2.3954409997971698e-05, + "loss": 0.0897, "step": 65580 }, { "epoch": 3.06, - "learning_rate": 1.3944493929023488e-05, - "loss": 0.1665, + "learning_rate": 2.395394192813568e-05, + "loss": 0.0828, "step": 65585 }, { "epoch": 3.06, - "learning_rate": 1.3944025127748348e-05, - "loss": 0.258, + "learning_rate": 2.395347385829966e-05, + "loss": 0.1199, "step": 65590 }, { "epoch": 3.06, - "learning_rate": 1.394355632647321e-05, - "loss": 0.3192, + "learning_rate": 2.3953005788463637e-05, + "loss": 0.3063, "step": 65595 }, { "epoch": 3.06, - "learning_rate": 1.394308752519807e-05, - "loss": 0.0779, + "learning_rate": 2.3952537718627617e-05, + "loss": 0.0408, "step": 65600 }, { "epoch": 3.06, - "learning_rate": 1.394261872392293e-05, - "loss": 0.0351, + "learning_rate": 2.39520696487916e-05, + "loss": 0.0307, "step": 65605 }, { "epoch": 3.06, - "learning_rate": 1.3942149922647789e-05, - "loss": 0.0668, + "learning_rate": 2.395160157895558e-05, + "loss": 0.0725, "step": 65610 }, { "epoch": 3.06, - "learning_rate": 1.3941681121372652e-05, - "loss": 0.0433, + "learning_rate": 2.395113350911956e-05, + "loss": 0.0362, "step": 65615 }, { "epoch": 3.06, - "learning_rate": 1.3941212320097512e-05, - "loss": 0.0631, + "learning_rate": 2.3950665439283543e-05, + "loss": 0.085, "step": 65620 }, { "epoch": 3.06, - "learning_rate": 1.3940743518822372e-05, - "loss": 0.1735, + "learning_rate": 2.3950197369447523e-05, + "loss": 0.1472, "step": 65625 }, { "epoch": 3.06, - "learning_rate": 1.3940274717547232e-05, - "loss": 0.154, + "learning_rate": 2.3949729299611503e-05, + "loss": 0.1104, "step": 65630 }, { "epoch": 3.06, - "learning_rate": 1.3939805916272094e-05, - "loss": 0.1654, + "learning_rate": 2.3949261229775483e-05, + "loss": 0.2074, "step": 65635 }, { "epoch": 3.06, - "learning_rate": 1.3939337114996954e-05, - "loss": 0.1955, + "learning_rate": 2.3948793159939466e-05, + "loss": 0.1352, "step": 65640 }, { "epoch": 3.06, - "learning_rate": 1.3938868313721814e-05, - "loss": 0.2966, + "learning_rate": 2.3948325090103446e-05, + "loss": 0.3035, "step": 65645 }, { "epoch": 3.06, - "learning_rate": 1.3938399512446674e-05, - "loss": 0.0721, + "learning_rate": 2.3947857020267425e-05, + "loss": 0.0611, "step": 65650 }, { "epoch": 3.06, - "learning_rate": 1.3937930711171535e-05, - "loss": 0.106, + "learning_rate": 2.3947388950431405e-05, + "loss": 0.0347, "step": 65655 }, { "epoch": 3.06, - "learning_rate": 1.3937461909896397e-05, - "loss": 0.0779, + "learning_rate": 2.3946920880595385e-05, + "loss": 0.0389, "step": 65660 }, { "epoch": 3.06, - "learning_rate": 1.3936993108621257e-05, - "loss": 0.0368, + "learning_rate": 2.3946452810759365e-05, + "loss": 0.0757, "step": 65665 }, { "epoch": 3.06, - "learning_rate": 1.3936524307346117e-05, - "loss": 0.0609, + "learning_rate": 2.3945984740923345e-05, + "loss": 0.0737, "step": 65670 }, { "epoch": 3.06, - "learning_rate": 1.3936055506070978e-05, - "loss": 0.073, + "learning_rate": 2.3945516671087325e-05, + "loss": 0.1378, "step": 65675 }, { "epoch": 3.06, - "learning_rate": 1.3935586704795838e-05, - "loss": 0.1176, + "learning_rate": 2.3945048601251308e-05, + "loss": 0.1356, "step": 65680 }, { "epoch": 3.06, - "learning_rate": 1.3935117903520698e-05, - "loss": 0.2023, + "learning_rate": 2.3944580531415288e-05, + "loss": 0.1128, "step": 65685 }, { "epoch": 3.07, - "learning_rate": 1.3934649102245558e-05, - "loss": 0.1856, + "learning_rate": 2.3944112461579267e-05, + "loss": 0.2542, "step": 65690 }, { "epoch": 3.07, - "learning_rate": 1.393418030097042e-05, - "loss": 0.2884, + "learning_rate": 2.394364439174325e-05, + "loss": 0.2818, "step": 65695 }, { "epoch": 3.07, - "learning_rate": 1.393371149969528e-05, - "loss": 0.0588, + "learning_rate": 2.394317632190723e-05, + "loss": 0.0441, "step": 65700 }, { "epoch": 3.07, - "learning_rate": 1.3933242698420141e-05, - "loss": 0.0409, + "learning_rate": 2.394270825207121e-05, + "loss": 0.0138, "step": 65705 }, { "epoch": 3.07, - "learning_rate": 1.3932773897145001e-05, - "loss": 0.0542, + "learning_rate": 2.394224018223519e-05, + "loss": 0.0353, "step": 65710 }, { "epoch": 3.07, - "learning_rate": 1.3932305095869863e-05, - "loss": 0.0748, + "learning_rate": 2.3941772112399173e-05, + "loss": 0.0604, "step": 65715 }, { "epoch": 3.07, - "learning_rate": 1.3931836294594723e-05, - "loss": 0.0393, + "learning_rate": 2.394130404256315e-05, + "loss": 0.1403, "step": 65720 }, { "epoch": 3.07, - "learning_rate": 1.3931367493319583e-05, - "loss": 0.0428, + "learning_rate": 2.394083597272713e-05, + "loss": 0.1039, "step": 65725 }, { "epoch": 3.07, - "learning_rate": 1.3930898692044443e-05, - "loss": 0.1729, + "learning_rate": 2.394036790289111e-05, + "loss": 0.1394, "step": 65730 }, { "epoch": 3.07, - "learning_rate": 1.3930429890769304e-05, - "loss": 0.1347, + "learning_rate": 2.3939899833055093e-05, + "loss": 0.1129, "step": 65735 }, { "epoch": 3.07, - "learning_rate": 1.3929961089494164e-05, - "loss": 0.1727, + "learning_rate": 2.3939431763219072e-05, + "loss": 0.1712, "step": 65740 }, { "epoch": 3.07, - "learning_rate": 1.3929492288219024e-05, - "loss": 0.3092, + "learning_rate": 2.3938963693383052e-05, + "loss": 0.2501, "step": 65745 }, { "epoch": 3.07, - "learning_rate": 1.3929023486943886e-05, - "loss": 0.0617, + "learning_rate": 2.3938495623547035e-05, + "loss": 0.0447, "step": 65750 }, { "epoch": 3.07, - "learning_rate": 1.3928554685668748e-05, - "loss": 0.0284, + "learning_rate": 2.3938027553711015e-05, + "loss": 0.0243, "step": 65755 }, { "epoch": 3.07, - "learning_rate": 1.3928085884393607e-05, - "loss": 0.0499, + "learning_rate": 2.3937559483874995e-05, + "loss": 0.0689, "step": 65760 }, { "epoch": 3.07, - "learning_rate": 1.3927617083118467e-05, - "loss": 0.0908, + "learning_rate": 2.3937091414038975e-05, + "loss": 0.0681, "step": 65765 }, { "epoch": 3.07, - "learning_rate": 1.3927148281843327e-05, - "loss": 0.0661, + "learning_rate": 2.3936623344202958e-05, + "loss": 0.083, "step": 65770 }, { "epoch": 3.07, - "learning_rate": 1.3926679480568189e-05, - "loss": 0.0754, + "learning_rate": 2.3936155274366938e-05, + "loss": 0.1083, "step": 65775 }, { "epoch": 3.07, - "learning_rate": 1.3926210679293049e-05, - "loss": 0.2077, + "learning_rate": 2.3935687204530918e-05, + "loss": 0.1524, "step": 65780 }, { "epoch": 3.07, - "learning_rate": 1.3925741878017909e-05, - "loss": 0.2175, + "learning_rate": 2.3935219134694894e-05, + "loss": 0.149, "step": 65785 }, { "epoch": 3.07, - "learning_rate": 1.3925273076742769e-05, - "loss": 0.1797, + "learning_rate": 2.3934751064858877e-05, + "loss": 0.1533, "step": 65790 }, { "epoch": 3.07, - "learning_rate": 1.3924804275467629e-05, - "loss": 0.2083, + "learning_rate": 2.3934282995022857e-05, + "loss": 0.2894, "step": 65795 }, { "epoch": 3.07, - "learning_rate": 1.3924335474192492e-05, - "loss": 0.0658, + "learning_rate": 2.3933814925186837e-05, + "loss": 0.0775, "step": 65800 }, { "epoch": 3.07, - "learning_rate": 1.3923866672917352e-05, - "loss": 0.0514, + "learning_rate": 2.393334685535082e-05, + "loss": 0.0298, "step": 65805 }, { "epoch": 3.07, - "learning_rate": 1.3923397871642212e-05, - "loss": 0.06, + "learning_rate": 2.39328787855148e-05, + "loss": 0.0632, "step": 65810 }, { "epoch": 3.07, - "learning_rate": 1.3922929070367074e-05, - "loss": 0.0353, + "learning_rate": 2.393241071567878e-05, + "loss": 0.0409, "step": 65815 }, { "epoch": 3.07, - "learning_rate": 1.3922460269091933e-05, - "loss": 0.0438, + "learning_rate": 2.393194264584276e-05, + "loss": 0.0471, "step": 65820 }, { "epoch": 3.07, - "learning_rate": 1.3921991467816793e-05, - "loss": 0.1012, + "learning_rate": 2.3931474576006743e-05, + "loss": 0.0748, "step": 65825 }, { "epoch": 3.07, - "learning_rate": 1.3921522666541653e-05, - "loss": 0.0892, + "learning_rate": 2.3931006506170723e-05, + "loss": 0.0793, "step": 65830 }, { "epoch": 3.07, - "learning_rate": 1.3921053865266513e-05, - "loss": 0.151, + "learning_rate": 2.3930538436334702e-05, + "loss": 0.071, "step": 65835 }, { "epoch": 3.07, - "learning_rate": 1.3920585063991375e-05, - "loss": 0.2743, + "learning_rate": 2.3930070366498682e-05, + "loss": 0.1585, "step": 65840 }, { "epoch": 3.07, - "learning_rate": 1.3920116262716236e-05, - "loss": 0.3603, + "learning_rate": 2.3929602296662662e-05, + "loss": 0.3125, "step": 65845 }, { "epoch": 3.07, - "learning_rate": 1.3919647461441096e-05, - "loss": 0.0907, + "learning_rate": 2.3929134226826642e-05, + "loss": 0.1144, "step": 65850 }, { "epoch": 3.07, - "learning_rate": 1.3919178660165958e-05, - "loss": 0.0014, + "learning_rate": 2.3928666156990622e-05, + "loss": 0.0657, "step": 65855 }, { "epoch": 3.07, - "learning_rate": 1.3918709858890818e-05, - "loss": 0.0464, + "learning_rate": 2.39281980871546e-05, + "loss": 0.0504, "step": 65860 }, { "epoch": 3.07, - "learning_rate": 1.3918241057615678e-05, - "loss": 0.0833, + "learning_rate": 2.3927730017318585e-05, + "loss": 0.0371, "step": 65865 }, { "epoch": 3.07, - "learning_rate": 1.3917772256340538e-05, - "loss": 0.0198, + "learning_rate": 2.3927261947482565e-05, + "loss": 0.0423, "step": 65870 }, { "epoch": 3.07, - "learning_rate": 1.3917303455065398e-05, - "loss": 0.0867, + "learning_rate": 2.3926793877646544e-05, + "loss": 0.1003, "step": 65875 }, { "epoch": 3.07, - "learning_rate": 1.391683465379026e-05, - "loss": 0.1497, + "learning_rate": 2.3926325807810528e-05, + "loss": 0.0709, "step": 65880 }, { "epoch": 3.07, - "learning_rate": 1.391636585251512e-05, - "loss": 0.1157, + "learning_rate": 2.3925857737974507e-05, + "loss": 0.1189, "step": 65885 }, { "epoch": 3.07, - "learning_rate": 1.3915897051239981e-05, - "loss": 0.1333, + "learning_rate": 2.3925389668138487e-05, + "loss": 0.2205, "step": 65890 }, { "epoch": 3.07, - "learning_rate": 1.3915428249964843e-05, - "loss": 0.3905, + "learning_rate": 2.3924921598302467e-05, + "loss": 0.2774, "step": 65895 }, { "epoch": 3.07, - "learning_rate": 1.3914959448689703e-05, - "loss": 0.0356, + "learning_rate": 2.392445352846645e-05, + "loss": 0.0886, "step": 65900 }, { "epoch": 3.08, - "learning_rate": 1.3914490647414562e-05, - "loss": 0.0283, + "learning_rate": 2.392398545863043e-05, + "loss": 0.0093, "step": 65905 }, { "epoch": 3.08, - "learning_rate": 1.3914021846139422e-05, - "loss": 0.0624, + "learning_rate": 2.3923517388794406e-05, + "loss": 0.0463, "step": 65910 }, { "epoch": 3.08, - "learning_rate": 1.3913553044864282e-05, - "loss": 0.0387, + "learning_rate": 2.3923049318958386e-05, + "loss": 0.0254, "step": 65915 }, { "epoch": 3.08, - "learning_rate": 1.3913084243589144e-05, - "loss": 0.1258, + "learning_rate": 2.392258124912237e-05, + "loss": 0.043, "step": 65920 }, { "epoch": 3.08, - "learning_rate": 1.3912615442314004e-05, - "loss": 0.0784, + "learning_rate": 2.392211317928635e-05, + "loss": 0.0593, "step": 65925 }, { "epoch": 3.08, - "learning_rate": 1.3912146641038864e-05, - "loss": 0.0804, + "learning_rate": 2.392164510945033e-05, + "loss": 0.0684, "step": 65930 }, { "epoch": 3.08, - "learning_rate": 1.3911677839763724e-05, - "loss": 0.1573, + "learning_rate": 2.3921177039614312e-05, + "loss": 0.2234, "step": 65935 }, { "epoch": 3.08, - "learning_rate": 1.3911209038488587e-05, - "loss": 0.2832, + "learning_rate": 2.3920708969778292e-05, + "loss": 0.2472, "step": 65940 }, { "epoch": 3.08, - "learning_rate": 1.3910740237213447e-05, - "loss": 0.328, + "learning_rate": 2.3920240899942272e-05, + "loss": 0.2372, "step": 65945 }, { "epoch": 3.08, - "learning_rate": 1.3910271435938307e-05, - "loss": 0.0674, + "learning_rate": 2.3919772830106252e-05, + "loss": 0.0973, "step": 65950 }, { "epoch": 3.08, - "learning_rate": 1.3909802634663167e-05, - "loss": 0.0282, + "learning_rate": 2.3919304760270235e-05, + "loss": 0.0231, "step": 65955 }, { "epoch": 3.08, - "learning_rate": 1.3909333833388029e-05, - "loss": 0.018, + "learning_rate": 2.3918836690434215e-05, + "loss": 0.0362, "step": 65960 }, { "epoch": 3.08, - "learning_rate": 1.3908865032112888e-05, - "loss": 0.0294, + "learning_rate": 2.3918368620598195e-05, + "loss": 0.0614, "step": 65965 }, { "epoch": 3.08, - "learning_rate": 1.3908396230837748e-05, - "loss": 0.1143, + "learning_rate": 2.3917900550762174e-05, + "loss": 0.0415, "step": 65970 }, { "epoch": 3.08, - "learning_rate": 1.3907927429562608e-05, - "loss": 0.0887, + "learning_rate": 2.3917432480926154e-05, + "loss": 0.0329, "step": 65975 }, { "epoch": 3.08, - "learning_rate": 1.3907458628287468e-05, - "loss": 0.1257, + "learning_rate": 2.3916964411090134e-05, + "loss": 0.0963, "step": 65980 }, { "epoch": 3.08, - "learning_rate": 1.3906989827012332e-05, - "loss": 0.1163, + "learning_rate": 2.3916496341254114e-05, + "loss": 0.1146, "step": 65985 }, { "epoch": 3.08, - "learning_rate": 1.3906521025737192e-05, - "loss": 0.1974, + "learning_rate": 2.3916028271418097e-05, + "loss": 0.2195, "step": 65990 }, { "epoch": 3.08, - "learning_rate": 1.3906052224462051e-05, - "loss": 0.2213, + "learning_rate": 2.3915560201582077e-05, + "loss": 0.2738, "step": 65995 }, { "epoch": 3.08, - "learning_rate": 1.3905583423186913e-05, - "loss": 0.1337, + "learning_rate": 2.3915092131746057e-05, + "loss": 0.058, "step": 66000 }, { "epoch": 3.08, - "learning_rate": 1.3905114621911773e-05, - "loss": 0.0456, + "learning_rate": 2.3914624061910037e-05, + "loss": 0.0307, "step": 66005 }, { "epoch": 3.08, - "learning_rate": 1.3904645820636633e-05, - "loss": 0.0686, + "learning_rate": 2.391415599207402e-05, + "loss": 0.029, "step": 66010 }, { "epoch": 3.08, - "learning_rate": 1.3904177019361493e-05, - "loss": 0.0671, + "learning_rate": 2.3913687922238e-05, + "loss": 0.0934, "step": 66015 }, { "epoch": 3.08, - "learning_rate": 1.3903708218086353e-05, - "loss": 0.0511, + "learning_rate": 2.391321985240198e-05, + "loss": 0.0763, "step": 66020 }, { "epoch": 3.08, - "learning_rate": 1.3903239416811214e-05, - "loss": 0.1142, + "learning_rate": 2.391275178256596e-05, + "loss": 0.0804, "step": 66025 }, { "epoch": 3.08, - "learning_rate": 1.3902770615536076e-05, - "loss": 0.1178, + "learning_rate": 2.3912283712729942e-05, + "loss": 0.1793, "step": 66030 }, { "epoch": 3.08, - "learning_rate": 1.3902301814260936e-05, - "loss": 0.1264, + "learning_rate": 2.391181564289392e-05, + "loss": 0.1332, "step": 66035 }, { "epoch": 3.08, - "learning_rate": 1.3901833012985798e-05, - "loss": 0.1253, + "learning_rate": 2.39113475730579e-05, + "loss": 0.2716, "step": 66040 }, { "epoch": 3.08, - "learning_rate": 1.3901364211710658e-05, - "loss": 0.272, + "learning_rate": 2.391087950322188e-05, + "loss": 0.3148, "step": 66045 }, { "epoch": 3.08, - "learning_rate": 1.3900895410435517e-05, - "loss": 0.072, + "learning_rate": 2.3910411433385862e-05, + "loss": 0.0678, "step": 66050 }, { "epoch": 3.08, - "learning_rate": 1.3900426609160377e-05, - "loss": 0.1135, + "learning_rate": 2.390994336354984e-05, + "loss": 0.0251, "step": 66055 }, { "epoch": 3.08, - "learning_rate": 1.3899957807885237e-05, - "loss": 0.1074, + "learning_rate": 2.390947529371382e-05, + "loss": 0.0156, "step": 66060 }, { "epoch": 3.08, - "learning_rate": 1.3899489006610099e-05, - "loss": 0.0989, + "learning_rate": 2.3909007223877805e-05, + "loss": 0.0901, "step": 66065 }, { "epoch": 3.08, - "learning_rate": 1.3899020205334959e-05, - "loss": 0.068, + "learning_rate": 2.3908539154041784e-05, + "loss": 0.0174, "step": 66070 }, { "epoch": 3.08, - "learning_rate": 1.389855140405982e-05, - "loss": 0.053, + "learning_rate": 2.3908071084205764e-05, + "loss": 0.0894, "step": 66075 }, { "epoch": 3.08, - "learning_rate": 1.3898082602784682e-05, - "loss": 0.1008, + "learning_rate": 2.3907603014369744e-05, + "loss": 0.1206, "step": 66080 }, { "epoch": 3.08, - "learning_rate": 1.3897613801509542e-05, - "loss": 0.0979, + "learning_rate": 2.3907134944533727e-05, + "loss": 0.1085, "step": 66085 }, { "epoch": 3.08, - "learning_rate": 1.3897145000234402e-05, - "loss": 0.1121, + "learning_rate": 2.3906666874697707e-05, + "loss": 0.2234, "step": 66090 }, { "epoch": 3.08, - "learning_rate": 1.3896676198959262e-05, - "loss": 0.2136, + "learning_rate": 2.3906198804861687e-05, + "loss": 0.2585, "step": 66095 }, { "epoch": 3.08, - "learning_rate": 1.3896207397684122e-05, - "loss": 0.0855, + "learning_rate": 2.3905730735025663e-05, + "loss": 0.0557, "step": 66100 }, { "epoch": 3.08, - "learning_rate": 1.3895738596408984e-05, - "loss": 0.0216, + "learning_rate": 2.3905262665189646e-05, + "loss": 0.0414, "step": 66105 }, { "epoch": 3.08, - "learning_rate": 1.3895269795133843e-05, - "loss": 0.0825, + "learning_rate": 2.3904794595353626e-05, + "loss": 0.0444, "step": 66110 }, { "epoch": 3.09, - "learning_rate": 1.3894800993858703e-05, - "loss": 0.019, + "learning_rate": 2.3904326525517606e-05, + "loss": 0.0734, "step": 66115 }, { "epoch": 3.09, - "learning_rate": 1.3894332192583563e-05, - "loss": 0.0268, + "learning_rate": 2.390385845568159e-05, + "loss": 0.052, "step": 66120 }, { "epoch": 3.09, - "learning_rate": 1.3893863391308427e-05, - "loss": 0.0669, + "learning_rate": 2.390339038584557e-05, + "loss": 0.0708, "step": 66125 }, { "epoch": 3.09, - "learning_rate": 1.3893394590033287e-05, - "loss": 0.0652, + "learning_rate": 2.390292231600955e-05, + "loss": 0.1005, "step": 66130 }, { "epoch": 3.09, - "learning_rate": 1.3892925788758147e-05, - "loss": 0.1235, + "learning_rate": 2.390245424617353e-05, + "loss": 0.1568, "step": 66135 }, { "epoch": 3.09, - "learning_rate": 1.3892456987483006e-05, - "loss": 0.2344, + "learning_rate": 2.3901986176337512e-05, + "loss": 0.1586, "step": 66140 }, { "epoch": 3.09, - "learning_rate": 1.3891988186207868e-05, - "loss": 0.2408, + "learning_rate": 2.3901518106501492e-05, + "loss": 0.3318, "step": 66145 }, { "epoch": 3.09, - "learning_rate": 1.3891519384932728e-05, - "loss": 0.0598, + "learning_rate": 2.390105003666547e-05, + "loss": 0.0798, "step": 66150 }, { "epoch": 3.09, - "learning_rate": 1.3891050583657588e-05, - "loss": 0.0141, + "learning_rate": 2.390058196682945e-05, + "loss": 0.0282, "step": 66155 }, { "epoch": 3.09, - "learning_rate": 1.3890581782382448e-05, - "loss": 0.0588, + "learning_rate": 2.3900113896993435e-05, + "loss": 0.0585, "step": 66160 }, { "epoch": 3.09, - "learning_rate": 1.389011298110731e-05, - "loss": 0.0216, + "learning_rate": 2.389964582715741e-05, + "loss": 0.0643, "step": 66165 }, { "epoch": 3.09, - "learning_rate": 1.3889644179832171e-05, - "loss": 0.102, + "learning_rate": 2.389917775732139e-05, + "loss": 0.0433, "step": 66170 }, { "epoch": 3.09, - "learning_rate": 1.3889175378557031e-05, - "loss": 0.058, + "learning_rate": 2.3898709687485374e-05, + "loss": 0.0466, "step": 66175 }, { "epoch": 3.09, - "learning_rate": 1.3888706577281893e-05, - "loss": 0.0824, + "learning_rate": 2.3898241617649354e-05, + "loss": 0.0902, "step": 66180 }, { "epoch": 3.09, - "learning_rate": 1.3888237776006753e-05, - "loss": 0.177, + "learning_rate": 2.3897773547813334e-05, + "loss": 0.1312, "step": 66185 }, { "epoch": 3.09, - "learning_rate": 1.3887768974731613e-05, - "loss": 0.2118, + "learning_rate": 2.3897305477977314e-05, + "loss": 0.1867, "step": 66190 }, { "epoch": 3.09, - "learning_rate": 1.3887300173456473e-05, - "loss": 0.2732, + "learning_rate": 2.3896837408141297e-05, + "loss": 0.2533, "step": 66195 }, { "epoch": 3.09, - "learning_rate": 1.3886831372181332e-05, - "loss": 0.0782, + "learning_rate": 2.3896369338305277e-05, + "loss": 0.1085, "step": 66200 }, { "epoch": 3.09, - "learning_rate": 1.3886362570906194e-05, - "loss": 0.0291, + "learning_rate": 2.3895901268469256e-05, + "loss": 0.0316, "step": 66205 }, { "epoch": 3.09, - "learning_rate": 1.3885893769631054e-05, - "loss": 0.0364, + "learning_rate": 2.3895433198633236e-05, + "loss": 0.0606, "step": 66210 }, { "epoch": 3.09, - "learning_rate": 1.3885424968355916e-05, - "loss": 0.041, + "learning_rate": 2.389496512879722e-05, + "loss": 0.0534, "step": 66215 }, { "epoch": 3.09, - "learning_rate": 1.3884956167080777e-05, - "loss": 0.0625, + "learning_rate": 2.38944970589612e-05, + "loss": 0.1082, "step": 66220 }, { "epoch": 3.09, - "learning_rate": 1.3884487365805637e-05, - "loss": 0.0589, + "learning_rate": 2.3894028989125176e-05, + "loss": 0.0929, "step": 66225 }, { "epoch": 3.09, - "learning_rate": 1.3884018564530497e-05, - "loss": 0.1163, + "learning_rate": 2.389356091928916e-05, + "loss": 0.1482, "step": 66230 }, { "epoch": 3.09, - "learning_rate": 1.3883549763255357e-05, - "loss": 0.0797, + "learning_rate": 2.389309284945314e-05, + "loss": 0.1856, "step": 66235 }, { "epoch": 3.09, - "learning_rate": 1.3883080961980217e-05, - "loss": 0.1096, + "learning_rate": 2.389262477961712e-05, + "loss": 0.1485, "step": 66240 }, { "epoch": 3.09, - "learning_rate": 1.3882612160705079e-05, - "loss": 0.1487, + "learning_rate": 2.38921567097811e-05, + "loss": 0.1826, "step": 66245 }, { "epoch": 3.09, - "learning_rate": 1.3882143359429939e-05, - "loss": 0.0677, + "learning_rate": 2.389168863994508e-05, + "loss": 0.0503, "step": 66250 }, { "epoch": 3.09, - "learning_rate": 1.3881674558154798e-05, - "loss": 0.0646, + "learning_rate": 2.389122057010906e-05, + "loss": 0.0185, "step": 66255 }, { "epoch": 3.09, - "learning_rate": 1.3881205756879658e-05, - "loss": 0.0805, + "learning_rate": 2.389075250027304e-05, + "loss": 0.0451, "step": 66260 }, { "epoch": 3.09, - "learning_rate": 1.3880736955604522e-05, - "loss": 0.1717, + "learning_rate": 2.389028443043702e-05, + "loss": 0.0513, "step": 66265 }, { "epoch": 3.09, - "learning_rate": 1.3880268154329382e-05, - "loss": 0.0432, + "learning_rate": 2.3889816360601004e-05, + "loss": 0.0599, "step": 66270 }, { "epoch": 3.09, - "learning_rate": 1.3879799353054242e-05, - "loss": 0.1104, + "learning_rate": 2.3889348290764984e-05, + "loss": 0.0679, "step": 66275 }, { "epoch": 3.09, - "learning_rate": 1.3879330551779102e-05, - "loss": 0.1144, + "learning_rate": 2.3888880220928964e-05, + "loss": 0.1015, "step": 66280 }, { "epoch": 3.09, - "learning_rate": 1.3878861750503963e-05, - "loss": 0.1326, + "learning_rate": 2.3888412151092944e-05, + "loss": 0.1409, "step": 66285 }, { "epoch": 3.09, - "learning_rate": 1.3878392949228823e-05, - "loss": 0.4025, + "learning_rate": 2.3887944081256923e-05, + "loss": 0.2931, "step": 66290 }, { "epoch": 3.09, - "learning_rate": 1.3877924147953683e-05, - "loss": 0.3015, + "learning_rate": 2.3887476011420903e-05, + "loss": 0.1602, "step": 66295 }, { "epoch": 3.09, - "learning_rate": 1.3877455346678543e-05, - "loss": 0.0762, + "learning_rate": 2.3887007941584883e-05, + "loss": 0.1009, "step": 66300 }, { "epoch": 3.09, - "learning_rate": 1.3876986545403403e-05, - "loss": 0.0335, + "learning_rate": 2.3886539871748866e-05, + "loss": 0.0339, "step": 66305 }, { "epoch": 3.09, - "learning_rate": 1.3876517744128266e-05, - "loss": 0.0364, + "learning_rate": 2.3886071801912846e-05, + "loss": 0.0247, "step": 66310 }, { "epoch": 3.09, - "learning_rate": 1.3876048942853126e-05, - "loss": 0.0831, + "learning_rate": 2.3885603732076826e-05, + "loss": 0.0357, "step": 66315 }, { "epoch": 3.09, - "learning_rate": 1.3875580141577986e-05, - "loss": 0.0909, + "learning_rate": 2.3885135662240806e-05, + "loss": 0.0391, "step": 66320 }, { "epoch": 3.09, - "learning_rate": 1.3875111340302848e-05, - "loss": 0.0958, + "learning_rate": 2.388466759240479e-05, + "loss": 0.0529, "step": 66325 }, { "epoch": 3.1, - "learning_rate": 1.3874642539027708e-05, - "loss": 0.109, + "learning_rate": 2.388419952256877e-05, + "loss": 0.1234, "step": 66330 }, { "epoch": 3.1, - "learning_rate": 1.3874173737752568e-05, - "loss": 0.1368, + "learning_rate": 2.388373145273275e-05, + "loss": 0.1476, "step": 66335 }, { "epoch": 3.1, - "learning_rate": 1.3873704936477428e-05, - "loss": 0.2237, + "learning_rate": 2.388326338289673e-05, + "loss": 0.0748, "step": 66340 }, { "epoch": 3.1, - "learning_rate": 1.3873236135202287e-05, - "loss": 0.3289, + "learning_rate": 2.388279531306071e-05, + "loss": 0.4396, "step": 66345 }, { "epoch": 3.1, - "learning_rate": 1.3872767333927149e-05, - "loss": 0.0626, + "learning_rate": 2.388232724322469e-05, + "loss": 0.0475, "step": 66350 }, { "epoch": 3.1, - "learning_rate": 1.387229853265201e-05, - "loss": 0.0189, + "learning_rate": 2.3881859173388668e-05, + "loss": 0.0214, "step": 66355 }, { "epoch": 3.1, - "learning_rate": 1.387182973137687e-05, - "loss": 0.0907, + "learning_rate": 2.388139110355265e-05, + "loss": 0.0561, "step": 66360 }, { "epoch": 3.1, - "learning_rate": 1.3871360930101732e-05, - "loss": 0.0416, + "learning_rate": 2.388092303371663e-05, + "loss": 0.0989, "step": 66365 }, { "epoch": 3.1, - "learning_rate": 1.3870892128826592e-05, - "loss": 0.0711, + "learning_rate": 2.388045496388061e-05, + "loss": 0.0512, "step": 66370 }, { "epoch": 3.1, - "learning_rate": 1.3870423327551452e-05, - "loss": 0.1234, + "learning_rate": 2.387998689404459e-05, + "loss": 0.0431, "step": 66375 }, { "epoch": 3.1, - "learning_rate": 1.3869954526276312e-05, - "loss": 0.1226, + "learning_rate": 2.3879518824208574e-05, + "loss": 0.1604, "step": 66380 }, { "epoch": 3.1, - "learning_rate": 1.3869485725001172e-05, - "loss": 0.1751, + "learning_rate": 2.3879050754372554e-05, + "loss": 0.0805, "step": 66385 }, { "epoch": 3.1, - "learning_rate": 1.3869016923726034e-05, - "loss": 0.2324, + "learning_rate": 2.3878582684536533e-05, + "loss": 0.2093, "step": 66390 }, { "epoch": 3.1, - "learning_rate": 1.3868548122450894e-05, - "loss": 0.2637, + "learning_rate": 2.3878114614700513e-05, + "loss": 0.3215, "step": 66395 }, { "epoch": 3.1, - "learning_rate": 1.3868079321175755e-05, - "loss": 0.0733, + "learning_rate": 2.3877646544864496e-05, + "loss": 0.0551, "step": 66400 }, { "epoch": 3.1, - "learning_rate": 1.3867610519900617e-05, - "loss": 0.0436, + "learning_rate": 2.3877178475028476e-05, + "loss": 0.0188, "step": 66405 }, { "epoch": 3.1, - "learning_rate": 1.3867141718625477e-05, - "loss": 0.0274, + "learning_rate": 2.3876710405192456e-05, + "loss": 0.0139, "step": 66410 }, { "epoch": 3.1, - "learning_rate": 1.3866672917350337e-05, - "loss": 0.049, + "learning_rate": 2.3876242335356436e-05, + "loss": 0.083, "step": 66415 }, { "epoch": 3.1, - "learning_rate": 1.3866204116075197e-05, - "loss": 0.0902, + "learning_rate": 2.3875774265520416e-05, + "loss": 0.03, "step": 66420 }, { "epoch": 3.1, - "learning_rate": 1.3865735314800057e-05, - "loss": 0.1148, + "learning_rate": 2.3875306195684395e-05, + "loss": 0.0661, "step": 66425 }, { "epoch": 3.1, - "learning_rate": 1.3865266513524918e-05, - "loss": 0.1224, + "learning_rate": 2.3874838125848375e-05, + "loss": 0.1342, "step": 66430 }, { "epoch": 3.1, - "learning_rate": 1.3864797712249778e-05, - "loss": 0.2542, + "learning_rate": 2.387437005601236e-05, + "loss": 0.281, "step": 66435 }, { "epoch": 3.1, - "learning_rate": 1.3864328910974638e-05, - "loss": 0.1721, + "learning_rate": 2.387390198617634e-05, + "loss": 0.2226, "step": 66440 }, { "epoch": 3.1, - "learning_rate": 1.3863860109699498e-05, - "loss": 0.314, + "learning_rate": 2.3873433916340318e-05, + "loss": 0.2609, "step": 66445 }, { "epoch": 3.1, - "learning_rate": 1.3863391308424361e-05, - "loss": 0.0618, + "learning_rate": 2.3872965846504298e-05, + "loss": 0.0536, "step": 66450 }, { "epoch": 3.1, - "learning_rate": 1.3862922507149221e-05, - "loss": 0.0659, + "learning_rate": 2.387249777666828e-05, + "loss": 0.0542, "step": 66455 }, { "epoch": 3.1, - "learning_rate": 1.3862453705874081e-05, - "loss": 0.0642, + "learning_rate": 2.387202970683226e-05, + "loss": 0.0082, "step": 66460 }, { "epoch": 3.1, - "learning_rate": 1.3861984904598941e-05, - "loss": 0.0547, + "learning_rate": 2.387156163699624e-05, + "loss": 0.0605, "step": 66465 }, { "epoch": 3.1, - "learning_rate": 1.3861516103323803e-05, - "loss": 0.0489, + "learning_rate": 2.387109356716022e-05, + "loss": 0.0464, "step": 66470 }, { "epoch": 3.1, - "learning_rate": 1.3861047302048663e-05, - "loss": 0.1195, + "learning_rate": 2.3870625497324204e-05, + "loss": 0.0606, "step": 66475 }, { "epoch": 3.1, - "learning_rate": 1.3860578500773523e-05, - "loss": 0.2332, + "learning_rate": 2.387015742748818e-05, + "loss": 0.0748, "step": 66480 }, { "epoch": 3.1, - "learning_rate": 1.3860109699498383e-05, - "loss": 0.1851, + "learning_rate": 2.386968935765216e-05, + "loss": 0.1158, "step": 66485 }, { "epoch": 3.1, - "learning_rate": 1.3859640898223242e-05, - "loss": 0.142, + "learning_rate": 2.3869221287816143e-05, + "loss": 0.2137, "step": 66490 }, { "epoch": 3.1, - "learning_rate": 1.3859172096948106e-05, - "loss": 0.2409, + "learning_rate": 2.3868753217980123e-05, + "loss": 0.3546, "step": 66495 }, { "epoch": 3.1, - "learning_rate": 1.3858703295672966e-05, - "loss": 0.0916, + "learning_rate": 2.3868285148144103e-05, + "loss": 0.0519, "step": 66500 }, { "epoch": 3.1, - "learning_rate": 1.3858234494397826e-05, - "loss": 0.025, + "learning_rate": 2.3867817078308083e-05, + "loss": 0.0583, "step": 66505 }, { "epoch": 3.1, - "learning_rate": 1.3857765693122687e-05, - "loss": 0.0263, + "learning_rate": 2.3867349008472066e-05, + "loss": 0.0505, "step": 66510 }, { "epoch": 3.1, - "learning_rate": 1.3857296891847547e-05, - "loss": 0.1125, + "learning_rate": 2.3866880938636046e-05, + "loss": 0.0721, "step": 66515 }, { "epoch": 3.1, - "learning_rate": 1.3856828090572407e-05, - "loss": 0.0423, + "learning_rate": 2.3866412868800026e-05, + "loss": 0.1071, "step": 66520 }, { "epoch": 3.1, - "learning_rate": 1.3856359289297267e-05, - "loss": 0.1117, + "learning_rate": 2.3865944798964005e-05, + "loss": 0.0612, "step": 66525 }, { "epoch": 3.1, - "learning_rate": 1.3855890488022129e-05, - "loss": 0.1736, + "learning_rate": 2.386547672912799e-05, + "loss": 0.1165, "step": 66530 }, { "epoch": 3.1, - "learning_rate": 1.3855421686746989e-05, - "loss": 0.1654, + "learning_rate": 2.386500865929197e-05, + "loss": 0.2055, "step": 66535 }, { "epoch": 3.1, - "learning_rate": 1.385495288547185e-05, - "loss": 0.2988, + "learning_rate": 2.3864540589455945e-05, + "loss": 0.1883, "step": 66540 }, { "epoch": 3.11, - "learning_rate": 1.385448408419671e-05, - "loss": 0.4467, + "learning_rate": 2.3864072519619928e-05, + "loss": 0.229, "step": 66545 }, { "epoch": 3.11, - "learning_rate": 1.3854015282921572e-05, - "loss": 0.0604, + "learning_rate": 2.3863604449783908e-05, + "loss": 0.0818, "step": 66550 }, { "epoch": 3.11, - "learning_rate": 1.3853546481646432e-05, - "loss": 0.029, + "learning_rate": 2.3863136379947888e-05, + "loss": 0.022, "step": 66555 }, { "epoch": 3.11, - "learning_rate": 1.3853077680371292e-05, - "loss": 0.0642, + "learning_rate": 2.3862668310111867e-05, + "loss": 0.054, "step": 66560 }, { "epoch": 3.11, - "learning_rate": 1.3852608879096152e-05, - "loss": 0.0539, + "learning_rate": 2.386220024027585e-05, + "loss": 0.0657, "step": 66565 }, { "epoch": 3.11, - "learning_rate": 1.3852140077821013e-05, - "loss": 0.054, + "learning_rate": 2.386173217043983e-05, + "loss": 0.0774, "step": 66570 }, { "epoch": 3.11, - "learning_rate": 1.3851671276545873e-05, - "loss": 0.0307, + "learning_rate": 2.386126410060381e-05, + "loss": 0.0853, "step": 66575 }, { "epoch": 3.11, - "learning_rate": 1.3851202475270733e-05, - "loss": 0.0699, + "learning_rate": 2.386079603076779e-05, + "loss": 0.1299, "step": 66580 }, { "epoch": 3.11, - "learning_rate": 1.3850733673995593e-05, - "loss": 0.0521, + "learning_rate": 2.3860327960931773e-05, + "loss": 0.054, "step": 66585 }, { "epoch": 3.11, - "learning_rate": 1.3850264872720456e-05, - "loss": 0.2003, + "learning_rate": 2.3859859891095753e-05, + "loss": 0.2467, "step": 66590 }, { "epoch": 3.11, - "learning_rate": 1.3849796071445316e-05, - "loss": 0.2092, + "learning_rate": 2.3859391821259733e-05, + "loss": 0.2199, "step": 66595 }, { "epoch": 3.11, - "learning_rate": 1.3849327270170176e-05, - "loss": 0.0655, + "learning_rate": 2.3858923751423716e-05, + "loss": 0.0498, "step": 66600 }, { "epoch": 3.11, - "learning_rate": 1.3848858468895036e-05, - "loss": 0.057, + "learning_rate": 2.3858455681587693e-05, + "loss": 0.0339, "step": 66605 }, { "epoch": 3.11, - "learning_rate": 1.3848389667619898e-05, - "loss": 0.0298, + "learning_rate": 2.3857987611751672e-05, + "loss": 0.0318, "step": 66610 }, { "epoch": 3.11, - "learning_rate": 1.3847920866344758e-05, - "loss": 0.0235, + "learning_rate": 2.3857519541915652e-05, + "loss": 0.0529, "step": 66615 }, { "epoch": 3.11, - "learning_rate": 1.3847452065069618e-05, - "loss": 0.1046, + "learning_rate": 2.3857051472079635e-05, + "loss": 0.0675, "step": 66620 }, { "epoch": 3.11, - "learning_rate": 1.3846983263794478e-05, - "loss": 0.0526, + "learning_rate": 2.3856583402243615e-05, + "loss": 0.0741, "step": 66625 }, { "epoch": 3.11, - "learning_rate": 1.3846514462519338e-05, - "loss": 0.1352, + "learning_rate": 2.3856115332407595e-05, + "loss": 0.1601, "step": 66630 }, { "epoch": 3.11, - "learning_rate": 1.3846045661244201e-05, - "loss": 0.1467, + "learning_rate": 2.3855647262571575e-05, + "loss": 0.2362, "step": 66635 }, { "epoch": 3.11, - "learning_rate": 1.384557685996906e-05, - "loss": 0.3234, + "learning_rate": 2.3855179192735558e-05, + "loss": 0.1321, "step": 66640 }, { "epoch": 3.11, - "learning_rate": 1.384510805869392e-05, - "loss": 0.4519, + "learning_rate": 2.3854711122899538e-05, + "loss": 0.2361, "step": 66645 }, { "epoch": 3.11, - "learning_rate": 1.3844639257418782e-05, - "loss": 0.0615, + "learning_rate": 2.3854243053063518e-05, + "loss": 0.0329, "step": 66650 }, { "epoch": 3.11, - "learning_rate": 1.3844170456143642e-05, - "loss": 0.0226, + "learning_rate": 2.3853774983227498e-05, + "loss": 0.0214, "step": 66655 }, { "epoch": 3.11, - "learning_rate": 1.3843701654868502e-05, - "loss": 0.0299, + "learning_rate": 2.385330691339148e-05, + "loss": 0.1279, "step": 66660 }, { "epoch": 3.11, - "learning_rate": 1.3843232853593362e-05, - "loss": 0.0906, + "learning_rate": 2.385283884355546e-05, + "loss": 0.0511, "step": 66665 }, { "epoch": 3.11, - "learning_rate": 1.3842764052318222e-05, - "loss": 0.1091, + "learning_rate": 2.3852370773719437e-05, + "loss": 0.0549, "step": 66670 }, { "epoch": 3.11, - "learning_rate": 1.3842295251043084e-05, - "loss": 0.0796, + "learning_rate": 2.385190270388342e-05, + "loss": 0.1078, "step": 66675 }, { "epoch": 3.11, - "learning_rate": 1.3841826449767945e-05, - "loss": 0.0716, + "learning_rate": 2.38514346340474e-05, + "loss": 0.1392, "step": 66680 }, { "epoch": 3.11, - "learning_rate": 1.3841357648492805e-05, - "loss": 0.2111, + "learning_rate": 2.385096656421138e-05, + "loss": 0.1236, "step": 66685 }, { "epoch": 3.11, - "learning_rate": 1.3840888847217667e-05, - "loss": 0.2017, + "learning_rate": 2.385049849437536e-05, + "loss": 0.2692, "step": 66690 }, { "epoch": 3.11, - "learning_rate": 1.3840420045942527e-05, - "loss": 0.2499, + "learning_rate": 2.3850030424539343e-05, + "loss": 0.3092, "step": 66695 }, { "epoch": 3.11, - "learning_rate": 1.3839951244667387e-05, - "loss": 0.0438, + "learning_rate": 2.3849562354703323e-05, + "loss": 0.0941, "step": 66700 }, { "epoch": 3.11, - "learning_rate": 1.3839482443392247e-05, - "loss": 0.0432, + "learning_rate": 2.3849094284867303e-05, + "loss": 0.0253, "step": 66705 }, { "epoch": 3.11, - "learning_rate": 1.3839013642117107e-05, - "loss": 0.0451, + "learning_rate": 2.3848626215031282e-05, + "loss": 0.0316, "step": 66710 }, { "epoch": 3.11, - "learning_rate": 1.3838544840841968e-05, - "loss": 0.0343, + "learning_rate": 2.3848158145195266e-05, + "loss": 0.0708, "step": 66715 }, { "epoch": 3.11, - "learning_rate": 1.3838076039566828e-05, - "loss": 0.0664, + "learning_rate": 2.3847690075359245e-05, + "loss": 0.1272, "step": 66720 }, { "epoch": 3.11, - "learning_rate": 1.383760723829169e-05, - "loss": 0.0706, + "learning_rate": 2.3847222005523225e-05, + "loss": 0.0487, "step": 66725 }, { "epoch": 3.11, - "learning_rate": 1.3837138437016551e-05, - "loss": 0.1731, + "learning_rate": 2.3846753935687205e-05, + "loss": 0.0975, "step": 66730 }, { "epoch": 3.11, - "learning_rate": 1.3836669635741411e-05, - "loss": 0.1815, + "learning_rate": 2.3846285865851185e-05, + "loss": 0.1878, "step": 66735 }, { "epoch": 3.11, - "learning_rate": 1.3836200834466271e-05, - "loss": 0.1428, + "learning_rate": 2.3845817796015165e-05, + "loss": 0.2166, "step": 66740 }, { "epoch": 3.11, - "learning_rate": 1.3835732033191131e-05, - "loss": 0.3716, + "learning_rate": 2.3845349726179144e-05, + "loss": 0.3496, "step": 66745 }, { "epoch": 3.11, - "learning_rate": 1.3835263231915991e-05, - "loss": 0.0278, + "learning_rate": 2.3844881656343128e-05, + "loss": 0.0579, "step": 66750 }, { "epoch": 3.11, - "learning_rate": 1.3834794430640853e-05, - "loss": 0.046, + "learning_rate": 2.3844413586507107e-05, + "loss": 0.0446, "step": 66755 }, { "epoch": 3.12, - "learning_rate": 1.3834325629365713e-05, - "loss": 0.1035, + "learning_rate": 2.3843945516671087e-05, + "loss": 0.0131, "step": 66760 }, { "epoch": 3.12, - "learning_rate": 1.3833856828090573e-05, - "loss": 0.1144, + "learning_rate": 2.3843477446835067e-05, + "loss": 0.0579, "step": 66765 }, { "epoch": 3.12, - "learning_rate": 1.3833388026815433e-05, - "loss": 0.0783, + "learning_rate": 2.384300937699905e-05, + "loss": 0.0523, "step": 66770 }, { "epoch": 3.12, - "learning_rate": 1.3832919225540296e-05, - "loss": 0.0529, + "learning_rate": 2.384254130716303e-05, + "loss": 0.0555, "step": 66775 }, { "epoch": 3.12, - "learning_rate": 1.3832450424265156e-05, - "loss": 0.089, + "learning_rate": 2.384207323732701e-05, + "loss": 0.0622, "step": 66780 }, { "epoch": 3.12, - "learning_rate": 1.3831981622990016e-05, - "loss": 0.1363, + "learning_rate": 2.3841605167490993e-05, + "loss": 0.0857, "step": 66785 }, { "epoch": 3.12, - "learning_rate": 1.3831512821714876e-05, - "loss": 0.212, + "learning_rate": 2.3841137097654973e-05, + "loss": 0.2491, "step": 66790 }, { "epoch": 3.12, - "learning_rate": 1.3831044020439737e-05, - "loss": 0.1733, + "learning_rate": 2.384066902781895e-05, + "loss": 0.3227, "step": 66795 }, { "epoch": 3.12, - "learning_rate": 1.3830575219164597e-05, - "loss": 0.067, + "learning_rate": 2.384020095798293e-05, + "loss": 0.0567, "step": 66800 }, { "epoch": 3.12, - "learning_rate": 1.3830106417889457e-05, - "loss": 0.0803, + "learning_rate": 2.3839732888146912e-05, + "loss": 0.0209, "step": 66805 }, { "epoch": 3.12, - "learning_rate": 1.3829637616614317e-05, - "loss": 0.0371, + "learning_rate": 2.3839264818310892e-05, + "loss": 0.0227, "step": 66810 }, { "epoch": 3.12, - "learning_rate": 1.3829168815339177e-05, - "loss": 0.0515, + "learning_rate": 2.3838796748474872e-05, + "loss": 0.0561, "step": 66815 }, { "epoch": 3.12, - "learning_rate": 1.382870001406404e-05, - "loss": 0.0837, + "learning_rate": 2.3838328678638852e-05, + "loss": 0.0754, "step": 66820 }, { "epoch": 3.12, - "learning_rate": 1.38282312127889e-05, - "loss": 0.0697, + "learning_rate": 2.3837860608802835e-05, + "loss": 0.0486, "step": 66825 }, { "epoch": 3.12, - "learning_rate": 1.382776241151376e-05, - "loss": 0.1489, + "learning_rate": 2.3837392538966815e-05, + "loss": 0.0991, "step": 66830 }, { "epoch": 3.12, - "learning_rate": 1.3827293610238622e-05, - "loss": 0.1778, + "learning_rate": 2.3836924469130795e-05, + "loss": 0.1332, "step": 66835 }, { "epoch": 3.12, - "learning_rate": 1.3826824808963482e-05, - "loss": 0.1401, + "learning_rate": 2.3836456399294778e-05, + "loss": 0.2104, "step": 66840 }, { "epoch": 3.12, - "learning_rate": 1.3826356007688342e-05, - "loss": 0.3149, + "learning_rate": 2.3835988329458758e-05, + "loss": 0.3672, "step": 66845 }, { "epoch": 3.12, - "learning_rate": 1.3825887206413202e-05, - "loss": 0.0503, + "learning_rate": 2.3835520259622738e-05, + "loss": 0.0556, "step": 66850 }, { "epoch": 3.12, - "learning_rate": 1.3825418405138062e-05, - "loss": 0.027, + "learning_rate": 2.3835052189786717e-05, + "loss": 0.0496, "step": 66855 }, { "epoch": 3.12, - "learning_rate": 1.3824949603862923e-05, - "loss": 0.0344, + "learning_rate": 2.3834584119950697e-05, + "loss": 0.0398, "step": 66860 }, { "epoch": 3.12, - "learning_rate": 1.3824480802587785e-05, - "loss": 0.0539, + "learning_rate": 2.3834116050114677e-05, + "loss": 0.0653, "step": 66865 }, { "epoch": 3.12, - "learning_rate": 1.3824012001312645e-05, - "loss": 0.0438, + "learning_rate": 2.3833647980278657e-05, + "loss": 0.0466, "step": 66870 }, { "epoch": 3.12, - "learning_rate": 1.3823543200037506e-05, - "loss": 0.0387, + "learning_rate": 2.3833179910442637e-05, + "loss": 0.0604, "step": 66875 }, { "epoch": 3.12, - "learning_rate": 1.3823074398762366e-05, - "loss": 0.1085, + "learning_rate": 2.383271184060662e-05, + "loss": 0.1241, "step": 66880 }, { "epoch": 3.12, - "learning_rate": 1.3822605597487226e-05, - "loss": 0.1261, + "learning_rate": 2.38322437707706e-05, + "loss": 0.1355, "step": 66885 }, { "epoch": 3.12, - "learning_rate": 1.3822136796212086e-05, - "loss": 0.2468, + "learning_rate": 2.383177570093458e-05, + "loss": 0.1424, "step": 66890 }, { "epoch": 3.12, - "learning_rate": 1.3821667994936946e-05, - "loss": 0.2092, + "learning_rate": 2.383130763109856e-05, + "loss": 0.2721, "step": 66895 }, { "epoch": 3.12, - "learning_rate": 1.3821199193661808e-05, - "loss": 0.0453, + "learning_rate": 2.3830839561262543e-05, + "loss": 0.0688, "step": 66900 }, { "epoch": 3.12, - "learning_rate": 1.3820730392386668e-05, - "loss": 0.0149, + "learning_rate": 2.3830371491426522e-05, + "loss": 0.0192, "step": 66905 }, { "epoch": 3.12, - "learning_rate": 1.3820261591111528e-05, - "loss": 0.0878, + "learning_rate": 2.3829903421590502e-05, + "loss": 0.0467, "step": 66910 }, { "epoch": 3.12, - "learning_rate": 1.3819792789836391e-05, - "loss": 0.1053, + "learning_rate": 2.3829435351754485e-05, + "loss": 0.019, "step": 66915 }, { "epoch": 3.12, - "learning_rate": 1.3819323988561251e-05, - "loss": 0.1154, + "learning_rate": 2.3828967281918462e-05, + "loss": 0.0432, "step": 66920 }, { "epoch": 3.12, - "learning_rate": 1.3818855187286111e-05, - "loss": 0.0957, + "learning_rate": 2.382849921208244e-05, + "loss": 0.0599, "step": 66925 }, { "epoch": 3.12, - "learning_rate": 1.3818386386010971e-05, - "loss": 0.1194, + "learning_rate": 2.382803114224642e-05, + "loss": 0.096, "step": 66930 }, { "epoch": 3.12, - "learning_rate": 1.381791758473583e-05, - "loss": 0.1477, + "learning_rate": 2.3827563072410405e-05, + "loss": 0.1109, "step": 66935 }, { "epoch": 3.12, - "learning_rate": 1.3817448783460692e-05, - "loss": 0.1528, + "learning_rate": 2.3827095002574384e-05, + "loss": 0.2118, "step": 66940 }, { "epoch": 3.12, - "learning_rate": 1.3816979982185552e-05, - "loss": 0.3295, + "learning_rate": 2.3826626932738364e-05, + "loss": 0.3333, "step": 66945 }, { "epoch": 3.12, - "learning_rate": 1.3816511180910412e-05, - "loss": 0.0486, + "learning_rate": 2.3826158862902344e-05, + "loss": 0.0554, "step": 66950 }, { "epoch": 3.12, - "learning_rate": 1.3816042379635272e-05, - "loss": 0.0517, + "learning_rate": 2.3825690793066327e-05, + "loss": 0.0199, "step": 66955 }, { "epoch": 3.12, - "learning_rate": 1.3815573578360136e-05, - "loss": 0.0239, + "learning_rate": 2.3825222723230307e-05, + "loss": 0.0254, "step": 66960 }, { "epoch": 3.12, - "learning_rate": 1.3815104777084995e-05, - "loss": 0.0443, + "learning_rate": 2.3824754653394287e-05, + "loss": 0.0234, "step": 66965 }, { "epoch": 3.12, - "learning_rate": 1.3814635975809855e-05, - "loss": 0.083, + "learning_rate": 2.382428658355827e-05, + "loss": 0.0679, "step": 66970 }, { "epoch": 3.13, - "learning_rate": 1.3814167174534715e-05, - "loss": 0.0842, + "learning_rate": 2.382381851372225e-05, + "loss": 0.0674, "step": 66975 }, { "epoch": 3.13, - "learning_rate": 1.3813698373259577e-05, - "loss": 0.071, + "learning_rate": 2.382335044388623e-05, + "loss": 0.091, "step": 66980 }, { "epoch": 3.13, - "learning_rate": 1.3813229571984437e-05, - "loss": 0.1215, + "learning_rate": 2.3822882374050206e-05, + "loss": 0.1218, "step": 66985 }, { "epoch": 3.13, - "learning_rate": 1.3812760770709297e-05, - "loss": 0.1975, + "learning_rate": 2.382241430421419e-05, + "loss": 0.1443, "step": 66990 }, { "epoch": 3.13, - "learning_rate": 1.3812291969434157e-05, - "loss": 0.312, + "learning_rate": 2.382194623437817e-05, + "loss": 0.2643, "step": 66995 }, { "epoch": 3.13, - "learning_rate": 1.3811823168159018e-05, - "loss": 0.0263, + "learning_rate": 2.382147816454215e-05, + "loss": 0.1286, "step": 67000 }, { "epoch": 3.13, - "learning_rate": 1.381135436688388e-05, - "loss": 0.022, + "learning_rate": 2.382101009470613e-05, + "loss": 0.0283, "step": 67005 }, { "epoch": 3.13, - "learning_rate": 1.381088556560874e-05, - "loss": 0.0698, + "learning_rate": 2.3820542024870112e-05, + "loss": 0.068, "step": 67010 }, { "epoch": 3.13, - "learning_rate": 1.38104167643336e-05, - "loss": 0.0665, + "learning_rate": 2.3820073955034092e-05, + "loss": 0.1003, "step": 67015 }, { "epoch": 3.13, - "learning_rate": 1.3809947963058462e-05, - "loss": 0.0565, + "learning_rate": 2.3819605885198072e-05, + "loss": 0.0291, "step": 67020 }, { "epoch": 3.13, - "learning_rate": 1.3809479161783321e-05, - "loss": 0.0716, + "learning_rate": 2.3819137815362055e-05, + "loss": 0.0663, "step": 67025 }, { "epoch": 3.13, - "learning_rate": 1.3809010360508181e-05, - "loss": 0.0787, + "learning_rate": 2.3818669745526035e-05, + "loss": 0.1684, "step": 67030 }, { "epoch": 3.13, - "learning_rate": 1.3808541559233041e-05, - "loss": 0.1136, + "learning_rate": 2.3818201675690015e-05, + "loss": 0.1427, "step": 67035 }, { "epoch": 3.13, - "learning_rate": 1.3808072757957903e-05, - "loss": 0.1218, + "learning_rate": 2.3817733605853994e-05, + "loss": 0.2737, "step": 67040 }, { "epoch": 3.13, - "learning_rate": 1.3807603956682763e-05, - "loss": 0.2986, + "learning_rate": 2.3817265536017974e-05, + "loss": 0.2323, "step": 67045 }, { "epoch": 3.13, - "learning_rate": 1.3807135155407625e-05, - "loss": 0.0709, + "learning_rate": 2.3816797466181954e-05, + "loss": 0.0971, "step": 67050 }, { "epoch": 3.13, - "learning_rate": 1.3806666354132484e-05, - "loss": 0.0823, + "learning_rate": 2.3816329396345934e-05, + "loss": 0.0467, "step": 67055 }, { "epoch": 3.13, - "learning_rate": 1.3806197552857346e-05, - "loss": 0.0219, + "learning_rate": 2.3815861326509914e-05, + "loss": 0.3666, "step": 67060 }, { "epoch": 3.13, - "learning_rate": 1.3805728751582206e-05, - "loss": 0.0453, + "learning_rate": 2.3815393256673897e-05, + "loss": 0.0345, "step": 67065 }, { "epoch": 3.13, - "learning_rate": 1.3805259950307066e-05, - "loss": 0.0388, + "learning_rate": 2.3814925186837877e-05, + "loss": 0.1221, "step": 67070 }, { "epoch": 3.13, - "learning_rate": 1.3804791149031926e-05, - "loss": 0.1246, + "learning_rate": 2.3814457117001856e-05, + "loss": 0.1157, "step": 67075 }, { "epoch": 3.13, - "learning_rate": 1.3804322347756787e-05, - "loss": 0.1001, + "learning_rate": 2.3813989047165836e-05, + "loss": 0.1358, "step": 67080 }, { "epoch": 3.13, - "learning_rate": 1.3803853546481647e-05, - "loss": 0.1526, + "learning_rate": 2.381352097732982e-05, + "loss": 0.1155, "step": 67085 }, { "epoch": 3.13, - "learning_rate": 1.3803384745206507e-05, - "loss": 0.2058, + "learning_rate": 2.38130529074938e-05, + "loss": 0.2144, "step": 67090 }, { "epoch": 3.13, - "learning_rate": 1.3802915943931367e-05, - "loss": 0.3274, + "learning_rate": 2.381258483765778e-05, + "loss": 0.2574, "step": 67095 }, { "epoch": 3.13, - "learning_rate": 1.380244714265623e-05, - "loss": 0.0589, + "learning_rate": 2.3812116767821762e-05, + "loss": 0.0336, "step": 67100 }, { "epoch": 3.13, - "learning_rate": 1.380197834138109e-05, - "loss": 0.0554, + "learning_rate": 2.3811648697985742e-05, + "loss": 0.0331, "step": 67105 }, { "epoch": 3.13, - "learning_rate": 1.380150954010595e-05, - "loss": 0.1107, + "learning_rate": 2.381118062814972e-05, + "loss": 0.0531, "step": 67110 }, { "epoch": 3.13, - "learning_rate": 1.380104073883081e-05, - "loss": 0.0373, + "learning_rate": 2.38107125583137e-05, + "loss": 0.0893, "step": 67115 }, { "epoch": 3.13, - "learning_rate": 1.3800571937555672e-05, - "loss": 0.1125, + "learning_rate": 2.381024448847768e-05, + "loss": 0.037, "step": 67120 }, { "epoch": 3.13, - "learning_rate": 1.3800103136280532e-05, - "loss": 0.0933, + "learning_rate": 2.380977641864166e-05, + "loss": 0.1595, "step": 67125 }, { "epoch": 3.13, - "learning_rate": 1.3799634335005392e-05, - "loss": 0.0467, + "learning_rate": 2.380930834880564e-05, + "loss": 0.0874, "step": 67130 }, { "epoch": 3.13, - "learning_rate": 1.3799165533730252e-05, - "loss": 0.1961, + "learning_rate": 2.380884027896962e-05, + "loss": 0.1572, "step": 67135 }, { "epoch": 3.13, - "learning_rate": 1.3798696732455112e-05, - "loss": 0.1006, + "learning_rate": 2.3808372209133604e-05, + "loss": 0.2248, "step": 67140 }, { "epoch": 3.13, - "learning_rate": 1.3798227931179975e-05, - "loss": 0.2407, + "learning_rate": 2.3807904139297584e-05, + "loss": 0.4436, "step": 67145 }, { "epoch": 3.13, - "learning_rate": 1.3797759129904835e-05, - "loss": 0.0181, + "learning_rate": 2.3807436069461564e-05, + "loss": 0.0987, "step": 67150 }, { "epoch": 3.13, - "learning_rate": 1.3797290328629695e-05, - "loss": 0.0193, + "learning_rate": 2.3806967999625547e-05, + "loss": 0.036, "step": 67155 }, { "epoch": 3.13, - "learning_rate": 1.3796821527354557e-05, - "loss": 0.0602, + "learning_rate": 2.3806499929789527e-05, + "loss": 0.0322, "step": 67160 }, { "epoch": 3.13, - "learning_rate": 1.3796352726079417e-05, - "loss": 0.0818, + "learning_rate": 2.3806031859953507e-05, + "loss": 0.0594, "step": 67165 }, { "epoch": 3.13, - "learning_rate": 1.3795883924804276e-05, - "loss": 0.0914, + "learning_rate": 2.3805563790117487e-05, + "loss": 0.0666, "step": 67170 }, { "epoch": 3.13, - "learning_rate": 1.3795415123529136e-05, - "loss": 0.0872, + "learning_rate": 2.3805095720281466e-05, + "loss": 0.0725, "step": 67175 }, { "epoch": 3.13, - "learning_rate": 1.3794946322253996e-05, - "loss": 0.1196, + "learning_rate": 2.3804627650445446e-05, + "loss": 0.1632, "step": 67180 }, { "epoch": 3.13, - "learning_rate": 1.3794477520978858e-05, - "loss": 0.1093, + "learning_rate": 2.3804159580609426e-05, + "loss": 0.145, "step": 67185 }, { "epoch": 3.14, - "learning_rate": 1.379400871970372e-05, - "loss": 0.1819, + "learning_rate": 2.3803691510773406e-05, + "loss": 0.1489, "step": 67190 }, { "epoch": 3.14, - "learning_rate": 1.379353991842858e-05, - "loss": 0.3717, + "learning_rate": 2.380322344093739e-05, + "loss": 0.173, "step": 67195 }, { "epoch": 3.14, - "learning_rate": 1.3793071117153441e-05, - "loss": 0.0776, + "learning_rate": 2.380275537110137e-05, + "loss": 0.0832, "step": 67200 }, { "epoch": 3.14, - "learning_rate": 1.3792602315878301e-05, - "loss": 0.026, + "learning_rate": 2.380228730126535e-05, + "loss": 0.0178, "step": 67205 }, { "epoch": 3.14, - "learning_rate": 1.3792133514603161e-05, - "loss": 0.0606, + "learning_rate": 2.3801819231429332e-05, + "loss": 0.1049, "step": 67210 }, { "epoch": 3.14, - "learning_rate": 1.3791664713328021e-05, - "loss": 0.0693, + "learning_rate": 2.3801351161593312e-05, + "loss": 0.0215, "step": 67215 }, { "epoch": 3.14, - "learning_rate": 1.3791195912052881e-05, - "loss": 0.0327, + "learning_rate": 2.380088309175729e-05, + "loss": 0.0686, "step": 67220 }, { "epoch": 3.14, - "learning_rate": 1.3790727110777743e-05, - "loss": 0.0602, + "learning_rate": 2.380041502192127e-05, + "loss": 0.0688, "step": 67225 }, { "epoch": 3.14, - "learning_rate": 1.3790258309502602e-05, - "loss": 0.0975, + "learning_rate": 2.3799946952085255e-05, + "loss": 0.1738, "step": 67230 }, { "epoch": 3.14, - "learning_rate": 1.3789789508227462e-05, - "loss": 0.1581, + "learning_rate": 2.379947888224923e-05, + "loss": 0.1301, "step": 67235 }, { "epoch": 3.14, - "learning_rate": 1.3789320706952326e-05, - "loss": 0.1922, + "learning_rate": 2.379901081241321e-05, + "loss": 0.186, "step": 67240 }, { "epoch": 3.14, - "learning_rate": 1.3788851905677186e-05, - "loss": 0.1326, + "learning_rate": 2.379854274257719e-05, + "loss": 0.2531, "step": 67245 }, { "epoch": 3.14, - "learning_rate": 1.3788383104402046e-05, - "loss": 0.0908, + "learning_rate": 2.3798074672741174e-05, + "loss": 0.0657, "step": 67250 }, { "epoch": 3.14, - "learning_rate": 1.3787914303126905e-05, - "loss": 0.0064, + "learning_rate": 2.3797606602905154e-05, + "loss": 0.059, "step": 67255 }, { "epoch": 3.14, - "learning_rate": 1.3787445501851765e-05, - "loss": 0.0279, + "learning_rate": 2.3797138533069133e-05, + "loss": 0.1246, "step": 67260 }, { "epoch": 3.14, - "learning_rate": 1.3786976700576627e-05, - "loss": 0.0307, + "learning_rate": 2.3796670463233113e-05, + "loss": 0.0766, "step": 67265 }, { "epoch": 3.14, - "learning_rate": 1.3786507899301487e-05, - "loss": 0.0717, + "learning_rate": 2.3796202393397096e-05, + "loss": 0.089, "step": 67270 }, { "epoch": 3.14, - "learning_rate": 1.3786039098026347e-05, - "loss": 0.0845, + "learning_rate": 2.3795734323561076e-05, + "loss": 0.0678, "step": 67275 }, { "epoch": 3.14, - "learning_rate": 1.3785570296751207e-05, - "loss": 0.1032, + "learning_rate": 2.3795266253725056e-05, + "loss": 0.1219, "step": 67280 }, { "epoch": 3.14, - "learning_rate": 1.378510149547607e-05, - "loss": 0.1643, + "learning_rate": 2.379479818388904e-05, + "loss": 0.1767, "step": 67285 }, { "epoch": 3.14, - "learning_rate": 1.378463269420093e-05, - "loss": 0.243, + "learning_rate": 2.379433011405302e-05, + "loss": 0.1375, "step": 67290 }, { "epoch": 3.14, - "learning_rate": 1.378416389292579e-05, - "loss": 0.3675, + "learning_rate": 2.3793862044217e-05, + "loss": 0.3152, "step": 67295 }, { "epoch": 3.14, - "learning_rate": 1.378369509165065e-05, - "loss": 0.0633, + "learning_rate": 2.3793393974380975e-05, + "loss": 0.0478, "step": 67300 }, { "epoch": 3.14, - "learning_rate": 1.3783226290375512e-05, - "loss": 0.0273, + "learning_rate": 2.379292590454496e-05, + "loss": 0.0263, "step": 67305 }, { "epoch": 3.14, - "learning_rate": 1.3782757489100372e-05, - "loss": 0.047, + "learning_rate": 2.379245783470894e-05, + "loss": 0.0362, "step": 67310 }, { "epoch": 3.14, - "learning_rate": 1.3782288687825231e-05, - "loss": 0.0805, + "learning_rate": 2.3791989764872918e-05, + "loss": 0.0162, "step": 67315 }, { "epoch": 3.14, - "learning_rate": 1.3781819886550091e-05, - "loss": 0.1093, + "learning_rate": 2.3791521695036898e-05, + "loss": 0.0348, "step": 67320 }, { "epoch": 3.14, - "learning_rate": 1.3781351085274951e-05, - "loss": 0.0728, + "learning_rate": 2.379105362520088e-05, + "loss": 0.087, "step": 67325 }, { "epoch": 3.14, - "learning_rate": 1.3780882283999815e-05, - "loss": 0.0528, + "learning_rate": 2.379058555536486e-05, + "loss": 0.0805, "step": 67330 }, { "epoch": 3.14, - "learning_rate": 1.3780413482724675e-05, - "loss": 0.1021, + "learning_rate": 2.379011748552884e-05, + "loss": 0.167, "step": 67335 }, { "epoch": 3.14, - "learning_rate": 1.3779944681449535e-05, - "loss": 0.2603, + "learning_rate": 2.3789649415692824e-05, + "loss": 0.1758, "step": 67340 }, { "epoch": 3.14, - "learning_rate": 1.3779475880174396e-05, - "loss": 0.2996, + "learning_rate": 2.3789181345856804e-05, + "loss": 0.2723, "step": 67345 }, { "epoch": 3.14, - "learning_rate": 1.3779007078899256e-05, - "loss": 0.0848, + "learning_rate": 2.3788713276020784e-05, + "loss": 0.0407, "step": 67350 }, { "epoch": 3.14, - "learning_rate": 1.3778538277624116e-05, - "loss": 0.0372, + "learning_rate": 2.3788245206184764e-05, + "loss": 0.0185, "step": 67355 }, { "epoch": 3.14, - "learning_rate": 1.3778069476348976e-05, - "loss": 0.0614, + "learning_rate": 2.3787777136348747e-05, + "loss": 0.0506, "step": 67360 }, { "epoch": 3.14, - "learning_rate": 1.3777600675073836e-05, - "loss": 0.0727, + "learning_rate": 2.3787309066512723e-05, + "loss": 0.0587, "step": 67365 }, { "epoch": 3.14, - "learning_rate": 1.3777131873798698e-05, - "loss": 0.0873, + "learning_rate": 2.3786840996676703e-05, + "loss": 0.1134, "step": 67370 }, { "epoch": 3.14, - "learning_rate": 1.3776663072523559e-05, - "loss": 0.1126, + "learning_rate": 2.3786372926840683e-05, + "loss": 0.0328, "step": 67375 }, { "epoch": 3.14, - "learning_rate": 1.3776194271248419e-05, - "loss": 0.1982, + "learning_rate": 2.3785904857004666e-05, + "loss": 0.09, "step": 67380 }, { "epoch": 3.14, - "learning_rate": 1.377572546997328e-05, - "loss": 0.2663, + "learning_rate": 2.3785436787168646e-05, + "loss": 0.2605, "step": 67385 }, { "epoch": 3.14, - "learning_rate": 1.377525666869814e-05, - "loss": 0.2309, + "learning_rate": 2.3784968717332626e-05, + "loss": 0.3733, "step": 67390 }, { "epoch": 3.14, - "learning_rate": 1.3774787867423e-05, - "loss": 0.3462, + "learning_rate": 2.378450064749661e-05, + "loss": 0.3259, "step": 67395 }, { "epoch": 3.14, - "learning_rate": 1.377431906614786e-05, - "loss": 0.0791, + "learning_rate": 2.378403257766059e-05, + "loss": 0.0238, "step": 67400 }, { "epoch": 3.15, - "learning_rate": 1.377385026487272e-05, - "loss": 0.0226, + "learning_rate": 2.378356450782457e-05, + "loss": 0.0608, "step": 67405 }, { "epoch": 3.15, - "learning_rate": 1.3773381463597582e-05, - "loss": 0.0631, + "learning_rate": 2.3783096437988548e-05, + "loss": 0.0344, "step": 67410 }, { "epoch": 3.15, - "learning_rate": 1.3772912662322442e-05, - "loss": 0.0799, + "learning_rate": 2.378262836815253e-05, + "loss": 0.0943, "step": 67415 }, { "epoch": 3.15, - "learning_rate": 1.3772443861047302e-05, - "loss": 0.0407, + "learning_rate": 2.378216029831651e-05, + "loss": 0.0393, "step": 67420 }, { "epoch": 3.15, - "learning_rate": 1.3771975059772165e-05, - "loss": 0.0385, + "learning_rate": 2.3781692228480488e-05, + "loss": 0.0639, "step": 67425 }, { "epoch": 3.15, - "learning_rate": 1.3771506258497025e-05, - "loss": 0.0817, + "learning_rate": 2.3781224158644468e-05, + "loss": 0.0661, "step": 67430 }, { "epoch": 3.15, - "learning_rate": 1.3771037457221885e-05, - "loss": 0.1169, + "learning_rate": 2.378075608880845e-05, + "loss": 0.0785, "step": 67435 }, { "epoch": 3.15, - "learning_rate": 1.3770568655946745e-05, - "loss": 0.1531, + "learning_rate": 2.378028801897243e-05, + "loss": 0.1502, "step": 67440 }, { "epoch": 3.15, - "learning_rate": 1.3770099854671605e-05, - "loss": 0.1626, + "learning_rate": 2.377981994913641e-05, + "loss": 0.2903, "step": 67445 }, { "epoch": 3.15, - "learning_rate": 1.3769631053396467e-05, - "loss": 0.0748, + "learning_rate": 2.377935187930039e-05, + "loss": 0.0515, "step": 67450 }, { "epoch": 3.15, - "learning_rate": 1.3769162252121327e-05, - "loss": 0.0773, + "learning_rate": 2.3778883809464373e-05, + "loss": 0.0942, "step": 67455 }, { "epoch": 3.15, - "learning_rate": 1.3768693450846186e-05, - "loss": 0.0212, + "learning_rate": 2.3778415739628353e-05, + "loss": 0.055, "step": 67460 }, { "epoch": 3.15, - "learning_rate": 1.3768224649571046e-05, - "loss": 0.0787, + "learning_rate": 2.3777947669792333e-05, + "loss": 0.0439, "step": 67465 }, { "epoch": 3.15, - "learning_rate": 1.376775584829591e-05, - "loss": 0.0926, + "learning_rate": 2.3777479599956316e-05, + "loss": 0.1091, "step": 67470 }, { "epoch": 3.15, - "learning_rate": 1.376728704702077e-05, - "loss": 0.1528, + "learning_rate": 2.3777011530120296e-05, + "loss": 0.0628, "step": 67475 }, { "epoch": 3.15, - "learning_rate": 1.376681824574563e-05, - "loss": 0.2009, + "learning_rate": 2.3776543460284276e-05, + "loss": 0.1702, "step": 67480 }, { "epoch": 3.15, - "learning_rate": 1.376634944447049e-05, - "loss": 0.1525, + "learning_rate": 2.3776075390448256e-05, + "loss": 0.1167, "step": 67485 }, { "epoch": 3.15, - "learning_rate": 1.3765880643195351e-05, - "loss": 0.2424, + "learning_rate": 2.3775607320612236e-05, + "loss": 0.2041, "step": 67490 }, { "epoch": 3.15, - "learning_rate": 1.3765411841920211e-05, - "loss": 0.4591, + "learning_rate": 2.3775139250776215e-05, + "loss": 0.193, "step": 67495 }, { "epoch": 3.15, - "learning_rate": 1.3764943040645071e-05, - "loss": 0.0752, + "learning_rate": 2.3774671180940195e-05, + "loss": 0.0637, "step": 67500 }, { "epoch": 3.15, - "learning_rate": 1.3764474239369931e-05, - "loss": 0.009, + "learning_rate": 2.3774203111104175e-05, + "loss": 0.0709, "step": 67505 }, { "epoch": 3.15, - "learning_rate": 1.3764005438094793e-05, - "loss": 0.087, + "learning_rate": 2.3773735041268158e-05, + "loss": 0.0383, "step": 67510 }, { "epoch": 3.15, - "learning_rate": 1.3763536636819654e-05, - "loss": 0.0634, + "learning_rate": 2.3773266971432138e-05, + "loss": 0.051, "step": 67515 }, { "epoch": 3.15, - "learning_rate": 1.3763067835544514e-05, - "loss": 0.0393, + "learning_rate": 2.3772798901596118e-05, + "loss": 0.127, "step": 67520 }, { "epoch": 3.15, - "learning_rate": 1.3762599034269374e-05, - "loss": 0.0283, + "learning_rate": 2.37723308317601e-05, + "loss": 0.1225, "step": 67525 }, { "epoch": 3.15, - "learning_rate": 1.3762130232994236e-05, - "loss": 0.1217, + "learning_rate": 2.377186276192408e-05, + "loss": 0.1131, "step": 67530 }, { "epoch": 3.15, - "learning_rate": 1.3761661431719096e-05, - "loss": 0.1377, + "learning_rate": 2.377139469208806e-05, + "loss": 0.1179, "step": 67535 }, { "epoch": 3.15, - "learning_rate": 1.3761192630443956e-05, - "loss": 0.2084, + "learning_rate": 2.377092662225204e-05, + "loss": 0.388, "step": 67540 }, { "epoch": 3.15, - "learning_rate": 1.3760723829168816e-05, - "loss": 0.2998, + "learning_rate": 2.3770458552416024e-05, + "loss": 0.1508, "step": 67545 }, { "epoch": 3.15, - "learning_rate": 1.3760255027893677e-05, - "loss": 0.0727, + "learning_rate": 2.3769990482580004e-05, + "loss": 0.065, "step": 67550 }, { "epoch": 3.15, - "learning_rate": 1.3759786226618537e-05, - "loss": 0.0368, + "learning_rate": 2.376952241274398e-05, + "loss": 0.029, "step": 67555 }, { "epoch": 3.15, - "learning_rate": 1.3759317425343397e-05, - "loss": 0.0344, + "learning_rate": 2.376905434290796e-05, + "loss": 0.0257, "step": 67560 }, { "epoch": 3.15, - "learning_rate": 1.3758848624068259e-05, - "loss": 0.0686, + "learning_rate": 2.3768586273071943e-05, + "loss": 0.0371, "step": 67565 }, { "epoch": 3.15, - "learning_rate": 1.375837982279312e-05, - "loss": 0.1097, + "learning_rate": 2.3768118203235923e-05, + "loss": 0.0516, "step": 67570 }, { "epoch": 3.15, - "learning_rate": 1.375791102151798e-05, - "loss": 0.2716, + "learning_rate": 2.3767650133399903e-05, + "loss": 0.1494, "step": 67575 }, { "epoch": 3.15, - "learning_rate": 1.375744222024284e-05, - "loss": 0.1313, + "learning_rate": 2.3767182063563886e-05, + "loss": 0.1095, "step": 67580 }, { "epoch": 3.15, - "learning_rate": 1.37569734189677e-05, - "loss": 0.117, + "learning_rate": 2.3766713993727866e-05, + "loss": 0.1596, "step": 67585 }, { "epoch": 3.15, - "learning_rate": 1.3756504617692562e-05, - "loss": 0.1824, + "learning_rate": 2.3766245923891845e-05, + "loss": 0.1266, "step": 67590 }, { "epoch": 3.15, - "learning_rate": 1.3756035816417422e-05, - "loss": 0.1543, + "learning_rate": 2.3765777854055825e-05, + "loss": 0.2497, "step": 67595 }, { "epoch": 3.15, - "learning_rate": 1.3755567015142282e-05, - "loss": 0.122, + "learning_rate": 2.376530978421981e-05, + "loss": 0.0552, "step": 67600 }, { "epoch": 3.15, - "learning_rate": 1.3755098213867142e-05, - "loss": 0.0074, + "learning_rate": 2.3764841714383788e-05, + "loss": 0.0487, "step": 67605 }, { "epoch": 3.15, - "learning_rate": 1.3754629412592005e-05, - "loss": 0.0307, + "learning_rate": 2.3764373644547768e-05, + "loss": 0.0359, "step": 67610 }, { "epoch": 3.16, - "learning_rate": 1.3754160611316865e-05, - "loss": 0.0396, + "learning_rate": 2.3763905574711745e-05, + "loss": 0.033, "step": 67615 }, { "epoch": 3.16, - "learning_rate": 1.3753691810041725e-05, - "loss": 0.0495, + "learning_rate": 2.3763437504875728e-05, + "loss": 0.0672, "step": 67620 }, { "epoch": 3.16, - "learning_rate": 1.3753223008766585e-05, - "loss": 0.0975, + "learning_rate": 2.3762969435039708e-05, + "loss": 0.1235, "step": 67625 }, { "epoch": 3.16, - "learning_rate": 1.3752754207491446e-05, - "loss": 0.1073, + "learning_rate": 2.3762501365203687e-05, + "loss": 0.0986, "step": 67630 }, { "epoch": 3.16, - "learning_rate": 1.3752285406216306e-05, - "loss": 0.1438, + "learning_rate": 2.376203329536767e-05, + "loss": 0.0818, "step": 67635 }, { "epoch": 3.16, - "learning_rate": 1.3751816604941166e-05, - "loss": 0.2011, + "learning_rate": 2.376156522553165e-05, + "loss": 0.1686, "step": 67640 }, { "epoch": 3.16, - "learning_rate": 1.3751347803666026e-05, - "loss": 0.3405, + "learning_rate": 2.376109715569563e-05, + "loss": 0.2837, "step": 67645 }, { "epoch": 3.16, - "learning_rate": 1.3750879002390886e-05, - "loss": 0.0957, + "learning_rate": 2.376062908585961e-05, + "loss": 0.1005, "step": 67650 }, { "epoch": 3.16, - "learning_rate": 1.375041020111575e-05, - "loss": 0.0259, + "learning_rate": 2.3760161016023593e-05, + "loss": 0.0205, "step": 67655 }, { "epoch": 3.16, - "learning_rate": 1.374994139984061e-05, - "loss": 0.0578, + "learning_rate": 2.3759692946187573e-05, + "loss": 0.0335, "step": 67660 }, { "epoch": 3.16, - "learning_rate": 1.374947259856547e-05, - "loss": 0.1437, + "learning_rate": 2.3759224876351553e-05, + "loss": 0.0388, "step": 67665 }, { "epoch": 3.16, - "learning_rate": 1.374900379729033e-05, - "loss": 0.0596, + "learning_rate": 2.3758756806515533e-05, + "loss": 0.0717, "step": 67670 }, { "epoch": 3.16, - "learning_rate": 1.374853499601519e-05, - "loss": 0.0759, + "learning_rate": 2.3758288736679516e-05, + "loss": 0.0655, "step": 67675 }, { "epoch": 3.16, - "learning_rate": 1.374806619474005e-05, - "loss": 0.0801, + "learning_rate": 2.3757820666843492e-05, + "loss": 0.0707, "step": 67680 }, { "epoch": 3.16, - "learning_rate": 1.374759739346491e-05, - "loss": 0.1489, + "learning_rate": 2.3757352597007472e-05, + "loss": 0.1287, "step": 67685 }, { "epoch": 3.16, - "learning_rate": 1.374712859218977e-05, - "loss": 0.0848, + "learning_rate": 2.3756884527171452e-05, + "loss": 0.1605, "step": 67690 }, { "epoch": 3.16, - "learning_rate": 1.3746659790914632e-05, - "loss": 0.237, + "learning_rate": 2.3756416457335435e-05, + "loss": 0.2086, "step": 67695 }, { "epoch": 3.16, - "learning_rate": 1.3746190989639492e-05, - "loss": 0.0866, + "learning_rate": 2.3755948387499415e-05, + "loss": 0.0836, "step": 67700 }, { "epoch": 3.16, - "learning_rate": 1.3745722188364354e-05, - "loss": 0.0325, + "learning_rate": 2.3755480317663395e-05, + "loss": 0.0223, "step": 67705 }, { "epoch": 3.16, - "learning_rate": 1.3745253387089215e-05, - "loss": 0.0331, + "learning_rate": 2.3755012247827378e-05, + "loss": 0.0275, "step": 67710 }, { "epoch": 3.16, - "learning_rate": 1.3744784585814075e-05, - "loss": 0.0228, + "learning_rate": 2.3754544177991358e-05, + "loss": 0.0636, "step": 67715 }, { "epoch": 3.16, - "learning_rate": 1.3744315784538935e-05, - "loss": 0.1033, + "learning_rate": 2.3754076108155338e-05, + "loss": 0.0293, "step": 67720 }, { "epoch": 3.16, - "learning_rate": 1.3743846983263795e-05, - "loss": 0.1214, + "learning_rate": 2.3753608038319317e-05, + "loss": 0.0887, "step": 67725 }, { "epoch": 3.16, - "learning_rate": 1.3743378181988655e-05, - "loss": 0.0933, + "learning_rate": 2.37531399684833e-05, + "loss": 0.1367, "step": 67730 }, { "epoch": 3.16, - "learning_rate": 1.3742909380713517e-05, - "loss": 0.1463, + "learning_rate": 2.375267189864728e-05, + "loss": 0.1909, "step": 67735 }, { "epoch": 3.16, - "learning_rate": 1.3742440579438377e-05, - "loss": 0.2893, + "learning_rate": 2.375220382881126e-05, + "loss": 0.264, "step": 67740 }, { "epoch": 3.16, - "learning_rate": 1.3741971778163237e-05, - "loss": 0.3865, + "learning_rate": 2.3751735758975237e-05, + "loss": 0.2295, "step": 67745 }, { "epoch": 3.16, - "learning_rate": 1.37415029768881e-05, - "loss": 0.0846, + "learning_rate": 2.375126768913922e-05, + "loss": 0.1168, "step": 67750 }, { "epoch": 3.16, - "learning_rate": 1.374103417561296e-05, - "loss": 0.0302, + "learning_rate": 2.37507996193032e-05, + "loss": 0.0403, "step": 67755 }, { "epoch": 3.16, - "learning_rate": 1.374056537433782e-05, - "loss": 0.0473, + "learning_rate": 2.375033154946718e-05, + "loss": 0.0761, "step": 67760 }, { "epoch": 3.16, - "learning_rate": 1.374009657306268e-05, - "loss": 0.018, + "learning_rate": 2.3749863479631163e-05, + "loss": 0.0457, "step": 67765 }, { "epoch": 3.16, - "learning_rate": 1.373962777178754e-05, - "loss": 0.1362, + "learning_rate": 2.3749395409795143e-05, + "loss": 0.0857, "step": 67770 }, { "epoch": 3.16, - "learning_rate": 1.3739158970512401e-05, - "loss": 0.057, + "learning_rate": 2.3748927339959122e-05, + "loss": 0.0675, "step": 67775 }, { "epoch": 3.16, - "learning_rate": 1.3738690169237261e-05, - "loss": 0.1749, + "learning_rate": 2.3748459270123102e-05, + "loss": 0.148, "step": 67780 }, { "epoch": 3.16, - "learning_rate": 1.3738221367962121e-05, - "loss": 0.1016, + "learning_rate": 2.3747991200287085e-05, + "loss": 0.1699, "step": 67785 }, { "epoch": 3.16, - "learning_rate": 1.3737752566686981e-05, - "loss": 0.1584, + "learning_rate": 2.3747523130451065e-05, + "loss": 0.1654, "step": 67790 }, { "epoch": 3.16, - "learning_rate": 1.3737283765411844e-05, - "loss": 0.357, + "learning_rate": 2.3747055060615045e-05, + "loss": 0.3544, "step": 67795 }, { "epoch": 3.16, - "learning_rate": 1.3736814964136704e-05, - "loss": 0.0303, + "learning_rate": 2.3746586990779025e-05, + "loss": 0.0849, "step": 67800 }, { "epoch": 3.16, - "learning_rate": 1.3736346162861564e-05, - "loss": 0.0423, + "learning_rate": 2.3746118920943005e-05, + "loss": 0.0669, "step": 67805 }, { "epoch": 3.16, - "learning_rate": 1.3735877361586424e-05, - "loss": 0.0347, + "learning_rate": 2.3745650851106985e-05, + "loss": 0.0418, "step": 67810 }, { "epoch": 3.16, - "learning_rate": 1.3735408560311286e-05, - "loss": 0.0268, + "learning_rate": 2.3745182781270964e-05, + "loss": 0.0484, "step": 67815 }, { "epoch": 3.16, - "learning_rate": 1.3734939759036146e-05, - "loss": 0.0234, + "learning_rate": 2.3744714711434948e-05, + "loss": 0.0757, "step": 67820 }, { "epoch": 3.16, - "learning_rate": 1.3734470957761006e-05, - "loss": 0.0924, + "learning_rate": 2.3744246641598927e-05, + "loss": 0.1378, "step": 67825 }, { "epoch": 3.17, - "learning_rate": 1.3734002156485866e-05, - "loss": 0.0546, + "learning_rate": 2.3743778571762907e-05, + "loss": 0.1436, "step": 67830 }, { "epoch": 3.17, - "learning_rate": 1.3733533355210726e-05, - "loss": 0.0996, + "learning_rate": 2.3743310501926887e-05, + "loss": 0.0679, "step": 67835 }, { "epoch": 3.17, - "learning_rate": 1.3733064553935589e-05, - "loss": 0.127, + "learning_rate": 2.374284243209087e-05, + "loss": 0.1999, "step": 67840 }, { "epoch": 3.17, - "learning_rate": 1.3732595752660449e-05, - "loss": 0.2586, + "learning_rate": 2.374237436225485e-05, + "loss": 0.2928, "step": 67845 }, { "epoch": 3.17, - "learning_rate": 1.3732126951385309e-05, - "loss": 0.0788, + "learning_rate": 2.374190629241883e-05, + "loss": 0.0898, "step": 67850 }, { "epoch": 3.17, - "learning_rate": 1.373165815011017e-05, - "loss": 0.0596, + "learning_rate": 2.374143822258281e-05, + "loss": 0.0348, "step": 67855 }, { "epoch": 3.17, - "learning_rate": 1.373118934883503e-05, - "loss": 0.0302, + "learning_rate": 2.3740970152746793e-05, + "loss": 0.0331, "step": 67860 }, { "epoch": 3.17, - "learning_rate": 1.373072054755989e-05, - "loss": 0.0224, + "learning_rate": 2.3740502082910773e-05, + "loss": 0.041, "step": 67865 }, { "epoch": 3.17, - "learning_rate": 1.373025174628475e-05, - "loss": 0.0651, + "learning_rate": 2.374003401307475e-05, + "loss": 0.0522, "step": 67870 }, { "epoch": 3.17, - "learning_rate": 1.372978294500961e-05, - "loss": 0.1478, + "learning_rate": 2.373956594323873e-05, + "loss": 0.1023, "step": 67875 }, { "epoch": 3.17, - "learning_rate": 1.3729314143734472e-05, - "loss": 0.0709, + "learning_rate": 2.3739097873402712e-05, + "loss": 0.0968, "step": 67880 }, { "epoch": 3.17, - "learning_rate": 1.3728845342459332e-05, - "loss": 0.1736, + "learning_rate": 2.3738629803566692e-05, + "loss": 0.1362, "step": 67885 }, { "epoch": 3.17, - "learning_rate": 1.3728376541184193e-05, - "loss": 0.2604, + "learning_rate": 2.3738161733730672e-05, + "loss": 0.1835, "step": 67890 }, { "epoch": 3.17, - "learning_rate": 1.3727907739909055e-05, - "loss": 0.3258, + "learning_rate": 2.3737693663894655e-05, + "loss": 0.2923, "step": 67895 }, { "epoch": 3.17, - "learning_rate": 1.3727438938633915e-05, - "loss": 0.1177, + "learning_rate": 2.3737225594058635e-05, + "loss": 0.0503, "step": 67900 }, { "epoch": 3.17, - "learning_rate": 1.3726970137358775e-05, - "loss": 0.0203, + "learning_rate": 2.3736757524222615e-05, + "loss": 0.0243, "step": 67905 }, { "epoch": 3.17, - "learning_rate": 1.3726501336083635e-05, - "loss": 0.0373, + "learning_rate": 2.3736289454386594e-05, + "loss": 0.023, "step": 67910 }, { "epoch": 3.17, - "learning_rate": 1.3726032534808495e-05, - "loss": 0.0882, + "learning_rate": 2.3735821384550578e-05, + "loss": 0.0768, "step": 67915 }, { "epoch": 3.17, - "learning_rate": 1.3725563733533356e-05, - "loss": 0.1121, + "learning_rate": 2.3735353314714557e-05, + "loss": 0.063, "step": 67920 }, { "epoch": 3.17, - "learning_rate": 1.3725094932258216e-05, - "loss": 0.1319, + "learning_rate": 2.3734885244878537e-05, + "loss": 0.1207, "step": 67925 }, { "epoch": 3.17, - "learning_rate": 1.3724626130983076e-05, - "loss": 0.1484, + "learning_rate": 2.3734417175042514e-05, + "loss": 0.0634, "step": 67930 }, { "epoch": 3.17, - "learning_rate": 1.372415732970794e-05, - "loss": 0.1881, + "learning_rate": 2.3733949105206497e-05, + "loss": 0.1652, "step": 67935 }, { "epoch": 3.17, - "learning_rate": 1.37236885284328e-05, - "loss": 0.1164, + "learning_rate": 2.3733481035370477e-05, + "loss": 0.2386, "step": 67940 }, { "epoch": 3.17, - "learning_rate": 1.372321972715766e-05, - "loss": 0.3235, + "learning_rate": 2.3733012965534457e-05, + "loss": 0.2734, "step": 67945 }, { "epoch": 3.17, - "learning_rate": 1.372275092588252e-05, - "loss": 0.0919, + "learning_rate": 2.373254489569844e-05, + "loss": 0.075, "step": 67950 }, { "epoch": 3.17, - "learning_rate": 1.3722282124607381e-05, - "loss": 0.0481, + "learning_rate": 2.373207682586242e-05, + "loss": 0.046, "step": 67955 }, { "epoch": 3.17, - "learning_rate": 1.372181332333224e-05, - "loss": 0.0297, + "learning_rate": 2.37316087560264e-05, + "loss": 0.0276, "step": 67960 }, { "epoch": 3.17, - "learning_rate": 1.37213445220571e-05, - "loss": 0.0337, + "learning_rate": 2.373114068619038e-05, + "loss": 0.0683, "step": 67965 }, { "epoch": 3.17, - "learning_rate": 1.372087572078196e-05, - "loss": 0.0748, + "learning_rate": 2.3730672616354362e-05, + "loss": 0.0357, "step": 67970 }, { "epoch": 3.17, - "learning_rate": 1.372040691950682e-05, - "loss": 0.0674, + "learning_rate": 2.3730204546518342e-05, + "loss": 0.0862, "step": 67975 }, { "epoch": 3.17, - "learning_rate": 1.3719938118231684e-05, - "loss": 0.08, + "learning_rate": 2.3729736476682322e-05, + "loss": 0.2357, "step": 67980 }, { "epoch": 3.17, - "learning_rate": 1.3719469316956544e-05, - "loss": 0.1805, + "learning_rate": 2.3729268406846302e-05, + "loss": 0.1544, "step": 67985 }, { "epoch": 3.17, - "learning_rate": 1.3719000515681404e-05, - "loss": 0.1943, + "learning_rate": 2.3728800337010285e-05, + "loss": 0.093, "step": 67990 }, { "epoch": 3.17, - "learning_rate": 1.3718531714406265e-05, - "loss": 0.2545, + "learning_rate": 2.372833226717426e-05, + "loss": 0.2396, "step": 67995 }, { "epoch": 3.17, - "learning_rate": 1.3718062913131125e-05, - "loss": 0.077, + "learning_rate": 2.372786419733824e-05, + "loss": 0.0416, "step": 68000 }, { "epoch": 3.17, - "learning_rate": 1.3717594111855985e-05, - "loss": 0.0349, + "learning_rate": 2.3727396127502225e-05, + "loss": 0.0239, "step": 68005 }, { "epoch": 3.17, - "learning_rate": 1.3717125310580845e-05, - "loss": 0.0293, + "learning_rate": 2.3726928057666204e-05, + "loss": 0.0538, "step": 68010 }, { "epoch": 3.17, - "learning_rate": 1.3716656509305705e-05, - "loss": 0.1356, + "learning_rate": 2.3726459987830184e-05, + "loss": 0.0711, "step": 68015 }, { "epoch": 3.17, - "learning_rate": 1.3716187708030567e-05, - "loss": 0.1144, + "learning_rate": 2.3725991917994164e-05, + "loss": 0.0723, "step": 68020 }, { "epoch": 3.17, - "learning_rate": 1.3715718906755427e-05, - "loss": 0.1098, + "learning_rate": 2.3725523848158147e-05, + "loss": 0.1521, "step": 68025 }, { "epoch": 3.17, - "learning_rate": 1.3715250105480288e-05, - "loss": 0.1073, + "learning_rate": 2.3725055778322127e-05, + "loss": 0.1095, "step": 68030 }, { "epoch": 3.17, - "learning_rate": 1.371478130420515e-05, - "loss": 0.1827, + "learning_rate": 2.3724587708486107e-05, + "loss": 0.1553, "step": 68035 }, { "epoch": 3.17, - "learning_rate": 1.371431250293001e-05, - "loss": 0.2102, + "learning_rate": 2.3724119638650087e-05, + "loss": 0.1655, "step": 68040 }, { "epoch": 3.18, - "learning_rate": 1.371384370165487e-05, - "loss": 0.2809, + "learning_rate": 2.372365156881407e-05, + "loss": 0.2012, "step": 68045 }, { "epoch": 3.18, - "learning_rate": 1.371337490037973e-05, - "loss": 0.0934, + "learning_rate": 2.372318349897805e-05, + "loss": 0.061, "step": 68050 }, { "epoch": 3.18, - "learning_rate": 1.371290609910459e-05, - "loss": 0.0633, + "learning_rate": 2.372271542914203e-05, + "loss": 0.0344, "step": 68055 }, { "epoch": 3.18, - "learning_rate": 1.3712437297829451e-05, - "loss": 0.0299, + "learning_rate": 2.3722247359306006e-05, + "loss": 0.0521, "step": 68060 }, { "epoch": 3.18, - "learning_rate": 1.3711968496554311e-05, - "loss": 0.0281, + "learning_rate": 2.372177928946999e-05, + "loss": 0.07, "step": 68065 }, { "epoch": 3.18, - "learning_rate": 1.3711499695279171e-05, - "loss": 0.0962, + "learning_rate": 2.372131121963397e-05, + "loss": 0.0942, "step": 68070 }, { "epoch": 3.18, - "learning_rate": 1.3711030894004035e-05, - "loss": 0.0608, + "learning_rate": 2.372084314979795e-05, + "loss": 0.0627, "step": 68075 }, { "epoch": 3.18, - "learning_rate": 1.3710562092728894e-05, - "loss": 0.1047, + "learning_rate": 2.3720375079961932e-05, + "loss": 0.1349, "step": 68080 }, { "epoch": 3.18, - "learning_rate": 1.3710093291453754e-05, - "loss": 0.1032, + "learning_rate": 2.3719907010125912e-05, + "loss": 0.1144, "step": 68085 }, { "epoch": 3.18, - "learning_rate": 1.3709624490178614e-05, - "loss": 0.2665, + "learning_rate": 2.371943894028989e-05, + "loss": 0.1913, "step": 68090 }, { "epoch": 3.18, - "learning_rate": 1.3709155688903474e-05, - "loss": 0.4392, + "learning_rate": 2.371897087045387e-05, + "loss": 0.2131, "step": 68095 }, { "epoch": 3.18, - "learning_rate": 1.3708686887628336e-05, - "loss": 0.06, + "learning_rate": 2.3718502800617855e-05, + "loss": 0.0327, "step": 68100 }, { "epoch": 3.18, - "learning_rate": 1.3708218086353196e-05, - "loss": 0.0181, + "learning_rate": 2.3718034730781834e-05, + "loss": 0.0291, "step": 68105 }, { "epoch": 3.18, - "learning_rate": 1.3707749285078056e-05, - "loss": 0.0404, + "learning_rate": 2.3717566660945814e-05, + "loss": 0.0325, "step": 68110 }, { "epoch": 3.18, - "learning_rate": 1.3707280483802916e-05, - "loss": 0.0858, + "learning_rate": 2.3717098591109794e-05, + "loss": 0.1472, "step": 68115 }, { "epoch": 3.18, - "learning_rate": 1.3706811682527779e-05, - "loss": 0.091, + "learning_rate": 2.3716630521273774e-05, + "loss": 0.0531, "step": 68120 }, { "epoch": 3.18, - "learning_rate": 1.3706342881252639e-05, - "loss": 0.1108, + "learning_rate": 2.3716162451437754e-05, + "loss": 0.0513, "step": 68125 }, { "epoch": 3.18, - "learning_rate": 1.3705874079977499e-05, - "loss": 0.1246, + "learning_rate": 2.3715694381601734e-05, + "loss": 0.078, "step": 68130 }, { "epoch": 3.18, - "learning_rate": 1.3705405278702359e-05, - "loss": 0.0982, + "learning_rate": 2.3715226311765717e-05, + "loss": 0.1212, "step": 68135 }, { "epoch": 3.18, - "learning_rate": 1.370493647742722e-05, - "loss": 0.2201, + "learning_rate": 2.3714758241929697e-05, + "loss": 0.1769, "step": 68140 }, { "epoch": 3.18, - "learning_rate": 1.370446767615208e-05, - "loss": 0.1963, + "learning_rate": 2.3714290172093676e-05, + "loss": 0.2778, "step": 68145 }, { "epoch": 3.18, - "learning_rate": 1.370399887487694e-05, - "loss": 0.0729, + "learning_rate": 2.3713822102257656e-05, + "loss": 0.1223, "step": 68150 }, { "epoch": 3.18, - "learning_rate": 1.37035300736018e-05, - "loss": 0.0392, + "learning_rate": 2.371335403242164e-05, + "loss": 0.0529, "step": 68155 }, { "epoch": 3.18, - "learning_rate": 1.370306127232666e-05, - "loss": 0.0932, + "learning_rate": 2.371288596258562e-05, + "loss": 0.0431, "step": 68160 }, { "epoch": 3.18, - "learning_rate": 1.3702592471051524e-05, - "loss": 0.0605, + "learning_rate": 2.37124178927496e-05, + "loss": 0.0582, "step": 68165 }, { "epoch": 3.18, - "learning_rate": 1.3702123669776383e-05, - "loss": 0.0771, + "learning_rate": 2.371194982291358e-05, + "loss": 0.0792, "step": 68170 }, { "epoch": 3.18, - "learning_rate": 1.3701654868501243e-05, - "loss": 0.1162, + "learning_rate": 2.3711481753077562e-05, + "loss": 0.0678, "step": 68175 }, { "epoch": 3.18, - "learning_rate": 1.3701186067226105e-05, - "loss": 0.0961, + "learning_rate": 2.3711013683241542e-05, + "loss": 0.1182, "step": 68180 }, { "epoch": 3.18, - "learning_rate": 1.3700717265950965e-05, - "loss": 0.1156, + "learning_rate": 2.3710545613405518e-05, + "loss": 0.0971, "step": 68185 }, { "epoch": 3.18, - "learning_rate": 1.3700248464675825e-05, - "loss": 0.148, + "learning_rate": 2.37100775435695e-05, + "loss": 0.1843, "step": 68190 }, { "epoch": 3.18, - "learning_rate": 1.3699779663400685e-05, - "loss": 0.2526, + "learning_rate": 2.370960947373348e-05, + "loss": 0.3974, "step": 68195 }, { "epoch": 3.18, - "learning_rate": 1.3699310862125545e-05, - "loss": 0.0837, + "learning_rate": 2.370914140389746e-05, + "loss": 0.0813, "step": 68200 }, { "epoch": 3.18, - "learning_rate": 1.3698842060850406e-05, - "loss": 0.0659, + "learning_rate": 2.370867333406144e-05, + "loss": 0.0208, "step": 68205 }, { "epoch": 3.18, - "learning_rate": 1.3698373259575266e-05, - "loss": 0.0308, + "learning_rate": 2.3708205264225424e-05, + "loss": 0.0717, "step": 68210 }, { "epoch": 3.18, - "learning_rate": 1.3697904458300128e-05, - "loss": 0.0924, + "learning_rate": 2.3707737194389404e-05, + "loss": 0.0583, "step": 68215 }, { "epoch": 3.18, - "learning_rate": 1.369743565702499e-05, - "loss": 0.0603, + "learning_rate": 2.3707269124553384e-05, + "loss": 0.0459, "step": 68220 }, { "epoch": 3.18, - "learning_rate": 1.369696685574985e-05, - "loss": 0.0701, + "learning_rate": 2.3706801054717364e-05, + "loss": 0.0378, "step": 68225 }, { "epoch": 3.18, - "learning_rate": 1.369649805447471e-05, - "loss": 0.0573, + "learning_rate": 2.3706332984881347e-05, + "loss": 0.1722, "step": 68230 }, { "epoch": 3.18, - "learning_rate": 1.369602925319957e-05, - "loss": 0.1506, + "learning_rate": 2.3705864915045327e-05, + "loss": 0.1869, "step": 68235 }, { "epoch": 3.18, - "learning_rate": 1.369556045192443e-05, - "loss": 0.6045, + "learning_rate": 2.3705396845209306e-05, + "loss": 0.2312, "step": 68240 }, { "epoch": 3.18, - "learning_rate": 1.3695091650649291e-05, - "loss": 0.303, + "learning_rate": 2.3704928775373286e-05, + "loss": 0.2877, "step": 68245 }, { "epoch": 3.18, - "learning_rate": 1.3694622849374151e-05, - "loss": 0.0841, + "learning_rate": 2.3704460705537266e-05, + "loss": 0.0592, "step": 68250 }, { "epoch": 3.18, - "learning_rate": 1.369415404809901e-05, - "loss": 0.0456, + "learning_rate": 2.3703992635701246e-05, + "loss": 0.0488, "step": 68255 }, { "epoch": 3.19, - "learning_rate": 1.3693685246823874e-05, - "loss": 0.0308, + "learning_rate": 2.3703524565865226e-05, + "loss": 0.0224, "step": 68260 }, { "epoch": 3.19, - "learning_rate": 1.3693216445548734e-05, - "loss": 0.0283, + "learning_rate": 2.370305649602921e-05, + "loss": 0.0387, "step": 68265 }, { "epoch": 3.19, - "learning_rate": 1.3692747644273594e-05, - "loss": 0.069, + "learning_rate": 2.370258842619319e-05, + "loss": 0.0904, "step": 68270 }, { "epoch": 3.19, - "learning_rate": 1.3692278842998454e-05, - "loss": 0.1158, + "learning_rate": 2.370212035635717e-05, + "loss": 0.0659, "step": 68275 }, { "epoch": 3.19, - "learning_rate": 1.3691810041723314e-05, - "loss": 0.1096, + "learning_rate": 2.370165228652115e-05, + "loss": 0.196, "step": 68280 }, { "epoch": 3.19, - "learning_rate": 1.3691341240448175e-05, - "loss": 0.1556, + "learning_rate": 2.370118421668513e-05, + "loss": 0.1272, "step": 68285 }, { "epoch": 3.19, - "learning_rate": 1.3690872439173035e-05, - "loss": 0.1588, + "learning_rate": 2.370071614684911e-05, + "loss": 0.1402, "step": 68290 }, { "epoch": 3.19, - "learning_rate": 1.3690403637897895e-05, - "loss": 0.3156, + "learning_rate": 2.370024807701309e-05, + "loss": 0.2599, "step": 68295 }, { "epoch": 3.19, - "learning_rate": 1.3689934836622755e-05, - "loss": 0.1504, + "learning_rate": 2.369978000717707e-05, + "loss": 0.0541, "step": 68300 }, { "epoch": 3.19, - "learning_rate": 1.3689466035347619e-05, - "loss": 0.0297, + "learning_rate": 2.3699311937341054e-05, + "loss": 0.0487, "step": 68305 }, { "epoch": 3.19, - "learning_rate": 1.3688997234072479e-05, - "loss": 0.0265, + "learning_rate": 2.369884386750503e-05, + "loss": 0.0568, "step": 68310 }, { "epoch": 3.19, - "learning_rate": 1.3688528432797338e-05, - "loss": 0.043, + "learning_rate": 2.369837579766901e-05, + "loss": 0.0463, "step": 68315 }, { "epoch": 3.19, - "learning_rate": 1.3688059631522198e-05, - "loss": 0.2241, + "learning_rate": 2.3697907727832994e-05, + "loss": 0.1055, "step": 68320 }, { "epoch": 3.19, - "learning_rate": 1.368759083024706e-05, - "loss": 0.1192, + "learning_rate": 2.3697439657996974e-05, + "loss": 0.1359, "step": 68325 }, { "epoch": 3.19, - "learning_rate": 1.368712202897192e-05, - "loss": 0.0722, + "learning_rate": 2.3696971588160953e-05, + "loss": 0.0715, "step": 68330 }, { "epoch": 3.19, - "learning_rate": 1.368665322769678e-05, - "loss": 0.17, + "learning_rate": 2.3696503518324933e-05, + "loss": 0.0895, "step": 68335 }, { "epoch": 3.19, - "learning_rate": 1.368618442642164e-05, - "loss": 0.171, + "learning_rate": 2.3696035448488916e-05, + "loss": 0.2457, "step": 68340 }, { "epoch": 3.19, - "learning_rate": 1.3685715625146501e-05, - "loss": 0.2061, + "learning_rate": 2.3695567378652896e-05, + "loss": 0.2251, "step": 68345 }, { "epoch": 3.19, - "learning_rate": 1.3685246823871361e-05, - "loss": 0.0677, + "learning_rate": 2.3695099308816876e-05, + "loss": 0.0943, "step": 68350 }, { "epoch": 3.19, - "learning_rate": 1.3684778022596223e-05, - "loss": 0.022, + "learning_rate": 2.3694631238980856e-05, + "loss": 0.0124, "step": 68355 }, { "epoch": 3.19, - "learning_rate": 1.3684309221321083e-05, - "loss": 0.0457, + "learning_rate": 2.369416316914484e-05, + "loss": 0.0188, "step": 68360 }, { "epoch": 3.19, - "learning_rate": 1.3683840420045945e-05, - "loss": 0.0643, + "learning_rate": 2.369369509930882e-05, + "loss": 0.0502, "step": 68365 }, { "epoch": 3.19, - "learning_rate": 1.3683371618770805e-05, - "loss": 0.0821, + "learning_rate": 2.36932270294728e-05, + "loss": 0.0444, "step": 68370 }, { "epoch": 3.19, - "learning_rate": 1.3682902817495664e-05, - "loss": 0.0636, + "learning_rate": 2.369275895963678e-05, + "loss": 0.1426, "step": 68375 }, { "epoch": 3.19, - "learning_rate": 1.3682434016220524e-05, - "loss": 0.1447, + "learning_rate": 2.3692290889800758e-05, + "loss": 0.0651, "step": 68380 }, { "epoch": 3.19, - "learning_rate": 1.3681965214945386e-05, - "loss": 0.1489, + "learning_rate": 2.3691822819964738e-05, + "loss": 0.1487, "step": 68385 }, { "epoch": 3.19, - "learning_rate": 1.3681496413670246e-05, - "loss": 0.1166, + "learning_rate": 2.3691354750128718e-05, + "loss": 0.2652, "step": 68390 }, { "epoch": 3.19, - "learning_rate": 1.3681027612395106e-05, - "loss": 0.4197, + "learning_rate": 2.36908866802927e-05, + "loss": 0.3836, "step": 68395 }, { "epoch": 3.19, - "learning_rate": 1.3680558811119968e-05, - "loss": 0.0833, + "learning_rate": 2.369041861045668e-05, + "loss": 0.096, "step": 68400 }, { "epoch": 3.19, - "learning_rate": 1.3680090009844829e-05, - "loss": 0.0463, + "learning_rate": 2.368995054062066e-05, + "loss": 0.052, "step": 68405 }, { "epoch": 3.19, - "learning_rate": 1.3679621208569689e-05, - "loss": 0.051, + "learning_rate": 2.368948247078464e-05, + "loss": 0.0243, "step": 68410 }, { "epoch": 3.19, - "learning_rate": 1.3679152407294549e-05, - "loss": 0.0393, + "learning_rate": 2.3689014400948624e-05, + "loss": 0.0392, "step": 68415 }, { "epoch": 3.19, - "learning_rate": 1.3678683606019409e-05, - "loss": 0.0909, + "learning_rate": 2.3688546331112604e-05, + "loss": 0.0626, "step": 68420 }, { "epoch": 3.19, - "learning_rate": 1.367821480474427e-05, - "loss": 0.2237, + "learning_rate": 2.3688078261276583e-05, + "loss": 0.0596, "step": 68425 }, { "epoch": 3.19, - "learning_rate": 1.367774600346913e-05, - "loss": 0.0817, + "learning_rate": 2.3687610191440567e-05, + "loss": 0.137, "step": 68430 }, { "epoch": 3.19, - "learning_rate": 1.367727720219399e-05, - "loss": 0.0923, + "learning_rate": 2.3687142121604543e-05, + "loss": 0.1126, "step": 68435 }, { "epoch": 3.19, - "learning_rate": 1.367680840091885e-05, - "loss": 0.188, + "learning_rate": 2.3686674051768523e-05, + "loss": 0.1159, "step": 68440 }, { "epoch": 3.19, - "learning_rate": 1.3676339599643714e-05, - "loss": 0.3209, + "learning_rate": 2.3686205981932503e-05, + "loss": 0.1933, "step": 68445 }, { "epoch": 3.19, - "learning_rate": 1.3675870798368574e-05, - "loss": 0.0419, + "learning_rate": 2.3685737912096486e-05, + "loss": 0.0643, "step": 68450 }, { "epoch": 3.19, - "learning_rate": 1.3675401997093434e-05, - "loss": 0.0279, + "learning_rate": 2.3685269842260466e-05, + "loss": 0.0158, "step": 68455 }, { "epoch": 3.19, - "learning_rate": 1.3674933195818294e-05, - "loss": 0.0472, + "learning_rate": 2.3684801772424446e-05, + "loss": 0.0642, "step": 68460 }, { "epoch": 3.19, - "learning_rate": 1.3674464394543155e-05, - "loss": 0.0767, + "learning_rate": 2.3684333702588425e-05, + "loss": 0.0229, "step": 68465 }, { "epoch": 3.19, - "learning_rate": 1.3673995593268015e-05, - "loss": 0.039, + "learning_rate": 2.368386563275241e-05, + "loss": 0.1011, "step": 68470 }, { "epoch": 3.2, - "learning_rate": 1.3673526791992875e-05, - "loss": 0.1062, + "learning_rate": 2.368339756291639e-05, + "loss": 0.1019, "step": 68475 }, { "epoch": 3.2, - "learning_rate": 1.3673057990717735e-05, - "loss": 0.0748, + "learning_rate": 2.3682929493080368e-05, + "loss": 0.0926, "step": 68480 }, { "epoch": 3.2, - "learning_rate": 1.3672589189442595e-05, - "loss": 0.2356, + "learning_rate": 2.3682461423244348e-05, + "loss": 0.1488, "step": 68485 }, { "epoch": 3.2, - "learning_rate": 1.3672120388167458e-05, - "loss": 0.2333, + "learning_rate": 2.368199335340833e-05, + "loss": 0.2475, "step": 68490 }, { "epoch": 3.2, - "learning_rate": 1.3671651586892318e-05, - "loss": 0.2404, + "learning_rate": 2.368152528357231e-05, + "loss": 0.2636, "step": 68495 }, { "epoch": 3.2, - "learning_rate": 1.3671182785617178e-05, - "loss": 0.0869, + "learning_rate": 2.3681057213736287e-05, + "loss": 0.0633, "step": 68500 }, { "epoch": 3.2, - "learning_rate": 1.367071398434204e-05, - "loss": 0.0531, + "learning_rate": 2.368058914390027e-05, + "loss": 0.002, "step": 68505 }, { "epoch": 3.2, - "learning_rate": 1.36702451830669e-05, - "loss": 0.075, + "learning_rate": 2.368012107406425e-05, + "loss": 0.0605, "step": 68510 }, { "epoch": 3.2, - "learning_rate": 1.366977638179176e-05, - "loss": 0.0588, + "learning_rate": 2.367965300422823e-05, + "loss": 0.0681, "step": 68515 }, { "epoch": 3.2, - "learning_rate": 1.366930758051662e-05, - "loss": 0.0472, + "learning_rate": 2.367918493439221e-05, + "loss": 0.0526, "step": 68520 }, { "epoch": 3.2, - "learning_rate": 1.366883877924148e-05, - "loss": 0.0844, + "learning_rate": 2.3678716864556193e-05, + "loss": 0.0684, "step": 68525 }, { "epoch": 3.2, - "learning_rate": 1.3668369977966341e-05, - "loss": 0.1068, + "learning_rate": 2.3678248794720173e-05, + "loss": 0.1471, "step": 68530 }, { "epoch": 3.2, - "learning_rate": 1.3667901176691201e-05, - "loss": 0.2066, + "learning_rate": 2.3677780724884153e-05, + "loss": 0.174, "step": 68535 }, { "epoch": 3.2, - "learning_rate": 1.3667432375416063e-05, - "loss": 0.2088, + "learning_rate": 2.3677312655048133e-05, + "loss": 0.1618, "step": 68540 }, { "epoch": 3.2, - "learning_rate": 1.3666963574140924e-05, - "loss": 0.3059, + "learning_rate": 2.3676844585212116e-05, + "loss": 0.3303, "step": 68545 }, { "epoch": 3.2, - "learning_rate": 1.3666494772865784e-05, - "loss": 0.0681, + "learning_rate": 2.3676376515376096e-05, + "loss": 0.0703, "step": 68550 }, { "epoch": 3.2, - "learning_rate": 1.3666025971590644e-05, - "loss": 0.0341, + "learning_rate": 2.3675908445540076e-05, + "loss": 0.026, "step": 68555 }, { "epoch": 3.2, - "learning_rate": 1.3665557170315504e-05, - "loss": 0.0286, + "learning_rate": 2.367544037570406e-05, + "loss": 0.0494, "step": 68560 }, { "epoch": 3.2, - "learning_rate": 1.3665088369040364e-05, - "loss": 0.0394, + "learning_rate": 2.3674972305868035e-05, + "loss": 0.0563, "step": 68565 }, { "epoch": 3.2, - "learning_rate": 1.3664619567765226e-05, - "loss": 0.0279, + "learning_rate": 2.3674504236032015e-05, + "loss": 0.0581, "step": 68570 }, { "epoch": 3.2, - "learning_rate": 1.3664150766490086e-05, - "loss": 0.0567, + "learning_rate": 2.3674036166195995e-05, + "loss": 0.0913, "step": 68575 }, { "epoch": 3.2, - "learning_rate": 1.3663681965214945e-05, - "loss": 0.0947, + "learning_rate": 2.3673568096359978e-05, + "loss": 0.1486, "step": 68580 }, { "epoch": 3.2, - "learning_rate": 1.3663213163939809e-05, - "loss": 0.126, + "learning_rate": 2.3673100026523958e-05, + "loss": 0.0617, "step": 68585 }, { "epoch": 3.2, - "learning_rate": 1.3662744362664669e-05, - "loss": 0.1452, + "learning_rate": 2.3672631956687938e-05, + "loss": 0.2663, "step": 68590 }, { "epoch": 3.2, - "learning_rate": 1.3662275561389529e-05, - "loss": 0.2892, + "learning_rate": 2.3672163886851918e-05, + "loss": 0.2799, "step": 68595 }, { "epoch": 3.2, - "learning_rate": 1.3661806760114389e-05, - "loss": 0.0578, + "learning_rate": 2.36716958170159e-05, + "loss": 0.0775, "step": 68600 }, { "epoch": 3.2, - "learning_rate": 1.3661337958839249e-05, - "loss": 0.0673, + "learning_rate": 2.367122774717988e-05, + "loss": 0.0362, "step": 68605 }, { "epoch": 3.2, - "learning_rate": 1.366086915756411e-05, - "loss": 0.1013, + "learning_rate": 2.367075967734386e-05, + "loss": 0.0363, "step": 68610 }, { "epoch": 3.2, - "learning_rate": 1.366040035628897e-05, - "loss": 0.0439, + "learning_rate": 2.3670291607507844e-05, + "loss": 0.0373, "step": 68615 }, { "epoch": 3.2, - "learning_rate": 1.365993155501383e-05, - "loss": 0.0907, + "learning_rate": 2.3669823537671823e-05, + "loss": 0.0579, "step": 68620 }, { "epoch": 3.2, - "learning_rate": 1.365946275373869e-05, - "loss": 0.1349, + "learning_rate": 2.36693554678358e-05, + "loss": 0.0782, "step": 68625 }, { "epoch": 3.2, - "learning_rate": 1.3658993952463553e-05, - "loss": 0.0859, + "learning_rate": 2.366888739799978e-05, + "loss": 0.1258, "step": 68630 }, { "epoch": 3.2, - "learning_rate": 1.3658525151188413e-05, - "loss": 0.1666, + "learning_rate": 2.3668419328163763e-05, + "loss": 0.0947, "step": 68635 }, { "epoch": 3.2, - "learning_rate": 1.3658056349913273e-05, - "loss": 0.1575, + "learning_rate": 2.3667951258327743e-05, + "loss": 0.1514, "step": 68640 }, { "epoch": 3.2, - "learning_rate": 1.3657587548638133e-05, - "loss": 0.321, + "learning_rate": 2.3667483188491723e-05, + "loss": 0.2676, "step": 68645 }, { "epoch": 3.2, - "learning_rate": 1.3657118747362995e-05, - "loss": 0.0574, + "learning_rate": 2.3667015118655702e-05, + "loss": 0.0799, "step": 68650 }, { "epoch": 3.2, - "learning_rate": 1.3656649946087855e-05, - "loss": 0.054, + "learning_rate": 2.3666547048819686e-05, + "loss": 0.0165, "step": 68655 }, { "epoch": 3.2, - "learning_rate": 1.3656181144812715e-05, - "loss": 0.0162, + "learning_rate": 2.3666078978983665e-05, + "loss": 0.0794, "step": 68660 }, { "epoch": 3.2, - "learning_rate": 1.3655712343537574e-05, - "loss": 0.0822, + "learning_rate": 2.3665610909147645e-05, + "loss": 0.0132, "step": 68665 }, { "epoch": 3.2, - "learning_rate": 1.3655243542262434e-05, - "loss": 0.0454, + "learning_rate": 2.3665142839311625e-05, + "loss": 0.0376, "step": 68670 }, { "epoch": 3.2, - "learning_rate": 1.3654774740987296e-05, - "loss": 0.0711, + "learning_rate": 2.3664674769475608e-05, + "loss": 0.1045, "step": 68675 }, { "epoch": 3.2, - "learning_rate": 1.3654305939712158e-05, - "loss": 0.1444, + "learning_rate": 2.3664206699639588e-05, + "loss": 0.0904, "step": 68680 }, { "epoch": 3.2, - "learning_rate": 1.3653837138437018e-05, - "loss": 0.0962, + "learning_rate": 2.3663738629803568e-05, + "loss": 0.2205, "step": 68685 }, { "epoch": 3.21, - "learning_rate": 1.365336833716188e-05, - "loss": 0.1342, + "learning_rate": 2.3663270559967548e-05, + "loss": 0.1803, "step": 68690 }, { "epoch": 3.21, - "learning_rate": 1.365289953588674e-05, - "loss": 0.2585, + "learning_rate": 2.3662802490131527e-05, + "loss": 0.2258, "step": 68695 }, { "epoch": 3.21, - "learning_rate": 1.3652430734611599e-05, - "loss": 0.0507, + "learning_rate": 2.3662334420295507e-05, + "loss": 0.028, "step": 68700 }, { "epoch": 3.21, - "learning_rate": 1.3651961933336459e-05, - "loss": 0.0703, + "learning_rate": 2.3661866350459487e-05, + "loss": 0.0369, "step": 68705 }, { "epoch": 3.21, - "learning_rate": 1.3651493132061319e-05, - "loss": 0.0602, + "learning_rate": 2.366139828062347e-05, + "loss": 0.0325, "step": 68710 }, { "epoch": 3.21, - "learning_rate": 1.365102433078618e-05, - "loss": 0.0916, + "learning_rate": 2.366093021078745e-05, + "loss": 0.0642, "step": 68715 }, { "epoch": 3.21, - "learning_rate": 1.365055552951104e-05, - "loss": 0.0549, + "learning_rate": 2.366046214095143e-05, + "loss": 0.0518, "step": 68720 }, { "epoch": 3.21, - "learning_rate": 1.3650086728235902e-05, - "loss": 0.0701, + "learning_rate": 2.365999407111541e-05, + "loss": 0.0648, "step": 68725 }, { "epoch": 3.21, - "learning_rate": 1.3649617926960764e-05, - "loss": 0.0628, + "learning_rate": 2.3659526001279393e-05, + "loss": 0.0734, "step": 68730 }, { "epoch": 3.21, - "learning_rate": 1.3649149125685624e-05, - "loss": 0.0906, + "learning_rate": 2.3659057931443373e-05, + "loss": 0.1715, "step": 68735 }, { "epoch": 3.21, - "learning_rate": 1.3648680324410484e-05, - "loss": 0.1594, + "learning_rate": 2.3658589861607353e-05, + "loss": 0.2198, "step": 68740 }, { "epoch": 3.21, - "learning_rate": 1.3648211523135344e-05, - "loss": 0.2705, + "learning_rate": 2.3658121791771336e-05, + "loss": 0.4353, "step": 68745 }, { "epoch": 3.21, - "learning_rate": 1.3647742721860204e-05, - "loss": 0.0764, + "learning_rate": 2.3657653721935316e-05, + "loss": 0.0382, "step": 68750 }, { "epoch": 3.21, - "learning_rate": 1.3647273920585065e-05, - "loss": 0.0282, + "learning_rate": 2.3657185652099292e-05, + "loss": 0.0394, "step": 68755 }, { "epoch": 3.21, - "learning_rate": 1.3646805119309925e-05, - "loss": 0.068, + "learning_rate": 2.3656717582263272e-05, + "loss": 0.0267, "step": 68760 }, { "epoch": 3.21, - "learning_rate": 1.3646336318034785e-05, - "loss": 0.0627, + "learning_rate": 2.3656249512427255e-05, + "loss": 0.0397, "step": 68765 }, { "epoch": 3.21, - "learning_rate": 1.3645867516759648e-05, - "loss": 0.0624, + "learning_rate": 2.3655781442591235e-05, + "loss": 0.0521, "step": 68770 }, { "epoch": 3.21, - "learning_rate": 1.3645398715484508e-05, - "loss": 0.0597, + "learning_rate": 2.3655313372755215e-05, + "loss": 0.0791, "step": 68775 }, { "epoch": 3.21, - "learning_rate": 1.3644929914209368e-05, - "loss": 0.1637, + "learning_rate": 2.3654845302919195e-05, + "loss": 0.0774, "step": 68780 }, { "epoch": 3.21, - "learning_rate": 1.3644461112934228e-05, - "loss": 0.1517, + "learning_rate": 2.3654377233083178e-05, + "loss": 0.1089, "step": 68785 }, { "epoch": 3.21, - "learning_rate": 1.3643992311659088e-05, - "loss": 0.2563, + "learning_rate": 2.3653909163247158e-05, + "loss": 0.1368, "step": 68790 }, { "epoch": 3.21, - "learning_rate": 1.364352351038395e-05, - "loss": 0.2644, + "learning_rate": 2.3653441093411137e-05, + "loss": 0.2883, "step": 68795 }, { "epoch": 3.21, - "learning_rate": 1.364305470910881e-05, - "loss": 0.0429, + "learning_rate": 2.365297302357512e-05, + "loss": 0.0769, "step": 68800 }, { "epoch": 3.21, - "learning_rate": 1.364258590783367e-05, - "loss": 0.008, + "learning_rate": 2.36525049537391e-05, + "loss": 0.0372, "step": 68805 }, { "epoch": 3.21, - "learning_rate": 1.364211710655853e-05, - "loss": 0.0423, + "learning_rate": 2.365203688390308e-05, + "loss": 0.0623, "step": 68810 }, { "epoch": 3.21, - "learning_rate": 1.3641648305283393e-05, - "loss": 0.0706, + "learning_rate": 2.3651568814067057e-05, + "loss": 0.0352, "step": 68815 }, { "epoch": 3.21, - "learning_rate": 1.3641179504008253e-05, - "loss": 0.0768, + "learning_rate": 2.365110074423104e-05, + "loss": 0.0378, "step": 68820 }, { "epoch": 3.21, - "learning_rate": 1.3640710702733113e-05, - "loss": 0.0976, + "learning_rate": 2.365063267439502e-05, + "loss": 0.0713, "step": 68825 }, { "epoch": 3.21, - "learning_rate": 1.3640241901457973e-05, - "loss": 0.0463, + "learning_rate": 2.3650164604559e-05, + "loss": 0.0852, "step": 68830 }, { "epoch": 3.21, - "learning_rate": 1.3639773100182834e-05, - "loss": 0.1814, + "learning_rate": 2.364969653472298e-05, + "loss": 0.2813, "step": 68835 }, { "epoch": 3.21, - "learning_rate": 1.3639304298907694e-05, - "loss": 0.0591, + "learning_rate": 2.3649228464886963e-05, + "loss": 0.24, "step": 68840 }, { "epoch": 3.21, - "learning_rate": 1.3638835497632554e-05, - "loss": 0.302, + "learning_rate": 2.3648760395050942e-05, + "loss": 0.3329, "step": 68845 }, { "epoch": 3.21, - "learning_rate": 1.3638366696357414e-05, - "loss": 0.0831, + "learning_rate": 2.3648292325214922e-05, + "loss": 0.0844, "step": 68850 }, { "epoch": 3.21, - "learning_rate": 1.3637897895082276e-05, - "loss": 0.0241, + "learning_rate": 2.3647824255378902e-05, + "loss": 0.0374, "step": 68855 }, { "epoch": 3.21, - "learning_rate": 1.3637429093807136e-05, - "loss": 0.0345, + "learning_rate": 2.3647356185542885e-05, + "loss": 0.0779, "step": 68860 }, { "epoch": 3.21, - "learning_rate": 1.3636960292531997e-05, - "loss": 0.0299, + "learning_rate": 2.3646888115706865e-05, + "loss": 0.0385, "step": 68865 }, { "epoch": 3.21, - "learning_rate": 1.3636491491256857e-05, - "loss": 0.1228, + "learning_rate": 2.3646420045870845e-05, + "loss": 0.1392, "step": 68870 }, { "epoch": 3.21, - "learning_rate": 1.3636022689981719e-05, - "loss": 0.0709, + "learning_rate": 2.3645951976034828e-05, + "loss": 0.1512, "step": 68875 }, { "epoch": 3.21, - "learning_rate": 1.3635553888706579e-05, - "loss": 0.2013, + "learning_rate": 2.3645483906198804e-05, + "loss": 0.0741, "step": 68880 }, { "epoch": 3.21, - "learning_rate": 1.3635085087431439e-05, - "loss": 0.1109, + "learning_rate": 2.3645015836362784e-05, + "loss": 0.1657, "step": 68885 }, { "epoch": 3.21, - "learning_rate": 1.3634616286156299e-05, - "loss": 0.1237, + "learning_rate": 2.3644547766526764e-05, + "loss": 0.3095, "step": 68890 }, { "epoch": 3.21, - "learning_rate": 1.363414748488116e-05, - "loss": 0.2264, + "learning_rate": 2.3644079696690747e-05, + "loss": 0.1881, "step": 68895 }, { "epoch": 3.21, - "learning_rate": 1.363367868360602e-05, - "loss": 0.0495, + "learning_rate": 2.3643611626854727e-05, + "loss": 0.0749, "step": 68900 }, { "epoch": 3.22, - "learning_rate": 1.363320988233088e-05, - "loss": 0.0278, + "learning_rate": 2.3643143557018707e-05, + "loss": 0.0381, "step": 68905 }, { "epoch": 3.22, - "learning_rate": 1.3632741081055742e-05, - "loss": 0.0226, + "learning_rate": 2.3642675487182687e-05, + "loss": 0.0324, "step": 68910 }, { "epoch": 3.22, - "learning_rate": 1.3632272279780603e-05, - "loss": 0.0756, + "learning_rate": 2.364220741734667e-05, + "loss": 0.0644, "step": 68915 }, { "epoch": 3.22, - "learning_rate": 1.3631803478505463e-05, - "loss": 0.0944, + "learning_rate": 2.364173934751065e-05, + "loss": 0.049, "step": 68920 }, { "epoch": 3.22, - "learning_rate": 1.3631334677230323e-05, - "loss": 0.1001, + "learning_rate": 2.364127127767463e-05, + "loss": 0.0706, "step": 68925 }, { "epoch": 3.22, - "learning_rate": 1.3630865875955183e-05, - "loss": 0.0958, + "learning_rate": 2.3640803207838613e-05, + "loss": 0.0813, "step": 68930 }, { "epoch": 3.22, - "learning_rate": 1.3630397074680045e-05, - "loss": 0.0488, + "learning_rate": 2.3640335138002593e-05, + "loss": 0.1689, "step": 68935 }, { "epoch": 3.22, - "learning_rate": 1.3629928273404905e-05, - "loss": 0.2112, + "learning_rate": 2.3639867068166572e-05, + "loss": 0.156, "step": 68940 }, { "epoch": 3.22, - "learning_rate": 1.3629459472129765e-05, - "loss": 0.311, + "learning_rate": 2.363939899833055e-05, + "loss": 0.1814, "step": 68945 }, { "epoch": 3.22, - "learning_rate": 1.3628990670854625e-05, - "loss": 0.0435, + "learning_rate": 2.3638930928494532e-05, + "loss": 0.0504, "step": 68950 }, { "epoch": 3.22, - "learning_rate": 1.3628521869579488e-05, - "loss": 0.0489, + "learning_rate": 2.3638462858658512e-05, + "loss": 0.0238, "step": 68955 }, { "epoch": 3.22, - "learning_rate": 1.3628053068304348e-05, - "loss": 0.0378, + "learning_rate": 2.363799478882249e-05, + "loss": 0.0613, "step": 68960 }, { "epoch": 3.22, - "learning_rate": 1.3627584267029208e-05, - "loss": 0.0346, + "learning_rate": 2.363752671898647e-05, + "loss": 0.0723, "step": 68965 }, { "epoch": 3.22, - "learning_rate": 1.3627115465754068e-05, - "loss": 0.0223, + "learning_rate": 2.3637058649150455e-05, + "loss": 0.0508, "step": 68970 }, { "epoch": 3.22, - "learning_rate": 1.362664666447893e-05, - "loss": 0.0454, + "learning_rate": 2.3636590579314435e-05, + "loss": 0.049, "step": 68975 }, { "epoch": 3.22, - "learning_rate": 1.362617786320379e-05, - "loss": 0.0608, + "learning_rate": 2.3636122509478414e-05, + "loss": 0.1705, "step": 68980 }, { "epoch": 3.22, - "learning_rate": 1.362570906192865e-05, - "loss": 0.132, + "learning_rate": 2.3635654439642398e-05, + "loss": 0.1424, "step": 68985 }, { "epoch": 3.22, - "learning_rate": 1.3625240260653509e-05, - "loss": 0.1778, + "learning_rate": 2.3635186369806377e-05, + "loss": 0.1512, "step": 68990 }, { "epoch": 3.22, - "learning_rate": 1.3624771459378369e-05, - "loss": 0.3151, + "learning_rate": 2.3634718299970357e-05, + "loss": 0.3093, "step": 68995 }, { "epoch": 3.22, - "learning_rate": 1.362430265810323e-05, - "loss": 0.0527, + "learning_rate": 2.3634250230134337e-05, + "loss": 0.0608, "step": 69000 }, { "epoch": 3.22, - "learning_rate": 1.3623833856828092e-05, - "loss": 0.0827, + "learning_rate": 2.3633782160298317e-05, + "loss": 0.0151, "step": 69005 }, { "epoch": 3.22, - "learning_rate": 1.3623365055552952e-05, - "loss": 0.0368, + "learning_rate": 2.3633314090462297e-05, + "loss": 0.0349, "step": 69010 }, { "epoch": 3.22, - "learning_rate": 1.3622896254277814e-05, - "loss": 0.1629, + "learning_rate": 2.3632846020626276e-05, + "loss": 0.0419, "step": 69015 }, { "epoch": 3.22, - "learning_rate": 1.3622427453002674e-05, - "loss": 0.1078, + "learning_rate": 2.3632377950790256e-05, + "loss": 0.0632, "step": 69020 }, { "epoch": 3.22, - "learning_rate": 1.3621958651727534e-05, - "loss": 0.1879, + "learning_rate": 2.363190988095424e-05, + "loss": 0.1073, "step": 69025 }, { "epoch": 3.22, - "learning_rate": 1.3621489850452394e-05, - "loss": 0.1383, + "learning_rate": 2.363144181111822e-05, + "loss": 0.1258, "step": 69030 }, { "epoch": 3.22, - "learning_rate": 1.3621021049177254e-05, - "loss": 0.1159, + "learning_rate": 2.36309737412822e-05, + "loss": 0.1289, "step": 69035 }, { "epoch": 3.22, - "learning_rate": 1.3620552247902115e-05, - "loss": 0.3463, + "learning_rate": 2.3630505671446182e-05, + "loss": 0.269, "step": 69040 }, { "epoch": 3.22, - "learning_rate": 1.3620083446626975e-05, - "loss": 0.2195, + "learning_rate": 2.3630037601610162e-05, + "loss": 0.2948, "step": 69045 }, { "epoch": 3.22, - "learning_rate": 1.3619614645351837e-05, - "loss": 0.0743, + "learning_rate": 2.3629569531774142e-05, + "loss": 0.0486, "step": 69050 }, { "epoch": 3.22, - "learning_rate": 1.3619145844076698e-05, - "loss": 0.031, + "learning_rate": 2.3629101461938122e-05, + "loss": 0.0168, "step": 69055 }, { "epoch": 3.22, - "learning_rate": 1.3618677042801558e-05, - "loss": 0.0395, + "learning_rate": 2.3628633392102105e-05, + "loss": 0.103, "step": 69060 }, { "epoch": 3.22, - "learning_rate": 1.3618208241526418e-05, - "loss": 0.0818, + "learning_rate": 2.3628165322266085e-05, + "loss": 0.014, "step": 69065 }, { "epoch": 3.22, - "learning_rate": 1.3617739440251278e-05, - "loss": 0.0429, + "learning_rate": 2.362769725243006e-05, + "loss": 0.0649, "step": 69070 }, { "epoch": 3.22, - "learning_rate": 1.3617270638976138e-05, - "loss": 0.077, + "learning_rate": 2.362722918259404e-05, + "loss": 0.0723, "step": 69075 }, { "epoch": 3.22, - "learning_rate": 1.3616801837701e-05, - "loss": 0.1415, + "learning_rate": 2.3626761112758024e-05, + "loss": 0.054, "step": 69080 }, { "epoch": 3.22, - "learning_rate": 1.361633303642586e-05, - "loss": 0.1771, + "learning_rate": 2.3626293042922004e-05, + "loss": 0.075, "step": 69085 }, { "epoch": 3.22, - "learning_rate": 1.361586423515072e-05, - "loss": 0.2503, + "learning_rate": 2.3625824973085984e-05, + "loss": 0.1369, "step": 69090 }, { "epoch": 3.22, - "learning_rate": 1.3615395433875583e-05, - "loss": 0.2144, + "learning_rate": 2.3625356903249964e-05, + "loss": 0.4244, "step": 69095 }, { "epoch": 3.22, - "learning_rate": 1.3614926632600443e-05, - "loss": 0.0527, + "learning_rate": 2.3624888833413947e-05, + "loss": 0.1103, "step": 69100 }, { "epoch": 3.22, - "learning_rate": 1.3614457831325303e-05, - "loss": 0.0181, + "learning_rate": 2.3624420763577927e-05, + "loss": 0.0445, "step": 69105 }, { "epoch": 3.22, - "learning_rate": 1.3613989030050163e-05, - "loss": 0.0112, + "learning_rate": 2.3623952693741907e-05, + "loss": 0.0337, "step": 69110 }, { "epoch": 3.23, - "learning_rate": 1.3613520228775023e-05, - "loss": 0.1278, + "learning_rate": 2.362348462390589e-05, + "loss": 0.0429, "step": 69115 }, { "epoch": 3.23, - "learning_rate": 1.3613051427499884e-05, - "loss": 0.0671, + "learning_rate": 2.362301655406987e-05, + "loss": 0.0601, "step": 69120 }, { "epoch": 3.23, - "learning_rate": 1.3612582626224744e-05, - "loss": 0.0654, + "learning_rate": 2.362254848423385e-05, + "loss": 0.0699, "step": 69125 }, { "epoch": 3.23, - "learning_rate": 1.3612113824949604e-05, - "loss": 0.1508, + "learning_rate": 2.362208041439783e-05, + "loss": 0.1007, "step": 69130 }, { "epoch": 3.23, - "learning_rate": 1.3611645023674464e-05, - "loss": 0.1947, + "learning_rate": 2.362161234456181e-05, + "loss": 0.1248, "step": 69135 }, { "epoch": 3.23, - "learning_rate": 1.3611176222399327e-05, - "loss": 0.2565, + "learning_rate": 2.362114427472579e-05, + "loss": 0.2458, "step": 69140 }, { "epoch": 3.23, - "learning_rate": 1.3610707421124187e-05, - "loss": 0.2243, + "learning_rate": 2.362067620488977e-05, + "loss": 0.2299, "step": 69145 }, { "epoch": 3.23, - "learning_rate": 1.3610238619849047e-05, - "loss": 0.0372, + "learning_rate": 2.362020813505375e-05, + "loss": 0.0997, "step": 69150 }, { "epoch": 3.23, - "learning_rate": 1.3609769818573907e-05, - "loss": 0.025, + "learning_rate": 2.361974006521773e-05, + "loss": 0.0231, "step": 69155 }, { "epoch": 3.23, - "learning_rate": 1.3609301017298769e-05, - "loss": 0.0489, + "learning_rate": 2.361927199538171e-05, + "loss": 0.0331, "step": 69160 }, { "epoch": 3.23, - "learning_rate": 1.3608832216023629e-05, - "loss": 0.0977, + "learning_rate": 2.361880392554569e-05, + "loss": 0.0943, "step": 69165 }, { "epoch": 3.23, - "learning_rate": 1.3608363414748489e-05, - "loss": 0.0823, + "learning_rate": 2.3618335855709675e-05, + "loss": 0.0297, "step": 69170 }, { "epoch": 3.23, - "learning_rate": 1.3607894613473349e-05, - "loss": 0.0635, + "learning_rate": 2.3617867785873654e-05, + "loss": 0.073, "step": 69175 }, { "epoch": 3.23, - "learning_rate": 1.3607425812198209e-05, - "loss": 0.0701, + "learning_rate": 2.3617399716037634e-05, + "loss": 0.1791, "step": 69180 }, { "epoch": 3.23, - "learning_rate": 1.360695701092307e-05, - "loss": 0.1179, + "learning_rate": 2.3616931646201614e-05, + "loss": 0.1171, "step": 69185 }, { "epoch": 3.23, - "learning_rate": 1.3606488209647932e-05, - "loss": 0.118, + "learning_rate": 2.3616463576365597e-05, + "loss": 0.2477, "step": 69190 }, { "epoch": 3.23, - "learning_rate": 1.3606019408372792e-05, - "loss": 0.2901, + "learning_rate": 2.3615995506529574e-05, + "loss": 0.2315, "step": 69195 }, { "epoch": 3.23, - "learning_rate": 1.3605550607097653e-05, - "loss": 0.082, + "learning_rate": 2.3615527436693553e-05, + "loss": 0.0704, "step": 69200 }, { "epoch": 3.23, - "learning_rate": 1.3605081805822513e-05, - "loss": 0.0496, + "learning_rate": 2.3615059366857533e-05, + "loss": 0.0921, "step": 69205 }, { "epoch": 3.23, - "learning_rate": 1.3604613004547373e-05, - "loss": 0.0609, + "learning_rate": 2.3614591297021516e-05, + "loss": 0.0296, "step": 69210 }, { "epoch": 3.23, - "learning_rate": 1.3604144203272233e-05, - "loss": 0.1, + "learning_rate": 2.3614123227185496e-05, + "loss": 0.0447, "step": 69215 }, { "epoch": 3.23, - "learning_rate": 1.3603675401997093e-05, - "loss": 0.0398, + "learning_rate": 2.3613655157349476e-05, + "loss": 0.0722, "step": 69220 }, { "epoch": 3.23, - "learning_rate": 1.3603206600721955e-05, - "loss": 0.097, + "learning_rate": 2.361318708751346e-05, + "loss": 0.0718, "step": 69225 }, { "epoch": 3.23, - "learning_rate": 1.3602737799446815e-05, - "loss": 0.135, + "learning_rate": 2.361271901767744e-05, + "loss": 0.0921, "step": 69230 }, { "epoch": 3.23, - "learning_rate": 1.3602268998171676e-05, - "loss": 0.1163, + "learning_rate": 2.361225094784142e-05, + "loss": 0.1169, "step": 69235 }, { "epoch": 3.23, - "learning_rate": 1.3601800196896538e-05, - "loss": 0.2599, + "learning_rate": 2.36117828780054e-05, + "loss": 0.1596, "step": 69240 }, { "epoch": 3.23, - "learning_rate": 1.3601331395621398e-05, - "loss": 0.2447, + "learning_rate": 2.3611314808169382e-05, + "loss": 0.2162, "step": 69245 }, { "epoch": 3.23, - "learning_rate": 1.3600862594346258e-05, - "loss": 0.0495, + "learning_rate": 2.3610846738333362e-05, + "loss": 0.0738, "step": 69250 }, { "epoch": 3.23, - "learning_rate": 1.3600393793071118e-05, - "loss": 0.0333, + "learning_rate": 2.361037866849734e-05, + "loss": 0.052, "step": 69255 }, { "epoch": 3.23, - "learning_rate": 1.3599924991795978e-05, - "loss": 0.0179, + "learning_rate": 2.3609910598661318e-05, + "loss": 0.0361, "step": 69260 }, { "epoch": 3.23, - "learning_rate": 1.359945619052084e-05, - "loss": 0.061, + "learning_rate": 2.36094425288253e-05, + "loss": 0.0917, "step": 69265 }, { "epoch": 3.23, - "learning_rate": 1.35989873892457e-05, - "loss": 0.1018, + "learning_rate": 2.360897445898928e-05, + "loss": 0.1065, "step": 69270 }, { "epoch": 3.23, - "learning_rate": 1.359851858797056e-05, - "loss": 0.0976, + "learning_rate": 2.360850638915326e-05, + "loss": 0.0586, "step": 69275 }, { "epoch": 3.23, - "learning_rate": 1.3598049786695423e-05, - "loss": 0.1241, + "learning_rate": 2.360803831931724e-05, + "loss": 0.0394, "step": 69280 }, { "epoch": 3.23, - "learning_rate": 1.3597580985420283e-05, - "loss": 0.1119, + "learning_rate": 2.3607570249481224e-05, + "loss": 0.1449, "step": 69285 }, { "epoch": 3.23, - "learning_rate": 1.3597112184145142e-05, - "loss": 0.1587, + "learning_rate": 2.3607102179645204e-05, + "loss": 0.1011, "step": 69290 }, { "epoch": 3.23, - "learning_rate": 1.3596643382870002e-05, - "loss": 0.1354, + "learning_rate": 2.3606634109809184e-05, + "loss": 0.4263, "step": 69295 }, { "epoch": 3.23, - "learning_rate": 1.3596174581594862e-05, - "loss": 0.0734, + "learning_rate": 2.3606166039973167e-05, + "loss": 0.052, "step": 69300 }, { "epoch": 3.23, - "learning_rate": 1.3595705780319724e-05, - "loss": 0.0203, + "learning_rate": 2.3605697970137147e-05, + "loss": 0.0171, "step": 69305 }, { "epoch": 3.23, - "learning_rate": 1.3595236979044584e-05, - "loss": 0.0254, + "learning_rate": 2.3605229900301126e-05, + "loss": 0.0635, "step": 69310 }, { "epoch": 3.23, - "learning_rate": 1.3594768177769444e-05, - "loss": 0.0609, + "learning_rate": 2.3604761830465106e-05, + "loss": 0.055, "step": 69315 }, { "epoch": 3.23, - "learning_rate": 1.3594299376494304e-05, - "loss": 0.1144, + "learning_rate": 2.3604293760629086e-05, + "loss": 0.0389, "step": 69320 }, { "epoch": 3.23, - "learning_rate": 1.3593830575219165e-05, - "loss": 0.1696, + "learning_rate": 2.3603825690793066e-05, + "loss": 0.0951, "step": 69325 }, { "epoch": 3.24, - "learning_rate": 1.3593361773944027e-05, - "loss": 0.1502, + "learning_rate": 2.3603357620957046e-05, + "loss": 0.1193, "step": 69330 }, { "epoch": 3.24, - "learning_rate": 1.3592892972668887e-05, - "loss": 0.103, + "learning_rate": 2.3602889551121025e-05, + "loss": 0.1097, "step": 69335 }, { "epoch": 3.24, - "learning_rate": 1.3592424171393747e-05, - "loss": 0.3154, + "learning_rate": 2.360242148128501e-05, + "loss": 0.21, "step": 69340 }, { "epoch": 3.24, - "learning_rate": 1.3591955370118608e-05, - "loss": 0.2723, + "learning_rate": 2.360195341144899e-05, + "loss": 0.3132, "step": 69345 }, { "epoch": 3.24, - "learning_rate": 1.3591486568843468e-05, - "loss": 0.0576, + "learning_rate": 2.3601485341612968e-05, + "loss": 0.0613, "step": 69350 }, { "epoch": 3.24, - "learning_rate": 1.3591017767568328e-05, - "loss": 0.0225, + "learning_rate": 2.360101727177695e-05, + "loss": 0.0076, "step": 69355 }, { "epoch": 3.24, - "learning_rate": 1.3590548966293188e-05, - "loss": 0.0424, + "learning_rate": 2.360054920194093e-05, + "loss": 0.0277, "step": 69360 }, { "epoch": 3.24, - "learning_rate": 1.359008016501805e-05, - "loss": 0.0495, + "learning_rate": 2.360008113210491e-05, + "loss": 0.0903, "step": 69365 }, { "epoch": 3.24, - "learning_rate": 1.358961136374291e-05, - "loss": 0.0497, + "learning_rate": 2.359961306226889e-05, + "loss": 0.0825, "step": 69370 }, { "epoch": 3.24, - "learning_rate": 1.3589142562467771e-05, - "loss": 0.0975, + "learning_rate": 2.3599144992432874e-05, + "loss": 0.1125, "step": 69375 }, { "epoch": 3.24, - "learning_rate": 1.3588673761192633e-05, - "loss": 0.1527, + "learning_rate": 2.3598676922596854e-05, + "loss": 0.0841, "step": 69380 }, { "epoch": 3.24, - "learning_rate": 1.3588204959917493e-05, - "loss": 0.1149, + "learning_rate": 2.359820885276083e-05, + "loss": 0.104, "step": 69385 }, { "epoch": 3.24, - "learning_rate": 1.3587736158642353e-05, - "loss": 0.1807, + "learning_rate": 2.359774078292481e-05, + "loss": 0.2057, "step": 69390 }, { "epoch": 3.24, - "learning_rate": 1.3587267357367213e-05, - "loss": 0.1963, + "learning_rate": 2.3597272713088793e-05, + "loss": 0.2698, "step": 69395 }, { "epoch": 3.24, - "learning_rate": 1.3586798556092073e-05, - "loss": 0.0231, + "learning_rate": 2.3596804643252773e-05, + "loss": 0.0506, "step": 69400 }, { "epoch": 3.24, - "learning_rate": 1.3586329754816934e-05, - "loss": 0.0145, + "learning_rate": 2.3596336573416753e-05, + "loss": 0.012, "step": 69405 }, { "epoch": 3.24, - "learning_rate": 1.3585860953541794e-05, - "loss": 0.026, + "learning_rate": 2.3595868503580736e-05, + "loss": 0.0381, "step": 69410 }, { "epoch": 3.24, - "learning_rate": 1.3585392152266654e-05, - "loss": 0.0397, + "learning_rate": 2.3595400433744716e-05, + "loss": 0.0578, "step": 69415 }, { "epoch": 3.24, - "learning_rate": 1.3584923350991518e-05, - "loss": 0.0624, + "learning_rate": 2.3594932363908696e-05, + "loss": 0.0741, "step": 69420 }, { "epoch": 3.24, - "learning_rate": 1.3584454549716378e-05, - "loss": 0.1085, + "learning_rate": 2.3594464294072676e-05, + "loss": 0.1458, "step": 69425 }, { "epoch": 3.24, - "learning_rate": 1.3583985748441238e-05, - "loss": 0.1823, + "learning_rate": 2.359399622423666e-05, + "loss": 0.1615, "step": 69430 }, { "epoch": 3.24, - "learning_rate": 1.3583516947166097e-05, - "loss": 0.1782, + "learning_rate": 2.359352815440064e-05, + "loss": 0.1027, "step": 69435 }, { "epoch": 3.24, - "learning_rate": 1.3583048145890957e-05, - "loss": 0.2047, + "learning_rate": 2.359306008456462e-05, + "loss": 0.126, "step": 69440 }, { "epoch": 3.24, - "learning_rate": 1.3582579344615819e-05, - "loss": 0.3008, + "learning_rate": 2.35925920147286e-05, + "loss": 0.2118, "step": 69445 }, { "epoch": 3.24, - "learning_rate": 1.3582110543340679e-05, - "loss": 0.0884, + "learning_rate": 2.3592123944892578e-05, + "loss": 0.1057, "step": 69450 }, { "epoch": 3.24, - "learning_rate": 1.3581641742065539e-05, - "loss": 0.0262, + "learning_rate": 2.3591655875056558e-05, + "loss": 0.0298, "step": 69455 }, { "epoch": 3.24, - "learning_rate": 1.3581172940790399e-05, - "loss": 0.0235, + "learning_rate": 2.3591187805220538e-05, + "loss": 0.0155, "step": 69460 }, { "epoch": 3.24, - "learning_rate": 1.3580704139515262e-05, - "loss": 0.0497, + "learning_rate": 2.3590719735384518e-05, + "loss": 0.0378, "step": 69465 }, { "epoch": 3.24, - "learning_rate": 1.3580235338240122e-05, - "loss": 0.037, + "learning_rate": 2.35902516655485e-05, + "loss": 0.1025, "step": 69470 }, { "epoch": 3.24, - "learning_rate": 1.3579766536964982e-05, - "loss": 0.0763, + "learning_rate": 2.358978359571248e-05, + "loss": 0.0652, "step": 69475 }, { "epoch": 3.24, - "learning_rate": 1.3579297735689842e-05, - "loss": 0.1038, + "learning_rate": 2.358931552587646e-05, + "loss": 0.0589, "step": 69480 }, { "epoch": 3.24, - "learning_rate": 1.3578828934414704e-05, - "loss": 0.2086, + "learning_rate": 2.3588847456040444e-05, + "loss": 0.1236, "step": 69485 }, { "epoch": 3.24, - "learning_rate": 1.3578360133139563e-05, - "loss": 0.1528, + "learning_rate": 2.3588379386204424e-05, + "loss": 0.1658, "step": 69490 }, { "epoch": 3.24, - "learning_rate": 1.3577891331864423e-05, - "loss": 0.2077, + "learning_rate": 2.3587911316368403e-05, + "loss": 0.2628, "step": 69495 }, { "epoch": 3.24, - "learning_rate": 1.3577422530589283e-05, - "loss": 0.0728, + "learning_rate": 2.3587443246532383e-05, + "loss": 0.0581, "step": 69500 }, { "epoch": 3.24, - "learning_rate": 1.3576953729314143e-05, - "loss": 0.0365, + "learning_rate": 2.3586975176696366e-05, + "loss": 0.0699, "step": 69505 }, { "epoch": 3.24, - "learning_rate": 1.3576484928039005e-05, - "loss": 0.031, + "learning_rate": 2.3586507106860343e-05, + "loss": 0.067, "step": 69510 }, { "epoch": 3.24, - "learning_rate": 1.3576016126763867e-05, - "loss": 0.039, + "learning_rate": 2.3586039037024323e-05, + "loss": 0.0523, "step": 69515 }, { "epoch": 3.24, - "learning_rate": 1.3575547325488726e-05, - "loss": 0.0433, + "learning_rate": 2.3585570967188302e-05, + "loss": 0.0593, "step": 69520 }, { "epoch": 3.24, - "learning_rate": 1.3575078524213588e-05, - "loss": 0.0939, + "learning_rate": 2.3585102897352286e-05, + "loss": 0.0986, "step": 69525 }, { "epoch": 3.24, - "learning_rate": 1.3574609722938448e-05, - "loss": 0.1011, + "learning_rate": 2.3584634827516265e-05, + "loss": 0.1765, "step": 69530 }, { "epoch": 3.24, - "learning_rate": 1.3574140921663308e-05, - "loss": 0.1292, + "learning_rate": 2.3584166757680245e-05, + "loss": 0.203, "step": 69535 }, { "epoch": 3.24, - "learning_rate": 1.3573672120388168e-05, - "loss": 0.2253, + "learning_rate": 2.358369868784423e-05, + "loss": 0.2439, "step": 69540 }, { "epoch": 3.25, - "learning_rate": 1.3573203319113028e-05, - "loss": 0.3289, + "learning_rate": 2.3583230618008208e-05, + "loss": 0.2671, "step": 69545 }, { "epoch": 3.25, - "learning_rate": 1.357273451783789e-05, - "loss": 0.0613, + "learning_rate": 2.3582762548172188e-05, + "loss": 0.0425, "step": 69550 }, { "epoch": 3.25, - "learning_rate": 1.357226571656275e-05, - "loss": 0.0207, + "learning_rate": 2.3582294478336168e-05, + "loss": 0.051, "step": 69555 }, { "epoch": 3.25, - "learning_rate": 1.3571796915287611e-05, - "loss": 0.0707, + "learning_rate": 2.358182640850015e-05, + "loss": 0.0553, "step": 69560 }, { "epoch": 3.25, - "learning_rate": 1.3571328114012473e-05, - "loss": 0.1089, + "learning_rate": 2.358135833866413e-05, + "loss": 0.076, "step": 69565 }, { "epoch": 3.25, - "learning_rate": 1.3570859312737333e-05, - "loss": 0.0599, + "learning_rate": 2.358089026882811e-05, + "loss": 0.0695, "step": 69570 }, { "epoch": 3.25, - "learning_rate": 1.3570390511462193e-05, - "loss": 0.1222, + "learning_rate": 2.3580422198992087e-05, + "loss": 0.0521, "step": 69575 }, { "epoch": 3.25, - "learning_rate": 1.3569921710187052e-05, - "loss": 0.1018, + "learning_rate": 2.357995412915607e-05, + "loss": 0.209, "step": 69580 }, { "epoch": 3.25, - "learning_rate": 1.3569452908911912e-05, - "loss": 0.1716, + "learning_rate": 2.357948605932005e-05, + "loss": 0.176, "step": 69585 }, { "epoch": 3.25, - "learning_rate": 1.3568984107636774e-05, - "loss": 0.2636, + "learning_rate": 2.357901798948403e-05, + "loss": 0.2615, "step": 69590 }, { "epoch": 3.25, - "learning_rate": 1.3568515306361634e-05, - "loss": 0.2356, + "learning_rate": 2.3578549919648013e-05, + "loss": 0.3569, "step": 69595 }, { "epoch": 3.25, - "learning_rate": 1.3568046505086494e-05, - "loss": 0.0779, + "learning_rate": 2.3578081849811993e-05, + "loss": 0.062, "step": 69600 }, { "epoch": 3.25, - "learning_rate": 1.3567577703811357e-05, - "loss": 0.0668, + "learning_rate": 2.3577613779975973e-05, + "loss": 0.0198, "step": 69605 }, { "epoch": 3.25, - "learning_rate": 1.3567108902536217e-05, - "loss": 0.0626, + "learning_rate": 2.3577145710139953e-05, + "loss": 0.0487, "step": 69610 }, { "epoch": 3.25, - "learning_rate": 1.3566640101261077e-05, - "loss": 0.0641, + "learning_rate": 2.3576677640303936e-05, + "loss": 0.0488, "step": 69615 }, { "epoch": 3.25, - "learning_rate": 1.3566171299985937e-05, - "loss": 0.1447, + "learning_rate": 2.3576209570467916e-05, + "loss": 0.0649, "step": 69620 }, { "epoch": 3.25, - "learning_rate": 1.3565702498710797e-05, - "loss": 0.0985, + "learning_rate": 2.3575741500631896e-05, + "loss": 0.0976, "step": 69625 }, { "epoch": 3.25, - "learning_rate": 1.3565233697435659e-05, - "loss": 0.0882, + "learning_rate": 2.3575273430795875e-05, + "loss": 0.1576, "step": 69630 }, { "epoch": 3.25, - "learning_rate": 1.3564764896160519e-05, - "loss": 0.1848, + "learning_rate": 2.3574805360959855e-05, + "loss": 0.2095, "step": 69635 }, { "epoch": 3.25, - "learning_rate": 1.3564296094885378e-05, - "loss": 0.1694, + "learning_rate": 2.3574337291123835e-05, + "loss": 0.1859, "step": 69640 }, { "epoch": 3.25, - "learning_rate": 1.3563827293610238e-05, - "loss": 0.2375, + "learning_rate": 2.3573869221287815e-05, + "loss": 0.197, "step": 69645 }, { "epoch": 3.25, - "learning_rate": 1.3563358492335098e-05, - "loss": 0.0846, + "learning_rate": 2.3573401151451798e-05, + "loss": 0.0577, "step": 69650 }, { "epoch": 3.25, - "learning_rate": 1.3562889691059962e-05, - "loss": 0.0379, + "learning_rate": 2.3572933081615778e-05, + "loss": 0.0355, "step": 69655 }, { "epoch": 3.25, - "learning_rate": 1.3562420889784822e-05, - "loss": 0.0343, + "learning_rate": 2.3572465011779758e-05, + "loss": 0.039, "step": 69660 }, { "epoch": 3.25, - "learning_rate": 1.3561952088509682e-05, - "loss": 0.0637, + "learning_rate": 2.3571996941943737e-05, + "loss": 0.0169, "step": 69665 }, { "epoch": 3.25, - "learning_rate": 1.3561483287234543e-05, - "loss": 0.1055, + "learning_rate": 2.357152887210772e-05, + "loss": 0.034, "step": 69670 }, { "epoch": 3.25, - "learning_rate": 1.3561014485959403e-05, - "loss": 0.0717, + "learning_rate": 2.35710608022717e-05, + "loss": 0.0905, "step": 69675 }, { "epoch": 3.25, - "learning_rate": 1.3560545684684263e-05, - "loss": 0.1089, + "learning_rate": 2.357059273243568e-05, + "loss": 0.0913, "step": 69680 }, { "epoch": 3.25, - "learning_rate": 1.3560076883409123e-05, - "loss": 0.1697, + "learning_rate": 2.357012466259966e-05, + "loss": 0.0716, "step": 69685 }, { "epoch": 3.25, - "learning_rate": 1.3559608082133985e-05, - "loss": 0.2271, + "learning_rate": 2.3569656592763643e-05, + "loss": 0.1237, "step": 69690 }, { "epoch": 3.25, - "learning_rate": 1.3559139280858844e-05, - "loss": 0.2439, + "learning_rate": 2.3569188522927623e-05, + "loss": 0.3668, "step": 69695 }, { "epoch": 3.25, - "learning_rate": 1.3558670479583706e-05, - "loss": 0.0538, + "learning_rate": 2.35687204530916e-05, + "loss": 0.0475, "step": 69700 }, { "epoch": 3.25, - "learning_rate": 1.3558201678308566e-05, - "loss": 0.0417, + "learning_rate": 2.356825238325558e-05, + "loss": 0.0283, "step": 69705 }, { "epoch": 3.25, - "learning_rate": 1.3557732877033428e-05, - "loss": 0.0547, + "learning_rate": 2.3567784313419563e-05, + "loss": 0.0459, "step": 69710 }, { "epoch": 3.25, - "learning_rate": 1.3557264075758288e-05, - "loss": 0.1037, + "learning_rate": 2.3567316243583542e-05, + "loss": 0.035, "step": 69715 }, { "epoch": 3.25, - "learning_rate": 1.3556795274483148e-05, - "loss": 0.102, + "learning_rate": 2.3566848173747522e-05, + "loss": 0.0581, "step": 69720 }, { "epoch": 3.25, - "learning_rate": 1.3556326473208007e-05, - "loss": 0.049, + "learning_rate": 2.3566380103911505e-05, + "loss": 0.0642, "step": 69725 }, { "epoch": 3.25, - "learning_rate": 1.3555857671932869e-05, - "loss": 0.1282, + "learning_rate": 2.3565912034075485e-05, + "loss": 0.0512, "step": 69730 }, { "epoch": 3.25, - "learning_rate": 1.3555388870657729e-05, - "loss": 0.1482, + "learning_rate": 2.3565443964239465e-05, + "loss": 0.18, "step": 69735 }, { "epoch": 3.25, - "learning_rate": 1.3554920069382589e-05, - "loss": 0.1134, + "learning_rate": 2.3564975894403445e-05, + "loss": 0.1584, "step": 69740 }, { "epoch": 3.25, - "learning_rate": 1.355445126810745e-05, - "loss": 0.167, + "learning_rate": 2.3564507824567428e-05, + "loss": 0.2448, "step": 69745 }, { "epoch": 3.25, - "learning_rate": 1.3553982466832312e-05, - "loss": 0.0429, + "learning_rate": 2.3564039754731408e-05, + "loss": 0.0648, "step": 69750 }, { "epoch": 3.25, - "learning_rate": 1.3553513665557172e-05, - "loss": 0.0229, + "learning_rate": 2.3563571684895388e-05, + "loss": 0.0562, "step": 69755 }, { "epoch": 3.26, - "learning_rate": 1.3553044864282032e-05, - "loss": 0.0685, + "learning_rate": 2.3563103615059368e-05, + "loss": 0.0337, "step": 69760 }, { "epoch": 3.26, - "learning_rate": 1.3552576063006892e-05, - "loss": 0.0867, + "learning_rate": 2.3562635545223347e-05, + "loss": 0.0573, "step": 69765 }, { "epoch": 3.26, - "learning_rate": 1.3552107261731754e-05, - "loss": 0.1179, + "learning_rate": 2.3562167475387327e-05, + "loss": 0.0728, "step": 69770 }, { "epoch": 3.26, - "learning_rate": 1.3551638460456614e-05, - "loss": 0.0445, + "learning_rate": 2.3561699405551307e-05, + "loss": 0.1, "step": 69775 }, { "epoch": 3.26, - "learning_rate": 1.3551169659181474e-05, - "loss": 0.1649, + "learning_rate": 2.356123133571529e-05, + "loss": 0.0638, "step": 69780 }, { "epoch": 3.26, - "learning_rate": 1.3550700857906333e-05, - "loss": 0.1136, + "learning_rate": 2.356076326587927e-05, + "loss": 0.0686, "step": 69785 }, { "epoch": 3.26, - "learning_rate": 1.3550232056631197e-05, - "loss": 0.1493, + "learning_rate": 2.356029519604325e-05, + "loss": 0.2265, "step": 69790 }, { "epoch": 3.26, - "learning_rate": 1.3549763255356057e-05, - "loss": 0.3378, + "learning_rate": 2.355982712620723e-05, + "loss": 0.1651, "step": 69795 }, { "epoch": 3.26, - "learning_rate": 1.3549294454080917e-05, - "loss": 0.0582, + "learning_rate": 2.3559359056371213e-05, + "loss": 0.0527, "step": 69800 }, { "epoch": 3.26, - "learning_rate": 1.3548825652805777e-05, - "loss": 0.0214, + "learning_rate": 2.3558890986535193e-05, + "loss": 0.0103, "step": 69805 }, { "epoch": 3.26, - "learning_rate": 1.3548356851530638e-05, - "loss": 0.0592, + "learning_rate": 2.3558422916699172e-05, + "loss": 0.0314, "step": 69810 }, { "epoch": 3.26, - "learning_rate": 1.3547888050255498e-05, - "loss": 0.0658, + "learning_rate": 2.3557954846863152e-05, + "loss": 0.068, "step": 69815 }, { "epoch": 3.26, - "learning_rate": 1.3547419248980358e-05, - "loss": 0.0869, + "learning_rate": 2.3557486777027136e-05, + "loss": 0.0408, "step": 69820 }, { "epoch": 3.26, - "learning_rate": 1.3546950447705218e-05, - "loss": 0.1109, + "learning_rate": 2.3557018707191112e-05, + "loss": 0.0888, "step": 69825 }, { "epoch": 3.26, - "learning_rate": 1.3546481646430078e-05, - "loss": 0.1647, + "learning_rate": 2.3556550637355092e-05, + "loss": 0.0655, "step": 69830 }, { "epoch": 3.26, - "learning_rate": 1.354601284515494e-05, - "loss": 0.2254, + "learning_rate": 2.3556082567519075e-05, + "loss": 0.1836, "step": 69835 }, { "epoch": 3.26, - "learning_rate": 1.3545544043879801e-05, - "loss": 0.249, + "learning_rate": 2.3555614497683055e-05, + "loss": 0.1262, "step": 69840 }, { "epoch": 3.26, - "learning_rate": 1.3545075242604661e-05, - "loss": 0.232, + "learning_rate": 2.3555146427847035e-05, + "loss": 0.2176, "step": 69845 }, { "epoch": 3.26, - "learning_rate": 1.3544606441329523e-05, - "loss": 0.0793, + "learning_rate": 2.3554678358011014e-05, + "loss": 0.0781, "step": 69850 }, { "epoch": 3.26, - "learning_rate": 1.3544137640054383e-05, - "loss": 0.0497, + "learning_rate": 2.3554210288174998e-05, + "loss": 0.0223, "step": 69855 }, { "epoch": 3.26, - "learning_rate": 1.3543668838779243e-05, - "loss": 0.0398, + "learning_rate": 2.3553742218338977e-05, + "loss": 0.0242, "step": 69860 }, { "epoch": 3.26, - "learning_rate": 1.3543200037504103e-05, - "loss": 0.0742, + "learning_rate": 2.3553274148502957e-05, + "loss": 0.0346, "step": 69865 }, { "epoch": 3.26, - "learning_rate": 1.3542731236228963e-05, - "loss": 0.0944, + "learning_rate": 2.3552806078666937e-05, + "loss": 0.0799, "step": 69870 }, { "epoch": 3.26, - "learning_rate": 1.3542262434953824e-05, - "loss": 0.0623, + "learning_rate": 2.355233800883092e-05, + "loss": 0.0321, "step": 69875 }, { "epoch": 3.26, - "learning_rate": 1.3541793633678684e-05, - "loss": 0.088, + "learning_rate": 2.35518699389949e-05, + "loss": 0.1544, "step": 69880 }, { "epoch": 3.26, - "learning_rate": 1.3541324832403546e-05, - "loss": 0.1227, + "learning_rate": 2.355140186915888e-05, + "loss": 0.0906, "step": 69885 }, { "epoch": 3.26, - "learning_rate": 1.3540856031128407e-05, - "loss": 0.2153, + "learning_rate": 2.3550933799322856e-05, + "loss": 0.3188, "step": 69890 }, { "epoch": 3.26, - "learning_rate": 1.3540387229853267e-05, - "loss": 0.2853, + "learning_rate": 2.355046572948684e-05, + "loss": 0.2067, "step": 69895 }, { "epoch": 3.26, - "learning_rate": 1.3539918428578127e-05, - "loss": 0.0896, + "learning_rate": 2.354999765965082e-05, + "loss": 0.022, "step": 69900 }, { "epoch": 3.26, - "learning_rate": 1.3539449627302987e-05, - "loss": 0.0529, + "learning_rate": 2.35495295898148e-05, + "loss": 0.0365, "step": 69905 }, { "epoch": 3.26, - "learning_rate": 1.3538980826027847e-05, - "loss": 0.0535, + "learning_rate": 2.3549061519978782e-05, + "loss": 0.0614, "step": 69910 }, { "epoch": 3.26, - "learning_rate": 1.3538512024752709e-05, - "loss": 0.071, + "learning_rate": 2.3548593450142762e-05, + "loss": 0.0637, "step": 69915 }, { "epoch": 3.26, - "learning_rate": 1.3538043223477569e-05, - "loss": 0.0571, + "learning_rate": 2.3548125380306742e-05, + "loss": 0.0269, "step": 69920 }, { "epoch": 3.26, - "learning_rate": 1.3537574422202429e-05, - "loss": 0.0381, + "learning_rate": 2.3547657310470722e-05, + "loss": 0.1555, "step": 69925 }, { "epoch": 3.26, - "learning_rate": 1.3537105620927292e-05, - "loss": 0.174, + "learning_rate": 2.3547189240634705e-05, + "loss": 0.0927, "step": 69930 }, { "epoch": 3.26, - "learning_rate": 1.3536636819652152e-05, - "loss": 0.1899, + "learning_rate": 2.3546721170798685e-05, + "loss": 0.118, "step": 69935 }, { "epoch": 3.26, - "learning_rate": 1.3536168018377012e-05, - "loss": 0.1559, + "learning_rate": 2.3546253100962665e-05, + "loss": 0.094, "step": 69940 }, { "epoch": 3.26, - "learning_rate": 1.3535699217101872e-05, - "loss": 0.3481, + "learning_rate": 2.3545785031126645e-05, + "loss": 0.2494, "step": 69945 }, { "epoch": 3.26, - "learning_rate": 1.3535230415826732e-05, - "loss": 0.0759, + "learning_rate": 2.3545316961290628e-05, + "loss": 0.0566, "step": 69950 }, { "epoch": 3.26, - "learning_rate": 1.3534761614551593e-05, - "loss": 0.0254, + "learning_rate": 2.3544848891454604e-05, + "loss": 0.0049, "step": 69955 }, { "epoch": 3.26, - "learning_rate": 1.3534292813276453e-05, - "loss": 0.0272, + "learning_rate": 2.3544380821618584e-05, + "loss": 0.0844, "step": 69960 }, { "epoch": 3.26, - "learning_rate": 1.3533824012001313e-05, - "loss": 0.0522, + "learning_rate": 2.3543912751782567e-05, + "loss": 0.1028, "step": 69965 }, { "epoch": 3.26, - "learning_rate": 1.3533355210726173e-05, - "loss": 0.0842, + "learning_rate": 2.3543444681946547e-05, + "loss": 0.057, "step": 69970 }, { "epoch": 3.27, - "learning_rate": 1.3532886409451033e-05, - "loss": 0.1161, + "learning_rate": 2.3542976612110527e-05, + "loss": 0.0504, "step": 69975 }, { "epoch": 3.27, - "learning_rate": 1.3532417608175896e-05, - "loss": 0.0978, + "learning_rate": 2.3542508542274507e-05, + "loss": 0.0842, "step": 69980 }, { "epoch": 3.27, - "learning_rate": 1.3531948806900756e-05, - "loss": 0.0851, + "learning_rate": 2.354204047243849e-05, + "loss": 0.1559, "step": 69985 }, { "epoch": 3.27, - "learning_rate": 1.3531480005625616e-05, - "loss": 0.2412, + "learning_rate": 2.354157240260247e-05, + "loss": 0.1762, "step": 69990 }, { "epoch": 3.27, - "learning_rate": 1.3531011204350478e-05, - "loss": 0.291, + "learning_rate": 2.354110433276645e-05, + "loss": 0.1621, "step": 69995 }, { "epoch": 3.27, - "learning_rate": 1.3530542403075338e-05, - "loss": 0.0615, + "learning_rate": 2.354063626293043e-05, + "loss": 0.0452, "step": 70000 }, { "epoch": 3.27, - "learning_rate": 1.3530073601800198e-05, - "loss": 0.049, + "learning_rate": 2.3540168193094412e-05, + "loss": 0.0154, "step": 70005 }, { "epoch": 3.27, - "learning_rate": 1.3529604800525058e-05, - "loss": 0.0562, + "learning_rate": 2.3539700123258392e-05, + "loss": 0.0286, "step": 70010 }, { "epoch": 3.27, - "learning_rate": 1.3529135999249918e-05, - "loss": 0.0864, + "learning_rate": 2.353923205342237e-05, + "loss": 0.0486, "step": 70015 }, { "epoch": 3.27, - "learning_rate": 1.3528667197974779e-05, - "loss": 0.044, + "learning_rate": 2.3538763983586352e-05, + "loss": 0.041, "step": 70020 }, { "epoch": 3.27, - "learning_rate": 1.352819839669964e-05, - "loss": 0.1173, + "learning_rate": 2.3538295913750332e-05, + "loss": 0.1041, "step": 70025 }, { "epoch": 3.27, - "learning_rate": 1.35277295954245e-05, - "loss": 0.1263, + "learning_rate": 2.353782784391431e-05, + "loss": 0.0725, "step": 70030 }, { "epoch": 3.27, - "learning_rate": 1.3527260794149362e-05, - "loss": 0.0906, + "learning_rate": 2.353735977407829e-05, + "loss": 0.1816, "step": 70035 }, { "epoch": 3.27, - "learning_rate": 1.3526791992874222e-05, - "loss": 0.2683, + "learning_rate": 2.3536891704242275e-05, + "loss": 0.2183, "step": 70040 }, { "epoch": 3.27, - "learning_rate": 1.3526323191599082e-05, - "loss": 0.2091, + "learning_rate": 2.3536423634406254e-05, + "loss": 0.3107, "step": 70045 }, { "epoch": 3.27, - "learning_rate": 1.3525854390323942e-05, - "loss": 0.1217, + "learning_rate": 2.3535955564570234e-05, + "loss": 0.0468, "step": 70050 }, { "epoch": 3.27, - "learning_rate": 1.3525385589048802e-05, - "loss": 0.0359, + "learning_rate": 2.3535487494734214e-05, + "loss": 0.0184, "step": 70055 }, { "epoch": 3.27, - "learning_rate": 1.3524916787773664e-05, - "loss": 0.0479, + "learning_rate": 2.3535019424898197e-05, + "loss": 0.0519, "step": 70060 }, { "epoch": 3.27, - "learning_rate": 1.3524447986498524e-05, - "loss": 0.0724, + "learning_rate": 2.3534551355062177e-05, + "loss": 0.0564, "step": 70065 }, { "epoch": 3.27, - "learning_rate": 1.3523979185223385e-05, - "loss": 0.0834, + "learning_rate": 2.3534083285226157e-05, + "loss": 0.066, "step": 70070 }, { "epoch": 3.27, - "learning_rate": 1.3523510383948247e-05, - "loss": 0.0925, + "learning_rate": 2.3533615215390137e-05, + "loss": 0.03, "step": 70075 }, { "epoch": 3.27, - "learning_rate": 1.3523041582673107e-05, - "loss": 0.173, + "learning_rate": 2.3533147145554117e-05, + "loss": 0.107, "step": 70080 }, { "epoch": 3.27, - "learning_rate": 1.3522572781397967e-05, - "loss": 0.1308, + "learning_rate": 2.3532679075718096e-05, + "loss": 0.2029, "step": 70085 }, { "epoch": 3.27, - "learning_rate": 1.3522103980122827e-05, - "loss": 0.1221, + "learning_rate": 2.3532211005882076e-05, + "loss": 0.1106, "step": 70090 }, { "epoch": 3.27, - "learning_rate": 1.3521635178847687e-05, - "loss": 0.3521, + "learning_rate": 2.353174293604606e-05, + "loss": 0.168, "step": 70095 }, { "epoch": 3.27, - "learning_rate": 1.3521166377572548e-05, - "loss": 0.0234, + "learning_rate": 2.353127486621004e-05, + "loss": 0.0459, "step": 70100 }, { "epoch": 3.27, - "learning_rate": 1.3520697576297408e-05, - "loss": 0.0109, + "learning_rate": 2.353080679637402e-05, + "loss": 0.0213, "step": 70105 }, { "epoch": 3.27, - "learning_rate": 1.3520228775022268e-05, - "loss": 0.0234, + "learning_rate": 2.3530338726538e-05, + "loss": 0.0676, "step": 70110 }, { "epoch": 3.27, - "learning_rate": 1.3519759973747131e-05, - "loss": 0.0545, + "learning_rate": 2.3529870656701982e-05, + "loss": 0.0282, "step": 70115 }, { "epoch": 3.27, - "learning_rate": 1.3519291172471991e-05, - "loss": 0.0509, + "learning_rate": 2.3529402586865962e-05, + "loss": 0.0489, "step": 70120 }, { "epoch": 3.27, - "learning_rate": 1.3518822371196851e-05, - "loss": 0.1194, + "learning_rate": 2.352893451702994e-05, + "loss": 0.0736, "step": 70125 }, { "epoch": 3.27, - "learning_rate": 1.3518353569921711e-05, - "loss": 0.1149, + "learning_rate": 2.352846644719392e-05, + "loss": 0.1711, "step": 70130 }, { "epoch": 3.27, - "learning_rate": 1.3517884768646571e-05, - "loss": 0.1618, + "learning_rate": 2.3527998377357905e-05, + "loss": 0.1266, "step": 70135 }, { "epoch": 3.27, - "learning_rate": 1.3517415967371433e-05, - "loss": 0.1269, + "learning_rate": 2.3527530307521885e-05, + "loss": 0.1837, "step": 70140 }, { "epoch": 3.27, - "learning_rate": 1.3516947166096293e-05, - "loss": 0.3106, + "learning_rate": 2.352706223768586e-05, + "loss": 0.3447, "step": 70145 }, { "epoch": 3.27, - "learning_rate": 1.3516478364821153e-05, - "loss": 0.1003, + "learning_rate": 2.3526594167849844e-05, + "loss": 0.0445, "step": 70150 }, { "epoch": 3.27, - "learning_rate": 1.3516009563546013e-05, - "loss": 0.0187, + "learning_rate": 2.3526126098013824e-05, + "loss": 0.0134, "step": 70155 }, { "epoch": 3.27, - "learning_rate": 1.3515540762270874e-05, - "loss": 0.0811, + "learning_rate": 2.3525658028177804e-05, + "loss": 0.0255, "step": 70160 }, { "epoch": 3.27, - "learning_rate": 1.3515071960995736e-05, - "loss": 0.008, + "learning_rate": 2.3525189958341784e-05, + "loss": 0.0655, "step": 70165 }, { "epoch": 3.27, - "learning_rate": 1.3514603159720596e-05, - "loss": 0.0612, + "learning_rate": 2.3524721888505767e-05, + "loss": 0.053, "step": 70170 }, { "epoch": 3.27, - "learning_rate": 1.3514134358445456e-05, - "loss": 0.0913, + "learning_rate": 2.3524253818669747e-05, + "loss": 0.0647, "step": 70175 }, { "epoch": 3.27, - "learning_rate": 1.3513665557170317e-05, - "loss": 0.1169, + "learning_rate": 2.3523785748833726e-05, + "loss": 0.1394, "step": 70180 }, { "epoch": 3.27, - "learning_rate": 1.3513196755895177e-05, - "loss": 0.2338, + "learning_rate": 2.3523317678997706e-05, + "loss": 0.1596, "step": 70185 }, { "epoch": 3.28, - "learning_rate": 1.3512727954620037e-05, - "loss": 0.2062, + "learning_rate": 2.352284960916169e-05, + "loss": 0.277, "step": 70190 }, { "epoch": 3.28, - "learning_rate": 1.3512259153344897e-05, - "loss": 0.2692, + "learning_rate": 2.352238153932567e-05, + "loss": 0.3841, "step": 70195 }, { "epoch": 3.28, - "learning_rate": 1.3511790352069759e-05, - "loss": 0.0322, + "learning_rate": 2.352191346948965e-05, + "loss": 0.1028, "step": 70200 }, { "epoch": 3.28, - "learning_rate": 1.3511321550794619e-05, - "loss": 0.0478, + "learning_rate": 2.352144539965363e-05, + "loss": 0.0653, "step": 70205 }, { "epoch": 3.28, - "learning_rate": 1.351085274951948e-05, - "loss": 0.0719, + "learning_rate": 2.352097732981761e-05, + "loss": 0.0277, "step": 70210 }, { "epoch": 3.28, - "learning_rate": 1.351038394824434e-05, - "loss": 0.0476, + "learning_rate": 2.352050925998159e-05, + "loss": 0.0857, "step": 70215 }, { "epoch": 3.28, - "learning_rate": 1.3509915146969202e-05, - "loss": 0.0856, + "learning_rate": 2.352004119014557e-05, + "loss": 0.0624, "step": 70220 }, { "epoch": 3.28, - "learning_rate": 1.3509446345694062e-05, - "loss": 0.0642, + "learning_rate": 2.351957312030955e-05, + "loss": 0.0515, "step": 70225 }, { "epoch": 3.28, - "learning_rate": 1.3508977544418922e-05, - "loss": 0.1311, + "learning_rate": 2.351910505047353e-05, + "loss": 0.11, "step": 70230 }, { "epoch": 3.28, - "learning_rate": 1.3508508743143782e-05, - "loss": 0.0779, + "learning_rate": 2.351863698063751e-05, + "loss": 0.0995, "step": 70235 }, { "epoch": 3.28, - "learning_rate": 1.3508039941868643e-05, - "loss": 0.1567, + "learning_rate": 2.351816891080149e-05, + "loss": 0.2278, "step": 70240 }, { "epoch": 3.28, - "learning_rate": 1.3507571140593503e-05, - "loss": 0.3294, + "learning_rate": 2.3517700840965474e-05, + "loss": 0.3136, "step": 70245 }, { "epoch": 3.28, - "learning_rate": 1.3507102339318363e-05, - "loss": 0.0495, + "learning_rate": 2.3517232771129454e-05, + "loss": 0.0761, "step": 70250 }, { "epoch": 3.28, - "learning_rate": 1.3506633538043225e-05, - "loss": 0.0297, + "learning_rate": 2.3516764701293434e-05, + "loss": 0.02, "step": 70255 }, { "epoch": 3.28, - "learning_rate": 1.3506164736768086e-05, - "loss": 0.0638, + "learning_rate": 2.3516296631457414e-05, + "loss": 0.0286, "step": 70260 }, { "epoch": 3.28, - "learning_rate": 1.3505695935492946e-05, - "loss": 0.0689, + "learning_rate": 2.3515828561621397e-05, + "loss": 0.0757, "step": 70265 }, { "epoch": 3.28, - "learning_rate": 1.3505227134217806e-05, - "loss": 0.0922, + "learning_rate": 2.3515360491785373e-05, + "loss": 0.0425, "step": 70270 }, { "epoch": 3.28, - "learning_rate": 1.3504758332942666e-05, - "loss": 0.055, + "learning_rate": 2.3514892421949353e-05, + "loss": 0.0616, "step": 70275 }, { "epoch": 3.28, - "learning_rate": 1.3504289531667528e-05, - "loss": 0.0506, + "learning_rate": 2.3514424352113336e-05, + "loss": 0.1114, "step": 70280 }, { "epoch": 3.28, - "learning_rate": 1.3503820730392388e-05, - "loss": 0.2447, + "learning_rate": 2.3513956282277316e-05, + "loss": 0.1263, "step": 70285 }, { "epoch": 3.28, - "learning_rate": 1.3503351929117248e-05, - "loss": 0.152, + "learning_rate": 2.3513488212441296e-05, + "loss": 0.2403, "step": 70290 }, { "epoch": 3.28, - "learning_rate": 1.3502883127842108e-05, - "loss": 0.4533, + "learning_rate": 2.3513020142605276e-05, + "loss": 0.2312, "step": 70295 }, { "epoch": 3.28, - "learning_rate": 1.3502414326566968e-05, - "loss": 0.0593, + "learning_rate": 2.351255207276926e-05, + "loss": 0.0791, "step": 70300 }, { "epoch": 3.28, - "learning_rate": 1.3501945525291831e-05, - "loss": 0.0576, + "learning_rate": 2.351208400293324e-05, + "loss": 0.0096, "step": 70305 }, { "epoch": 3.28, - "learning_rate": 1.3501476724016691e-05, - "loss": 0.1412, + "learning_rate": 2.351161593309722e-05, + "loss": 0.0495, "step": 70310 }, { "epoch": 3.28, - "learning_rate": 1.350100792274155e-05, - "loss": 0.067, + "learning_rate": 2.35111478632612e-05, + "loss": 0.0535, "step": 70315 }, { "epoch": 3.28, - "learning_rate": 1.3500539121466412e-05, - "loss": 0.0748, + "learning_rate": 2.351067979342518e-05, + "loss": 0.0684, "step": 70320 }, { "epoch": 3.28, - "learning_rate": 1.3500070320191272e-05, - "loss": 0.0804, + "learning_rate": 2.351021172358916e-05, + "loss": 0.1341, "step": 70325 }, { "epoch": 3.28, - "learning_rate": 1.3499601518916132e-05, - "loss": 0.1018, + "learning_rate": 2.350974365375314e-05, + "loss": 0.0943, "step": 70330 }, { "epoch": 3.28, - "learning_rate": 1.3499132717640992e-05, - "loss": 0.2179, + "learning_rate": 2.350927558391712e-05, + "loss": 0.063, "step": 70335 }, { "epoch": 3.28, - "learning_rate": 1.3498663916365852e-05, - "loss": 0.1462, + "learning_rate": 2.35088075140811e-05, + "loss": 0.274, "step": 70340 }, { "epoch": 3.28, - "learning_rate": 1.3498195115090714e-05, - "loss": 0.3059, + "learning_rate": 2.350833944424508e-05, + "loss": 0.2878, "step": 70345 }, { "epoch": 3.28, - "learning_rate": 1.3497726313815575e-05, - "loss": 0.0822, + "learning_rate": 2.350787137440906e-05, + "loss": 0.0406, "step": 70350 }, { "epoch": 3.28, - "learning_rate": 1.3497257512540435e-05, - "loss": 0.0157, + "learning_rate": 2.3507403304573044e-05, + "loss": 0.0243, "step": 70355 }, { "epoch": 3.28, - "learning_rate": 1.3496788711265297e-05, - "loss": 0.0288, + "learning_rate": 2.3506935234737024e-05, + "loss": 0.0337, "step": 70360 }, { "epoch": 3.28, - "learning_rate": 1.3496319909990157e-05, - "loss": 0.0865, + "learning_rate": 2.3506467164901003e-05, + "loss": 0.0776, "step": 70365 }, { "epoch": 3.28, - "learning_rate": 1.3495851108715017e-05, - "loss": 0.0594, + "learning_rate": 2.3505999095064983e-05, + "loss": 0.0643, "step": 70370 }, { "epoch": 3.28, - "learning_rate": 1.3495382307439877e-05, - "loss": 0.0708, + "learning_rate": 2.3505531025228966e-05, + "loss": 0.069, "step": 70375 }, { "epoch": 3.28, - "learning_rate": 1.3494913506164737e-05, - "loss": 0.0819, + "learning_rate": 2.3505062955392946e-05, + "loss": 0.072, "step": 70380 }, { "epoch": 3.28, - "learning_rate": 1.3494444704889598e-05, - "loss": 0.3972, + "learning_rate": 2.3504594885556926e-05, + "loss": 0.0749, "step": 70385 }, { "epoch": 3.28, - "learning_rate": 1.3493975903614458e-05, - "loss": 0.2576, + "learning_rate": 2.350412681572091e-05, + "loss": 0.2005, "step": 70390 }, { "epoch": 3.28, - "learning_rate": 1.349350710233932e-05, - "loss": 0.3062, + "learning_rate": 2.3503658745884886e-05, + "loss": 0.1916, "step": 70395 }, { "epoch": 3.28, - "learning_rate": 1.3493038301064182e-05, - "loss": 0.0972, + "learning_rate": 2.3503190676048866e-05, + "loss": 0.0829, "step": 70400 }, { "epoch": 3.29, - "learning_rate": 1.3492569499789041e-05, - "loss": 0.0176, + "learning_rate": 2.3502722606212845e-05, + "loss": 0.0403, "step": 70405 }, { "epoch": 3.29, - "learning_rate": 1.3492100698513901e-05, - "loss": 0.0343, + "learning_rate": 2.350225453637683e-05, + "loss": 0.0308, "step": 70410 }, { "epoch": 3.29, - "learning_rate": 1.3491631897238761e-05, - "loss": 0.0536, + "learning_rate": 2.350178646654081e-05, + "loss": 0.0384, "step": 70415 }, { "epoch": 3.29, - "learning_rate": 1.3491163095963621e-05, - "loss": 0.0684, + "learning_rate": 2.3501318396704788e-05, + "loss": 0.0323, "step": 70420 }, { "epoch": 3.29, - "learning_rate": 1.3490694294688483e-05, - "loss": 0.1144, + "learning_rate": 2.3500850326868768e-05, + "loss": 0.0374, "step": 70425 }, { "epoch": 3.29, - "learning_rate": 1.3490225493413343e-05, - "loss": 0.1388, + "learning_rate": 2.350038225703275e-05, + "loss": 0.114, "step": 70430 }, { "epoch": 3.29, - "learning_rate": 1.3489756692138203e-05, - "loss": 0.1507, + "learning_rate": 2.349991418719673e-05, + "loss": 0.0716, "step": 70435 }, { "epoch": 3.29, - "learning_rate": 1.3489287890863066e-05, - "loss": 0.2201, + "learning_rate": 2.349944611736071e-05, + "loss": 0.1352, "step": 70440 }, { "epoch": 3.29, - "learning_rate": 1.3488819089587926e-05, - "loss": 0.3038, + "learning_rate": 2.3498978047524694e-05, + "loss": 0.1718, "step": 70445 }, { "epoch": 3.29, - "learning_rate": 1.3488350288312786e-05, - "loss": 0.0431, + "learning_rate": 2.3498509977688674e-05, + "loss": 0.072, "step": 70450 }, { "epoch": 3.29, - "learning_rate": 1.3487881487037646e-05, - "loss": 0.0485, + "learning_rate": 2.3498041907852654e-05, + "loss": 0.0594, "step": 70455 }, { "epoch": 3.29, - "learning_rate": 1.3487412685762506e-05, - "loss": 0.0658, + "learning_rate": 2.349757383801663e-05, + "loss": 0.0041, "step": 70460 }, { "epoch": 3.29, - "learning_rate": 1.3486943884487367e-05, - "loss": 0.0789, + "learning_rate": 2.3497105768180613e-05, + "loss": 0.0417, "step": 70465 }, { "epoch": 3.29, - "learning_rate": 1.3486475083212227e-05, - "loss": 0.0781, + "learning_rate": 2.3496637698344593e-05, + "loss": 0.0775, "step": 70470 }, { "epoch": 3.29, - "learning_rate": 1.3486006281937087e-05, - "loss": 0.0563, + "learning_rate": 2.3496169628508573e-05, + "loss": 0.08, "step": 70475 }, { "epoch": 3.29, - "learning_rate": 1.3485537480661947e-05, - "loss": 0.1637, + "learning_rate": 2.3495701558672553e-05, + "loss": 0.1252, "step": 70480 }, { "epoch": 3.29, - "learning_rate": 1.3485068679386807e-05, - "loss": 0.1455, + "learning_rate": 2.3495233488836536e-05, + "loss": 0.1889, "step": 70485 }, { "epoch": 3.29, - "learning_rate": 1.348459987811167e-05, - "loss": 0.169, + "learning_rate": 2.3494765419000516e-05, + "loss": 0.2294, "step": 70490 }, { "epoch": 3.29, - "learning_rate": 1.348413107683653e-05, - "loss": 0.3557, + "learning_rate": 2.3494297349164496e-05, + "loss": 0.3066, "step": 70495 }, { "epoch": 3.29, - "learning_rate": 1.348366227556139e-05, - "loss": 0.0485, + "learning_rate": 2.3493829279328475e-05, + "loss": 0.0799, "step": 70500 }, { "epoch": 3.29, - "learning_rate": 1.3483193474286252e-05, - "loss": 0.0281, + "learning_rate": 2.349336120949246e-05, + "loss": 0.0244, "step": 70505 }, { "epoch": 3.29, - "learning_rate": 1.3482724673011112e-05, - "loss": 0.0363, + "learning_rate": 2.349289313965644e-05, + "loss": 0.0439, "step": 70510 }, { "epoch": 3.29, - "learning_rate": 1.3482255871735972e-05, - "loss": 0.0769, + "learning_rate": 2.3492425069820418e-05, + "loss": 0.0616, "step": 70515 }, { "epoch": 3.29, - "learning_rate": 1.3481787070460832e-05, - "loss": 0.031, + "learning_rate": 2.3491956999984398e-05, + "loss": 0.0733, "step": 70520 }, { "epoch": 3.29, - "learning_rate": 1.3481318269185692e-05, - "loss": 0.081, + "learning_rate": 2.3491488930148378e-05, + "loss": 0.086, "step": 70525 }, { "epoch": 3.29, - "learning_rate": 1.3480849467910553e-05, - "loss": 0.0901, + "learning_rate": 2.3491020860312358e-05, + "loss": 0.0668, "step": 70530 }, { "epoch": 3.29, - "learning_rate": 1.3480380666635415e-05, - "loss": 0.093, + "learning_rate": 2.3490552790476338e-05, + "loss": 0.0938, "step": 70535 }, { "epoch": 3.29, - "learning_rate": 1.3479911865360275e-05, - "loss": 0.1909, + "learning_rate": 2.349008472064032e-05, + "loss": 0.2281, "step": 70540 }, { "epoch": 3.29, - "learning_rate": 1.3479443064085137e-05, - "loss": 0.2227, + "learning_rate": 2.34896166508043e-05, + "loss": 0.1502, "step": 70545 }, { "epoch": 3.29, - "learning_rate": 1.3478974262809996e-05, - "loss": 0.0348, + "learning_rate": 2.348914858096828e-05, + "loss": 0.0645, "step": 70550 }, { "epoch": 3.29, - "learning_rate": 1.3478505461534856e-05, - "loss": 0.0182, + "learning_rate": 2.348868051113226e-05, + "loss": 0.0274, "step": 70555 }, { "epoch": 3.29, - "learning_rate": 1.3478036660259716e-05, - "loss": 0.024, + "learning_rate": 2.3488212441296243e-05, + "loss": 0.0349, "step": 70560 }, { "epoch": 3.29, - "learning_rate": 1.3477567858984576e-05, - "loss": 0.0567, + "learning_rate": 2.3487744371460223e-05, + "loss": 0.0593, "step": 70565 }, { "epoch": 3.29, - "learning_rate": 1.3477099057709438e-05, - "loss": 0.0617, + "learning_rate": 2.3487276301624203e-05, + "loss": 0.0582, "step": 70570 }, { "epoch": 3.29, - "learning_rate": 1.3476630256434298e-05, - "loss": 0.0485, + "learning_rate": 2.3486808231788186e-05, + "loss": 0.1337, "step": 70575 }, { "epoch": 3.29, - "learning_rate": 1.347616145515916e-05, - "loss": 0.0487, + "learning_rate": 2.3486340161952166e-05, + "loss": 0.077, "step": 70580 }, { "epoch": 3.29, - "learning_rate": 1.3475692653884021e-05, - "loss": 0.3816, + "learning_rate": 2.3485872092116142e-05, + "loss": 0.1821, "step": 70585 }, { "epoch": 3.29, - "learning_rate": 1.3475223852608881e-05, - "loss": 0.2829, + "learning_rate": 2.3485404022280122e-05, + "loss": 0.197, "step": 70590 }, { "epoch": 3.29, - "learning_rate": 1.3474755051333741e-05, - "loss": 0.357, + "learning_rate": 2.3484935952444106e-05, + "loss": 0.3303, "step": 70595 }, { "epoch": 3.29, - "learning_rate": 1.3474286250058601e-05, - "loss": 0.0774, + "learning_rate": 2.3484467882608085e-05, + "loss": 0.0865, "step": 70600 }, { "epoch": 3.29, - "learning_rate": 1.347381744878346e-05, - "loss": 0.0361, + "learning_rate": 2.3483999812772065e-05, + "loss": 0.0298, "step": 70605 }, { "epoch": 3.29, - "learning_rate": 1.3473348647508322e-05, - "loss": 0.0453, + "learning_rate": 2.3483531742936045e-05, + "loss": 0.0468, "step": 70610 }, { "epoch": 3.29, - "learning_rate": 1.3472879846233182e-05, - "loss": 0.0449, + "learning_rate": 2.3483063673100028e-05, + "loss": 0.0857, "step": 70615 }, { "epoch": 3.3, - "learning_rate": 1.3472411044958042e-05, - "loss": 0.0499, + "learning_rate": 2.3482595603264008e-05, + "loss": 0.0868, "step": 70620 }, { "epoch": 3.3, - "learning_rate": 1.3471942243682902e-05, - "loss": 0.048, + "learning_rate": 2.3482127533427988e-05, + "loss": 0.0789, "step": 70625 }, { "epoch": 3.3, - "learning_rate": 1.3471473442407766e-05, - "loss": 0.2074, + "learning_rate": 2.348165946359197e-05, + "loss": 0.1059, "step": 70630 }, { "epoch": 3.3, - "learning_rate": 1.3471004641132626e-05, - "loss": 0.1713, + "learning_rate": 2.348119139375595e-05, + "loss": 0.1368, "step": 70635 }, { "epoch": 3.3, - "learning_rate": 1.3470535839857485e-05, - "loss": 0.281, + "learning_rate": 2.348072332391993e-05, + "loss": 0.1975, "step": 70640 }, { "epoch": 3.3, - "learning_rate": 1.3470067038582345e-05, - "loss": 0.2752, + "learning_rate": 2.348025525408391e-05, + "loss": 0.2491, "step": 70645 }, { "epoch": 3.3, - "learning_rate": 1.3469598237307207e-05, - "loss": 0.0978, + "learning_rate": 2.347978718424789e-05, + "loss": 0.0491, "step": 70650 }, { "epoch": 3.3, - "learning_rate": 1.3469129436032067e-05, - "loss": 0.0477, + "learning_rate": 2.347931911441187e-05, + "loss": 0.0317, "step": 70655 }, { "epoch": 3.3, - "learning_rate": 1.3468660634756927e-05, - "loss": 0.0398, + "learning_rate": 2.347885104457585e-05, + "loss": 0.0674, "step": 70660 }, { "epoch": 3.3, - "learning_rate": 1.3468191833481787e-05, - "loss": 0.0324, + "learning_rate": 2.347838297473983e-05, + "loss": 0.0737, "step": 70665 }, { "epoch": 3.3, - "learning_rate": 1.3467723032206648e-05, - "loss": 0.0819, + "learning_rate": 2.3477914904903813e-05, + "loss": 0.068, "step": 70670 }, { "epoch": 3.3, - "learning_rate": 1.346725423093151e-05, - "loss": 0.076, + "learning_rate": 2.3477446835067793e-05, + "loss": 0.0358, "step": 70675 }, { "epoch": 3.3, - "learning_rate": 1.346678542965637e-05, - "loss": 0.057, + "learning_rate": 2.3476978765231773e-05, + "loss": 0.046, "step": 70680 }, { "epoch": 3.3, - "learning_rate": 1.346631662838123e-05, - "loss": 0.1844, + "learning_rate": 2.3476510695395752e-05, + "loss": 0.1593, "step": 70685 }, { "epoch": 3.3, - "learning_rate": 1.3465847827106092e-05, - "loss": 0.2069, + "learning_rate": 2.3476042625559736e-05, + "loss": 0.1746, "step": 70690 }, { "epoch": 3.3, - "learning_rate": 1.3465379025830952e-05, - "loss": 0.1684, + "learning_rate": 2.3475574555723715e-05, + "loss": 0.1715, "step": 70695 }, { "epoch": 3.3, - "learning_rate": 1.3464910224555811e-05, - "loss": 0.0384, + "learning_rate": 2.3475106485887695e-05, + "loss": 0.0388, "step": 70700 }, { "epoch": 3.3, - "learning_rate": 1.3464441423280671e-05, - "loss": 0.0533, + "learning_rate": 2.347463841605168e-05, + "loss": 0.0329, "step": 70705 }, { "epoch": 3.3, - "learning_rate": 1.3463972622005533e-05, - "loss": 0.0424, + "learning_rate": 2.3474170346215655e-05, + "loss": 0.0201, "step": 70710 }, { "epoch": 3.3, - "learning_rate": 1.3463503820730393e-05, - "loss": 0.0333, + "learning_rate": 2.3473702276379635e-05, + "loss": 0.0228, "step": 70715 }, { "epoch": 3.3, - "learning_rate": 1.3463035019455255e-05, - "loss": 0.0665, + "learning_rate": 2.3473234206543615e-05, + "loss": 0.067, "step": 70720 }, { "epoch": 3.3, - "learning_rate": 1.3462566218180114e-05, - "loss": 0.0811, + "learning_rate": 2.3472766136707598e-05, + "loss": 0.1036, "step": 70725 }, { "epoch": 3.3, - "learning_rate": 1.3462097416904976e-05, - "loss": 0.0911, + "learning_rate": 2.3472298066871578e-05, + "loss": 0.108, "step": 70730 }, { "epoch": 3.3, - "learning_rate": 1.3461628615629836e-05, - "loss": 0.2091, + "learning_rate": 2.3471829997035557e-05, + "loss": 0.0983, "step": 70735 }, { "epoch": 3.3, - "learning_rate": 1.3461159814354696e-05, - "loss": 0.2925, + "learning_rate": 2.3471361927199537e-05, + "loss": 0.2356, "step": 70740 }, { "epoch": 3.3, - "learning_rate": 1.3460691013079556e-05, - "loss": 0.2814, + "learning_rate": 2.347089385736352e-05, + "loss": 0.2258, "step": 70745 }, { "epoch": 3.3, - "learning_rate": 1.3460222211804418e-05, - "loss": 0.0556, + "learning_rate": 2.34704257875275e-05, + "loss": 0.0737, "step": 70750 }, { "epoch": 3.3, - "learning_rate": 1.3459753410529277e-05, - "loss": 0.0091, + "learning_rate": 2.346995771769148e-05, + "loss": 0.0394, "step": 70755 }, { "epoch": 3.3, - "learning_rate": 1.3459284609254137e-05, - "loss": 0.0401, + "learning_rate": 2.3469489647855463e-05, + "loss": 0.0361, "step": 70760 }, { "epoch": 3.3, - "learning_rate": 1.3458815807978999e-05, - "loss": 0.0593, + "learning_rate": 2.3469021578019443e-05, + "loss": 0.1082, "step": 70765 }, { "epoch": 3.3, - "learning_rate": 1.345834700670386e-05, - "loss": 0.046, + "learning_rate": 2.3468553508183423e-05, + "loss": 0.1015, "step": 70770 }, { "epoch": 3.3, - "learning_rate": 1.345787820542872e-05, - "loss": 0.075, + "learning_rate": 2.34680854383474e-05, + "loss": 0.0732, "step": 70775 }, { "epoch": 3.3, - "learning_rate": 1.345740940415358e-05, - "loss": 0.0843, + "learning_rate": 2.3467617368511382e-05, + "loss": 0.1305, "step": 70780 }, { "epoch": 3.3, - "learning_rate": 1.345694060287844e-05, - "loss": 0.2834, + "learning_rate": 2.3467149298675362e-05, + "loss": 0.0794, "step": 70785 }, { "epoch": 3.3, - "learning_rate": 1.3456471801603302e-05, - "loss": 0.354, + "learning_rate": 2.3466681228839342e-05, + "loss": 0.1879, "step": 70790 }, { "epoch": 3.3, - "learning_rate": 1.3456003000328162e-05, - "loss": 0.2956, + "learning_rate": 2.3466213159003322e-05, + "loss": 0.2989, "step": 70795 }, { "epoch": 3.3, - "learning_rate": 1.3455534199053022e-05, - "loss": 0.044, + "learning_rate": 2.3465745089167305e-05, + "loss": 0.0802, "step": 70800 }, { "epoch": 3.3, - "learning_rate": 1.3455065397777882e-05, - "loss": 0.0318, + "learning_rate": 2.3465277019331285e-05, + "loss": 0.0744, "step": 70805 }, { "epoch": 3.3, - "learning_rate": 1.3454596596502742e-05, - "loss": 0.0744, + "learning_rate": 2.3464808949495265e-05, + "loss": 0.0391, "step": 70810 }, { "epoch": 3.3, - "learning_rate": 1.3454127795227605e-05, - "loss": 0.0909, + "learning_rate": 2.3464340879659248e-05, + "loss": 0.0548, "step": 70815 }, { "epoch": 3.3, - "learning_rate": 1.3453658993952465e-05, - "loss": 0.0814, + "learning_rate": 2.3463872809823228e-05, + "loss": 0.0612, "step": 70820 }, { "epoch": 3.3, - "learning_rate": 1.3453190192677325e-05, - "loss": 0.1356, + "learning_rate": 2.3463404739987208e-05, + "loss": 0.0949, "step": 70825 }, { "epoch": 3.31, - "learning_rate": 1.3452721391402187e-05, - "loss": 0.1235, + "learning_rate": 2.3462936670151187e-05, + "loss": 0.1288, "step": 70830 }, { "epoch": 3.31, - "learning_rate": 1.3452252590127047e-05, - "loss": 0.0741, + "learning_rate": 2.3462468600315167e-05, + "loss": 0.1713, "step": 70835 }, { "epoch": 3.31, - "learning_rate": 1.3451783788851907e-05, - "loss": 0.1165, + "learning_rate": 2.3462000530479147e-05, + "loss": 0.1532, "step": 70840 }, { "epoch": 3.31, - "learning_rate": 1.3451314987576766e-05, - "loss": 0.3005, + "learning_rate": 2.3461532460643127e-05, + "loss": 0.2368, "step": 70845 }, { "epoch": 3.31, - "learning_rate": 1.3450846186301626e-05, - "loss": 0.0731, + "learning_rate": 2.3461064390807107e-05, + "loss": 0.055, "step": 70850 }, { "epoch": 3.31, - "learning_rate": 1.3450377385026488e-05, - "loss": 0.034, + "learning_rate": 2.346059632097109e-05, + "loss": 0.0317, "step": 70855 }, { "epoch": 3.31, - "learning_rate": 1.344990858375135e-05, - "loss": 0.0277, + "learning_rate": 2.346012825113507e-05, + "loss": 0.0451, "step": 70860 }, { "epoch": 3.31, - "learning_rate": 1.344943978247621e-05, - "loss": 0.0857, + "learning_rate": 2.345966018129905e-05, + "loss": 0.0883, "step": 70865 }, { "epoch": 3.31, - "learning_rate": 1.3448970981201071e-05, - "loss": 0.0274, + "learning_rate": 2.345919211146303e-05, + "loss": 0.0585, "step": 70870 }, { "epoch": 3.31, - "learning_rate": 1.3448502179925931e-05, - "loss": 0.1408, + "learning_rate": 2.3458724041627013e-05, + "loss": 0.0913, "step": 70875 }, { "epoch": 3.31, - "learning_rate": 1.3448033378650791e-05, - "loss": 0.0956, + "learning_rate": 2.3458255971790992e-05, + "loss": 0.098, "step": 70880 }, { "epoch": 3.31, - "learning_rate": 1.3447564577375651e-05, - "loss": 0.0891, + "learning_rate": 2.3457787901954972e-05, + "loss": 0.175, "step": 70885 }, { "epoch": 3.31, - "learning_rate": 1.3447095776100511e-05, - "loss": 0.1134, + "learning_rate": 2.3457319832118955e-05, + "loss": 0.1416, "step": 70890 }, { "epoch": 3.31, - "learning_rate": 1.3446626974825373e-05, - "loss": 0.2421, + "learning_rate": 2.3456851762282935e-05, + "loss": 0.3933, "step": 70895 }, { "epoch": 3.31, - "learning_rate": 1.3446158173550233e-05, - "loss": 0.0405, + "learning_rate": 2.345638369244691e-05, + "loss": 0.0399, "step": 70900 }, { "epoch": 3.31, - "learning_rate": 1.3445689372275094e-05, - "loss": 0.0647, + "learning_rate": 2.345591562261089e-05, + "loss": 0.0196, "step": 70905 }, { "epoch": 3.31, - "learning_rate": 1.3445220570999956e-05, - "loss": 0.0569, + "learning_rate": 2.3455447552774875e-05, + "loss": 0.0387, "step": 70910 }, { "epoch": 3.31, - "learning_rate": 1.3444751769724816e-05, - "loss": 0.0657, + "learning_rate": 2.3454979482938855e-05, + "loss": 0.081, "step": 70915 }, { "epoch": 3.31, - "learning_rate": 1.3444282968449676e-05, - "loss": 0.0433, + "learning_rate": 2.3454511413102834e-05, + "loss": 0.0882, "step": 70920 }, { "epoch": 3.31, - "learning_rate": 1.3443814167174536e-05, - "loss": 0.1249, + "learning_rate": 2.3454043343266814e-05, + "loss": 0.1391, "step": 70925 }, { "epoch": 3.31, - "learning_rate": 1.3443345365899395e-05, - "loss": 0.1427, + "learning_rate": 2.3453575273430797e-05, + "loss": 0.0641, "step": 70930 }, { "epoch": 3.31, - "learning_rate": 1.3442876564624257e-05, - "loss": 0.1352, + "learning_rate": 2.3453107203594777e-05, + "loss": 0.0684, "step": 70935 }, { "epoch": 3.31, - "learning_rate": 1.3442407763349117e-05, - "loss": 0.154, + "learning_rate": 2.3452639133758757e-05, + "loss": 0.2094, "step": 70940 }, { "epoch": 3.31, - "learning_rate": 1.3441938962073977e-05, - "loss": 0.2397, + "learning_rate": 2.345217106392274e-05, + "loss": 0.1508, "step": 70945 }, { "epoch": 3.31, - "learning_rate": 1.3441470160798837e-05, - "loss": 0.0717, + "learning_rate": 2.345170299408672e-05, + "loss": 0.0768, "step": 70950 }, { "epoch": 3.31, - "learning_rate": 1.34410013595237e-05, - "loss": 0.0249, + "learning_rate": 2.34512349242507e-05, + "loss": 0.0607, "step": 70955 }, { "epoch": 3.31, - "learning_rate": 1.344053255824856e-05, - "loss": 0.076, + "learning_rate": 2.345076685441468e-05, + "loss": 0.0397, "step": 70960 }, { "epoch": 3.31, - "learning_rate": 1.344006375697342e-05, - "loss": 0.0485, + "learning_rate": 2.345029878457866e-05, + "loss": 0.0266, "step": 70965 }, { "epoch": 3.31, - "learning_rate": 1.343959495569828e-05, - "loss": 0.071, + "learning_rate": 2.344983071474264e-05, + "loss": 0.0961, "step": 70970 }, { "epoch": 3.31, - "learning_rate": 1.3439126154423142e-05, - "loss": 0.18, + "learning_rate": 2.344936264490662e-05, + "loss": 0.0656, "step": 70975 }, { "epoch": 3.31, - "learning_rate": 1.3438657353148002e-05, - "loss": 0.1468, + "learning_rate": 2.34488945750706e-05, + "loss": 0.1497, "step": 70980 }, { "epoch": 3.31, - "learning_rate": 1.3438188551872862e-05, - "loss": 0.0761, + "learning_rate": 2.3448426505234582e-05, + "loss": 0.0884, "step": 70985 }, { "epoch": 3.31, - "learning_rate": 1.3437719750597721e-05, - "loss": 0.1708, + "learning_rate": 2.3447958435398562e-05, + "loss": 0.2132, "step": 70990 }, { "epoch": 3.31, - "learning_rate": 1.3437250949322581e-05, - "loss": 0.2662, + "learning_rate": 2.3447490365562542e-05, + "loss": 0.2205, "step": 70995 }, { "epoch": 3.31, - "learning_rate": 1.3436782148047445e-05, - "loss": 0.0523, + "learning_rate": 2.3447022295726525e-05, + "loss": 0.05, "step": 71000 }, { "epoch": 3.31, - "learning_rate": 1.3436313346772305e-05, - "loss": 0.0473, + "learning_rate": 2.3446554225890505e-05, + "loss": 0.0214, "step": 71005 }, { "epoch": 3.31, - "learning_rate": 1.3435844545497165e-05, - "loss": 0.0661, + "learning_rate": 2.3446086156054485e-05, + "loss": 0.0276, "step": 71010 }, { "epoch": 3.31, - "learning_rate": 1.3435375744222026e-05, - "loss": 0.0808, + "learning_rate": 2.3445618086218464e-05, + "loss": 0.0322, "step": 71015 }, { "epoch": 3.31, - "learning_rate": 1.3434906942946886e-05, - "loss": 0.0806, + "learning_rate": 2.3445150016382448e-05, + "loss": 0.0774, "step": 71020 }, { "epoch": 3.31, - "learning_rate": 1.3434438141671746e-05, - "loss": 0.0982, + "learning_rate": 2.3444681946546424e-05, + "loss": 0.1055, "step": 71025 }, { "epoch": 3.31, - "learning_rate": 1.3433969340396606e-05, - "loss": 0.1275, + "learning_rate": 2.3444213876710404e-05, + "loss": 0.0709, "step": 71030 }, { "epoch": 3.31, - "learning_rate": 1.3433500539121466e-05, - "loss": 0.1666, + "learning_rate": 2.3443745806874384e-05, + "loss": 0.093, "step": 71035 }, { "epoch": 3.31, - "learning_rate": 1.3433031737846328e-05, - "loss": 0.1886, + "learning_rate": 2.3443277737038367e-05, + "loss": 0.1827, "step": 71040 }, { "epoch": 3.32, - "learning_rate": 1.343256293657119e-05, - "loss": 0.1818, + "learning_rate": 2.3442809667202347e-05, + "loss": 0.2909, "step": 71045 }, { "epoch": 3.32, - "learning_rate": 1.3432094135296049e-05, - "loss": 0.0647, + "learning_rate": 2.3442341597366327e-05, + "loss": 0.0742, "step": 71050 }, { "epoch": 3.32, - "learning_rate": 1.343162533402091e-05, - "loss": 0.0322, + "learning_rate": 2.344187352753031e-05, + "loss": 0.0304, "step": 71055 }, { "epoch": 3.32, - "learning_rate": 1.343115653274577e-05, - "loss": 0.0411, + "learning_rate": 2.344140545769429e-05, + "loss": 0.0346, "step": 71060 }, { "epoch": 3.32, - "learning_rate": 1.343068773147063e-05, - "loss": 0.0963, + "learning_rate": 2.344093738785827e-05, + "loss": 0.0579, "step": 71065 }, { "epoch": 3.32, - "learning_rate": 1.343021893019549e-05, - "loss": 0.1114, + "learning_rate": 2.344046931802225e-05, + "loss": 0.023, "step": 71070 }, { "epoch": 3.32, - "learning_rate": 1.342975012892035e-05, - "loss": 0.0667, + "learning_rate": 2.3440001248186232e-05, + "loss": 0.186, "step": 71075 }, { "epoch": 3.32, - "learning_rate": 1.3429281327645212e-05, - "loss": 0.0841, + "learning_rate": 2.3439533178350212e-05, + "loss": 0.1488, "step": 71080 }, { "epoch": 3.32, - "learning_rate": 1.3428812526370072e-05, - "loss": 0.1584, + "learning_rate": 2.3439065108514192e-05, + "loss": 0.1178, "step": 71085 }, { "epoch": 3.32, - "learning_rate": 1.3428343725094934e-05, - "loss": 0.1747, + "learning_rate": 2.343859703867817e-05, + "loss": 0.2035, "step": 71090 }, { "epoch": 3.32, - "learning_rate": 1.3427874923819795e-05, - "loss": 0.3115, + "learning_rate": 2.343812896884215e-05, + "loss": 0.244, "step": 71095 }, { "epoch": 3.32, - "learning_rate": 1.3427406122544655e-05, - "loss": 0.0685, + "learning_rate": 2.343766089900613e-05, + "loss": 0.0535, "step": 71100 }, { "epoch": 3.32, - "learning_rate": 1.3426937321269515e-05, - "loss": 0.0193, + "learning_rate": 2.343719282917011e-05, + "loss": 0.0407, "step": 71105 }, { "epoch": 3.32, - "learning_rate": 1.3426468519994375e-05, - "loss": 0.065, + "learning_rate": 2.343672475933409e-05, + "loss": 0.0229, "step": 71110 }, { "epoch": 3.32, - "learning_rate": 1.3425999718719237e-05, - "loss": 0.0504, + "learning_rate": 2.3436256689498074e-05, + "loss": 0.0616, "step": 71115 }, { "epoch": 3.32, - "learning_rate": 1.3425530917444097e-05, - "loss": 0.0395, + "learning_rate": 2.3435788619662054e-05, + "loss": 0.0699, "step": 71120 }, { "epoch": 3.32, - "learning_rate": 1.3425062116168957e-05, - "loss": 0.0502, + "learning_rate": 2.3435320549826034e-05, + "loss": 0.1738, "step": 71125 }, { "epoch": 3.32, - "learning_rate": 1.3424593314893817e-05, - "loss": 0.0907, + "learning_rate": 2.3434852479990017e-05, + "loss": 0.1507, "step": 71130 }, { "epoch": 3.32, - "learning_rate": 1.3424124513618676e-05, - "loss": 0.1055, + "learning_rate": 2.3434384410153997e-05, + "loss": 0.124, "step": 71135 }, { "epoch": 3.32, - "learning_rate": 1.342365571234354e-05, - "loss": 0.0928, + "learning_rate": 2.3433916340317977e-05, + "loss": 0.1636, "step": 71140 }, { "epoch": 3.32, - "learning_rate": 1.34231869110684e-05, - "loss": 0.343, + "learning_rate": 2.3433448270481957e-05, + "loss": 0.32, "step": 71145 }, { "epoch": 3.32, - "learning_rate": 1.342271810979326e-05, - "loss": 0.0368, + "learning_rate": 2.343298020064594e-05, + "loss": 0.0607, "step": 71150 }, { "epoch": 3.32, - "learning_rate": 1.3422249308518121e-05, - "loss": 0.0282, + "learning_rate": 2.3432512130809916e-05, + "loss": 0.0527, "step": 71155 }, { "epoch": 3.32, - "learning_rate": 1.3421780507242981e-05, - "loss": 0.0772, + "learning_rate": 2.3432044060973896e-05, + "loss": 0.0461, "step": 71160 }, { "epoch": 3.32, - "learning_rate": 1.3421311705967841e-05, - "loss": 0.0514, + "learning_rate": 2.3431575991137876e-05, + "loss": 0.0508, "step": 71165 }, { "epoch": 3.32, - "learning_rate": 1.3420842904692701e-05, - "loss": 0.077, + "learning_rate": 2.343110792130186e-05, + "loss": 0.0525, "step": 71170 }, { "epoch": 3.32, - "learning_rate": 1.3420374103417561e-05, - "loss": 0.1319, + "learning_rate": 2.343063985146584e-05, + "loss": 0.0447, "step": 71175 }, { "epoch": 3.32, - "learning_rate": 1.3419905302142423e-05, - "loss": 0.1325, + "learning_rate": 2.343017178162982e-05, + "loss": 0.1545, "step": 71180 }, { "epoch": 3.32, - "learning_rate": 1.3419436500867284e-05, - "loss": 0.1731, + "learning_rate": 2.3429703711793802e-05, + "loss": 0.1223, "step": 71185 }, { "epoch": 3.32, - "learning_rate": 1.3418967699592144e-05, - "loss": 0.2676, + "learning_rate": 2.3429235641957782e-05, + "loss": 0.1881, "step": 71190 }, { "epoch": 3.32, - "learning_rate": 1.3418498898317006e-05, - "loss": 0.4007, + "learning_rate": 2.342876757212176e-05, + "loss": 0.2967, "step": 71195 }, { "epoch": 3.32, - "learning_rate": 1.3418030097041866e-05, - "loss": 0.1035, + "learning_rate": 2.342829950228574e-05, + "loss": 0.062, "step": 71200 }, { "epoch": 3.32, - "learning_rate": 1.3417561295766726e-05, - "loss": 0.1052, + "learning_rate": 2.3427831432449725e-05, + "loss": 0.0077, "step": 71205 }, { "epoch": 3.32, - "learning_rate": 1.3417092494491586e-05, - "loss": 0.0426, + "learning_rate": 2.3427363362613704e-05, + "loss": 0.0673, "step": 71210 }, { "epoch": 3.32, - "learning_rate": 1.3416623693216446e-05, - "loss": 0.0513, + "learning_rate": 2.342689529277768e-05, + "loss": 0.048, "step": 71215 }, { "epoch": 3.32, - "learning_rate": 1.3416154891941307e-05, - "loss": 0.0408, + "learning_rate": 2.342642722294166e-05, + "loss": 0.1128, "step": 71220 }, { "epoch": 3.32, - "learning_rate": 1.3415686090666167e-05, - "loss": 0.1096, + "learning_rate": 2.3425959153105644e-05, + "loss": 0.2254, "step": 71225 }, { "epoch": 3.32, - "learning_rate": 1.3415217289391029e-05, - "loss": 0.0922, + "learning_rate": 2.3425491083269624e-05, + "loss": 0.0897, "step": 71230 }, { "epoch": 3.32, - "learning_rate": 1.341474848811589e-05, - "loss": 0.1576, + "learning_rate": 2.3425023013433603e-05, + "loss": 0.1356, "step": 71235 }, { "epoch": 3.32, - "learning_rate": 1.341427968684075e-05, - "loss": 0.118, + "learning_rate": 2.3424554943597587e-05, + "loss": 0.3869, "step": 71240 }, { "epoch": 3.32, - "learning_rate": 1.341381088556561e-05, - "loss": 0.2277, + "learning_rate": 2.3424086873761567e-05, + "loss": 0.267, "step": 71245 }, { "epoch": 3.32, - "learning_rate": 1.341334208429047e-05, - "loss": 0.0906, + "learning_rate": 2.3423618803925546e-05, + "loss": 0.0932, "step": 71250 }, { "epoch": 3.32, - "learning_rate": 1.341287328301533e-05, - "loss": 0.0256, + "learning_rate": 2.3423150734089526e-05, + "loss": 0.0468, "step": 71255 }, { "epoch": 3.33, - "learning_rate": 1.3412404481740192e-05, - "loss": 0.0239, + "learning_rate": 2.342268266425351e-05, + "loss": 0.1404, "step": 71260 }, { "epoch": 3.33, - "learning_rate": 1.3411935680465052e-05, - "loss": 0.021, + "learning_rate": 2.342221459441749e-05, + "loss": 0.0207, "step": 71265 }, { "epoch": 3.33, - "learning_rate": 1.3411466879189912e-05, - "loss": 0.0638, + "learning_rate": 2.342174652458147e-05, + "loss": 0.071, "step": 71270 }, { "epoch": 3.33, - "learning_rate": 1.3410998077914772e-05, - "loss": 0.0908, + "learning_rate": 2.342127845474545e-05, + "loss": 0.0403, "step": 71275 }, { "epoch": 3.33, - "learning_rate": 1.3410529276639635e-05, - "loss": 0.1054, + "learning_rate": 2.342081038490943e-05, + "loss": 0.1147, "step": 71280 }, { "epoch": 3.33, - "learning_rate": 1.3410060475364495e-05, - "loss": 0.1925, + "learning_rate": 2.342034231507341e-05, + "loss": 0.0999, "step": 71285 }, { "epoch": 3.33, - "learning_rate": 1.3409591674089355e-05, - "loss": 0.2934, + "learning_rate": 2.3419874245237388e-05, + "loss": 0.3453, "step": 71290 }, { "epoch": 3.33, - "learning_rate": 1.3409122872814215e-05, - "loss": 0.3971, + "learning_rate": 2.3419406175401368e-05, + "loss": 0.296, "step": 71295 }, { "epoch": 3.33, - "learning_rate": 1.3408654071539076e-05, - "loss": 0.0439, + "learning_rate": 2.341893810556535e-05, + "loss": 0.0562, "step": 71300 }, { "epoch": 3.33, - "learning_rate": 1.3408185270263936e-05, - "loss": 0.0476, + "learning_rate": 2.341847003572933e-05, + "loss": 0.0233, "step": 71305 }, { "epoch": 3.33, - "learning_rate": 1.3407716468988796e-05, - "loss": 0.0662, + "learning_rate": 2.341800196589331e-05, + "loss": 0.0609, "step": 71310 }, { "epoch": 3.33, - "learning_rate": 1.3407247667713656e-05, - "loss": 0.0805, + "learning_rate": 2.3417533896057294e-05, + "loss": 0.0258, "step": 71315 }, { "epoch": 3.33, - "learning_rate": 1.3406778866438516e-05, - "loss": 0.0783, + "learning_rate": 2.3417065826221274e-05, + "loss": 0.3696, "step": 71320 }, { "epoch": 3.33, - "learning_rate": 1.340631006516338e-05, - "loss": 0.047, + "learning_rate": 2.3416597756385254e-05, + "loss": 0.0482, "step": 71325 }, { "epoch": 3.33, - "learning_rate": 1.340584126388824e-05, - "loss": 0.1047, + "learning_rate": 2.3416129686549234e-05, + "loss": 0.0605, "step": 71330 }, { "epoch": 3.33, - "learning_rate": 1.34053724626131e-05, - "loss": 0.1287, + "learning_rate": 2.3415661616713217e-05, + "loss": 0.1359, "step": 71335 }, { "epoch": 3.33, - "learning_rate": 1.3404903661337961e-05, - "loss": 0.1862, + "learning_rate": 2.3415193546877197e-05, + "loss": 0.2291, "step": 71340 }, { "epoch": 3.33, - "learning_rate": 1.340443486006282e-05, - "loss": 0.2412, + "learning_rate": 2.3414725477041173e-05, + "loss": 0.1951, "step": 71345 }, { "epoch": 3.33, - "learning_rate": 1.340396605878768e-05, - "loss": 0.0788, + "learning_rate": 2.3414257407205153e-05, + "loss": 0.0574, "step": 71350 }, { "epoch": 3.33, - "learning_rate": 1.340349725751254e-05, - "loss": 0.0357, + "learning_rate": 2.3413789337369136e-05, + "loss": 0.049, "step": 71355 }, { "epoch": 3.33, - "learning_rate": 1.34030284562374e-05, - "loss": 0.0551, + "learning_rate": 2.3413321267533116e-05, + "loss": 0.0617, "step": 71360 }, { "epoch": 3.33, - "learning_rate": 1.3402559654962262e-05, - "loss": 0.0659, + "learning_rate": 2.3412853197697096e-05, + "loss": 0.0686, "step": 71365 }, { "epoch": 3.33, - "learning_rate": 1.3402090853687124e-05, - "loss": 0.0647, + "learning_rate": 2.341238512786108e-05, + "loss": 0.0619, "step": 71370 }, { "epoch": 3.33, - "learning_rate": 1.3401622052411984e-05, - "loss": 0.079, + "learning_rate": 2.341191705802506e-05, + "loss": 0.0595, "step": 71375 }, { "epoch": 3.33, - "learning_rate": 1.3401153251136845e-05, - "loss": 0.112, + "learning_rate": 2.341144898818904e-05, + "loss": 0.1534, "step": 71380 }, { "epoch": 3.33, - "learning_rate": 1.3400684449861705e-05, - "loss": 0.1248, + "learning_rate": 2.341098091835302e-05, + "loss": 0.1786, "step": 71385 }, { "epoch": 3.33, - "learning_rate": 1.3400215648586565e-05, - "loss": 0.1932, + "learning_rate": 2.3410512848517e-05, + "loss": 0.1749, "step": 71390 }, { "epoch": 3.33, - "learning_rate": 1.3399746847311425e-05, - "loss": 0.4284, + "learning_rate": 2.341004477868098e-05, + "loss": 0.2042, "step": 71395 }, { "epoch": 3.33, - "learning_rate": 1.3399278046036285e-05, - "loss": 0.0631, + "learning_rate": 2.340957670884496e-05, + "loss": 0.0871, "step": 71400 }, { "epoch": 3.33, - "learning_rate": 1.3398809244761147e-05, - "loss": 0.0085, + "learning_rate": 2.3409108639008938e-05, + "loss": 0.0394, "step": 71405 }, { "epoch": 3.33, - "learning_rate": 1.3398340443486007e-05, - "loss": 0.0515, + "learning_rate": 2.340864056917292e-05, + "loss": 0.0271, "step": 71410 }, { "epoch": 3.33, - "learning_rate": 1.3397871642210868e-05, - "loss": 0.0528, + "learning_rate": 2.34081724993369e-05, + "loss": 0.0616, "step": 71415 }, { "epoch": 3.33, - "learning_rate": 1.339740284093573e-05, - "loss": 0.1182, + "learning_rate": 2.340770442950088e-05, + "loss": 0.115, "step": 71420 }, { "epoch": 3.33, - "learning_rate": 1.339693403966059e-05, - "loss": 0.142, + "learning_rate": 2.3407236359664864e-05, + "loss": 0.0698, "step": 71425 }, { "epoch": 3.33, - "learning_rate": 1.339646523838545e-05, - "loss": 0.0624, + "learning_rate": 2.3406768289828843e-05, + "loss": 0.1632, "step": 71430 }, { "epoch": 3.33, - "learning_rate": 1.339599643711031e-05, - "loss": 0.1191, + "learning_rate": 2.3406300219992823e-05, + "loss": 0.1622, "step": 71435 }, { "epoch": 3.33, - "learning_rate": 1.339552763583517e-05, - "loss": 0.2345, + "learning_rate": 2.3405832150156803e-05, + "loss": 0.3173, "step": 71440 }, { "epoch": 3.33, - "learning_rate": 1.3395058834560031e-05, - "loss": 0.2952, + "learning_rate": 2.3405364080320786e-05, + "loss": 0.2319, "step": 71445 }, { "epoch": 3.33, - "learning_rate": 1.3394590033284891e-05, - "loss": 0.0598, + "learning_rate": 2.3404896010484766e-05, + "loss": 0.0216, "step": 71450 }, { "epoch": 3.33, - "learning_rate": 1.3394121232009751e-05, - "loss": 0.0184, + "learning_rate": 2.3404427940648746e-05, + "loss": 0.0331, "step": 71455 }, { "epoch": 3.33, - "learning_rate": 1.3393652430734611e-05, - "loss": 0.0507, + "learning_rate": 2.3403959870812726e-05, + "loss": 0.0526, "step": 71460 }, { "epoch": 3.33, - "learning_rate": 1.3393183629459474e-05, - "loss": 0.0529, + "learning_rate": 2.340349180097671e-05, + "loss": 0.0988, "step": 71465 }, { "epoch": 3.33, - "learning_rate": 1.3392714828184334e-05, - "loss": 0.0637, + "learning_rate": 2.3403023731140685e-05, + "loss": 0.0903, "step": 71470 }, { "epoch": 3.34, - "learning_rate": 1.3392246026909194e-05, - "loss": 0.1361, + "learning_rate": 2.3402555661304665e-05, + "loss": 0.0893, "step": 71475 }, { "epoch": 3.34, - "learning_rate": 1.3391777225634054e-05, - "loss": 0.0945, + "learning_rate": 2.3402087591468645e-05, + "loss": 0.1246, "step": 71480 }, { "epoch": 3.34, - "learning_rate": 1.3391308424358916e-05, - "loss": 0.0916, + "learning_rate": 2.3401619521632628e-05, + "loss": 0.0604, "step": 71485 }, { "epoch": 3.34, - "learning_rate": 1.3390839623083776e-05, - "loss": 0.0792, + "learning_rate": 2.3401151451796608e-05, + "loss": 0.1547, "step": 71490 }, { "epoch": 3.34, - "learning_rate": 1.3390370821808636e-05, - "loss": 0.3219, + "learning_rate": 2.3400683381960588e-05, + "loss": 0.2494, "step": 71495 }, { "epoch": 3.34, - "learning_rate": 1.3389902020533496e-05, - "loss": 0.0405, + "learning_rate": 2.340021531212457e-05, + "loss": 0.0688, "step": 71500 }, { "epoch": 3.34, - "learning_rate": 1.3389433219258357e-05, - "loss": 0.0338, + "learning_rate": 2.339974724228855e-05, + "loss": 0.0598, "step": 71505 }, { "epoch": 3.34, - "learning_rate": 1.3388964417983219e-05, - "loss": 0.053, + "learning_rate": 2.339927917245253e-05, + "loss": 0.0484, "step": 71510 }, { "epoch": 3.34, - "learning_rate": 1.3388495616708079e-05, - "loss": 0.0713, + "learning_rate": 2.339881110261651e-05, + "loss": 0.0265, "step": 71515 }, { "epoch": 3.34, - "learning_rate": 1.3388026815432939e-05, - "loss": 0.0541, + "learning_rate": 2.3398343032780494e-05, + "loss": 0.0876, "step": 71520 }, { "epoch": 3.34, - "learning_rate": 1.33875580141578e-05, - "loss": 0.0825, + "learning_rate": 2.3397874962944474e-05, + "loss": 0.085, "step": 71525 }, { "epoch": 3.34, - "learning_rate": 1.338708921288266e-05, - "loss": 0.1124, + "learning_rate": 2.3397406893108453e-05, + "loss": 0.1152, "step": 71530 }, { "epoch": 3.34, - "learning_rate": 1.338662041160752e-05, - "loss": 0.1191, + "learning_rate": 2.339693882327243e-05, + "loss": 0.1376, "step": 71535 }, { "epoch": 3.34, - "learning_rate": 1.338615161033238e-05, - "loss": 0.2045, + "learning_rate": 2.3396470753436413e-05, + "loss": 0.1654, "step": 71540 }, { "epoch": 3.34, - "learning_rate": 1.3385682809057242e-05, - "loss": 0.4005, + "learning_rate": 2.3396002683600393e-05, + "loss": 0.3246, "step": 71545 }, { "epoch": 3.34, - "learning_rate": 1.3385214007782102e-05, - "loss": 0.1029, + "learning_rate": 2.3395534613764373e-05, + "loss": 0.0831, "step": 71550 }, { "epoch": 3.34, - "learning_rate": 1.3384745206506963e-05, - "loss": 0.0426, + "learning_rate": 2.3395066543928356e-05, + "loss": 0.0596, "step": 71555 }, { "epoch": 3.34, - "learning_rate": 1.3384276405231823e-05, - "loss": 0.0687, + "learning_rate": 2.3394598474092336e-05, + "loss": 0.0362, "step": 71560 }, { "epoch": 3.34, - "learning_rate": 1.3383807603956685e-05, - "loss": 0.0593, + "learning_rate": 2.3394130404256316e-05, + "loss": 0.0999, "step": 71565 }, { "epoch": 3.34, - "learning_rate": 1.3383338802681545e-05, - "loss": 0.0722, + "learning_rate": 2.3393662334420295e-05, + "loss": 0.0818, "step": 71570 }, { "epoch": 3.34, - "learning_rate": 1.3382870001406405e-05, - "loss": 0.0619, + "learning_rate": 2.339319426458428e-05, + "loss": 0.0523, "step": 71575 }, { "epoch": 3.34, - "learning_rate": 1.3382401200131265e-05, - "loss": 0.144, + "learning_rate": 2.339272619474826e-05, + "loss": 0.1276, "step": 71580 }, { "epoch": 3.34, - "learning_rate": 1.3381932398856126e-05, - "loss": 0.1801, + "learning_rate": 2.3392258124912238e-05, + "loss": 0.1137, "step": 71585 }, { "epoch": 3.34, - "learning_rate": 1.3381463597580986e-05, - "loss": 0.2007, + "learning_rate": 2.3391790055076218e-05, + "loss": 0.2068, "step": 71590 }, { "epoch": 3.34, - "learning_rate": 1.3380994796305846e-05, - "loss": 0.2975, + "learning_rate": 2.3391321985240198e-05, + "loss": 0.2999, "step": 71595 }, { "epoch": 3.34, - "learning_rate": 1.3380525995030706e-05, - "loss": 0.066, + "learning_rate": 2.3390853915404178e-05, + "loss": 0.0544, "step": 71600 }, { "epoch": 3.34, - "learning_rate": 1.338005719375557e-05, - "loss": 0.0438, + "learning_rate": 2.3390385845568157e-05, + "loss": 0.0197, "step": 71605 }, { "epoch": 3.34, - "learning_rate": 1.337958839248043e-05, - "loss": 0.061, + "learning_rate": 2.338991777573214e-05, + "loss": 0.0403, "step": 71610 }, { "epoch": 3.34, - "learning_rate": 1.337911959120529e-05, - "loss": 0.0309, + "learning_rate": 2.338944970589612e-05, + "loss": 0.0978, "step": 71615 }, { "epoch": 3.34, - "learning_rate": 1.337865078993015e-05, - "loss": 0.0702, + "learning_rate": 2.33889816360601e-05, + "loss": 0.0329, "step": 71620 }, { "epoch": 3.34, - "learning_rate": 1.3378181988655011e-05, - "loss": 0.0805, + "learning_rate": 2.338851356622408e-05, + "loss": 0.1112, "step": 71625 }, { "epoch": 3.34, - "learning_rate": 1.3377713187379871e-05, - "loss": 0.0509, + "learning_rate": 2.3388045496388063e-05, + "loss": 0.0943, "step": 71630 }, { "epoch": 3.34, - "learning_rate": 1.337724438610473e-05, - "loss": 0.1516, + "learning_rate": 2.3387577426552043e-05, + "loss": 0.1737, "step": 71635 }, { "epoch": 3.34, - "learning_rate": 1.337677558482959e-05, - "loss": 0.1109, + "learning_rate": 2.3387109356716023e-05, + "loss": 0.2539, "step": 71640 }, { "epoch": 3.34, - "learning_rate": 1.337630678355445e-05, - "loss": 0.2836, + "learning_rate": 2.3386641286880003e-05, + "loss": 0.2746, "step": 71645 }, { "epoch": 3.34, - "learning_rate": 1.3375837982279314e-05, - "loss": 0.0521, + "learning_rate": 2.3386173217043986e-05, + "loss": 0.0446, "step": 71650 }, { "epoch": 3.34, - "learning_rate": 1.3375369181004174e-05, - "loss": 0.0361, + "learning_rate": 2.3385705147207966e-05, + "loss": 0.0512, "step": 71655 }, { "epoch": 3.34, - "learning_rate": 1.3374900379729034e-05, - "loss": 0.0419, + "learning_rate": 2.3385237077371942e-05, + "loss": 0.0651, "step": 71660 }, { "epoch": 3.34, - "learning_rate": 1.3374431578453896e-05, - "loss": 0.0647, + "learning_rate": 2.3384769007535925e-05, + "loss": 0.0903, "step": 71665 }, { "epoch": 3.34, - "learning_rate": 1.3373962777178755e-05, - "loss": 0.0941, + "learning_rate": 2.3384300937699905e-05, + "loss": 0.0924, "step": 71670 }, { "epoch": 3.34, - "learning_rate": 1.3373493975903615e-05, - "loss": 0.1481, + "learning_rate": 2.3383832867863885e-05, + "loss": 0.0979, "step": 71675 }, { "epoch": 3.34, - "learning_rate": 1.3373025174628475e-05, - "loss": 0.1705, + "learning_rate": 2.3383364798027865e-05, + "loss": 0.0613, "step": 71680 }, { "epoch": 3.34, - "learning_rate": 1.3372556373353335e-05, - "loss": 0.1946, + "learning_rate": 2.3382896728191848e-05, + "loss": 0.1031, "step": 71685 }, { "epoch": 3.35, - "learning_rate": 1.3372087572078197e-05, - "loss": 0.2952, + "learning_rate": 2.3382428658355828e-05, + "loss": 0.1811, "step": 71690 }, { "epoch": 3.35, - "learning_rate": 1.3371618770803059e-05, - "loss": 0.3711, + "learning_rate": 2.3381960588519808e-05, + "loss": 0.3334, "step": 71695 }, { "epoch": 3.35, - "learning_rate": 1.3371149969527918e-05, - "loss": 0.0434, + "learning_rate": 2.3381492518683788e-05, + "loss": 0.0697, "step": 71700 }, { "epoch": 3.35, - "learning_rate": 1.337068116825278e-05, - "loss": 0.0257, + "learning_rate": 2.338102444884777e-05, + "loss": 0.0336, "step": 71705 }, { "epoch": 3.35, - "learning_rate": 1.337021236697764e-05, - "loss": 0.0518, + "learning_rate": 2.338055637901175e-05, + "loss": 0.0341, "step": 71710 }, { "epoch": 3.35, - "learning_rate": 1.33697435657025e-05, - "loss": 0.041, + "learning_rate": 2.338008830917573e-05, + "loss": 0.0397, "step": 71715 }, { "epoch": 3.35, - "learning_rate": 1.336927476442736e-05, - "loss": 0.0884, + "learning_rate": 2.337962023933971e-05, + "loss": 0.1008, "step": 71720 }, { "epoch": 3.35, - "learning_rate": 1.336880596315222e-05, - "loss": 0.0258, + "learning_rate": 2.337915216950369e-05, + "loss": 0.0658, "step": 71725 }, { "epoch": 3.35, - "learning_rate": 1.3368337161877081e-05, - "loss": 0.1479, + "learning_rate": 2.337868409966767e-05, + "loss": 0.0498, "step": 71730 }, { "epoch": 3.35, - "learning_rate": 1.3367868360601941e-05, - "loss": 0.1033, + "learning_rate": 2.337821602983165e-05, + "loss": 0.0863, "step": 71735 }, { "epoch": 3.35, - "learning_rate": 1.3367399559326803e-05, - "loss": 0.2549, + "learning_rate": 2.3377747959995633e-05, + "loss": 0.2048, "step": 71740 }, { "epoch": 3.35, - "learning_rate": 1.3366930758051665e-05, - "loss": 0.1745, + "learning_rate": 2.3377279890159613e-05, + "loss": 0.2875, "step": 71745 }, { "epoch": 3.35, - "learning_rate": 1.3366461956776525e-05, - "loss": 0.0645, + "learning_rate": 2.3376811820323592e-05, + "loss": 0.0584, "step": 71750 }, { "epoch": 3.35, - "learning_rate": 1.3365993155501384e-05, - "loss": 0.0305, + "learning_rate": 2.3376343750487572e-05, + "loss": 0.0399, "step": 71755 }, { "epoch": 3.35, - "learning_rate": 1.3365524354226244e-05, - "loss": 0.0346, + "learning_rate": 2.3375875680651555e-05, + "loss": 0.0492, "step": 71760 }, { "epoch": 3.35, - "learning_rate": 1.3365055552951104e-05, - "loss": 0.0896, + "learning_rate": 2.3375407610815535e-05, + "loss": 0.0616, "step": 71765 }, { "epoch": 3.35, - "learning_rate": 1.3364586751675966e-05, - "loss": 0.1029, + "learning_rate": 2.3374939540979515e-05, + "loss": 0.056, "step": 71770 }, { "epoch": 3.35, - "learning_rate": 1.3364117950400826e-05, - "loss": 0.0547, + "learning_rate": 2.3374471471143495e-05, + "loss": 0.1811, "step": 71775 }, { "epoch": 3.35, - "learning_rate": 1.3363649149125686e-05, - "loss": 0.0774, + "learning_rate": 2.3374003401307478e-05, + "loss": 0.1952, "step": 71780 }, { "epoch": 3.35, - "learning_rate": 1.3363180347850546e-05, - "loss": 0.1384, + "learning_rate": 2.3373535331471455e-05, + "loss": 0.1197, "step": 71785 }, { "epoch": 3.35, - "learning_rate": 1.3362711546575409e-05, - "loss": 0.1239, + "learning_rate": 2.3373067261635434e-05, + "loss": 0.2021, "step": 71790 }, { "epoch": 3.35, - "learning_rate": 1.3362242745300269e-05, - "loss": 0.236, + "learning_rate": 2.3372599191799418e-05, + "loss": 0.257, "step": 71795 }, { "epoch": 3.35, - "learning_rate": 1.3361773944025129e-05, - "loss": 0.0586, + "learning_rate": 2.3372131121963397e-05, + "loss": 0.0421, "step": 71800 }, { "epoch": 3.35, - "learning_rate": 1.3361305142749989e-05, - "loss": 0.0177, + "learning_rate": 2.3371663052127377e-05, + "loss": 0.0551, "step": 71805 }, { "epoch": 3.35, - "learning_rate": 1.336083634147485e-05, - "loss": 0.0119, + "learning_rate": 2.3371194982291357e-05, + "loss": 0.0511, "step": 71810 }, { "epoch": 3.35, - "learning_rate": 1.336036754019971e-05, - "loss": 0.059, + "learning_rate": 2.337072691245534e-05, + "loss": 0.1341, "step": 71815 }, { "epoch": 3.35, - "learning_rate": 1.335989873892457e-05, - "loss": 0.0429, + "learning_rate": 2.337025884261932e-05, + "loss": 0.0566, "step": 71820 }, { "epoch": 3.35, - "learning_rate": 1.335942993764943e-05, - "loss": 0.0979, + "learning_rate": 2.33697907727833e-05, + "loss": 0.0829, "step": 71825 }, { "epoch": 3.35, - "learning_rate": 1.335896113637429e-05, - "loss": 0.1286, + "learning_rate": 2.336932270294728e-05, + "loss": 0.1324, "step": 71830 }, { "epoch": 3.35, - "learning_rate": 1.3358492335099154e-05, - "loss": 0.1293, + "learning_rate": 2.3368854633111263e-05, + "loss": 0.0875, "step": 71835 }, { "epoch": 3.35, - "learning_rate": 1.3358023533824014e-05, - "loss": 0.223, + "learning_rate": 2.3368386563275243e-05, + "loss": 0.2227, "step": 71840 }, { "epoch": 3.35, - "learning_rate": 1.3357554732548873e-05, - "loss": 0.1792, + "learning_rate": 2.3367918493439223e-05, + "loss": 0.2088, "step": 71845 }, { "epoch": 3.35, - "learning_rate": 1.3357085931273735e-05, - "loss": 0.0705, + "learning_rate": 2.3367450423603202e-05, + "loss": 0.055, "step": 71850 }, { "epoch": 3.35, - "learning_rate": 1.3356617129998595e-05, - "loss": 0.0182, + "learning_rate": 2.3366982353767182e-05, + "loss": 0.0183, "step": 71855 }, { "epoch": 3.35, - "learning_rate": 1.3356148328723455e-05, - "loss": 0.0345, + "learning_rate": 2.3366514283931162e-05, + "loss": 0.0285, "step": 71860 }, { "epoch": 3.35, - "learning_rate": 1.3355679527448315e-05, - "loss": 0.0467, + "learning_rate": 2.3366046214095142e-05, + "loss": 0.0498, "step": 71865 }, { "epoch": 3.35, - "learning_rate": 1.3355210726173175e-05, - "loss": 0.0458, + "learning_rate": 2.3365578144259125e-05, + "loss": 0.0475, "step": 71870 }, { "epoch": 3.35, - "learning_rate": 1.3354741924898036e-05, - "loss": 0.081, + "learning_rate": 2.3365110074423105e-05, + "loss": 0.1188, "step": 71875 }, { "epoch": 3.35, - "learning_rate": 1.3354273123622898e-05, - "loss": 0.1493, + "learning_rate": 2.3364642004587085e-05, + "loss": 0.0983, "step": 71880 }, { "epoch": 3.35, - "learning_rate": 1.3353804322347758e-05, - "loss": 0.1628, + "learning_rate": 2.3364173934751064e-05, + "loss": 0.1324, "step": 71885 }, { "epoch": 3.35, - "learning_rate": 1.335333552107262e-05, - "loss": 0.1667, + "learning_rate": 2.3363705864915048e-05, + "loss": 0.1894, "step": 71890 }, { "epoch": 3.35, - "learning_rate": 1.335286671979748e-05, - "loss": 0.2669, + "learning_rate": 2.3363237795079028e-05, + "loss": 0.3572, "step": 71895 }, { "epoch": 3.35, - "learning_rate": 1.335239791852234e-05, - "loss": 0.0896, + "learning_rate": 2.3362769725243007e-05, + "loss": 0.0924, "step": 71900 }, { "epoch": 3.36, - "learning_rate": 1.33519291172472e-05, - "loss": 0.0787, + "learning_rate": 2.3362301655406987e-05, + "loss": 0.0355, "step": 71905 }, { "epoch": 3.36, - "learning_rate": 1.335146031597206e-05, - "loss": 0.0165, + "learning_rate": 2.3361833585570967e-05, + "loss": 0.0664, "step": 71910 }, { "epoch": 3.36, - "learning_rate": 1.3350991514696921e-05, - "loss": 0.0706, + "learning_rate": 2.3361365515734947e-05, + "loss": 0.0302, "step": 71915 }, { "epoch": 3.36, - "learning_rate": 1.3350522713421781e-05, - "loss": 0.0208, + "learning_rate": 2.3360897445898927e-05, + "loss": 0.0453, "step": 71920 }, { "epoch": 3.36, - "learning_rate": 1.3350053912146641e-05, - "loss": 0.0542, + "learning_rate": 2.336042937606291e-05, + "loss": 0.1158, "step": 71925 }, { "epoch": 3.36, - "learning_rate": 1.3349585110871504e-05, - "loss": 0.1398, + "learning_rate": 2.335996130622689e-05, + "loss": 0.1277, "step": 71930 }, { "epoch": 3.36, - "learning_rate": 1.3349116309596364e-05, - "loss": 0.1703, + "learning_rate": 2.335949323639087e-05, + "loss": 0.1361, "step": 71935 }, { "epoch": 3.36, - "learning_rate": 1.3348647508321224e-05, - "loss": 0.1941, + "learning_rate": 2.335902516655485e-05, + "loss": 0.0989, "step": 71940 }, { "epoch": 3.36, - "learning_rate": 1.3348178707046084e-05, - "loss": 0.2125, + "learning_rate": 2.3358557096718832e-05, + "loss": 0.1417, "step": 71945 }, { "epoch": 3.36, - "learning_rate": 1.3347709905770944e-05, - "loss": 0.069, + "learning_rate": 2.3358089026882812e-05, + "loss": 0.0827, "step": 71950 }, { "epoch": 3.36, - "learning_rate": 1.3347241104495806e-05, - "loss": 0.0295, + "learning_rate": 2.3357620957046792e-05, + "loss": 0.0374, "step": 71955 }, { "epoch": 3.36, - "learning_rate": 1.3346772303220665e-05, - "loss": 0.0448, + "learning_rate": 2.3357152887210772e-05, + "loss": 0.0461, "step": 71960 }, { "epoch": 3.36, - "learning_rate": 1.3346303501945525e-05, - "loss": 0.033, + "learning_rate": 2.3356684817374755e-05, + "loss": 0.05, "step": 71965 }, { "epoch": 3.36, - "learning_rate": 1.3345834700670385e-05, - "loss": 0.0627, + "learning_rate": 2.3356216747538735e-05, + "loss": 0.0795, "step": 71970 }, { "epoch": 3.36, - "learning_rate": 1.3345365899395249e-05, - "loss": 0.1029, + "learning_rate": 2.335574867770271e-05, + "loss": 0.0569, "step": 71975 }, { "epoch": 3.36, - "learning_rate": 1.3344897098120109e-05, - "loss": 0.1823, + "learning_rate": 2.3355280607866695e-05, + "loss": 0.0794, "step": 71980 }, { "epoch": 3.36, - "learning_rate": 1.3344428296844969e-05, - "loss": 0.1794, + "learning_rate": 2.3354812538030674e-05, + "loss": 0.1814, "step": 71985 }, { "epoch": 3.36, - "learning_rate": 1.3343959495569828e-05, - "loss": 0.2201, + "learning_rate": 2.3354344468194654e-05, + "loss": 0.1816, "step": 71990 }, { "epoch": 3.36, - "learning_rate": 1.334349069429469e-05, - "loss": 0.5181, + "learning_rate": 2.3353876398358634e-05, + "loss": 0.3283, "step": 71995 }, { "epoch": 3.36, - "learning_rate": 1.334302189301955e-05, - "loss": 0.083, + "learning_rate": 2.3353408328522617e-05, + "loss": 0.0665, "step": 72000 }, { "epoch": 3.36, - "learning_rate": 1.334255309174441e-05, - "loss": 0.0247, + "learning_rate": 2.3352940258686597e-05, + "loss": 0.0175, "step": 72005 }, { "epoch": 3.36, - "learning_rate": 1.334208429046927e-05, - "loss": 0.0589, + "learning_rate": 2.3352472188850577e-05, + "loss": 0.0241, "step": 72010 }, { "epoch": 3.36, - "learning_rate": 1.3341615489194132e-05, - "loss": 0.0756, + "learning_rate": 2.3352004119014557e-05, + "loss": 0.0333, "step": 72015 }, { "epoch": 3.36, - "learning_rate": 1.3341146687918993e-05, - "loss": 0.0276, + "learning_rate": 2.335153604917854e-05, + "loss": 0.0725, "step": 72020 }, { "epoch": 3.36, - "learning_rate": 1.3340677886643853e-05, - "loss": 0.0197, + "learning_rate": 2.335106797934252e-05, + "loss": 0.0659, "step": 72025 }, { "epoch": 3.36, - "learning_rate": 1.3340209085368713e-05, - "loss": 0.0849, + "learning_rate": 2.33505999095065e-05, + "loss": 0.0577, "step": 72030 }, { "epoch": 3.36, - "learning_rate": 1.3339740284093575e-05, - "loss": 0.1106, + "learning_rate": 2.335013183967048e-05, + "loss": 0.0507, "step": 72035 }, { "epoch": 3.36, - "learning_rate": 1.3339271482818435e-05, - "loss": 0.1956, + "learning_rate": 2.334966376983446e-05, + "loss": 0.1366, "step": 72040 }, { "epoch": 3.36, - "learning_rate": 1.3338802681543295e-05, - "loss": 0.2315, + "learning_rate": 2.334919569999844e-05, + "loss": 0.2895, "step": 72045 }, { "epoch": 3.36, - "learning_rate": 1.3338333880268154e-05, - "loss": 0.0434, + "learning_rate": 2.334872763016242e-05, + "loss": 0.0849, "step": 72050 }, { "epoch": 3.36, - "learning_rate": 1.3337865078993016e-05, - "loss": 0.0338, + "learning_rate": 2.3348259560326402e-05, + "loss": 0.0294, "step": 72055 }, { "epoch": 3.36, - "learning_rate": 1.3337396277717876e-05, - "loss": 0.0314, + "learning_rate": 2.3347791490490382e-05, + "loss": 0.0317, "step": 72060 }, { "epoch": 3.36, - "learning_rate": 1.3336927476442738e-05, - "loss": 0.0524, + "learning_rate": 2.334732342065436e-05, + "loss": 0.0616, "step": 72065 }, { "epoch": 3.36, - "learning_rate": 1.3336458675167598e-05, - "loss": 0.061, + "learning_rate": 2.334685535081834e-05, + "loss": 0.0844, "step": 72070 }, { "epoch": 3.36, - "learning_rate": 1.333598987389246e-05, - "loss": 0.0573, + "learning_rate": 2.3346387280982325e-05, + "loss": 0.1276, "step": 72075 }, { "epoch": 3.36, - "learning_rate": 1.3335521072617319e-05, - "loss": 0.0893, + "learning_rate": 2.3345919211146304e-05, + "loss": 0.111, "step": 72080 }, { "epoch": 3.36, - "learning_rate": 1.3335052271342179e-05, - "loss": 0.1066, + "learning_rate": 2.3345451141310284e-05, + "loss": 0.1864, "step": 72085 }, { "epoch": 3.36, - "learning_rate": 1.3334583470067039e-05, - "loss": 0.2155, + "learning_rate": 2.3344983071474264e-05, + "loss": 0.2319, "step": 72090 }, { "epoch": 3.36, - "learning_rate": 1.33341146687919e-05, - "loss": 0.3624, + "learning_rate": 2.3344515001638247e-05, + "loss": 0.1783, "step": 72095 }, { "epoch": 3.36, - "learning_rate": 1.333364586751676e-05, - "loss": 0.0759, + "learning_rate": 2.3344046931802224e-05, + "loss": 0.1195, "step": 72100 }, { "epoch": 3.36, - "learning_rate": 1.333317706624162e-05, - "loss": 0.0728, + "learning_rate": 2.3343578861966204e-05, + "loss": 0.0113, "step": 72105 }, { "epoch": 3.36, - "learning_rate": 1.333270826496648e-05, - "loss": 0.0529, + "learning_rate": 2.3343110792130187e-05, + "loss": 0.0318, "step": 72110 }, { "epoch": 3.36, - "learning_rate": 1.3332239463691344e-05, - "loss": 0.059, + "learning_rate": 2.3342642722294167e-05, + "loss": 0.0795, "step": 72115 }, { "epoch": 3.37, - "learning_rate": 1.3331770662416204e-05, - "loss": 0.0703, + "learning_rate": 2.3342174652458146e-05, + "loss": 0.1037, "step": 72120 }, { "epoch": 3.37, - "learning_rate": 1.3331301861141064e-05, - "loss": 0.0768, + "learning_rate": 2.3341706582622126e-05, + "loss": 0.048, "step": 72125 }, { "epoch": 3.37, - "learning_rate": 1.3330833059865924e-05, - "loss": 0.127, + "learning_rate": 2.334123851278611e-05, + "loss": 0.2226, "step": 72130 }, { "epoch": 3.37, - "learning_rate": 1.3330364258590785e-05, - "loss": 0.1048, + "learning_rate": 2.334077044295009e-05, + "loss": 0.0707, "step": 72135 }, { "epoch": 3.37, - "learning_rate": 1.3329895457315645e-05, - "loss": 0.3312, + "learning_rate": 2.334030237311407e-05, + "loss": 0.1492, "step": 72140 }, { "epoch": 3.37, - "learning_rate": 1.3329426656040505e-05, - "loss": 0.3051, + "learning_rate": 2.333983430327805e-05, + "loss": 0.377, "step": 72145 }, { "epoch": 3.37, - "learning_rate": 1.3328957854765365e-05, - "loss": 0.0489, + "learning_rate": 2.3339366233442032e-05, + "loss": 0.0713, "step": 72150 }, { "epoch": 3.37, - "learning_rate": 1.3328489053490225e-05, - "loss": 0.0036, + "learning_rate": 2.3338898163606012e-05, + "loss": 0.0128, "step": 72155 }, { "epoch": 3.37, - "learning_rate": 1.3328020252215088e-05, - "loss": 0.0514, + "learning_rate": 2.3338430093769992e-05, + "loss": 0.0206, "step": 72160 }, { "epoch": 3.37, - "learning_rate": 1.3327551450939948e-05, - "loss": 0.0611, + "learning_rate": 2.333796202393397e-05, + "loss": 0.0509, "step": 72165 }, { "epoch": 3.37, - "learning_rate": 1.3327082649664808e-05, - "loss": 0.0991, + "learning_rate": 2.333749395409795e-05, + "loss": 0.0534, "step": 72170 }, { "epoch": 3.37, - "learning_rate": 1.332661384838967e-05, - "loss": 0.0367, + "learning_rate": 2.333702588426193e-05, + "loss": 0.0466, "step": 72175 }, { "epoch": 3.37, - "learning_rate": 1.332614504711453e-05, - "loss": 0.1138, + "learning_rate": 2.333655781442591e-05, + "loss": 0.2096, "step": 72180 }, { "epoch": 3.37, - "learning_rate": 1.332567624583939e-05, - "loss": 0.221, + "learning_rate": 2.3336089744589894e-05, + "loss": 0.079, "step": 72185 }, { "epoch": 3.37, - "learning_rate": 1.332520744456425e-05, - "loss": 0.1647, + "learning_rate": 2.3335621674753874e-05, + "loss": 0.2544, "step": 72190 }, { "epoch": 3.37, - "learning_rate": 1.332473864328911e-05, - "loss": 0.3998, + "learning_rate": 2.3335153604917854e-05, + "loss": 0.4536, "step": 72195 }, { "epoch": 3.37, - "learning_rate": 1.3324269842013971e-05, - "loss": 0.0491, + "learning_rate": 2.3334685535081834e-05, + "loss": 0.0833, "step": 72200 }, { "epoch": 3.37, - "learning_rate": 1.3323801040738833e-05, - "loss": 0.0598, + "learning_rate": 2.3334217465245817e-05, + "loss": 0.0592, "step": 72205 }, { "epoch": 3.37, - "learning_rate": 1.3323332239463693e-05, - "loss": 0.0408, + "learning_rate": 2.3333749395409797e-05, + "loss": 0.0331, "step": 72210 }, { "epoch": 3.37, - "learning_rate": 1.3322863438188554e-05, - "loss": 0.0376, + "learning_rate": 2.3333281325573777e-05, + "loss": 0.0372, "step": 72215 }, { "epoch": 3.37, - "learning_rate": 1.3322394636913414e-05, - "loss": 0.0411, + "learning_rate": 2.333281325573776e-05, + "loss": 0.0612, "step": 72220 }, { "epoch": 3.37, - "learning_rate": 1.3321925835638274e-05, - "loss": 0.1011, + "learning_rate": 2.3332345185901736e-05, + "loss": 0.0387, "step": 72225 }, { "epoch": 3.37, - "learning_rate": 1.3321457034363134e-05, - "loss": 0.0658, + "learning_rate": 2.3331877116065716e-05, + "loss": 0.0664, "step": 72230 }, { "epoch": 3.37, - "learning_rate": 1.3320988233087994e-05, - "loss": 0.0787, + "learning_rate": 2.3331409046229696e-05, + "loss": 0.1157, "step": 72235 }, { "epoch": 3.37, - "learning_rate": 1.3320519431812856e-05, - "loss": 0.1882, + "learning_rate": 2.333094097639368e-05, + "loss": 0.1966, "step": 72240 }, { "epoch": 3.37, - "learning_rate": 1.3320050630537716e-05, - "loss": 0.3239, + "learning_rate": 2.333047290655766e-05, + "loss": 0.269, "step": 72245 }, { "epoch": 3.37, - "learning_rate": 1.3319581829262576e-05, - "loss": 0.097, + "learning_rate": 2.333000483672164e-05, + "loss": 0.092, "step": 72250 }, { "epoch": 3.37, - "learning_rate": 1.3319113027987439e-05, - "loss": 0.0214, + "learning_rate": 2.332953676688562e-05, + "loss": 0.0379, "step": 72255 }, { "epoch": 3.37, - "learning_rate": 1.3318644226712299e-05, - "loss": 0.0313, + "learning_rate": 2.33290686970496e-05, + "loss": 0.033, "step": 72260 }, { "epoch": 3.37, - "learning_rate": 1.3318175425437159e-05, - "loss": 0.1312, + "learning_rate": 2.332860062721358e-05, + "loss": 0.0924, "step": 72265 }, { "epoch": 3.37, - "learning_rate": 1.3317706624162019e-05, - "loss": 0.0164, + "learning_rate": 2.332813255737756e-05, + "loss": 0.0532, "step": 72270 }, { "epoch": 3.37, - "learning_rate": 1.3317237822886879e-05, - "loss": 0.0828, + "learning_rate": 2.332766448754154e-05, + "loss": 0.1052, "step": 72275 }, { "epoch": 3.37, - "learning_rate": 1.331676902161174e-05, - "loss": 0.1816, + "learning_rate": 2.3327196417705524e-05, + "loss": 0.1211, "step": 72280 }, { "epoch": 3.37, - "learning_rate": 1.33163002203366e-05, - "loss": 0.1546, + "learning_rate": 2.3326728347869504e-05, + "loss": 0.0893, "step": 72285 }, { "epoch": 3.37, - "learning_rate": 1.331583141906146e-05, - "loss": 0.2239, + "learning_rate": 2.332626027803348e-05, + "loss": 0.2285, "step": 72290 }, { "epoch": 3.37, - "learning_rate": 1.331536261778632e-05, - "loss": 0.3649, + "learning_rate": 2.3325792208197464e-05, + "loss": 0.3069, "step": 72295 }, { "epoch": 3.37, - "learning_rate": 1.3314893816511183e-05, - "loss": 0.0784, + "learning_rate": 2.3325324138361444e-05, + "loss": 0.122, "step": 72300 }, { "epoch": 3.37, - "learning_rate": 1.3314425015236043e-05, - "loss": 0.0467, + "learning_rate": 2.3324856068525423e-05, + "loss": 0.0221, "step": 72305 }, { "epoch": 3.37, - "learning_rate": 1.3313956213960903e-05, - "loss": 0.0673, + "learning_rate": 2.3324387998689403e-05, + "loss": 0.0223, "step": 72310 }, { "epoch": 3.37, - "learning_rate": 1.3313487412685763e-05, - "loss": 0.0611, + "learning_rate": 2.3323919928853386e-05, + "loss": 0.0782, "step": 72315 }, { "epoch": 3.37, - "learning_rate": 1.3313018611410625e-05, - "loss": 0.054, + "learning_rate": 2.3323451859017366e-05, + "loss": 0.0478, "step": 72320 }, { "epoch": 3.37, - "learning_rate": 1.3312549810135485e-05, - "loss": 0.229, + "learning_rate": 2.3322983789181346e-05, + "loss": 0.0595, "step": 72325 }, { "epoch": 3.38, - "learning_rate": 1.3312081008860345e-05, - "loss": 0.0997, + "learning_rate": 2.3322515719345326e-05, + "loss": 0.1592, "step": 72330 }, { "epoch": 3.38, - "learning_rate": 1.3311612207585205e-05, - "loss": 0.1775, + "learning_rate": 2.332204764950931e-05, + "loss": 0.072, "step": 72335 }, { "epoch": 3.38, - "learning_rate": 1.3311143406310064e-05, - "loss": 0.2334, + "learning_rate": 2.332157957967329e-05, + "loss": 0.2619, "step": 72340 }, { "epoch": 3.38, - "learning_rate": 1.3310674605034928e-05, - "loss": 0.1633, + "learning_rate": 2.332111150983727e-05, + "loss": 0.2464, "step": 72345 }, { "epoch": 3.38, - "learning_rate": 1.3310205803759788e-05, - "loss": 0.0776, + "learning_rate": 2.3320643440001252e-05, + "loss": 0.035, "step": 72350 }, { "epoch": 3.38, - "learning_rate": 1.3309737002484648e-05, - "loss": 0.0365, + "learning_rate": 2.332017537016523e-05, + "loss": 0.0238, "step": 72355 }, { "epoch": 3.38, - "learning_rate": 1.330926820120951e-05, - "loss": 0.0346, + "learning_rate": 2.3319707300329208e-05, + "loss": 0.0287, "step": 72360 }, { "epoch": 3.38, - "learning_rate": 1.330879939993437e-05, - "loss": 0.0875, + "learning_rate": 2.3319239230493188e-05, + "loss": 0.024, "step": 72365 }, { "epoch": 3.38, - "learning_rate": 1.330833059865923e-05, - "loss": 0.0641, + "learning_rate": 2.331877116065717e-05, + "loss": 0.0295, "step": 72370 }, { "epoch": 3.38, - "learning_rate": 1.3307861797384089e-05, - "loss": 0.0793, + "learning_rate": 2.331830309082115e-05, + "loss": 0.034, "step": 72375 }, { "epoch": 3.38, - "learning_rate": 1.3307392996108949e-05, - "loss": 0.0476, + "learning_rate": 2.331783502098513e-05, + "loss": 0.0604, "step": 72380 }, { "epoch": 3.38, - "learning_rate": 1.330692419483381e-05, - "loss": 0.1841, + "learning_rate": 2.331736695114911e-05, + "loss": 0.1122, "step": 72385 }, { "epoch": 3.38, - "learning_rate": 1.3306455393558672e-05, - "loss": 0.2389, + "learning_rate": 2.3316898881313094e-05, + "loss": 0.1623, "step": 72390 }, { "epoch": 3.38, - "learning_rate": 1.3305986592283532e-05, - "loss": 0.399, + "learning_rate": 2.3316430811477074e-05, + "loss": 0.3112, "step": 72395 }, { "epoch": 3.38, - "learning_rate": 1.3305517791008394e-05, - "loss": 0.0409, + "learning_rate": 2.3315962741641053e-05, + "loss": 0.0714, "step": 72400 }, { "epoch": 3.38, - "learning_rate": 1.3305048989733254e-05, - "loss": 0.018, + "learning_rate": 2.3315494671805037e-05, + "loss": 0.0372, "step": 72405 }, { "epoch": 3.38, - "learning_rate": 1.3304580188458114e-05, - "loss": 0.0465, + "learning_rate": 2.3315026601969016e-05, + "loss": 0.0441, "step": 72410 }, { "epoch": 3.38, - "learning_rate": 1.3304111387182974e-05, - "loss": 0.0337, + "learning_rate": 2.3314558532132993e-05, + "loss": 0.0557, "step": 72415 }, { "epoch": 3.38, - "learning_rate": 1.3303642585907834e-05, - "loss": 0.0483, + "learning_rate": 2.3314090462296973e-05, + "loss": 0.0807, "step": 72420 }, { "epoch": 3.38, - "learning_rate": 1.3303173784632695e-05, - "loss": 0.0754, + "learning_rate": 2.3313622392460956e-05, + "loss": 0.0853, "step": 72425 }, { "epoch": 3.38, - "learning_rate": 1.3302704983357555e-05, - "loss": 0.0944, + "learning_rate": 2.3313154322624936e-05, + "loss": 0.048, "step": 72430 }, { "epoch": 3.38, - "learning_rate": 1.3302236182082415e-05, - "loss": 0.1161, + "learning_rate": 2.3312686252788916e-05, + "loss": 0.1031, "step": 72435 }, { "epoch": 3.38, - "learning_rate": 1.3301767380807278e-05, - "loss": 0.1641, + "learning_rate": 2.3312218182952895e-05, + "loss": 0.1936, "step": 72440 }, { "epoch": 3.38, - "learning_rate": 1.3301298579532138e-05, - "loss": 0.2003, + "learning_rate": 2.331175011311688e-05, + "loss": 0.2776, "step": 72445 }, { "epoch": 3.38, - "learning_rate": 1.3300829778256998e-05, - "loss": 0.0955, + "learning_rate": 2.331128204328086e-05, + "loss": 0.0531, "step": 72450 }, { "epoch": 3.38, - "learning_rate": 1.3300360976981858e-05, - "loss": 0.0162, + "learning_rate": 2.3310813973444838e-05, + "loss": 0.0416, "step": 72455 }, { "epoch": 3.38, - "learning_rate": 1.3299892175706718e-05, - "loss": 0.0436, + "learning_rate": 2.331034590360882e-05, + "loss": 0.0799, "step": 72460 }, { "epoch": 3.38, - "learning_rate": 1.329942337443158e-05, - "loss": 0.0443, + "learning_rate": 2.33098778337728e-05, + "loss": 0.1, "step": 72465 }, { "epoch": 3.38, - "learning_rate": 1.329895457315644e-05, - "loss": 0.0388, + "learning_rate": 2.330940976393678e-05, + "loss": 0.0265, "step": 72470 }, { "epoch": 3.38, - "learning_rate": 1.32984857718813e-05, - "loss": 0.0516, + "learning_rate": 2.330894169410076e-05, + "loss": 0.0707, "step": 72475 }, { "epoch": 3.38, - "learning_rate": 1.329801697060616e-05, - "loss": 0.1215, + "learning_rate": 2.330847362426474e-05, + "loss": 0.0955, "step": 72480 }, { "epoch": 3.38, - "learning_rate": 1.3297548169331023e-05, - "loss": 0.1472, + "learning_rate": 2.330800555442872e-05, + "loss": 0.1845, "step": 72485 }, { "epoch": 3.38, - "learning_rate": 1.3297079368055883e-05, - "loss": 0.1324, + "learning_rate": 2.33075374845927e-05, + "loss": 0.0885, "step": 72490 }, { "epoch": 3.38, - "learning_rate": 1.3296610566780743e-05, - "loss": 0.3926, + "learning_rate": 2.330706941475668e-05, + "loss": 0.2061, "step": 72495 }, { "epoch": 3.38, - "learning_rate": 1.3296141765505603e-05, - "loss": 0.0776, + "learning_rate": 2.3306601344920663e-05, + "loss": 0.0608, "step": 72500 }, { "epoch": 3.38, - "learning_rate": 1.3295672964230464e-05, - "loss": 0.0354, + "learning_rate": 2.3306133275084643e-05, + "loss": 0.0217, "step": 72505 }, { "epoch": 3.38, - "learning_rate": 1.3295204162955324e-05, - "loss": 0.0843, + "learning_rate": 2.3305665205248623e-05, + "loss": 0.0332, "step": 72510 }, { "epoch": 3.38, - "learning_rate": 1.3294735361680184e-05, - "loss": 0.0604, + "learning_rate": 2.3305197135412603e-05, + "loss": 0.1302, "step": 72515 }, { "epoch": 3.38, - "learning_rate": 1.3294266560405044e-05, - "loss": 0.0771, + "learning_rate": 2.3304729065576586e-05, + "loss": 0.057, "step": 72520 }, { "epoch": 3.38, - "learning_rate": 1.3293797759129906e-05, - "loss": 0.0755, + "learning_rate": 2.3304260995740566e-05, + "loss": 0.0595, "step": 72525 }, { "epoch": 3.38, - "learning_rate": 1.3293328957854767e-05, - "loss": 0.1144, + "learning_rate": 2.3303792925904546e-05, + "loss": 0.1257, "step": 72530 }, { "epoch": 3.38, - "learning_rate": 1.3292860156579627e-05, - "loss": 0.1494, + "learning_rate": 2.330332485606853e-05, + "loss": 0.1393, "step": 72535 }, { "epoch": 3.38, - "learning_rate": 1.3292391355304489e-05, - "loss": 0.1948, + "learning_rate": 2.330285678623251e-05, + "loss": 0.2849, "step": 72540 }, { "epoch": 3.39, - "learning_rate": 1.3291922554029349e-05, - "loss": 0.3107, + "learning_rate": 2.3302388716396485e-05, + "loss": 0.551, "step": 72545 }, { "epoch": 3.39, - "learning_rate": 1.3291453752754209e-05, - "loss": 0.0527, + "learning_rate": 2.3301920646560465e-05, + "loss": 0.0821, "step": 72550 }, { "epoch": 3.39, - "learning_rate": 1.3290984951479069e-05, - "loss": 0.0309, + "learning_rate": 2.3301452576724448e-05, + "loss": 0.0366, "step": 72555 }, { "epoch": 3.39, - "learning_rate": 1.3290516150203929e-05, - "loss": 0.0505, + "learning_rate": 2.3300984506888428e-05, + "loss": 0.0146, "step": 72560 }, { "epoch": 3.39, - "learning_rate": 1.329004734892879e-05, - "loss": 0.0256, + "learning_rate": 2.3300516437052408e-05, + "loss": 0.1005, "step": 72565 }, { "epoch": 3.39, - "learning_rate": 1.328957854765365e-05, - "loss": 0.0659, + "learning_rate": 2.3300048367216388e-05, + "loss": 0.0894, "step": 72570 }, { "epoch": 3.39, - "learning_rate": 1.328910974637851e-05, - "loss": 0.0804, + "learning_rate": 2.329958029738037e-05, + "loss": 0.1084, "step": 72575 }, { "epoch": 3.39, - "learning_rate": 1.3288640945103373e-05, - "loss": 0.0771, + "learning_rate": 2.329911222754435e-05, + "loss": 0.1328, "step": 72580 }, { "epoch": 3.39, - "learning_rate": 1.3288172143828233e-05, - "loss": 0.21, + "learning_rate": 2.329864415770833e-05, + "loss": 0.1552, "step": 72585 }, { "epoch": 3.39, - "learning_rate": 1.3287703342553093e-05, - "loss": 0.233, + "learning_rate": 2.3298176087872314e-05, + "loss": 0.0983, "step": 72590 }, { "epoch": 3.39, - "learning_rate": 1.3287234541277953e-05, - "loss": 0.4205, + "learning_rate": 2.3297708018036293e-05, + "loss": 0.2346, "step": 72595 }, { "epoch": 3.39, - "learning_rate": 1.3286765740002813e-05, - "loss": 0.0724, + "learning_rate": 2.3297239948200273e-05, + "loss": 0.0941, "step": 72600 }, { "epoch": 3.39, - "learning_rate": 1.3286296938727675e-05, - "loss": 0.0352, + "learning_rate": 2.329677187836425e-05, + "loss": 0.0571, "step": 72605 }, { "epoch": 3.39, - "learning_rate": 1.3285828137452535e-05, - "loss": 0.0254, + "learning_rate": 2.3296303808528233e-05, + "loss": 0.0419, "step": 72610 }, { "epoch": 3.39, - "learning_rate": 1.3285359336177395e-05, - "loss": 0.0759, + "learning_rate": 2.3295835738692213e-05, + "loss": 0.0499, "step": 72615 }, { "epoch": 3.39, - "learning_rate": 1.3284890534902255e-05, - "loss": 0.0816, + "learning_rate": 2.3295367668856193e-05, + "loss": 0.0374, "step": 72620 }, { "epoch": 3.39, - "learning_rate": 1.3284421733627118e-05, - "loss": 0.0902, + "learning_rate": 2.3294899599020172e-05, + "loss": 0.0968, "step": 72625 }, { "epoch": 3.39, - "learning_rate": 1.3283952932351978e-05, - "loss": 0.088, + "learning_rate": 2.3294431529184156e-05, + "loss": 0.0994, "step": 72630 }, { "epoch": 3.39, - "learning_rate": 1.3283484131076838e-05, - "loss": 0.0907, + "learning_rate": 2.3293963459348135e-05, + "loss": 0.0801, "step": 72635 }, { "epoch": 3.39, - "learning_rate": 1.3283015329801698e-05, - "loss": 0.1708, + "learning_rate": 2.3293495389512115e-05, + "loss": 0.1363, "step": 72640 }, { "epoch": 3.39, - "learning_rate": 1.328254652852656e-05, - "loss": 0.2892, + "learning_rate": 2.32930273196761e-05, + "loss": 0.2928, "step": 72645 }, { "epoch": 3.39, - "learning_rate": 1.328207772725142e-05, - "loss": 0.0518, + "learning_rate": 2.3292559249840078e-05, + "loss": 0.0799, "step": 72650 }, { "epoch": 3.39, - "learning_rate": 1.328160892597628e-05, - "loss": 0.0324, + "learning_rate": 2.3292091180004058e-05, + "loss": 0.0129, "step": 72655 }, { "epoch": 3.39, - "learning_rate": 1.328114012470114e-05, - "loss": 0.0135, + "learning_rate": 2.3291623110168038e-05, + "loss": 0.036, "step": 72660 }, { "epoch": 3.39, - "learning_rate": 1.3280671323425999e-05, - "loss": 0.0409, + "learning_rate": 2.329115504033202e-05, + "loss": 0.0647, "step": 72665 }, { "epoch": 3.39, - "learning_rate": 1.3280202522150862e-05, - "loss": 0.0561, + "learning_rate": 2.3290686970495998e-05, + "loss": 0.0833, "step": 72670 }, { "epoch": 3.39, - "learning_rate": 1.3279733720875722e-05, - "loss": 0.0636, + "learning_rate": 2.3290218900659977e-05, + "loss": 0.0351, "step": 72675 }, { "epoch": 3.39, - "learning_rate": 1.3279264919600582e-05, - "loss": 0.0697, + "learning_rate": 2.3289750830823957e-05, + "loss": 0.1412, "step": 72680 }, { "epoch": 3.39, - "learning_rate": 1.3278796118325444e-05, - "loss": 0.1301, + "learning_rate": 2.328928276098794e-05, + "loss": 0.2574, "step": 72685 }, { "epoch": 3.39, - "learning_rate": 1.3278327317050304e-05, - "loss": 0.1251, + "learning_rate": 2.328881469115192e-05, + "loss": 0.2252, "step": 72690 }, { "epoch": 3.39, - "learning_rate": 1.3277858515775164e-05, - "loss": 0.1802, + "learning_rate": 2.32883466213159e-05, + "loss": 0.5252, "step": 72695 }, { "epoch": 3.39, - "learning_rate": 1.3277389714500024e-05, - "loss": 0.0625, + "learning_rate": 2.328787855147988e-05, + "loss": 0.0806, "step": 72700 }, { "epoch": 3.39, - "learning_rate": 1.3276920913224884e-05, - "loss": 0.0129, + "learning_rate": 2.3287410481643863e-05, + "loss": 0.0185, "step": 72705 }, { "epoch": 3.39, - "learning_rate": 1.3276452111949745e-05, - "loss": 0.0352, + "learning_rate": 2.3286942411807843e-05, + "loss": 0.0791, "step": 72710 }, { "epoch": 3.39, - "learning_rate": 1.3275983310674607e-05, - "loss": 0.0247, + "learning_rate": 2.3286474341971823e-05, + "loss": 0.049, "step": 72715 }, { "epoch": 3.39, - "learning_rate": 1.3275514509399467e-05, - "loss": 0.0596, + "learning_rate": 2.3286006272135806e-05, + "loss": 0.0829, "step": 72720 }, { "epoch": 3.39, - "learning_rate": 1.3275045708124329e-05, - "loss": 0.1163, + "learning_rate": 2.3285538202299786e-05, + "loss": 0.0891, "step": 72725 }, { "epoch": 3.39, - "learning_rate": 1.3274576906849188e-05, - "loss": 0.1109, + "learning_rate": 2.3285070132463765e-05, + "loss": 0.1032, "step": 72730 }, { "epoch": 3.39, - "learning_rate": 1.3274108105574048e-05, - "loss": 0.1857, + "learning_rate": 2.3284602062627742e-05, + "loss": 0.2473, "step": 72735 }, { "epoch": 3.39, - "learning_rate": 1.3273639304298908e-05, - "loss": 0.2121, + "learning_rate": 2.3284133992791725e-05, + "loss": 0.3029, "step": 72740 }, { "epoch": 3.39, - "learning_rate": 1.3273170503023768e-05, - "loss": 0.5159, + "learning_rate": 2.3283665922955705e-05, + "loss": 0.4402, "step": 72745 }, { "epoch": 3.39, - "learning_rate": 1.327270170174863e-05, - "loss": 0.0355, + "learning_rate": 2.3283197853119685e-05, + "loss": 0.0467, "step": 72750 }, { "epoch": 3.39, - "learning_rate": 1.327223290047349e-05, - "loss": 0.0755, + "learning_rate": 2.3282729783283665e-05, + "loss": 0.0136, "step": 72755 }, { "epoch": 3.4, - "learning_rate": 1.327176409919835e-05, - "loss": 0.0358, + "learning_rate": 2.3282261713447648e-05, + "loss": 0.048, "step": 72760 }, { "epoch": 3.4, - "learning_rate": 1.3271295297923213e-05, - "loss": 0.0475, + "learning_rate": 2.3281793643611628e-05, + "loss": 0.0591, "step": 72765 }, { "epoch": 3.4, - "learning_rate": 1.3270826496648073e-05, - "loss": 0.0639, + "learning_rate": 2.3281325573775607e-05, + "loss": 0.0747, "step": 72770 }, { "epoch": 3.4, - "learning_rate": 1.3270357695372933e-05, - "loss": 0.0674, + "learning_rate": 2.328085750393959e-05, + "loss": 0.0305, "step": 72775 }, { "epoch": 3.4, - "learning_rate": 1.3269888894097793e-05, - "loss": 0.0946, + "learning_rate": 2.328038943410357e-05, + "loss": 0.0957, "step": 72780 }, { "epoch": 3.4, - "learning_rate": 1.3269420092822653e-05, - "loss": 0.1409, + "learning_rate": 2.327992136426755e-05, + "loss": 0.0751, "step": 72785 }, { "epoch": 3.4, - "learning_rate": 1.3268951291547514e-05, - "loss": 0.1262, + "learning_rate": 2.327945329443153e-05, + "loss": 0.1116, "step": 72790 }, { "epoch": 3.4, - "learning_rate": 1.3268482490272374e-05, - "loss": 0.2623, + "learning_rate": 2.327898522459551e-05, + "loss": 0.1562, "step": 72795 }, { "epoch": 3.4, - "learning_rate": 1.3268013688997234e-05, - "loss": 0.0845, + "learning_rate": 2.327851715475949e-05, + "loss": 0.0623, "step": 72800 }, { "epoch": 3.4, - "learning_rate": 1.3267544887722094e-05, - "loss": 0.0133, + "learning_rate": 2.327804908492347e-05, + "loss": 0.053, "step": 72805 }, { "epoch": 3.4, - "learning_rate": 1.3267076086446958e-05, - "loss": 0.0781, + "learning_rate": 2.327758101508745e-05, + "loss": 0.0186, "step": 72810 }, { "epoch": 3.4, - "learning_rate": 1.3266607285171817e-05, - "loss": 0.021, + "learning_rate": 2.3277112945251433e-05, + "loss": 0.0942, "step": 72815 }, { "epoch": 3.4, - "learning_rate": 1.3266138483896677e-05, - "loss": 0.1001, + "learning_rate": 2.3276644875415412e-05, + "loss": 0.0837, "step": 72820 }, { "epoch": 3.4, - "learning_rate": 1.3265669682621537e-05, - "loss": 0.1239, + "learning_rate": 2.3276176805579392e-05, + "loss": 0.0788, "step": 72825 }, { "epoch": 3.4, - "learning_rate": 1.3265200881346399e-05, - "loss": 0.1063, + "learning_rate": 2.3275708735743375e-05, + "loss": 0.0712, "step": 72830 }, { "epoch": 3.4, - "learning_rate": 1.3264732080071259e-05, - "loss": 0.0912, + "learning_rate": 2.3275240665907355e-05, + "loss": 0.1296, "step": 72835 }, { "epoch": 3.4, - "learning_rate": 1.3264263278796119e-05, - "loss": 0.2155, + "learning_rate": 2.3274772596071335e-05, + "loss": 0.1354, "step": 72840 }, { "epoch": 3.4, - "learning_rate": 1.3263794477520979e-05, - "loss": 0.2851, + "learning_rate": 2.3274304526235315e-05, + "loss": 0.1868, "step": 72845 }, { "epoch": 3.4, - "learning_rate": 1.3263325676245839e-05, - "loss": 0.0518, + "learning_rate": 2.3273836456399298e-05, + "loss": 0.0609, "step": 72850 }, { "epoch": 3.4, - "learning_rate": 1.3262856874970702e-05, - "loss": 0.0152, + "learning_rate": 2.3273368386563278e-05, + "loss": 0.0577, "step": 72855 }, { "epoch": 3.4, - "learning_rate": 1.3262388073695562e-05, - "loss": 0.0193, + "learning_rate": 2.3272900316727254e-05, + "loss": 0.0309, "step": 72860 }, { "epoch": 3.4, - "learning_rate": 1.3261919272420422e-05, - "loss": 0.093, + "learning_rate": 2.3272432246891234e-05, + "loss": 0.0306, "step": 72865 }, { "epoch": 3.4, - "learning_rate": 1.3261450471145284e-05, - "loss": 0.0918, + "learning_rate": 2.3271964177055217e-05, + "loss": 0.0595, "step": 72870 }, { "epoch": 3.4, - "learning_rate": 1.3260981669870143e-05, - "loss": 0.0729, + "learning_rate": 2.3271496107219197e-05, + "loss": 0.0435, "step": 72875 }, { "epoch": 3.4, - "learning_rate": 1.3260512868595003e-05, - "loss": 0.1065, + "learning_rate": 2.3271028037383177e-05, + "loss": 0.0703, "step": 72880 }, { "epoch": 3.4, - "learning_rate": 1.3260044067319863e-05, - "loss": 0.1023, + "learning_rate": 2.3270559967547157e-05, + "loss": 0.2049, "step": 72885 }, { "epoch": 3.4, - "learning_rate": 1.3259575266044725e-05, - "loss": 0.2665, + "learning_rate": 2.327009189771114e-05, + "loss": 0.3358, "step": 72890 }, { "epoch": 3.4, - "learning_rate": 1.3259106464769585e-05, - "loss": 0.1425, + "learning_rate": 2.326962382787512e-05, + "loss": 0.2102, "step": 72895 }, { "epoch": 3.4, - "learning_rate": 1.3258637663494445e-05, - "loss": 0.1003, + "learning_rate": 2.32691557580391e-05, + "loss": 0.0528, "step": 72900 }, { "epoch": 3.4, - "learning_rate": 1.3258168862219306e-05, - "loss": 0.0328, + "learning_rate": 2.3268687688203083e-05, + "loss": 0.0133, "step": 72905 }, { "epoch": 3.4, - "learning_rate": 1.3257700060944168e-05, - "loss": 0.025, + "learning_rate": 2.3268219618367063e-05, + "loss": 0.0244, "step": 72910 }, { "epoch": 3.4, - "learning_rate": 1.3257231259669028e-05, - "loss": 0.0856, + "learning_rate": 2.3267751548531042e-05, + "loss": 0.0863, "step": 72915 }, { "epoch": 3.4, - "learning_rate": 1.3256762458393888e-05, - "loss": 0.0648, + "learning_rate": 2.3267283478695022e-05, + "loss": 0.0791, "step": 72920 }, { "epoch": 3.4, - "learning_rate": 1.3256293657118748e-05, - "loss": 0.1638, + "learning_rate": 2.3266815408859002e-05, + "loss": 0.1283, "step": 72925 }, { "epoch": 3.4, - "learning_rate": 1.325582485584361e-05, - "loss": 0.1195, + "learning_rate": 2.3266347339022982e-05, + "loss": 0.1047, "step": 72930 }, { "epoch": 3.4, - "learning_rate": 1.325535605456847e-05, - "loss": 0.0723, + "learning_rate": 2.3265879269186962e-05, + "loss": 0.1687, "step": 72935 }, { "epoch": 3.4, - "learning_rate": 1.325488725329333e-05, - "loss": 0.193, + "learning_rate": 2.326541119935094e-05, + "loss": 0.1966, "step": 72940 }, { "epoch": 3.4, - "learning_rate": 1.325441845201819e-05, - "loss": 0.3241, + "learning_rate": 2.3264943129514925e-05, + "loss": 0.2487, "step": 72945 }, { "epoch": 3.4, - "learning_rate": 1.3253949650743053e-05, - "loss": 0.0862, + "learning_rate": 2.3264475059678905e-05, + "loss": 0.0329, "step": 72950 }, { "epoch": 3.4, - "learning_rate": 1.3253480849467913e-05, - "loss": 0.0328, + "learning_rate": 2.3264006989842884e-05, + "loss": 0.04, "step": 72955 }, { "epoch": 3.4, - "learning_rate": 1.3253012048192772e-05, - "loss": 0.0547, + "learning_rate": 2.3263538920006868e-05, + "loss": 0.0469, "step": 72960 }, { "epoch": 3.4, - "learning_rate": 1.3252543246917632e-05, - "loss": 0.0331, + "learning_rate": 2.3263070850170847e-05, + "loss": 0.0556, "step": 72965 }, { "epoch": 3.4, - "learning_rate": 1.3252074445642494e-05, - "loss": 0.0603, + "learning_rate": 2.3262602780334827e-05, + "loss": 0.0538, "step": 72970 }, { "epoch": 3.41, - "learning_rate": 1.3251605644367354e-05, - "loss": 0.1321, + "learning_rate": 2.3262134710498807e-05, + "loss": 0.0605, "step": 72975 }, { "epoch": 3.41, - "learning_rate": 1.3251136843092214e-05, - "loss": 0.0986, + "learning_rate": 2.326166664066279e-05, + "loss": 0.0706, "step": 72980 }, { "epoch": 3.41, - "learning_rate": 1.3250668041817074e-05, - "loss": 0.1706, + "learning_rate": 2.3261198570826767e-05, + "loss": 0.1869, "step": 72985 }, { "epoch": 3.41, - "learning_rate": 1.3250199240541934e-05, - "loss": 0.1643, + "learning_rate": 2.3260730500990746e-05, + "loss": 0.1869, "step": 72990 }, { "epoch": 3.41, - "learning_rate": 1.3249730439266797e-05, - "loss": 0.2303, + "learning_rate": 2.3260262431154726e-05, + "loss": 0.2543, "step": 72995 }, { "epoch": 3.41, - "learning_rate": 1.3249261637991657e-05, - "loss": 0.0926, + "learning_rate": 2.325979436131871e-05, + "loss": 0.1172, "step": 73000 }, { "epoch": 3.41, - "learning_rate": 1.3248792836716517e-05, - "loss": 0.0249, + "learning_rate": 2.325932629148269e-05, + "loss": 0.0295, "step": 73005 }, { "epoch": 3.41, - "learning_rate": 1.3248324035441379e-05, - "loss": 0.0443, + "learning_rate": 2.325885822164667e-05, + "loss": 0.043, "step": 73010 }, { "epoch": 3.41, - "learning_rate": 1.3247855234166239e-05, - "loss": 0.0351, + "learning_rate": 2.3258390151810652e-05, + "loss": 0.0497, "step": 73015 }, { "epoch": 3.41, - "learning_rate": 1.3247386432891098e-05, - "loss": 0.1067, + "learning_rate": 2.3257922081974632e-05, + "loss": 0.0555, "step": 73020 }, { "epoch": 3.41, - "learning_rate": 1.3246917631615958e-05, - "loss": 0.0794, + "learning_rate": 2.3257454012138612e-05, + "loss": 0.073, "step": 73025 }, { "epoch": 3.41, - "learning_rate": 1.3246448830340818e-05, - "loss": 0.0929, + "learning_rate": 2.3256985942302592e-05, + "loss": 0.1495, "step": 73030 }, { "epoch": 3.41, - "learning_rate": 1.324598002906568e-05, - "loss": 0.1354, + "learning_rate": 2.3256517872466575e-05, + "loss": 0.1684, "step": 73035 }, { "epoch": 3.41, - "learning_rate": 1.3245511227790542e-05, - "loss": 0.2728, + "learning_rate": 2.3256049802630555e-05, + "loss": 0.1341, "step": 73040 }, { "epoch": 3.41, - "learning_rate": 1.3245042426515402e-05, - "loss": 0.2449, + "learning_rate": 2.3255581732794535e-05, + "loss": 0.2681, "step": 73045 }, { "epoch": 3.41, - "learning_rate": 1.3244573625240263e-05, - "loss": 0.0582, + "learning_rate": 2.325511366295851e-05, + "loss": 0.0624, "step": 73050 }, { "epoch": 3.41, - "learning_rate": 1.3244104823965123e-05, - "loss": 0.0717, + "learning_rate": 2.3254645593122494e-05, + "loss": 0.0226, "step": 73055 }, { "epoch": 3.41, - "learning_rate": 1.3243636022689983e-05, - "loss": 0.037, + "learning_rate": 2.3254177523286474e-05, + "loss": 0.0198, "step": 73060 }, { "epoch": 3.41, - "learning_rate": 1.3243167221414843e-05, - "loss": 0.066, + "learning_rate": 2.3253709453450454e-05, + "loss": 0.0706, "step": 73065 }, { "epoch": 3.41, - "learning_rate": 1.3242698420139703e-05, - "loss": 0.1115, + "learning_rate": 2.3253241383614437e-05, + "loss": 0.0695, "step": 73070 }, { "epoch": 3.41, - "learning_rate": 1.3242229618864565e-05, - "loss": 0.057, + "learning_rate": 2.3252773313778417e-05, + "loss": 0.0587, "step": 73075 }, { "epoch": 3.41, - "learning_rate": 1.3241760817589424e-05, - "loss": 0.0675, + "learning_rate": 2.3252305243942397e-05, + "loss": 0.1056, "step": 73080 }, { "epoch": 3.41, - "learning_rate": 1.3241292016314284e-05, - "loss": 0.1726, + "learning_rate": 2.3251837174106377e-05, + "loss": 0.1559, "step": 73085 }, { "epoch": 3.41, - "learning_rate": 1.3240823215039148e-05, - "loss": 0.1837, + "learning_rate": 2.325136910427036e-05, + "loss": 0.183, "step": 73090 }, { "epoch": 3.41, - "learning_rate": 1.3240354413764008e-05, - "loss": 0.2014, + "learning_rate": 2.325090103443434e-05, + "loss": 0.2473, "step": 73095 }, { "epoch": 3.41, - "learning_rate": 1.3239885612488868e-05, - "loss": 0.0381, + "learning_rate": 2.325043296459832e-05, + "loss": 0.1004, "step": 73100 }, { "epoch": 3.41, - "learning_rate": 1.3239416811213728e-05, - "loss": 0.0316, + "learning_rate": 2.32499648947623e-05, + "loss": 0.0463, "step": 73105 }, { "epoch": 3.41, - "learning_rate": 1.3238948009938587e-05, - "loss": 0.046, + "learning_rate": 2.324949682492628e-05, + "loss": 0.0568, "step": 73110 }, { "epoch": 3.41, - "learning_rate": 1.3238479208663449e-05, - "loss": 0.0846, + "learning_rate": 2.324902875509026e-05, + "loss": 0.0382, "step": 73115 }, { "epoch": 3.41, - "learning_rate": 1.3238010407388309e-05, - "loss": 0.1258, + "learning_rate": 2.324856068525424e-05, + "loss": 0.0595, "step": 73120 }, { "epoch": 3.41, - "learning_rate": 1.3237541606113169e-05, - "loss": 0.0894, + "learning_rate": 2.324809261541822e-05, + "loss": 0.0575, "step": 73125 }, { "epoch": 3.41, - "learning_rate": 1.3237072804838029e-05, - "loss": 0.0433, + "learning_rate": 2.3247624545582202e-05, + "loss": 0.099, "step": 73130 }, { "epoch": 3.41, - "learning_rate": 1.3236604003562892e-05, - "loss": 0.137, + "learning_rate": 2.324715647574618e-05, + "loss": 0.1164, "step": 73135 }, { "epoch": 3.41, - "learning_rate": 1.3236135202287752e-05, - "loss": 0.104, + "learning_rate": 2.324668840591016e-05, + "loss": 0.1247, "step": 73140 }, { "epoch": 3.41, - "learning_rate": 1.3235666401012612e-05, - "loss": 0.2982, + "learning_rate": 2.3246220336074145e-05, + "loss": 0.2284, "step": 73145 }, { "epoch": 3.41, - "learning_rate": 1.3235197599737472e-05, - "loss": 0.0598, + "learning_rate": 2.3245752266238124e-05, + "loss": 0.0493, "step": 73150 }, { "epoch": 3.41, - "learning_rate": 1.3234728798462334e-05, - "loss": 0.0419, + "learning_rate": 2.3245284196402104e-05, + "loss": 0.0657, "step": 73155 }, { "epoch": 3.41, - "learning_rate": 1.3234259997187194e-05, - "loss": 0.0112, + "learning_rate": 2.3244816126566084e-05, + "loss": 0.068, "step": 73160 }, { "epoch": 3.41, - "learning_rate": 1.3233791195912053e-05, - "loss": 0.0433, + "learning_rate": 2.3244348056730067e-05, + "loss": 0.069, "step": 73165 }, { "epoch": 3.41, - "learning_rate": 1.3233322394636913e-05, - "loss": 0.1115, + "learning_rate": 2.3243879986894047e-05, + "loss": 0.1376, "step": 73170 }, { "epoch": 3.41, - "learning_rate": 1.3232853593361773e-05, - "loss": 0.0278, + "learning_rate": 2.3243411917058023e-05, + "loss": 0.113, "step": 73175 }, { "epoch": 3.41, - "learning_rate": 1.3232384792086637e-05, - "loss": 0.1111, + "learning_rate": 2.3242943847222003e-05, + "loss": 0.1838, "step": 73180 }, { "epoch": 3.41, - "learning_rate": 1.3231915990811497e-05, - "loss": 0.1012, + "learning_rate": 2.3242475777385986e-05, + "loss": 0.1441, "step": 73185 }, { "epoch": 3.42, - "learning_rate": 1.3231447189536357e-05, - "loss": 0.1412, + "learning_rate": 2.3242007707549966e-05, + "loss": 0.2381, "step": 73190 }, { "epoch": 3.42, - "learning_rate": 1.3230978388261218e-05, - "loss": 0.1721, + "learning_rate": 2.3241539637713946e-05, + "loss": 0.1409, "step": 73195 }, { "epoch": 3.42, - "learning_rate": 1.3230509586986078e-05, - "loss": 0.093, + "learning_rate": 2.324107156787793e-05, + "loss": 0.1011, "step": 73200 }, { "epoch": 3.42, - "learning_rate": 1.3230040785710938e-05, - "loss": 0.0337, + "learning_rate": 2.324060349804191e-05, + "loss": 0.0168, "step": 73205 }, { "epoch": 3.42, - "learning_rate": 1.3229571984435798e-05, - "loss": 0.0732, + "learning_rate": 2.324013542820589e-05, + "loss": 0.0392, "step": 73210 }, { "epoch": 3.42, - "learning_rate": 1.3229103183160658e-05, - "loss": 0.0859, + "learning_rate": 2.323966735836987e-05, + "loss": 0.0647, "step": 73215 }, { "epoch": 3.42, - "learning_rate": 1.322863438188552e-05, - "loss": 0.0842, + "learning_rate": 2.3239199288533852e-05, + "loss": 0.0598, "step": 73220 }, { "epoch": 3.42, - "learning_rate": 1.322816558061038e-05, - "loss": 0.043, + "learning_rate": 2.3238731218697832e-05, + "loss": 0.0647, "step": 73225 }, { "epoch": 3.42, - "learning_rate": 1.3227696779335241e-05, - "loss": 0.1238, + "learning_rate": 2.323826314886181e-05, + "loss": 0.0804, "step": 73230 }, { "epoch": 3.42, - "learning_rate": 1.3227227978060103e-05, - "loss": 0.1395, + "learning_rate": 2.323779507902579e-05, + "loss": 0.1249, "step": 73235 }, { "epoch": 3.42, - "learning_rate": 1.3226759176784963e-05, - "loss": 0.161, + "learning_rate": 2.323732700918977e-05, + "loss": 0.219, "step": 73240 }, { "epoch": 3.42, - "learning_rate": 1.3226290375509823e-05, - "loss": 0.1628, + "learning_rate": 2.323685893935375e-05, + "loss": 0.247, "step": 73245 }, { "epoch": 3.42, - "learning_rate": 1.3225821574234683e-05, - "loss": 0.0807, + "learning_rate": 2.323639086951773e-05, + "loss": 0.0849, "step": 73250 }, { "epoch": 3.42, - "learning_rate": 1.3225352772959542e-05, - "loss": 0.0274, + "learning_rate": 2.3235922799681714e-05, + "loss": 0.0514, "step": 73255 }, { "epoch": 3.42, - "learning_rate": 1.3224883971684404e-05, - "loss": 0.0187, + "learning_rate": 2.3235454729845694e-05, + "loss": 0.0178, "step": 73260 }, { "epoch": 3.42, - "learning_rate": 1.3224415170409264e-05, - "loss": 0.0621, + "learning_rate": 2.3234986660009674e-05, + "loss": 0.0543, "step": 73265 }, { "epoch": 3.42, - "learning_rate": 1.3223946369134124e-05, - "loss": 0.0558, + "learning_rate": 2.3234518590173654e-05, + "loss": 0.0899, "step": 73270 }, { "epoch": 3.42, - "learning_rate": 1.3223477567858987e-05, - "loss": 0.077, + "learning_rate": 2.3234050520337637e-05, + "loss": 0.1265, "step": 73275 }, { "epoch": 3.42, - "learning_rate": 1.3223008766583847e-05, - "loss": 0.0494, + "learning_rate": 2.3233582450501617e-05, + "loss": 0.0583, "step": 73280 }, { "epoch": 3.42, - "learning_rate": 1.3222539965308707e-05, - "loss": 0.1365, + "learning_rate": 2.3233114380665596e-05, + "loss": 0.0797, "step": 73285 }, { "epoch": 3.42, - "learning_rate": 1.3222071164033567e-05, - "loss": 0.1824, + "learning_rate": 2.3232646310829576e-05, + "loss": 0.2102, "step": 73290 }, { "epoch": 3.42, - "learning_rate": 1.3221602362758427e-05, - "loss": 0.4051, + "learning_rate": 2.323217824099356e-05, + "loss": 0.2334, "step": 73295 }, { "epoch": 3.42, - "learning_rate": 1.3221133561483289e-05, - "loss": 0.065, + "learning_rate": 2.3231710171157536e-05, + "loss": 0.0953, "step": 73300 }, { "epoch": 3.42, - "learning_rate": 1.3220664760208149e-05, - "loss": 0.0081, + "learning_rate": 2.3231242101321516e-05, + "loss": 0.0293, "step": 73305 }, { "epoch": 3.42, - "learning_rate": 1.3220195958933009e-05, - "loss": 0.0359, + "learning_rate": 2.3230774031485495e-05, + "loss": 0.0479, "step": 73310 }, { "epoch": 3.42, - "learning_rate": 1.3219727157657868e-05, - "loss": 0.0216, + "learning_rate": 2.323030596164948e-05, + "loss": 0.0134, "step": 73315 }, { "epoch": 3.42, - "learning_rate": 1.3219258356382732e-05, - "loss": 0.1492, + "learning_rate": 2.322983789181346e-05, + "loss": 0.0689, "step": 73320 }, { "epoch": 3.42, - "learning_rate": 1.3218789555107592e-05, - "loss": 0.1478, + "learning_rate": 2.322936982197744e-05, + "loss": 0.0725, "step": 73325 }, { "epoch": 3.42, - "learning_rate": 1.3218320753832452e-05, - "loss": 0.1755, + "learning_rate": 2.322890175214142e-05, + "loss": 0.0717, "step": 73330 }, { "epoch": 3.42, - "learning_rate": 1.3217851952557312e-05, - "loss": 0.1802, + "learning_rate": 2.32284336823054e-05, + "loss": 0.1037, "step": 73335 }, { "epoch": 3.42, - "learning_rate": 1.3217383151282173e-05, - "loss": 0.2669, + "learning_rate": 2.322796561246938e-05, + "loss": 0.1244, "step": 73340 }, { "epoch": 3.42, - "learning_rate": 1.3216914350007033e-05, - "loss": 0.2471, + "learning_rate": 2.322749754263336e-05, + "loss": 0.2715, "step": 73345 }, { "epoch": 3.42, - "learning_rate": 1.3216445548731893e-05, - "loss": 0.053, + "learning_rate": 2.3227029472797344e-05, + "loss": 0.0844, "step": 73350 }, { "epoch": 3.42, - "learning_rate": 1.3215976747456753e-05, - "loss": 0.0466, + "learning_rate": 2.3226561402961324e-05, + "loss": 0.028, "step": 73355 }, { "epoch": 3.42, - "learning_rate": 1.3215507946181615e-05, - "loss": 0.013, + "learning_rate": 2.3226093333125304e-05, + "loss": 0.0507, "step": 73360 }, { "epoch": 3.42, - "learning_rate": 1.3215039144906476e-05, - "loss": 0.0486, + "learning_rate": 2.322562526328928e-05, + "loss": 0.0567, "step": 73365 }, { "epoch": 3.42, - "learning_rate": 1.3214570343631336e-05, - "loss": 0.0598, + "learning_rate": 2.3225157193453263e-05, + "loss": 0.0989, "step": 73370 }, { "epoch": 3.42, - "learning_rate": 1.3214101542356196e-05, - "loss": 0.0522, + "learning_rate": 2.3224689123617243e-05, + "loss": 0.0634, "step": 73375 }, { "epoch": 3.42, - "learning_rate": 1.3213632741081058e-05, - "loss": 0.1005, + "learning_rate": 2.3224221053781223e-05, + "loss": 0.0555, "step": 73380 }, { "epoch": 3.42, - "learning_rate": 1.3213163939805918e-05, - "loss": 0.075, + "learning_rate": 2.3223752983945206e-05, + "loss": 0.0795, "step": 73385 }, { "epoch": 3.42, - "learning_rate": 1.3212695138530778e-05, - "loss": 0.2631, + "learning_rate": 2.3223284914109186e-05, + "loss": 0.1829, "step": 73390 }, { "epoch": 3.42, - "learning_rate": 1.3212226337255638e-05, - "loss": 0.2347, + "learning_rate": 2.3222816844273166e-05, + "loss": 0.2448, "step": 73395 }, { "epoch": 3.42, - "learning_rate": 1.32117575359805e-05, - "loss": 0.0567, + "learning_rate": 2.3222348774437146e-05, + "loss": 0.0551, "step": 73400 }, { "epoch": 3.43, - "learning_rate": 1.3211288734705359e-05, - "loss": 0.0364, + "learning_rate": 2.322188070460113e-05, + "loss": 0.0731, "step": 73405 }, { "epoch": 3.43, - "learning_rate": 1.3210819933430219e-05, - "loss": 0.023, + "learning_rate": 2.322141263476511e-05, + "loss": 0.0296, "step": 73410 }, { "epoch": 3.43, - "learning_rate": 1.321035113215508e-05, - "loss": 0.0908, + "learning_rate": 2.322094456492909e-05, + "loss": 0.094, "step": 73415 }, { "epoch": 3.43, - "learning_rate": 1.3209882330879942e-05, - "loss": 0.0697, + "learning_rate": 2.322047649509307e-05, + "loss": 0.0708, "step": 73420 }, { "epoch": 3.43, - "learning_rate": 1.3209413529604802e-05, - "loss": 0.0808, + "learning_rate": 2.3220008425257048e-05, + "loss": 0.1264, "step": 73425 }, { "epoch": 3.43, - "learning_rate": 1.3208944728329662e-05, - "loss": 0.0747, + "learning_rate": 2.3219540355421028e-05, + "loss": 0.1358, "step": 73430 }, { "epoch": 3.43, - "learning_rate": 1.3208475927054522e-05, - "loss": 0.1391, + "learning_rate": 2.3219072285585008e-05, + "loss": 0.1448, "step": 73435 }, { "epoch": 3.43, - "learning_rate": 1.3208007125779384e-05, - "loss": 0.3241, + "learning_rate": 2.321860421574899e-05, + "loss": 0.1859, "step": 73440 }, { "epoch": 3.43, - "learning_rate": 1.3207538324504244e-05, - "loss": 0.2686, + "learning_rate": 2.321813614591297e-05, + "loss": 0.5283, "step": 73445 }, { "epoch": 3.43, - "learning_rate": 1.3207069523229104e-05, - "loss": 0.0759, + "learning_rate": 2.321766807607695e-05, + "loss": 0.0412, "step": 73450 }, { "epoch": 3.43, - "learning_rate": 1.3206600721953964e-05, - "loss": 0.026, + "learning_rate": 2.321720000624093e-05, + "loss": 0.0249, "step": 73455 }, { "epoch": 3.43, - "learning_rate": 1.3206131920678827e-05, - "loss": 0.0234, + "learning_rate": 2.3216731936404914e-05, + "loss": 0.0491, "step": 73460 }, { "epoch": 3.43, - "learning_rate": 1.3205663119403687e-05, - "loss": 0.0817, + "learning_rate": 2.3216263866568894e-05, + "loss": 0.0677, "step": 73465 }, { "epoch": 3.43, - "learning_rate": 1.3205194318128547e-05, - "loss": 0.0864, + "learning_rate": 2.3215795796732873e-05, + "loss": 0.0876, "step": 73470 }, { "epoch": 3.43, - "learning_rate": 1.3204725516853407e-05, - "loss": 0.0828, + "learning_rate": 2.3215327726896853e-05, + "loss": 0.0801, "step": 73475 }, { "epoch": 3.43, - "learning_rate": 1.3204256715578268e-05, - "loss": 0.0999, + "learning_rate": 2.3214859657060836e-05, + "loss": 0.0709, "step": 73480 }, { "epoch": 3.43, - "learning_rate": 1.3203787914303128e-05, - "loss": 0.1457, + "learning_rate": 2.3214391587224816e-05, + "loss": 0.1714, "step": 73485 }, { "epoch": 3.43, - "learning_rate": 1.3203319113027988e-05, - "loss": 0.1746, + "learning_rate": 2.3213923517388793e-05, + "loss": 0.1423, "step": 73490 }, { "epoch": 3.43, - "learning_rate": 1.3202850311752848e-05, - "loss": 0.201, + "learning_rate": 2.3213455447552772e-05, + "loss": 0.3333, "step": 73495 }, { "epoch": 3.43, - "learning_rate": 1.3202381510477708e-05, - "loss": 0.0825, + "learning_rate": 2.3212987377716756e-05, + "loss": 0.0774, "step": 73500 }, { "epoch": 3.43, - "learning_rate": 1.3201912709202571e-05, - "loss": 0.017, + "learning_rate": 2.3212519307880735e-05, + "loss": 0.0215, "step": 73505 }, { "epoch": 3.43, - "learning_rate": 1.3201443907927431e-05, - "loss": 0.0643, + "learning_rate": 2.3212051238044715e-05, + "loss": 0.0483, "step": 73510 }, { "epoch": 3.43, - "learning_rate": 1.3200975106652291e-05, - "loss": 0.0708, + "learning_rate": 2.32115831682087e-05, + "loss": 0.1078, "step": 73515 }, { "epoch": 3.43, - "learning_rate": 1.3200506305377153e-05, - "loss": 0.0419, + "learning_rate": 2.321111509837268e-05, + "loss": 0.0599, "step": 73520 }, { "epoch": 3.43, - "learning_rate": 1.3200037504102013e-05, - "loss": 0.1007, + "learning_rate": 2.3210647028536658e-05, + "loss": 0.046, "step": 73525 }, { "epoch": 3.43, - "learning_rate": 1.3199568702826873e-05, - "loss": 0.1826, + "learning_rate": 2.3210178958700638e-05, + "loss": 0.1649, "step": 73530 }, { "epoch": 3.43, - "learning_rate": 1.3199099901551733e-05, - "loss": 0.136, + "learning_rate": 2.320971088886462e-05, + "loss": 0.1296, "step": 73535 }, { "epoch": 3.43, - "learning_rate": 1.3198631100276593e-05, - "loss": 0.1944, + "learning_rate": 2.32092428190286e-05, + "loss": 0.1258, "step": 73540 }, { "epoch": 3.43, - "learning_rate": 1.3198162299001454e-05, - "loss": 0.4173, + "learning_rate": 2.320877474919258e-05, + "loss": 0.2649, "step": 73545 }, { "epoch": 3.43, - "learning_rate": 1.3197693497726314e-05, - "loss": 0.105, + "learning_rate": 2.320830667935656e-05, + "loss": 0.0668, "step": 73550 }, { "epoch": 3.43, - "learning_rate": 1.3197224696451176e-05, - "loss": 0.0587, + "learning_rate": 2.320783860952054e-05, + "loss": 0.0509, "step": 73555 }, { "epoch": 3.43, - "learning_rate": 1.3196755895176037e-05, - "loss": 0.0138, + "learning_rate": 2.320737053968452e-05, + "loss": 0.021, "step": 73560 }, { "epoch": 3.43, - "learning_rate": 1.3196287093900897e-05, - "loss": 0.0695, + "learning_rate": 2.32069024698485e-05, + "loss": 0.0461, "step": 73565 }, { "epoch": 3.43, - "learning_rate": 1.3195818292625757e-05, - "loss": 0.0544, + "learning_rate": 2.3206434400012483e-05, + "loss": 0.0707, "step": 73570 }, { "epoch": 3.43, - "learning_rate": 1.3195349491350617e-05, - "loss": 0.0769, + "learning_rate": 2.3205966330176463e-05, + "loss": 0.0316, "step": 73575 }, { "epoch": 3.43, - "learning_rate": 1.3194880690075477e-05, - "loss": 0.0871, + "learning_rate": 2.3205498260340443e-05, + "loss": 0.1619, "step": 73580 }, { "epoch": 3.43, - "learning_rate": 1.3194411888800339e-05, - "loss": 0.116, + "learning_rate": 2.3205030190504423e-05, + "loss": 0.1712, "step": 73585 }, { "epoch": 3.43, - "learning_rate": 1.3193943087525199e-05, - "loss": 0.2275, + "learning_rate": 2.3204562120668406e-05, + "loss": 0.2858, "step": 73590 }, { "epoch": 3.43, - "learning_rate": 1.3193474286250059e-05, - "loss": 0.207, + "learning_rate": 2.3204094050832386e-05, + "loss": 0.2183, "step": 73595 }, { "epoch": 3.43, - "learning_rate": 1.3193005484974922e-05, - "loss": 0.052, + "learning_rate": 2.3203625980996366e-05, + "loss": 0.0463, "step": 73600 }, { "epoch": 3.43, - "learning_rate": 1.3192536683699782e-05, - "loss": 0.077, + "learning_rate": 2.3203157911160345e-05, + "loss": 0.0308, "step": 73605 }, { "epoch": 3.43, - "learning_rate": 1.3192067882424642e-05, - "loss": 0.0335, + "learning_rate": 2.320268984132433e-05, + "loss": 0.0549, "step": 73610 }, { "epoch": 3.43, - "learning_rate": 1.3191599081149502e-05, - "loss": 0.0224, + "learning_rate": 2.3202221771488305e-05, + "loss": 0.0842, "step": 73615 }, { "epoch": 3.44, - "learning_rate": 1.3191130279874362e-05, - "loss": 0.0545, + "learning_rate": 2.3201753701652285e-05, + "loss": 0.0981, "step": 73620 }, { "epoch": 3.44, - "learning_rate": 1.3190661478599223e-05, - "loss": 0.1288, + "learning_rate": 2.3201285631816268e-05, + "loss": 0.1577, "step": 73625 }, { "epoch": 3.44, - "learning_rate": 1.3190192677324083e-05, - "loss": 0.117, + "learning_rate": 2.3200817561980248e-05, + "loss": 0.0588, "step": 73630 }, { "epoch": 3.44, - "learning_rate": 1.3189723876048943e-05, - "loss": 0.1976, + "learning_rate": 2.3200349492144228e-05, + "loss": 0.0786, "step": 73635 }, { "epoch": 3.44, - "learning_rate": 1.3189255074773803e-05, - "loss": 0.1778, + "learning_rate": 2.3199881422308207e-05, + "loss": 0.1226, "step": 73640 }, { "epoch": 3.44, - "learning_rate": 1.3188786273498666e-05, - "loss": 0.2328, + "learning_rate": 2.319941335247219e-05, + "loss": 0.1741, "step": 73645 }, { "epoch": 3.44, - "learning_rate": 1.3188317472223526e-05, - "loss": 0.0614, + "learning_rate": 2.319894528263617e-05, + "loss": 0.0787, "step": 73650 }, { "epoch": 3.44, - "learning_rate": 1.3187848670948386e-05, - "loss": 0.0183, + "learning_rate": 2.319847721280015e-05, + "loss": 0.0523, "step": 73655 }, { "epoch": 3.44, - "learning_rate": 1.3187379869673246e-05, - "loss": 0.0346, + "learning_rate": 2.319800914296413e-05, + "loss": 0.0787, "step": 73660 }, { "epoch": 3.44, - "learning_rate": 1.3186911068398108e-05, - "loss": 0.0467, + "learning_rate": 2.3197541073128113e-05, + "loss": 0.044, "step": 73665 }, { "epoch": 3.44, - "learning_rate": 1.3186442267122968e-05, - "loss": 0.0368, + "learning_rate": 2.3197073003292093e-05, + "loss": 0.1074, "step": 73670 }, { "epoch": 3.44, - "learning_rate": 1.3185973465847828e-05, - "loss": 0.0949, + "learning_rate": 2.3196604933456073e-05, + "loss": 0.0653, "step": 73675 }, { "epoch": 3.44, - "learning_rate": 1.3185504664572688e-05, - "loss": 0.092, + "learning_rate": 2.319613686362005e-05, + "loss": 0.106, "step": 73680 }, { "epoch": 3.44, - "learning_rate": 1.3185035863297548e-05, - "loss": 0.1353, + "learning_rate": 2.3195668793784033e-05, + "loss": 0.1659, "step": 73685 }, { "epoch": 3.44, - "learning_rate": 1.3184567062022411e-05, - "loss": 0.2217, + "learning_rate": 2.3195200723948012e-05, + "loss": 0.1742, "step": 73690 }, { "epoch": 3.44, - "learning_rate": 1.318409826074727e-05, - "loss": 0.2987, + "learning_rate": 2.3194732654111992e-05, + "loss": 0.3329, "step": 73695 }, { "epoch": 3.44, - "learning_rate": 1.318362945947213e-05, - "loss": 0.0678, + "learning_rate": 2.3194264584275975e-05, + "loss": 0.0793, "step": 73700 }, { "epoch": 3.44, - "learning_rate": 1.3183160658196992e-05, - "loss": 0.0372, + "learning_rate": 2.3193796514439955e-05, + "loss": 0.0326, "step": 73705 }, { "epoch": 3.44, - "learning_rate": 1.3182691856921852e-05, - "loss": 0.0267, + "learning_rate": 2.3193328444603935e-05, + "loss": 0.0314, "step": 73710 }, { "epoch": 3.44, - "learning_rate": 1.3182223055646712e-05, - "loss": 0.0613, + "learning_rate": 2.3192860374767915e-05, + "loss": 0.0974, "step": 73715 }, { "epoch": 3.44, - "learning_rate": 1.3181754254371572e-05, - "loss": 0.0556, + "learning_rate": 2.3192392304931898e-05, + "loss": 0.0196, "step": 73720 }, { "epoch": 3.44, - "learning_rate": 1.3181285453096432e-05, - "loss": 0.111, + "learning_rate": 2.3191924235095878e-05, + "loss": 0.0798, "step": 73725 }, { "epoch": 3.44, - "learning_rate": 1.3180816651821294e-05, - "loss": 0.0856, + "learning_rate": 2.3191456165259858e-05, + "loss": 0.1215, "step": 73730 }, { "epoch": 3.44, - "learning_rate": 1.3180347850546154e-05, - "loss": 0.1449, + "learning_rate": 2.3190988095423838e-05, + "loss": 0.1737, "step": 73735 }, { "epoch": 3.44, - "learning_rate": 1.3179879049271015e-05, - "loss": 0.1621, + "learning_rate": 2.319052002558782e-05, + "loss": 0.1742, "step": 73740 }, { "epoch": 3.44, - "learning_rate": 1.3179410247995877e-05, - "loss": 0.2623, + "learning_rate": 2.3190051955751797e-05, + "loss": 0.2477, "step": 73745 }, { "epoch": 3.44, - "learning_rate": 1.3178941446720737e-05, - "loss": 0.0542, + "learning_rate": 2.3189583885915777e-05, + "loss": 0.081, "step": 73750 }, { "epoch": 3.44, - "learning_rate": 1.3178472645445597e-05, - "loss": 0.0172, + "learning_rate": 2.318911581607976e-05, + "loss": 0.045, "step": 73755 }, { "epoch": 3.44, - "learning_rate": 1.3178003844170457e-05, - "loss": 0.0505, + "learning_rate": 2.318864774624374e-05, + "loss": 0.0734, "step": 73760 }, { "epoch": 3.44, - "learning_rate": 1.3177535042895317e-05, - "loss": 0.088, + "learning_rate": 2.318817967640772e-05, + "loss": 0.0501, "step": 73765 }, { "epoch": 3.44, - "learning_rate": 1.3177066241620178e-05, - "loss": 0.0588, + "learning_rate": 2.31877116065717e-05, + "loss": 0.0918, "step": 73770 }, { "epoch": 3.44, - "learning_rate": 1.3176597440345038e-05, - "loss": 0.063, + "learning_rate": 2.3187243536735683e-05, + "loss": 0.0851, "step": 73775 }, { "epoch": 3.44, - "learning_rate": 1.3176128639069898e-05, - "loss": 0.1176, + "learning_rate": 2.3186775466899663e-05, + "loss": 0.2055, "step": 73780 }, { "epoch": 3.44, - "learning_rate": 1.3175659837794761e-05, - "loss": 0.1186, + "learning_rate": 2.3186307397063643e-05, + "loss": 0.1184, "step": 73785 }, { "epoch": 3.44, - "learning_rate": 1.3175191036519621e-05, - "loss": 0.1963, + "learning_rate": 2.3185839327227622e-05, + "loss": 0.1127, "step": 73790 }, { "epoch": 3.44, - "learning_rate": 1.3174722235244481e-05, - "loss": 0.2739, + "learning_rate": 2.3185371257391606e-05, + "loss": 0.2028, "step": 73795 }, { "epoch": 3.44, - "learning_rate": 1.3174253433969341e-05, - "loss": 0.0369, + "learning_rate": 2.3184903187555585e-05, + "loss": 0.0436, "step": 73800 }, { "epoch": 3.44, - "learning_rate": 1.3173784632694201e-05, - "loss": 0.015, + "learning_rate": 2.3184435117719562e-05, + "loss": 0.0574, "step": 73805 }, { "epoch": 3.44, - "learning_rate": 1.3173315831419063e-05, - "loss": 0.0756, + "learning_rate": 2.3183967047883545e-05, + "loss": 0.0634, "step": 73810 }, { "epoch": 3.44, - "learning_rate": 1.3172847030143923e-05, - "loss": 0.0836, + "learning_rate": 2.3183498978047525e-05, + "loss": 0.065, "step": 73815 }, { "epoch": 3.44, - "learning_rate": 1.3172378228868783e-05, - "loss": 0.0515, + "learning_rate": 2.3183030908211505e-05, + "loss": 0.1361, "step": 73820 }, { "epoch": 3.44, - "learning_rate": 1.3171909427593643e-05, - "loss": 0.0864, + "learning_rate": 2.3182562838375484e-05, + "loss": 0.0487, "step": 73825 }, { "epoch": 3.45, - "learning_rate": 1.3171440626318506e-05, - "loss": 0.107, + "learning_rate": 2.3182094768539468e-05, + "loss": 0.0844, "step": 73830 }, { "epoch": 3.45, - "learning_rate": 1.3170971825043366e-05, - "loss": 0.223, + "learning_rate": 2.3181626698703447e-05, + "loss": 0.0722, "step": 73835 }, { "epoch": 3.45, - "learning_rate": 1.3170503023768226e-05, - "loss": 0.2617, + "learning_rate": 2.3181158628867427e-05, + "loss": 0.2028, "step": 73840 }, { "epoch": 3.45, - "learning_rate": 1.3170034222493086e-05, - "loss": 0.372, + "learning_rate": 2.3180690559031407e-05, + "loss": 0.2387, "step": 73845 }, { "epoch": 3.45, - "learning_rate": 1.3169565421217947e-05, - "loss": 0.0329, + "learning_rate": 2.318022248919539e-05, + "loss": 0.0515, "step": 73850 }, { "epoch": 3.45, - "learning_rate": 1.3169096619942807e-05, - "loss": 0.0405, + "learning_rate": 2.317975441935937e-05, + "loss": 0.0247, "step": 73855 }, { "epoch": 3.45, - "learning_rate": 1.3168627818667667e-05, - "loss": 0.0614, + "learning_rate": 2.317928634952335e-05, + "loss": 0.0441, "step": 73860 }, { "epoch": 3.45, - "learning_rate": 1.3168159017392527e-05, - "loss": 0.0546, + "learning_rate": 2.3178818279687333e-05, + "loss": 0.0451, "step": 73865 }, { "epoch": 3.45, - "learning_rate": 1.3167690216117389e-05, - "loss": 0.0568, + "learning_rate": 2.317835020985131e-05, + "loss": 0.0394, "step": 73870 }, { "epoch": 3.45, - "learning_rate": 1.3167221414842249e-05, - "loss": 0.053, + "learning_rate": 2.317788214001529e-05, + "loss": 0.0809, "step": 73875 }, { "epoch": 3.45, - "learning_rate": 1.316675261356711e-05, - "loss": 0.0676, + "learning_rate": 2.317741407017927e-05, + "loss": 0.2113, "step": 73880 }, { "epoch": 3.45, - "learning_rate": 1.316628381229197e-05, - "loss": 0.0723, + "learning_rate": 2.3176946000343252e-05, + "loss": 0.2103, "step": 73885 }, { "epoch": 3.45, - "learning_rate": 1.3165815011016832e-05, - "loss": 0.1877, + "learning_rate": 2.3176477930507232e-05, + "loss": 0.098, "step": 73890 }, { "epoch": 3.45, - "learning_rate": 1.3165346209741692e-05, - "loss": 0.3457, + "learning_rate": 2.3176009860671212e-05, + "loss": 0.2672, "step": 73895 }, { "epoch": 3.45, - "learning_rate": 1.3164877408466552e-05, - "loss": 0.046, + "learning_rate": 2.3175541790835192e-05, + "loss": 0.0938, "step": 73900 }, { "epoch": 3.45, - "learning_rate": 1.3164408607191412e-05, - "loss": 0.009, + "learning_rate": 2.3175073720999175e-05, + "loss": 0.0129, "step": 73905 }, { "epoch": 3.45, - "learning_rate": 1.3163939805916273e-05, - "loss": 0.0821, + "learning_rate": 2.3174605651163155e-05, + "loss": 0.0178, "step": 73910 }, { "epoch": 3.45, - "learning_rate": 1.3163471004641133e-05, - "loss": 0.0406, + "learning_rate": 2.3174137581327135e-05, + "loss": 0.0273, "step": 73915 }, { "epoch": 3.45, - "learning_rate": 1.3163002203365993e-05, - "loss": 0.0372, + "learning_rate": 2.3173669511491115e-05, + "loss": 0.0666, "step": 73920 }, { "epoch": 3.45, - "learning_rate": 1.3162533402090855e-05, - "loss": 0.1434, + "learning_rate": 2.3173201441655098e-05, + "loss": 0.1303, "step": 73925 }, { "epoch": 3.45, - "learning_rate": 1.3162064600815717e-05, - "loss": 0.1279, + "learning_rate": 2.3172733371819078e-05, + "loss": 0.0829, "step": 73930 }, { "epoch": 3.45, - "learning_rate": 1.3161595799540576e-05, - "loss": 0.1362, + "learning_rate": 2.3172265301983054e-05, + "loss": 0.1623, "step": 73935 }, { "epoch": 3.45, - "learning_rate": 1.3161126998265436e-05, - "loss": 0.177, + "learning_rate": 2.3171797232147037e-05, + "loss": 0.0887, "step": 73940 }, { "epoch": 3.45, - "learning_rate": 1.3160658196990296e-05, - "loss": 0.2619, + "learning_rate": 2.3171329162311017e-05, + "loss": 0.2396, "step": 73945 }, { "epoch": 3.45, - "learning_rate": 1.3160189395715158e-05, - "loss": 0.0307, + "learning_rate": 2.3170861092474997e-05, + "loss": 0.0788, "step": 73950 }, { "epoch": 3.45, - "learning_rate": 1.3159720594440018e-05, - "loss": 0.0362, + "learning_rate": 2.3170393022638977e-05, + "loss": 0.0107, "step": 73955 }, { "epoch": 3.45, - "learning_rate": 1.3159251793164878e-05, - "loss": 0.0463, + "learning_rate": 2.316992495280296e-05, + "loss": 0.0231, "step": 73960 }, { "epoch": 3.45, - "learning_rate": 1.3158782991889738e-05, - "loss": 0.0827, + "learning_rate": 2.316945688296694e-05, + "loss": 0.0875, "step": 73965 }, { "epoch": 3.45, - "learning_rate": 1.3158314190614601e-05, - "loss": 0.0723, + "learning_rate": 2.316898881313092e-05, + "loss": 0.0948, "step": 73970 }, { "epoch": 3.45, - "learning_rate": 1.3157845389339461e-05, - "loss": 0.1391, + "learning_rate": 2.31685207432949e-05, + "loss": 0.1094, "step": 73975 }, { "epoch": 3.45, - "learning_rate": 1.3157376588064321e-05, - "loss": 0.0436, + "learning_rate": 2.3168052673458883e-05, + "loss": 0.1662, "step": 73980 }, { "epoch": 3.45, - "learning_rate": 1.3156907786789181e-05, - "loss": 0.1241, + "learning_rate": 2.3167584603622862e-05, + "loss": 0.1597, "step": 73985 }, { "epoch": 3.45, - "learning_rate": 1.3156438985514042e-05, - "loss": 0.1112, + "learning_rate": 2.3167116533786842e-05, + "loss": 0.1353, "step": 73990 }, { "epoch": 3.45, - "learning_rate": 1.3155970184238902e-05, - "loss": 0.3744, + "learning_rate": 2.3166648463950822e-05, + "loss": 0.3076, "step": 73995 }, { "epoch": 3.45, - "learning_rate": 1.3155501382963762e-05, - "loss": 0.0241, + "learning_rate": 2.3166180394114802e-05, + "loss": 0.0372, "step": 74000 }, { "epoch": 3.45, - "learning_rate": 1.3155032581688622e-05, - "loss": 0.0648, + "learning_rate": 2.316571232427878e-05, + "loss": 0.0185, "step": 74005 }, { "epoch": 3.45, - "learning_rate": 1.3154563780413482e-05, - "loss": 0.0572, + "learning_rate": 2.316524425444276e-05, + "loss": 0.0425, "step": 74010 }, { "epoch": 3.45, - "learning_rate": 1.3154094979138346e-05, - "loss": 0.0271, + "learning_rate": 2.3164776184606745e-05, + "loss": 0.054, "step": 74015 }, { "epoch": 3.45, - "learning_rate": 1.3153626177863205e-05, - "loss": 0.1576, + "learning_rate": 2.3164308114770724e-05, + "loss": 0.0916, "step": 74020 }, { "epoch": 3.45, - "learning_rate": 1.3153157376588065e-05, - "loss": 0.0899, + "learning_rate": 2.3163840044934704e-05, + "loss": 0.0683, "step": 74025 }, { "epoch": 3.45, - "learning_rate": 1.3152688575312927e-05, - "loss": 0.135, + "learning_rate": 2.3163371975098684e-05, + "loss": 0.0806, "step": 74030 }, { "epoch": 3.45, - "learning_rate": 1.3152219774037787e-05, - "loss": 0.0804, + "learning_rate": 2.3162903905262667e-05, + "loss": 0.0734, "step": 74035 }, { "epoch": 3.45, - "learning_rate": 1.3151750972762647e-05, - "loss": 0.3208, + "learning_rate": 2.3162435835426647e-05, + "loss": 0.1534, "step": 74040 }, { "epoch": 3.46, - "learning_rate": 1.3151282171487507e-05, - "loss": 0.2127, + "learning_rate": 2.3161967765590627e-05, + "loss": 0.2959, "step": 74045 }, { "epoch": 3.46, - "learning_rate": 1.3150813370212367e-05, - "loss": 0.0741, + "learning_rate": 2.316149969575461e-05, + "loss": 0.0478, "step": 74050 }, { "epoch": 3.46, - "learning_rate": 1.3150344568937228e-05, - "loss": 0.0224, + "learning_rate": 2.316103162591859e-05, + "loss": 0.0133, "step": 74055 }, { "epoch": 3.46, - "learning_rate": 1.3149875767662088e-05, - "loss": 0.042, + "learning_rate": 2.3160563556082566e-05, + "loss": 0.0386, "step": 74060 }, { "epoch": 3.46, - "learning_rate": 1.314940696638695e-05, - "loss": 0.0661, + "learning_rate": 2.3160095486246546e-05, + "loss": 0.0382, "step": 74065 }, { "epoch": 3.46, - "learning_rate": 1.3148938165111812e-05, - "loss": 0.0695, + "learning_rate": 2.315962741641053e-05, + "loss": 0.0269, "step": 74070 }, { "epoch": 3.46, - "learning_rate": 1.3148469363836672e-05, - "loss": 0.1331, + "learning_rate": 2.315915934657451e-05, + "loss": 0.07, "step": 74075 }, { "epoch": 3.46, - "learning_rate": 1.3148000562561531e-05, - "loss": 0.1019, + "learning_rate": 2.315869127673849e-05, + "loss": 0.0751, "step": 74080 }, { "epoch": 3.46, - "learning_rate": 1.3147531761286391e-05, - "loss": 0.1089, + "learning_rate": 2.315822320690247e-05, + "loss": 0.1172, "step": 74085 }, { "epoch": 3.46, - "learning_rate": 1.3147062960011251e-05, - "loss": 0.1379, + "learning_rate": 2.3157755137066452e-05, + "loss": 0.1617, "step": 74090 }, { "epoch": 3.46, - "learning_rate": 1.3146594158736113e-05, - "loss": 0.2481, + "learning_rate": 2.3157287067230432e-05, + "loss": 0.2722, "step": 74095 }, { "epoch": 3.46, - "learning_rate": 1.3146125357460973e-05, - "loss": 0.1092, + "learning_rate": 2.3156818997394412e-05, + "loss": 0.1426, "step": 74100 }, { "epoch": 3.46, - "learning_rate": 1.3145656556185833e-05, - "loss": 0.0486, + "learning_rate": 2.315635092755839e-05, + "loss": 0.0067, "step": 74105 }, { "epoch": 3.46, - "learning_rate": 1.3145187754910696e-05, - "loss": 0.0154, + "learning_rate": 2.3155882857722375e-05, + "loss": 0.0522, "step": 74110 }, { "epoch": 3.46, - "learning_rate": 1.3144718953635556e-05, - "loss": 0.0222, + "learning_rate": 2.3155414787886355e-05, + "loss": 0.0379, "step": 74115 }, { "epoch": 3.46, - "learning_rate": 1.3144250152360416e-05, - "loss": 0.0986, + "learning_rate": 2.3154946718050334e-05, + "loss": 0.0928, "step": 74120 }, { "epoch": 3.46, - "learning_rate": 1.3143781351085276e-05, - "loss": 0.073, + "learning_rate": 2.3154478648214314e-05, + "loss": 0.0765, "step": 74125 }, { "epoch": 3.46, - "learning_rate": 1.3143312549810136e-05, - "loss": 0.087, + "learning_rate": 2.3154010578378294e-05, + "loss": 0.1744, "step": 74130 }, { "epoch": 3.46, - "learning_rate": 1.3142843748534998e-05, - "loss": 0.0813, + "learning_rate": 2.3153542508542274e-05, + "loss": 0.1342, "step": 74135 }, { "epoch": 3.46, - "learning_rate": 1.3142374947259857e-05, - "loss": 0.1903, + "learning_rate": 2.3153074438706254e-05, + "loss": 0.2268, "step": 74140 }, { "epoch": 3.46, - "learning_rate": 1.3141906145984717e-05, - "loss": 0.259, + "learning_rate": 2.3152606368870237e-05, + "loss": 0.3727, "step": 74145 }, { "epoch": 3.46, - "learning_rate": 1.3141437344709577e-05, - "loss": 0.0568, + "learning_rate": 2.3152138299034217e-05, + "loss": 0.0338, "step": 74150 }, { "epoch": 3.46, - "learning_rate": 1.314096854343444e-05, - "loss": 0.0046, + "learning_rate": 2.3151670229198196e-05, + "loss": 0.0379, "step": 74155 }, { "epoch": 3.46, - "learning_rate": 1.31404997421593e-05, - "loss": 0.0352, + "learning_rate": 2.3151202159362176e-05, + "loss": 0.0297, "step": 74160 }, { "epoch": 3.46, - "learning_rate": 1.314003094088416e-05, - "loss": 0.0488, + "learning_rate": 2.315073408952616e-05, + "loss": 0.0895, "step": 74165 }, { "epoch": 3.46, - "learning_rate": 1.313956213960902e-05, - "loss": 0.0836, + "learning_rate": 2.315026601969014e-05, + "loss": 0.082, "step": 74170 }, { "epoch": 3.46, - "learning_rate": 1.3139093338333882e-05, - "loss": 0.081, + "learning_rate": 2.314979794985412e-05, + "loss": 0.0845, "step": 74175 }, { "epoch": 3.46, - "learning_rate": 1.3138624537058742e-05, - "loss": 0.1126, + "learning_rate": 2.3149329880018102e-05, + "loss": 0.0885, "step": 74180 }, { "epoch": 3.46, - "learning_rate": 1.3138155735783602e-05, - "loss": 0.1061, + "learning_rate": 2.314886181018208e-05, + "loss": 0.132, "step": 74185 }, { "epoch": 3.46, - "learning_rate": 1.3137686934508462e-05, - "loss": 0.2805, + "learning_rate": 2.314839374034606e-05, + "loss": 0.1786, "step": 74190 }, { "epoch": 3.46, - "learning_rate": 1.3137218133233322e-05, - "loss": 0.3446, + "learning_rate": 2.314792567051004e-05, + "loss": 0.3301, "step": 74195 }, { "epoch": 3.46, - "learning_rate": 1.3136749331958183e-05, - "loss": 0.0647, + "learning_rate": 2.314745760067402e-05, + "loss": 0.0915, "step": 74200 }, { "epoch": 3.46, - "learning_rate": 1.3136280530683045e-05, - "loss": 0.0504, + "learning_rate": 2.3146989530838e-05, + "loss": 0.0675, "step": 74205 }, { "epoch": 3.46, - "learning_rate": 1.3135811729407905e-05, - "loss": 0.0499, + "learning_rate": 2.314652146100198e-05, + "loss": 0.1124, "step": 74210 }, { "epoch": 3.46, - "learning_rate": 1.3135342928132767e-05, - "loss": 0.0752, + "learning_rate": 2.314605339116596e-05, + "loss": 0.1158, "step": 74215 }, { "epoch": 3.46, - "learning_rate": 1.3134874126857627e-05, - "loss": 0.0907, + "learning_rate": 2.3145585321329944e-05, + "loss": 0.0865, "step": 74220 }, { "epoch": 3.46, - "learning_rate": 1.3134405325582486e-05, - "loss": 0.0531, + "learning_rate": 2.3145117251493924e-05, + "loss": 0.0591, "step": 74225 }, { "epoch": 3.46, - "learning_rate": 1.3133936524307346e-05, - "loss": 0.2022, + "learning_rate": 2.3144649181657904e-05, + "loss": 0.0715, "step": 74230 }, { "epoch": 3.46, - "learning_rate": 1.3133467723032206e-05, - "loss": 0.1445, + "learning_rate": 2.3144181111821887e-05, + "loss": 0.0885, "step": 74235 }, { "epoch": 3.46, - "learning_rate": 1.3132998921757068e-05, - "loss": 0.2684, + "learning_rate": 2.3143713041985867e-05, + "loss": 0.1795, "step": 74240 }, { "epoch": 3.46, - "learning_rate": 1.3132530120481928e-05, - "loss": 0.2548, + "learning_rate": 2.3143244972149847e-05, + "loss": 0.1727, "step": 74245 }, { "epoch": 3.46, - "learning_rate": 1.313206131920679e-05, - "loss": 0.0683, + "learning_rate": 2.3142776902313823e-05, + "loss": 0.0385, "step": 74250 }, { "epoch": 3.46, - "learning_rate": 1.3131592517931651e-05, - "loss": 0.0156, + "learning_rate": 2.3142308832477806e-05, + "loss": 0.0399, "step": 74255 }, { "epoch": 3.47, - "learning_rate": 1.3131123716656511e-05, - "loss": 0.0489, + "learning_rate": 2.3141840762641786e-05, + "loss": 0.0063, "step": 74260 }, { "epoch": 3.47, - "learning_rate": 1.3130654915381371e-05, - "loss": 0.0534, + "learning_rate": 2.3141372692805766e-05, + "loss": 0.0659, "step": 74265 }, { "epoch": 3.47, - "learning_rate": 1.3130186114106231e-05, - "loss": 0.0794, + "learning_rate": 2.3140904622969746e-05, + "loss": 0.0522, "step": 74270 }, { "epoch": 3.47, - "learning_rate": 1.3129717312831091e-05, - "loss": 0.0462, + "learning_rate": 2.314043655313373e-05, + "loss": 0.0529, "step": 74275 }, { "epoch": 3.47, - "learning_rate": 1.3129248511555953e-05, - "loss": 0.126, + "learning_rate": 2.313996848329771e-05, + "loss": 0.1151, "step": 74280 }, { "epoch": 3.47, - "learning_rate": 1.3128779710280812e-05, - "loss": 0.1405, + "learning_rate": 2.313950041346169e-05, + "loss": 0.1267, "step": 74285 }, { "epoch": 3.47, - "learning_rate": 1.3128310909005672e-05, - "loss": 0.211, + "learning_rate": 2.313903234362567e-05, + "loss": 0.2208, "step": 74290 }, { "epoch": 3.47, - "learning_rate": 1.3127842107730536e-05, - "loss": 0.3788, + "learning_rate": 2.3138564273789652e-05, + "loss": 0.1575, "step": 74295 }, { "epoch": 3.47, - "learning_rate": 1.3127373306455396e-05, - "loss": 0.0817, + "learning_rate": 2.313809620395363e-05, + "loss": 0.0329, "step": 74300 }, { "epoch": 3.47, - "learning_rate": 1.3126904505180256e-05, - "loss": 0.0454, + "learning_rate": 2.313762813411761e-05, + "loss": 0.036, "step": 74305 }, { "epoch": 3.47, - "learning_rate": 1.3126435703905116e-05, - "loss": 0.0408, + "learning_rate": 2.313716006428159e-05, + "loss": 0.0483, "step": 74310 }, { "epoch": 3.47, - "learning_rate": 1.3125966902629977e-05, - "loss": 0.0503, + "learning_rate": 2.313669199444557e-05, + "loss": 0.1022, "step": 74315 }, { "epoch": 3.47, - "learning_rate": 1.3125498101354837e-05, - "loss": 0.0483, + "learning_rate": 2.313622392460955e-05, + "loss": 0.0123, "step": 74320 }, { "epoch": 3.47, - "learning_rate": 1.3125029300079697e-05, - "loss": 0.2694, + "learning_rate": 2.313575585477353e-05, + "loss": 0.0469, "step": 74325 }, { "epoch": 3.47, - "learning_rate": 1.3124560498804557e-05, - "loss": 0.1335, + "learning_rate": 2.3135287784937514e-05, + "loss": 0.0826, "step": 74330 }, { "epoch": 3.47, - "learning_rate": 1.3124091697529417e-05, - "loss": 0.1436, + "learning_rate": 2.3134819715101494e-05, + "loss": 0.1293, "step": 74335 }, { "epoch": 3.47, - "learning_rate": 1.312362289625428e-05, - "loss": 0.2363, + "learning_rate": 2.3134351645265473e-05, + "loss": 0.0571, "step": 74340 }, { "epoch": 3.47, - "learning_rate": 1.312315409497914e-05, - "loss": 0.2065, + "learning_rate": 2.3133883575429453e-05, + "loss": 0.1386, "step": 74345 }, { "epoch": 3.47, - "learning_rate": 1.3122685293704e-05, - "loss": 0.0453, + "learning_rate": 2.3133415505593436e-05, + "loss": 0.0463, "step": 74350 }, { "epoch": 3.47, - "learning_rate": 1.3122216492428862e-05, - "loss": 0.0222, + "learning_rate": 2.3132947435757416e-05, + "loss": 0.0372, "step": 74355 }, { "epoch": 3.47, - "learning_rate": 1.3121747691153722e-05, - "loss": 0.0301, + "learning_rate": 2.3132479365921396e-05, + "loss": 0.0227, "step": 74360 }, { "epoch": 3.47, - "learning_rate": 1.3121278889878582e-05, - "loss": 0.0662, + "learning_rate": 2.313201129608538e-05, + "loss": 0.0474, "step": 74365 }, { "epoch": 3.47, - "learning_rate": 1.3120810088603441e-05, - "loss": 0.0514, + "learning_rate": 2.313154322624936e-05, + "loss": 0.0995, "step": 74370 }, { "epoch": 3.47, - "learning_rate": 1.3120341287328301e-05, - "loss": 0.0178, + "learning_rate": 2.3131075156413336e-05, + "loss": 0.0587, "step": 74375 }, { "epoch": 3.47, - "learning_rate": 1.3119872486053163e-05, - "loss": 0.1477, + "learning_rate": 2.3130607086577315e-05, + "loss": 0.062, "step": 74380 }, { "epoch": 3.47, - "learning_rate": 1.3119403684778023e-05, - "loss": 0.0924, + "learning_rate": 2.31301390167413e-05, + "loss": 0.0419, "step": 74385 }, { "epoch": 3.47, - "learning_rate": 1.3118934883502885e-05, - "loss": 0.1522, + "learning_rate": 2.312967094690528e-05, + "loss": 0.1702, "step": 74390 }, { "epoch": 3.47, - "learning_rate": 1.3118466082227746e-05, - "loss": 0.4497, + "learning_rate": 2.3129202877069258e-05, + "loss": 0.234, "step": 74395 }, { "epoch": 3.47, - "learning_rate": 1.3117997280952606e-05, - "loss": 0.0668, + "learning_rate": 2.3128734807233238e-05, + "loss": 0.0801, "step": 74400 }, { "epoch": 3.47, - "learning_rate": 1.3117528479677466e-05, - "loss": 0.0368, + "learning_rate": 2.312826673739722e-05, + "loss": 0.0227, "step": 74405 }, { "epoch": 3.47, - "learning_rate": 1.3117059678402326e-05, - "loss": 0.0469, + "learning_rate": 2.31277986675612e-05, + "loss": 0.0527, "step": 74410 }, { "epoch": 3.47, - "learning_rate": 1.3116590877127186e-05, - "loss": 0.0359, + "learning_rate": 2.312733059772518e-05, + "loss": 0.0546, "step": 74415 }, { "epoch": 3.47, - "learning_rate": 1.3116122075852048e-05, - "loss": 0.0854, + "learning_rate": 2.3126862527889164e-05, + "loss": 0.0609, "step": 74420 }, { "epoch": 3.47, - "learning_rate": 1.3115653274576908e-05, - "loss": 0.125, + "learning_rate": 2.3126394458053144e-05, + "loss": 0.0612, "step": 74425 }, { "epoch": 3.47, - "learning_rate": 1.3115184473301767e-05, - "loss": 0.09, + "learning_rate": 2.3125926388217124e-05, + "loss": 0.174, "step": 74430 }, { "epoch": 3.47, - "learning_rate": 1.311471567202663e-05, - "loss": 0.1239, + "learning_rate": 2.3125458318381104e-05, + "loss": 0.1448, "step": 74435 }, { "epoch": 3.47, - "learning_rate": 1.311424687075149e-05, - "loss": 0.2436, + "learning_rate": 2.3124990248545083e-05, + "loss": 0.0793, "step": 74440 }, { "epoch": 3.47, - "learning_rate": 1.311377806947635e-05, - "loss": 0.3589, + "learning_rate": 2.3124522178709063e-05, + "loss": 0.3003, "step": 74445 }, { "epoch": 3.47, - "learning_rate": 1.311330926820121e-05, - "loss": 0.0651, + "learning_rate": 2.3124054108873043e-05, + "loss": 0.052, "step": 74450 }, { "epoch": 3.47, - "learning_rate": 1.311284046692607e-05, - "loss": 0.0261, + "learning_rate": 2.3123586039037023e-05, + "loss": 0.0318, "step": 74455 }, { "epoch": 3.47, - "learning_rate": 1.3112371665650932e-05, - "loss": 0.024, + "learning_rate": 2.3123117969201006e-05, + "loss": 0.0435, "step": 74460 }, { "epoch": 3.47, - "learning_rate": 1.3111902864375792e-05, - "loss": 0.0978, + "learning_rate": 2.3122649899364986e-05, + "loss": 0.0413, "step": 74465 }, { "epoch": 3.47, - "learning_rate": 1.3111434063100652e-05, - "loss": 0.112, + "learning_rate": 2.3122181829528966e-05, + "loss": 0.0863, "step": 74470 }, { "epoch": 3.48, - "learning_rate": 1.3110965261825512e-05, - "loss": 0.0872, + "learning_rate": 2.312171375969295e-05, + "loss": 0.1123, "step": 74475 }, { "epoch": 3.48, - "learning_rate": 1.3110496460550375e-05, - "loss": 0.0558, + "learning_rate": 2.312124568985693e-05, + "loss": 0.1145, "step": 74480 }, { "epoch": 3.48, - "learning_rate": 1.3110027659275235e-05, - "loss": 0.1574, + "learning_rate": 2.312077762002091e-05, + "loss": 0.1109, "step": 74485 }, { "epoch": 3.48, - "learning_rate": 1.3109558858000095e-05, - "loss": 0.1574, + "learning_rate": 2.312030955018489e-05, + "loss": 0.1343, "step": 74490 }, { "epoch": 3.48, - "learning_rate": 1.3109090056724955e-05, - "loss": 0.2159, + "learning_rate": 2.311984148034887e-05, + "loss": 0.3328, "step": 74495 }, { "epoch": 3.48, - "learning_rate": 1.3108621255449817e-05, - "loss": 0.06, + "learning_rate": 2.3119373410512848e-05, + "loss": 0.0573, "step": 74500 }, { "epoch": 3.48, - "learning_rate": 1.3108152454174677e-05, - "loss": 0.0309, + "learning_rate": 2.3118905340676828e-05, + "loss": 0.1064, "step": 74505 }, { "epoch": 3.48, - "learning_rate": 1.3107683652899537e-05, - "loss": 0.0342, + "learning_rate": 2.3118437270840808e-05, + "loss": 0.0399, "step": 74510 }, { "epoch": 3.48, - "learning_rate": 1.3107214851624397e-05, - "loss": 0.0972, + "learning_rate": 2.311796920100479e-05, + "loss": 0.0235, "step": 74515 }, { "epoch": 3.48, - "learning_rate": 1.3106746050349256e-05, - "loss": 0.0685, + "learning_rate": 2.311750113116877e-05, + "loss": 0.0846, "step": 74520 }, { "epoch": 3.48, - "learning_rate": 1.3106277249074118e-05, - "loss": 0.0872, + "learning_rate": 2.311703306133275e-05, + "loss": 0.0505, "step": 74525 }, { "epoch": 3.48, - "learning_rate": 1.310580844779898e-05, - "loss": 0.0839, + "learning_rate": 2.311656499149673e-05, + "loss": 0.0991, "step": 74530 }, { "epoch": 3.48, - "learning_rate": 1.310533964652384e-05, - "loss": 0.1234, + "learning_rate": 2.3116096921660713e-05, + "loss": 0.0713, "step": 74535 }, { "epoch": 3.48, - "learning_rate": 1.3104870845248701e-05, - "loss": 0.2244, + "learning_rate": 2.3115628851824693e-05, + "loss": 0.2833, "step": 74540 }, { "epoch": 3.48, - "learning_rate": 1.3104402043973561e-05, - "loss": 0.2721, + "learning_rate": 2.3115160781988673e-05, + "loss": 0.2268, "step": 74545 }, { "epoch": 3.48, - "learning_rate": 1.3103933242698421e-05, - "loss": 0.0539, + "learning_rate": 2.3114692712152656e-05, + "loss": 0.0895, "step": 74550 }, { "epoch": 3.48, - "learning_rate": 1.3103464441423281e-05, - "loss": 0.0219, + "learning_rate": 2.3114224642316636e-05, + "loss": 0.0079, "step": 74555 }, { "epoch": 3.48, - "learning_rate": 1.3102995640148141e-05, - "loss": 0.0461, + "learning_rate": 2.3113756572480616e-05, + "loss": 0.0112, "step": 74560 }, { "epoch": 3.48, - "learning_rate": 1.3102526838873003e-05, - "loss": 0.0784, + "learning_rate": 2.3113288502644592e-05, + "loss": 0.0399, "step": 74565 }, { "epoch": 3.48, - "learning_rate": 1.3102058037597863e-05, - "loss": 0.0725, + "learning_rate": 2.3112820432808576e-05, + "loss": 0.1065, "step": 74570 }, { "epoch": 3.48, - "learning_rate": 1.3101589236322724e-05, - "loss": 0.129, + "learning_rate": 2.3112352362972555e-05, + "loss": 0.0566, "step": 74575 }, { "epoch": 3.48, - "learning_rate": 1.3101120435047586e-05, - "loss": 0.0321, + "learning_rate": 2.3111884293136535e-05, + "loss": 0.0652, "step": 74580 }, { "epoch": 3.48, - "learning_rate": 1.3100651633772446e-05, - "loss": 0.0981, + "learning_rate": 2.3111416223300515e-05, + "loss": 0.1316, "step": 74585 }, { "epoch": 3.48, - "learning_rate": 1.3100182832497306e-05, - "loss": 0.1701, + "learning_rate": 2.3110948153464498e-05, + "loss": 0.2657, "step": 74590 }, { "epoch": 3.48, - "learning_rate": 1.3099714031222166e-05, - "loss": 0.1971, + "learning_rate": 2.3110480083628478e-05, + "loss": 0.2291, "step": 74595 }, { "epoch": 3.48, - "learning_rate": 1.3099245229947026e-05, - "loss": 0.0555, + "learning_rate": 2.3110012013792458e-05, + "loss": 0.0691, "step": 74600 }, { "epoch": 3.48, - "learning_rate": 1.3098776428671887e-05, - "loss": 0.0607, + "learning_rate": 2.310954394395644e-05, + "loss": 0.0868, "step": 74605 }, { "epoch": 3.48, - "learning_rate": 1.3098307627396747e-05, - "loss": 0.0498, + "learning_rate": 2.310907587412042e-05, + "loss": 0.0457, "step": 74610 }, { "epoch": 3.48, - "learning_rate": 1.3097838826121607e-05, - "loss": 0.0246, + "learning_rate": 2.31086078042844e-05, + "loss": 0.0763, "step": 74615 }, { "epoch": 3.48, - "learning_rate": 1.309737002484647e-05, - "loss": 0.0938, + "learning_rate": 2.310813973444838e-05, + "loss": 0.064, "step": 74620 }, { "epoch": 3.48, - "learning_rate": 1.309690122357133e-05, - "loss": 0.0851, + "learning_rate": 2.3107671664612364e-05, + "loss": 0.0981, "step": 74625 }, { "epoch": 3.48, - "learning_rate": 1.309643242229619e-05, - "loss": 0.1235, + "learning_rate": 2.310720359477634e-05, + "loss": 0.0624, "step": 74630 }, { "epoch": 3.48, - "learning_rate": 1.309596362102105e-05, - "loss": 0.1757, + "learning_rate": 2.310673552494032e-05, + "loss": 0.1317, "step": 74635 }, { "epoch": 3.48, - "learning_rate": 1.309549481974591e-05, - "loss": 0.2538, + "learning_rate": 2.31062674551043e-05, + "loss": 0.0916, "step": 74640 }, { "epoch": 3.48, - "learning_rate": 1.3095026018470772e-05, - "loss": 0.2441, + "learning_rate": 2.3105799385268283e-05, + "loss": 0.2597, "step": 74645 }, { "epoch": 3.48, - "learning_rate": 1.3094557217195632e-05, - "loss": 0.0279, + "learning_rate": 2.3105331315432263e-05, + "loss": 0.0702, "step": 74650 }, { "epoch": 3.48, - "learning_rate": 1.3094088415920492e-05, - "loss": 0.0447, + "learning_rate": 2.3104863245596243e-05, + "loss": 0.057, "step": 74655 }, { "epoch": 3.48, - "learning_rate": 1.3093619614645352e-05, - "loss": 0.011, + "learning_rate": 2.3104395175760226e-05, + "loss": 0.0544, "step": 74660 }, { "epoch": 3.48, - "learning_rate": 1.3093150813370215e-05, - "loss": 0.0893, + "learning_rate": 2.3103927105924206e-05, + "loss": 0.0661, "step": 74665 }, { "epoch": 3.48, - "learning_rate": 1.3092682012095075e-05, - "loss": 0.0987, + "learning_rate": 2.3103459036088185e-05, + "loss": 0.0683, "step": 74670 }, { "epoch": 3.48, - "learning_rate": 1.3092213210819935e-05, - "loss": 0.1312, + "learning_rate": 2.3102990966252165e-05, + "loss": 0.0957, "step": 74675 }, { "epoch": 3.48, - "learning_rate": 1.3091744409544795e-05, - "loss": 0.1104, + "learning_rate": 2.310252289641615e-05, + "loss": 0.1201, "step": 74680 }, { "epoch": 3.48, - "learning_rate": 1.3091275608269656e-05, - "loss": 0.1359, + "learning_rate": 2.3102054826580128e-05, + "loss": 0.1465, "step": 74685 }, { "epoch": 3.49, - "learning_rate": 1.3090806806994516e-05, - "loss": 0.2558, + "learning_rate": 2.3101586756744105e-05, + "loss": 0.1544, "step": 74690 }, { "epoch": 3.49, - "learning_rate": 1.3090338005719376e-05, - "loss": 0.2142, + "learning_rate": 2.3101118686908085e-05, + "loss": 0.2411, "step": 74695 }, { "epoch": 3.49, - "learning_rate": 1.3089869204444236e-05, - "loss": 0.0664, + "learning_rate": 2.3100650617072068e-05, + "loss": 0.0567, "step": 74700 }, { "epoch": 3.49, - "learning_rate": 1.3089400403169098e-05, - "loss": 0.0286, + "learning_rate": 2.3100182547236048e-05, + "loss": 0.0105, "step": 74705 }, { "epoch": 3.49, - "learning_rate": 1.3088931601893958e-05, - "loss": 0.0856, + "learning_rate": 2.3099714477400027e-05, + "loss": 0.0217, "step": 74710 }, { "epoch": 3.49, - "learning_rate": 1.308846280061882e-05, - "loss": 0.021, + "learning_rate": 2.3099246407564007e-05, + "loss": 0.0835, "step": 74715 }, { "epoch": 3.49, - "learning_rate": 1.308799399934368e-05, - "loss": 0.0525, + "learning_rate": 2.309877833772799e-05, + "loss": 0.0333, "step": 74720 }, { "epoch": 3.49, - "learning_rate": 1.308752519806854e-05, - "loss": 0.047, + "learning_rate": 2.309831026789197e-05, + "loss": 0.1006, "step": 74725 }, { "epoch": 3.49, - "learning_rate": 1.30870563967934e-05, - "loss": 0.1208, + "learning_rate": 2.309784219805595e-05, + "loss": 0.1171, "step": 74730 }, { "epoch": 3.49, - "learning_rate": 1.308658759551826e-05, - "loss": 0.208, + "learning_rate": 2.3097374128219933e-05, + "loss": 0.1331, "step": 74735 }, { "epoch": 3.49, - "learning_rate": 1.308611879424312e-05, - "loss": 0.182, + "learning_rate": 2.3096906058383913e-05, + "loss": 0.1281, "step": 74740 }, { "epoch": 3.49, - "learning_rate": 1.3085649992967982e-05, - "loss": 0.2804, + "learning_rate": 2.3096437988547893e-05, + "loss": 0.2058, "step": 74745 }, { "epoch": 3.49, - "learning_rate": 1.3085181191692842e-05, - "loss": 0.0816, + "learning_rate": 2.3095969918711873e-05, + "loss": 0.086, "step": 74750 }, { "epoch": 3.49, - "learning_rate": 1.3084712390417702e-05, - "loss": 0.0376, + "learning_rate": 2.3095501848875853e-05, + "loss": 0.0304, "step": 74755 }, { "epoch": 3.49, - "learning_rate": 1.3084243589142564e-05, - "loss": 0.0442, + "learning_rate": 2.3095033779039832e-05, + "loss": 0.0552, "step": 74760 }, { "epoch": 3.49, - "learning_rate": 1.3083774787867425e-05, - "loss": 0.0874, + "learning_rate": 2.3094565709203812e-05, + "loss": 0.03, "step": 74765 }, { "epoch": 3.49, - "learning_rate": 1.3083305986592285e-05, - "loss": 0.0598, + "learning_rate": 2.3094097639367792e-05, + "loss": 0.117, "step": 74770 }, { "epoch": 3.49, - "learning_rate": 1.3082837185317145e-05, - "loss": 0.0949, + "learning_rate": 2.3093629569531775e-05, + "loss": 0.1301, "step": 74775 }, { "epoch": 3.49, - "learning_rate": 1.3082368384042005e-05, - "loss": 0.1243, + "learning_rate": 2.3093161499695755e-05, + "loss": 0.1523, "step": 74780 }, { "epoch": 3.49, - "learning_rate": 1.3081899582766867e-05, - "loss": 0.1393, + "learning_rate": 2.3092693429859735e-05, + "loss": 0.1884, "step": 74785 }, { "epoch": 3.49, - "learning_rate": 1.3081430781491727e-05, - "loss": 0.1766, + "learning_rate": 2.3092225360023718e-05, + "loss": 0.1652, "step": 74790 }, { "epoch": 3.49, - "learning_rate": 1.3080961980216587e-05, - "loss": 0.329, + "learning_rate": 2.3091757290187698e-05, + "loss": 0.306, "step": 74795 }, { "epoch": 3.49, - "learning_rate": 1.3080493178941447e-05, - "loss": 0.0834, + "learning_rate": 2.3091289220351678e-05, + "loss": 0.0452, "step": 74800 }, { "epoch": 3.49, - "learning_rate": 1.308002437766631e-05, - "loss": 0.0429, + "learning_rate": 2.3090821150515657e-05, + "loss": 0.0194, "step": 74805 }, { "epoch": 3.49, - "learning_rate": 1.307955557639117e-05, - "loss": 0.0264, + "learning_rate": 2.309035308067964e-05, + "loss": 0.0631, "step": 74810 }, { "epoch": 3.49, - "learning_rate": 1.307908677511603e-05, - "loss": 0.0512, + "learning_rate": 2.3089885010843617e-05, + "loss": 0.0531, "step": 74815 }, { "epoch": 3.49, - "learning_rate": 1.307861797384089e-05, - "loss": 0.1067, + "learning_rate": 2.3089416941007597e-05, + "loss": 0.0799, "step": 74820 }, { "epoch": 3.49, - "learning_rate": 1.3078149172565751e-05, - "loss": 0.1192, + "learning_rate": 2.3088948871171577e-05, + "loss": 0.091, "step": 74825 }, { "epoch": 3.49, - "learning_rate": 1.3077680371290611e-05, - "loss": 0.0869, + "learning_rate": 2.308848080133556e-05, + "loss": 0.0675, "step": 74830 }, { "epoch": 3.49, - "learning_rate": 1.3077211570015471e-05, - "loss": 0.1571, + "learning_rate": 2.308801273149954e-05, + "loss": 0.0713, "step": 74835 }, { "epoch": 3.49, - "learning_rate": 1.3076742768740331e-05, - "loss": 0.214, + "learning_rate": 2.308754466166352e-05, + "loss": 0.2676, "step": 74840 }, { "epoch": 3.49, - "learning_rate": 1.3076273967465191e-05, - "loss": 0.1172, + "learning_rate": 2.3087076591827503e-05, + "loss": 0.2154, "step": 74845 }, { "epoch": 3.49, - "learning_rate": 1.3075805166190053e-05, - "loss": 0.067, + "learning_rate": 2.3086608521991483e-05, + "loss": 0.0384, "step": 74850 }, { "epoch": 3.49, - "learning_rate": 1.3075336364914914e-05, - "loss": 0.0654, + "learning_rate": 2.3086140452155462e-05, + "loss": 0.0158, "step": 74855 }, { "epoch": 3.49, - "learning_rate": 1.3074867563639774e-05, - "loss": 0.0608, + "learning_rate": 2.3085672382319442e-05, + "loss": 0.0302, "step": 74860 }, { "epoch": 3.49, - "learning_rate": 1.3074398762364636e-05, - "loss": 0.0573, + "learning_rate": 2.3085204312483425e-05, + "loss": 0.0505, "step": 74865 }, { "epoch": 3.49, - "learning_rate": 1.3073929961089496e-05, - "loss": 0.0422, + "learning_rate": 2.3084736242647405e-05, + "loss": 0.0453, "step": 74870 }, { "epoch": 3.49, - "learning_rate": 1.3073461159814356e-05, - "loss": 0.091, + "learning_rate": 2.3084268172811385e-05, + "loss": 0.0347, "step": 74875 }, { "epoch": 3.49, - "learning_rate": 1.3072992358539216e-05, - "loss": 0.1209, + "learning_rate": 2.308380010297536e-05, + "loss": 0.0615, "step": 74880 }, { "epoch": 3.49, - "learning_rate": 1.3072523557264076e-05, - "loss": 0.1703, + "learning_rate": 2.3083332033139345e-05, + "loss": 0.2994, "step": 74885 }, { "epoch": 3.49, - "learning_rate": 1.3072054755988937e-05, - "loss": 0.0818, + "learning_rate": 2.3082863963303325e-05, + "loss": 0.2489, "step": 74890 }, { "epoch": 3.49, - "learning_rate": 1.3071585954713797e-05, - "loss": 0.2644, + "learning_rate": 2.3082395893467304e-05, + "loss": 0.1898, "step": 74895 }, { "epoch": 3.49, - "learning_rate": 1.3071117153438659e-05, - "loss": 0.0688, + "learning_rate": 2.3081927823631284e-05, + "loss": 0.0447, "step": 74900 }, { "epoch": 3.5, - "learning_rate": 1.307064835216352e-05, - "loss": 0.0201, + "learning_rate": 2.3081459753795267e-05, + "loss": 0.0379, "step": 74905 }, { "epoch": 3.5, - "learning_rate": 1.307017955088838e-05, - "loss": 0.0338, + "learning_rate": 2.3080991683959247e-05, + "loss": 0.0837, "step": 74910 }, { "epoch": 3.5, - "learning_rate": 1.306971074961324e-05, - "loss": 0.1382, + "learning_rate": 2.3080523614123227e-05, + "loss": 0.0635, "step": 74915 }, { "epoch": 3.5, - "learning_rate": 1.30692419483381e-05, - "loss": 0.1653, + "learning_rate": 2.308005554428721e-05, + "loss": 0.077, "step": 74920 }, { "epoch": 3.5, - "learning_rate": 1.306877314706296e-05, - "loss": 0.0666, + "learning_rate": 2.307958747445119e-05, + "loss": 0.0817, "step": 74925 }, { "epoch": 3.5, - "learning_rate": 1.3068304345787822e-05, - "loss": 0.0343, + "learning_rate": 2.307911940461517e-05, + "loss": 0.1438, "step": 74930 }, { "epoch": 3.5, - "learning_rate": 1.3067835544512682e-05, - "loss": 0.1865, + "learning_rate": 2.307865133477915e-05, + "loss": 0.1383, "step": 74935 }, { "epoch": 3.5, - "learning_rate": 1.3067366743237542e-05, - "loss": 0.2692, + "learning_rate": 2.3078183264943133e-05, + "loss": 0.2236, "step": 74940 }, { "epoch": 3.5, - "learning_rate": 1.3066897941962405e-05, - "loss": 0.3542, + "learning_rate": 2.307771519510711e-05, + "loss": 0.2707, "step": 74945 }, { "epoch": 3.5, - "learning_rate": 1.3066429140687265e-05, - "loss": 0.0865, + "learning_rate": 2.307724712527109e-05, + "loss": 0.0644, "step": 74950 }, { "epoch": 3.5, - "learning_rate": 1.3065960339412125e-05, - "loss": 0.0346, + "learning_rate": 2.307677905543507e-05, + "loss": 0.0587, "step": 74955 }, { "epoch": 3.5, - "learning_rate": 1.3065491538136985e-05, - "loss": 0.0699, + "learning_rate": 2.3076310985599052e-05, + "loss": 0.0745, "step": 74960 }, { "epoch": 3.5, - "learning_rate": 1.3065022736861845e-05, - "loss": 0.0342, + "learning_rate": 2.3075842915763032e-05, + "loss": 0.0391, "step": 74965 }, { "epoch": 3.5, - "learning_rate": 1.3064553935586706e-05, - "loss": 0.1412, + "learning_rate": 2.3075374845927012e-05, + "loss": 0.0545, "step": 74970 }, { "epoch": 3.5, - "learning_rate": 1.3064085134311566e-05, - "loss": 0.0652, + "learning_rate": 2.3074906776090995e-05, + "loss": 0.1207, "step": 74975 }, { "epoch": 3.5, - "learning_rate": 1.3063616333036426e-05, - "loss": 0.1106, + "learning_rate": 2.3074438706254975e-05, + "loss": 0.1017, "step": 74980 }, { "epoch": 3.5, - "learning_rate": 1.3063147531761286e-05, - "loss": 0.1842, + "learning_rate": 2.3073970636418955e-05, + "loss": 0.1258, "step": 74985 }, { "epoch": 3.5, - "learning_rate": 1.306267873048615e-05, - "loss": 0.2778, + "learning_rate": 2.3073502566582934e-05, + "loss": 0.2388, "step": 74990 }, { "epoch": 3.5, - "learning_rate": 1.306220992921101e-05, - "loss": 0.2805, + "learning_rate": 2.3073034496746918e-05, + "loss": 0.3431, "step": 74995 }, { "epoch": 3.5, - "learning_rate": 1.306174112793587e-05, - "loss": 0.0736, + "learning_rate": 2.3072566426910897e-05, + "loss": 0.0344, "step": 75000 }, { "epoch": 3.5, - "learning_rate": 1.306127232666073e-05, - "loss": 0.0197, + "learning_rate": 2.3072098357074874e-05, + "loss": 0.0293, "step": 75005 }, { "epoch": 3.5, - "learning_rate": 1.3060803525385591e-05, - "loss": 0.0479, + "learning_rate": 2.3071630287238854e-05, + "loss": 0.0333, "step": 75010 }, { "epoch": 3.5, - "learning_rate": 1.3060334724110451e-05, - "loss": 0.0528, + "learning_rate": 2.3071162217402837e-05, + "loss": 0.0547, "step": 75015 }, { "epoch": 3.5, - "learning_rate": 1.305986592283531e-05, - "loss": 0.065, + "learning_rate": 2.3070694147566817e-05, + "loss": 0.0517, "step": 75020 }, { "epoch": 3.5, - "learning_rate": 1.305939712156017e-05, - "loss": 0.1103, + "learning_rate": 2.3070226077730797e-05, + "loss": 0.0775, "step": 75025 }, { "epoch": 3.5, - "learning_rate": 1.305892832028503e-05, - "loss": 0.0833, + "learning_rate": 2.306975800789478e-05, + "loss": 0.1479, "step": 75030 }, { "epoch": 3.5, - "learning_rate": 1.3058459519009892e-05, - "loss": 0.0872, + "learning_rate": 2.306928993805876e-05, + "loss": 0.039, "step": 75035 }, { "epoch": 3.5, - "learning_rate": 1.3057990717734754e-05, - "loss": 0.1221, + "learning_rate": 2.306882186822274e-05, + "loss": 0.3002, "step": 75040 }, { "epoch": 3.5, - "learning_rate": 1.3057521916459614e-05, - "loss": 0.2611, + "learning_rate": 2.306835379838672e-05, + "loss": 0.1615, "step": 75045 }, { "epoch": 3.5, - "learning_rate": 1.3057053115184475e-05, - "loss": 0.0998, + "learning_rate": 2.3067885728550702e-05, + "loss": 0.0652, "step": 75050 }, { "epoch": 3.5, - "learning_rate": 1.3056584313909335e-05, - "loss": 0.031, + "learning_rate": 2.3067417658714682e-05, + "loss": 0.026, "step": 75055 }, { "epoch": 3.5, - "learning_rate": 1.3056115512634195e-05, - "loss": 0.0343, + "learning_rate": 2.3066949588878662e-05, + "loss": 0.0248, "step": 75060 }, { "epoch": 3.5, - "learning_rate": 1.3055646711359055e-05, - "loss": 0.037, + "learning_rate": 2.3066481519042642e-05, + "loss": 0.0711, "step": 75065 }, { "epoch": 3.5, - "learning_rate": 1.3055177910083915e-05, - "loss": 0.186, + "learning_rate": 2.3066013449206622e-05, + "loss": 0.0394, "step": 75070 }, { "epoch": 3.5, - "learning_rate": 1.3054709108808777e-05, - "loss": 0.0453, + "learning_rate": 2.30655453793706e-05, + "loss": 0.0264, "step": 75075 }, { "epoch": 3.5, - "learning_rate": 1.3054240307533637e-05, - "loss": 0.1268, + "learning_rate": 2.306507730953458e-05, + "loss": 0.0369, "step": 75080 }, { "epoch": 3.5, - "learning_rate": 1.3053771506258498e-05, - "loss": 0.1189, + "learning_rate": 2.306460923969856e-05, + "loss": 0.1049, "step": 75085 }, { "epoch": 3.5, - "learning_rate": 1.305330270498336e-05, - "loss": 0.1851, + "learning_rate": 2.3064141169862544e-05, + "loss": 0.131, "step": 75090 }, { "epoch": 3.5, - "learning_rate": 1.305283390370822e-05, - "loss": 0.3663, + "learning_rate": 2.3063673100026524e-05, + "loss": 0.279, "step": 75095 }, { "epoch": 3.5, - "learning_rate": 1.305236510243308e-05, - "loss": 0.087, + "learning_rate": 2.3063205030190504e-05, + "loss": 0.0187, "step": 75100 }, { "epoch": 3.5, - "learning_rate": 1.305189630115794e-05, - "loss": 0.015, + "learning_rate": 2.3062736960354487e-05, + "loss": 0.078, "step": 75105 }, { "epoch": 3.5, - "learning_rate": 1.30514274998828e-05, - "loss": 0.0165, + "learning_rate": 2.3062268890518467e-05, + "loss": 0.0441, "step": 75110 }, { "epoch": 3.5, - "learning_rate": 1.3050958698607661e-05, - "loss": 0.0363, + "learning_rate": 2.3061800820682447e-05, + "loss": 0.0494, "step": 75115 }, { "epoch": 3.51, - "learning_rate": 1.3050489897332521e-05, - "loss": 0.0847, + "learning_rate": 2.3061332750846427e-05, + "loss": 0.1387, "step": 75120 }, { "epoch": 3.51, - "learning_rate": 1.3050021096057381e-05, - "loss": 0.0863, + "learning_rate": 2.306086468101041e-05, + "loss": 0.1008, "step": 75125 }, { "epoch": 3.51, - "learning_rate": 1.3049552294782245e-05, - "loss": 0.0849, + "learning_rate": 2.306039661117439e-05, + "loss": 0.0978, "step": 75130 }, { "epoch": 3.51, - "learning_rate": 1.3049083493507105e-05, - "loss": 0.1342, + "learning_rate": 2.3059928541338366e-05, + "loss": 0.148, "step": 75135 }, { "epoch": 3.51, - "learning_rate": 1.3048614692231964e-05, - "loss": 0.0638, + "learning_rate": 2.3059460471502346e-05, + "loss": 0.1755, "step": 75140 }, { "epoch": 3.51, - "learning_rate": 1.3048145890956824e-05, - "loss": 0.181, + "learning_rate": 2.305899240166633e-05, + "loss": 0.305, "step": 75145 }, { "epoch": 3.51, - "learning_rate": 1.3047677089681684e-05, - "loss": 0.1007, + "learning_rate": 2.305852433183031e-05, + "loss": 0.0685, "step": 75150 }, { "epoch": 3.51, - "learning_rate": 1.3047208288406546e-05, - "loss": 0.0194, + "learning_rate": 2.305805626199429e-05, + "loss": 0.0373, "step": 75155 }, { "epoch": 3.51, - "learning_rate": 1.3046739487131406e-05, - "loss": 0.0726, + "learning_rate": 2.3057588192158272e-05, + "loss": 0.0367, "step": 75160 }, { "epoch": 3.51, - "learning_rate": 1.3046270685856266e-05, - "loss": 0.0531, + "learning_rate": 2.3057120122322252e-05, + "loss": 0.0654, "step": 75165 }, { "epoch": 3.51, - "learning_rate": 1.3045801884581126e-05, - "loss": 0.0701, + "learning_rate": 2.305665205248623e-05, + "loss": 0.0873, "step": 75170 }, { "epoch": 3.51, - "learning_rate": 1.3045333083305987e-05, - "loss": 0.1159, + "learning_rate": 2.305618398265021e-05, + "loss": 0.0814, "step": 75175 }, { "epoch": 3.51, - "learning_rate": 1.3044864282030849e-05, - "loss": 0.0942, + "learning_rate": 2.3055715912814195e-05, + "loss": 0.0669, "step": 75180 }, { "epoch": 3.51, - "learning_rate": 1.3044395480755709e-05, - "loss": 0.1559, + "learning_rate": 2.3055247842978174e-05, + "loss": 0.0969, "step": 75185 }, { "epoch": 3.51, - "learning_rate": 1.3043926679480569e-05, - "loss": 0.1268, + "learning_rate": 2.3054779773142154e-05, + "loss": 0.2527, "step": 75190 }, { "epoch": 3.51, - "learning_rate": 1.304345787820543e-05, - "loss": 0.2342, + "learning_rate": 2.305431170330613e-05, + "loss": 0.2867, "step": 75195 }, { "epoch": 3.51, - "learning_rate": 1.304298907693029e-05, - "loss": 0.1179, + "learning_rate": 2.3053843633470114e-05, + "loss": 0.0992, "step": 75200 }, { "epoch": 3.51, - "learning_rate": 1.304252027565515e-05, - "loss": 0.0162, + "learning_rate": 2.3053375563634094e-05, + "loss": 0.0129, "step": 75205 }, { "epoch": 3.51, - "learning_rate": 1.304205147438001e-05, - "loss": 0.0417, + "learning_rate": 2.3052907493798074e-05, + "loss": 0.0544, "step": 75210 }, { "epoch": 3.51, - "learning_rate": 1.3041582673104872e-05, - "loss": 0.0826, + "learning_rate": 2.3052439423962057e-05, + "loss": 0.0696, "step": 75215 }, { "epoch": 3.51, - "learning_rate": 1.3041113871829732e-05, - "loss": 0.0919, + "learning_rate": 2.3051971354126037e-05, + "loss": 0.0525, "step": 75220 }, { "epoch": 3.51, - "learning_rate": 1.3040645070554593e-05, - "loss": 0.0402, + "learning_rate": 2.3051503284290016e-05, + "loss": 0.0534, "step": 75225 }, { "epoch": 3.51, - "learning_rate": 1.3040176269279453e-05, - "loss": 0.1075, + "learning_rate": 2.3051035214453996e-05, + "loss": 0.0202, "step": 75230 }, { "epoch": 3.51, - "learning_rate": 1.3039707468004315e-05, - "loss": 0.1924, + "learning_rate": 2.305056714461798e-05, + "loss": 0.1791, "step": 75235 }, { "epoch": 3.51, - "learning_rate": 1.3039238666729175e-05, - "loss": 0.2085, + "learning_rate": 2.305009907478196e-05, + "loss": 0.1478, "step": 75240 }, { "epoch": 3.51, - "learning_rate": 1.3038769865454035e-05, - "loss": 0.2829, + "learning_rate": 2.304963100494594e-05, + "loss": 0.2771, "step": 75245 }, { "epoch": 3.51, - "learning_rate": 1.3038301064178895e-05, - "loss": 0.0622, + "learning_rate": 2.304916293510992e-05, + "loss": 0.0581, "step": 75250 }, { "epoch": 3.51, - "learning_rate": 1.3037832262903756e-05, - "loss": 0.0114, + "learning_rate": 2.3048694865273902e-05, + "loss": 0.0434, "step": 75255 }, { "epoch": 3.51, - "learning_rate": 1.3037363461628616e-05, - "loss": 0.0967, + "learning_rate": 2.304822679543788e-05, + "loss": 0.0571, "step": 75260 }, { "epoch": 3.51, - "learning_rate": 1.3036894660353476e-05, - "loss": 0.0325, + "learning_rate": 2.3047758725601858e-05, + "loss": 0.0621, "step": 75265 }, { "epoch": 3.51, - "learning_rate": 1.3036425859078338e-05, - "loss": 0.1324, + "learning_rate": 2.304729065576584e-05, + "loss": 0.0454, "step": 75270 }, { "epoch": 3.51, - "learning_rate": 1.30359570578032e-05, - "loss": 0.0891, + "learning_rate": 2.304682258592982e-05, + "loss": 0.0858, "step": 75275 }, { "epoch": 3.51, - "learning_rate": 1.303548825652806e-05, - "loss": 0.0565, + "learning_rate": 2.30463545160938e-05, + "loss": 0.0494, "step": 75280 }, { "epoch": 3.51, - "learning_rate": 1.303501945525292e-05, - "loss": 0.1619, + "learning_rate": 2.304588644625778e-05, + "loss": 0.1377, "step": 75285 }, { "epoch": 3.51, - "learning_rate": 1.303455065397778e-05, - "loss": 0.2103, + "learning_rate": 2.3045418376421764e-05, + "loss": 0.1654, "step": 75290 }, { "epoch": 3.51, - "learning_rate": 1.3034081852702641e-05, - "loss": 0.2858, + "learning_rate": 2.3044950306585744e-05, + "loss": 0.2686, "step": 75295 }, { "epoch": 3.51, - "learning_rate": 1.3033613051427501e-05, - "loss": 0.0829, + "learning_rate": 2.3044482236749724e-05, + "loss": 0.0497, "step": 75300 }, { "epoch": 3.51, - "learning_rate": 1.3033144250152361e-05, - "loss": 0.0536, + "learning_rate": 2.3044014166913704e-05, + "loss": 0.0119, "step": 75305 }, { "epoch": 3.51, - "learning_rate": 1.303267544887722e-05, - "loss": 0.0318, + "learning_rate": 2.3043546097077687e-05, + "loss": 0.0247, "step": 75310 }, { "epoch": 3.51, - "learning_rate": 1.3032206647602084e-05, - "loss": 0.0717, + "learning_rate": 2.3043078027241667e-05, + "loss": 0.0415, "step": 75315 }, { "epoch": 3.51, - "learning_rate": 1.3031737846326944e-05, - "loss": 0.0579, + "learning_rate": 2.3042609957405646e-05, + "loss": 0.0717, "step": 75320 }, { "epoch": 3.51, - "learning_rate": 1.3031269045051804e-05, - "loss": 0.0892, + "learning_rate": 2.3042141887569623e-05, + "loss": 0.1198, "step": 75325 }, { "epoch": 3.52, - "learning_rate": 1.3030800243776664e-05, - "loss": 0.072, + "learning_rate": 2.3041673817733606e-05, + "loss": 0.1044, "step": 75330 }, { "epoch": 3.52, - "learning_rate": 1.3030331442501526e-05, - "loss": 0.1365, + "learning_rate": 2.3041205747897586e-05, + "loss": 0.1084, "step": 75335 }, { "epoch": 3.52, - "learning_rate": 1.3029862641226386e-05, - "loss": 0.1741, + "learning_rate": 2.3040737678061566e-05, + "loss": 0.1428, "step": 75340 }, { "epoch": 3.52, - "learning_rate": 1.3029393839951245e-05, - "loss": 0.32, + "learning_rate": 2.304026960822555e-05, + "loss": 0.3567, "step": 75345 }, { "epoch": 3.52, - "learning_rate": 1.3028925038676105e-05, - "loss": 0.0796, + "learning_rate": 2.303980153838953e-05, + "loss": 0.0612, "step": 75350 }, { "epoch": 3.52, - "learning_rate": 1.3028456237400965e-05, - "loss": 0.0354, + "learning_rate": 2.303933346855351e-05, + "loss": 0.0189, "step": 75355 }, { "epoch": 3.52, - "learning_rate": 1.3027987436125827e-05, - "loss": 0.042, + "learning_rate": 2.303886539871749e-05, + "loss": 0.0453, "step": 75360 }, { "epoch": 3.52, - "learning_rate": 1.3027518634850689e-05, - "loss": 0.0825, + "learning_rate": 2.303839732888147e-05, + "loss": 0.0415, "step": 75365 }, { "epoch": 3.52, - "learning_rate": 1.3027049833575549e-05, - "loss": 0.1049, + "learning_rate": 2.303792925904545e-05, + "loss": 0.0436, "step": 75370 }, { "epoch": 3.52, - "learning_rate": 1.302658103230041e-05, - "loss": 0.0567, + "learning_rate": 2.303746118920943e-05, + "loss": 0.1367, "step": 75375 }, { "epoch": 3.52, - "learning_rate": 1.302611223102527e-05, - "loss": 0.134, + "learning_rate": 2.303699311937341e-05, + "loss": 0.0813, "step": 75380 }, { "epoch": 3.52, - "learning_rate": 1.302564342975013e-05, - "loss": 0.1164, + "learning_rate": 2.303652504953739e-05, + "loss": 0.1048, "step": 75385 }, { "epoch": 3.52, - "learning_rate": 1.302517462847499e-05, - "loss": 0.1301, + "learning_rate": 2.303605697970137e-05, + "loss": 0.1535, "step": 75390 }, { "epoch": 3.52, - "learning_rate": 1.302470582719985e-05, - "loss": 0.2434, + "learning_rate": 2.303558890986535e-05, + "loss": 0.2507, "step": 75395 }, { "epoch": 3.52, - "learning_rate": 1.3024237025924711e-05, - "loss": 0.0633, + "learning_rate": 2.3035120840029334e-05, + "loss": 0.0765, "step": 75400 }, { "epoch": 3.52, - "learning_rate": 1.3023768224649571e-05, - "loss": 0.0375, + "learning_rate": 2.3034652770193314e-05, + "loss": 0.0275, "step": 75405 }, { "epoch": 3.52, - "learning_rate": 1.3023299423374433e-05, - "loss": 0.1196, + "learning_rate": 2.3034184700357293e-05, + "loss": 0.0237, "step": 75410 }, { "epoch": 3.52, - "learning_rate": 1.3022830622099295e-05, - "loss": 0.0867, + "learning_rate": 2.3033716630521273e-05, + "loss": 0.0986, "step": 75415 }, { "epoch": 3.52, - "learning_rate": 1.3022361820824155e-05, - "loss": 0.057, + "learning_rate": 2.3033248560685256e-05, + "loss": 0.066, "step": 75420 }, { "epoch": 3.52, - "learning_rate": 1.3021893019549015e-05, - "loss": 0.0412, + "learning_rate": 2.3032780490849236e-05, + "loss": 0.0759, "step": 75425 }, { "epoch": 3.52, - "learning_rate": 1.3021424218273874e-05, - "loss": 0.1075, + "learning_rate": 2.3032312421013216e-05, + "loss": 0.1098, "step": 75430 }, { "epoch": 3.52, - "learning_rate": 1.3020955416998734e-05, - "loss": 0.063, + "learning_rate": 2.3031844351177196e-05, + "loss": 0.1121, "step": 75435 }, { "epoch": 3.52, - "learning_rate": 1.3020486615723596e-05, - "loss": 0.1088, + "learning_rate": 2.303137628134118e-05, + "loss": 0.1161, "step": 75440 }, { "epoch": 3.52, - "learning_rate": 1.3020017814448456e-05, - "loss": 0.2778, + "learning_rate": 2.303090821150516e-05, + "loss": 0.2901, "step": 75445 }, { "epoch": 3.52, - "learning_rate": 1.3019549013173316e-05, - "loss": 0.0748, + "learning_rate": 2.3030440141669135e-05, + "loss": 0.1178, "step": 75450 }, { "epoch": 3.52, - "learning_rate": 1.301908021189818e-05, - "loss": 0.0526, + "learning_rate": 2.302997207183312e-05, + "loss": 0.022, "step": 75455 }, { "epoch": 3.52, - "learning_rate": 1.301861141062304e-05, - "loss": 0.0055, + "learning_rate": 2.3029504001997098e-05, + "loss": 0.0297, "step": 75460 }, { "epoch": 3.52, - "learning_rate": 1.3018142609347899e-05, - "loss": 0.028, + "learning_rate": 2.3029035932161078e-05, + "loss": 0.0266, "step": 75465 }, { "epoch": 3.52, - "learning_rate": 1.3017673808072759e-05, - "loss": 0.0265, + "learning_rate": 2.3028567862325058e-05, + "loss": 0.0967, "step": 75470 }, { "epoch": 3.52, - "learning_rate": 1.3017205006797619e-05, - "loss": 0.0494, + "learning_rate": 2.302809979248904e-05, + "loss": 0.1423, "step": 75475 }, { "epoch": 3.52, - "learning_rate": 1.301673620552248e-05, - "loss": 0.0785, + "learning_rate": 2.302763172265302e-05, + "loss": 0.0717, "step": 75480 }, { "epoch": 3.52, - "learning_rate": 1.301626740424734e-05, - "loss": 0.0594, + "learning_rate": 2.3027163652817e-05, + "loss": 0.064, "step": 75485 }, { "epoch": 3.52, - "learning_rate": 1.30157986029722e-05, - "loss": 0.2856, + "learning_rate": 2.302669558298098e-05, + "loss": 0.2371, "step": 75490 }, { "epoch": 3.52, - "learning_rate": 1.301532980169706e-05, - "loss": 0.279, + "learning_rate": 2.3026227513144964e-05, + "loss": 0.3141, "step": 75495 }, { "epoch": 3.52, - "learning_rate": 1.301486100042192e-05, - "loss": 0.0904, + "learning_rate": 2.3025759443308944e-05, + "loss": 0.0542, "step": 75500 }, { "epoch": 3.52, - "learning_rate": 1.3014392199146784e-05, - "loss": 0.0202, + "learning_rate": 2.3025291373472923e-05, + "loss": 0.04, "step": 75505 }, { "epoch": 3.52, - "learning_rate": 1.3013923397871644e-05, - "loss": 0.096, + "learning_rate": 2.3024823303636903e-05, + "loss": 0.0777, "step": 75510 }, { "epoch": 3.52, - "learning_rate": 1.3013454596596504e-05, - "loss": 0.0634, + "learning_rate": 2.3024355233800883e-05, + "loss": 0.0492, "step": 75515 }, { "epoch": 3.52, - "learning_rate": 1.3012985795321365e-05, - "loss": 0.0727, + "learning_rate": 2.3023887163964863e-05, + "loss": 0.1302, "step": 75520 }, { "epoch": 3.52, - "learning_rate": 1.3012516994046225e-05, - "loss": 0.1254, + "learning_rate": 2.3023419094128843e-05, + "loss": 0.1574, "step": 75525 }, { "epoch": 3.52, - "learning_rate": 1.3012048192771085e-05, - "loss": 0.0975, + "learning_rate": 2.3022951024292826e-05, + "loss": 0.1493, "step": 75530 }, { "epoch": 3.52, - "learning_rate": 1.3011579391495945e-05, - "loss": 0.0998, + "learning_rate": 2.3022482954456806e-05, + "loss": 0.188, "step": 75535 }, { "epoch": 3.52, - "learning_rate": 1.3011110590220805e-05, - "loss": 0.1179, + "learning_rate": 2.3022014884620786e-05, + "loss": 0.2345, "step": 75540 }, { "epoch": 3.53, - "learning_rate": 1.3010641788945667e-05, - "loss": 0.2451, + "learning_rate": 2.3021546814784765e-05, + "loss": 0.3475, "step": 75545 }, { "epoch": 3.53, - "learning_rate": 1.3010172987670528e-05, - "loss": 0.0274, + "learning_rate": 2.302107874494875e-05, + "loss": 0.0688, "step": 75550 }, { "epoch": 3.53, - "learning_rate": 1.3009704186395388e-05, - "loss": 0.0173, + "learning_rate": 2.302061067511273e-05, + "loss": 0.1157, "step": 75555 }, { "epoch": 3.53, - "learning_rate": 1.300923538512025e-05, - "loss": 0.0474, + "learning_rate": 2.3020142605276708e-05, + "loss": 0.0604, "step": 75560 }, { "epoch": 3.53, - "learning_rate": 1.300876658384511e-05, - "loss": 0.0392, + "learning_rate": 2.3019674535440688e-05, + "loss": 0.0717, "step": 75565 }, { "epoch": 3.53, - "learning_rate": 1.300829778256997e-05, - "loss": 0.0561, + "learning_rate": 2.301920646560467e-05, + "loss": 0.0483, "step": 75570 }, { "epoch": 3.53, - "learning_rate": 1.300782898129483e-05, - "loss": 0.0775, + "learning_rate": 2.3018738395768648e-05, + "loss": 0.0638, "step": 75575 }, { "epoch": 3.53, - "learning_rate": 1.300736018001969e-05, - "loss": 0.1528, + "learning_rate": 2.3018270325932627e-05, + "loss": 0.1486, "step": 75580 }, { "epoch": 3.53, - "learning_rate": 1.3006891378744551e-05, - "loss": 0.0843, + "learning_rate": 2.301780225609661e-05, + "loss": 0.1066, "step": 75585 }, { "epoch": 3.53, - "learning_rate": 1.3006422577469411e-05, - "loss": 0.2861, + "learning_rate": 2.301733418626059e-05, + "loss": 0.1281, "step": 75590 }, { "epoch": 3.53, - "learning_rate": 1.3005953776194273e-05, - "loss": 0.2198, + "learning_rate": 2.301686611642457e-05, + "loss": 0.1644, "step": 75595 }, { "epoch": 3.53, - "learning_rate": 1.3005484974919134e-05, - "loss": 0.0706, + "learning_rate": 2.301639804658855e-05, + "loss": 0.0979, "step": 75600 }, { "epoch": 3.53, - "learning_rate": 1.3005016173643994e-05, - "loss": 0.0442, + "learning_rate": 2.3015929976752533e-05, + "loss": 0.0167, "step": 75605 }, { "epoch": 3.53, - "learning_rate": 1.3004547372368854e-05, - "loss": 0.0733, + "learning_rate": 2.3015461906916513e-05, + "loss": 0.0042, "step": 75610 }, { "epoch": 3.53, - "learning_rate": 1.3004078571093714e-05, - "loss": 0.0251, + "learning_rate": 2.3014993837080493e-05, + "loss": 0.0673, "step": 75615 }, { "epoch": 3.53, - "learning_rate": 1.3003609769818574e-05, - "loss": 0.0101, + "learning_rate": 2.3014525767244473e-05, + "loss": 0.0481, "step": 75620 }, { "epoch": 3.53, - "learning_rate": 1.3003140968543436e-05, - "loss": 0.0295, + "learning_rate": 2.3014057697408456e-05, + "loss": 0.0615, "step": 75625 }, { "epoch": 3.53, - "learning_rate": 1.3002672167268296e-05, - "loss": 0.0772, + "learning_rate": 2.3013589627572436e-05, + "loss": 0.0607, "step": 75630 }, { "epoch": 3.53, - "learning_rate": 1.3002203365993155e-05, - "loss": 0.1783, + "learning_rate": 2.3013121557736416e-05, + "loss": 0.1156, "step": 75635 }, { "epoch": 3.53, - "learning_rate": 1.3001734564718019e-05, - "loss": 0.296, + "learning_rate": 2.3012653487900395e-05, + "loss": 0.2455, "step": 75640 }, { "epoch": 3.53, - "learning_rate": 1.3001265763442879e-05, - "loss": 0.1765, + "learning_rate": 2.3012185418064375e-05, + "loss": 0.2149, "step": 75645 }, { "epoch": 3.53, - "learning_rate": 1.3000796962167739e-05, - "loss": 0.0566, + "learning_rate": 2.3011717348228355e-05, + "loss": 0.0941, "step": 75650 }, { "epoch": 3.53, - "learning_rate": 1.3000328160892599e-05, - "loss": 0.0553, + "learning_rate": 2.3011249278392335e-05, + "loss": 0.0303, "step": 75655 }, { "epoch": 3.53, - "learning_rate": 1.2999859359617459e-05, - "loss": 0.0437, + "learning_rate": 2.3010781208556318e-05, + "loss": 0.0389, "step": 75660 }, { "epoch": 3.53, - "learning_rate": 1.299939055834232e-05, - "loss": 0.0497, + "learning_rate": 2.3010313138720298e-05, + "loss": 0.0669, "step": 75665 }, { "epoch": 3.53, - "learning_rate": 1.299892175706718e-05, - "loss": 0.0262, + "learning_rate": 2.3009845068884278e-05, + "loss": 0.0708, "step": 75670 }, { "epoch": 3.53, - "learning_rate": 1.299845295579204e-05, - "loss": 0.0857, + "learning_rate": 2.3009376999048258e-05, + "loss": 0.0696, "step": 75675 }, { "epoch": 3.53, - "learning_rate": 1.29979841545169e-05, - "loss": 0.0881, + "learning_rate": 2.300890892921224e-05, + "loss": 0.132, "step": 75680 }, { "epoch": 3.53, - "learning_rate": 1.2997515353241762e-05, - "loss": 0.1327, + "learning_rate": 2.300844085937622e-05, + "loss": 0.1884, "step": 75685 }, { "epoch": 3.53, - "learning_rate": 1.2997046551966623e-05, - "loss": 0.2564, + "learning_rate": 2.30079727895402e-05, + "loss": 0.2582, "step": 75690 }, { "epoch": 3.53, - "learning_rate": 1.2996577750691483e-05, - "loss": 0.2501, + "learning_rate": 2.300750471970418e-05, + "loss": 0.284, "step": 75695 }, { "epoch": 3.53, - "learning_rate": 1.2996108949416343e-05, - "loss": 0.0766, + "learning_rate": 2.300703664986816e-05, + "loss": 0.0842, "step": 75700 }, { "epoch": 3.53, - "learning_rate": 1.2995640148141205e-05, - "loss": 0.034, + "learning_rate": 2.300656858003214e-05, + "loss": 0.0067, "step": 75705 }, { "epoch": 3.53, - "learning_rate": 1.2995171346866065e-05, - "loss": 0.0137, + "learning_rate": 2.300610051019612e-05, + "loss": 0.0796, "step": 75710 }, { "epoch": 3.53, - "learning_rate": 1.2994702545590925e-05, - "loss": 0.0617, + "learning_rate": 2.3005632440360103e-05, + "loss": 0.0736, "step": 75715 }, { "epoch": 3.53, - "learning_rate": 1.2994233744315785e-05, - "loss": 0.1114, + "learning_rate": 2.3005164370524083e-05, + "loss": 0.0478, "step": 75720 }, { "epoch": 3.53, - "learning_rate": 1.2993764943040646e-05, - "loss": 0.0929, + "learning_rate": 2.3004696300688063e-05, + "loss": 0.0745, "step": 75725 }, { "epoch": 3.53, - "learning_rate": 1.2993296141765506e-05, - "loss": 0.1268, + "learning_rate": 2.3004228230852042e-05, + "loss": 0.0892, "step": 75730 }, { "epoch": 3.53, - "learning_rate": 1.2992827340490368e-05, - "loss": 0.1356, + "learning_rate": 2.3003760161016026e-05, + "loss": 0.1481, "step": 75735 }, { "epoch": 3.53, - "learning_rate": 1.299235853921523e-05, - "loss": 0.1289, + "learning_rate": 2.3003292091180005e-05, + "loss": 0.1881, "step": 75740 }, { "epoch": 3.53, - "learning_rate": 1.299188973794009e-05, - "loss": 0.2894, + "learning_rate": 2.3002824021343985e-05, + "loss": 0.2854, "step": 75745 }, { "epoch": 3.53, - "learning_rate": 1.299142093666495e-05, - "loss": 0.1238, + "learning_rate": 2.3002355951507965e-05, + "loss": 0.0674, "step": 75750 }, { "epoch": 3.53, - "learning_rate": 1.2990952135389809e-05, - "loss": 0.0415, + "learning_rate": 2.3001887881671948e-05, + "loss": 0.034, "step": 75755 }, { "epoch": 3.54, - "learning_rate": 1.2990483334114669e-05, - "loss": 0.0233, + "learning_rate": 2.3001419811835928e-05, + "loss": 0.024, "step": 75760 }, { "epoch": 3.54, - "learning_rate": 1.299001453283953e-05, - "loss": 0.0836, + "learning_rate": 2.3000951741999904e-05, + "loss": 0.1565, "step": 75765 }, { "epoch": 3.54, - "learning_rate": 1.298954573156439e-05, - "loss": 0.0405, + "learning_rate": 2.3000483672163888e-05, + "loss": 0.0369, "step": 75770 }, { "epoch": 3.54, - "learning_rate": 1.298907693028925e-05, - "loss": 0.0434, + "learning_rate": 2.3000015602327867e-05, + "loss": 0.1075, "step": 75775 }, { "epoch": 3.54, - "learning_rate": 1.2988608129014114e-05, - "loss": 0.0612, + "learning_rate": 2.2999547532491847e-05, + "loss": 0.0877, "step": 75780 }, { "epoch": 3.54, - "learning_rate": 1.2988139327738974e-05, - "loss": 0.1175, + "learning_rate": 2.2999079462655827e-05, + "loss": 0.1114, "step": 75785 }, { "epoch": 3.54, - "learning_rate": 1.2987670526463834e-05, - "loss": 0.1582, + "learning_rate": 2.299861139281981e-05, + "loss": 0.1925, "step": 75790 }, { "epoch": 3.54, - "learning_rate": 1.2987201725188694e-05, - "loss": 0.2338, + "learning_rate": 2.299814332298379e-05, + "loss": 0.3523, "step": 75795 }, { "epoch": 3.54, - "learning_rate": 1.2986732923913554e-05, - "loss": 0.0745, + "learning_rate": 2.299767525314777e-05, + "loss": 0.0298, "step": 75800 }, { "epoch": 3.54, - "learning_rate": 1.2986264122638415e-05, - "loss": 0.0183, + "learning_rate": 2.299720718331175e-05, + "loss": 0.0211, "step": 75805 }, { "epoch": 3.54, - "learning_rate": 1.2985795321363275e-05, - "loss": 0.0273, + "learning_rate": 2.2996739113475733e-05, + "loss": 0.0157, "step": 75810 }, { "epoch": 3.54, - "learning_rate": 1.2985326520088135e-05, - "loss": 0.0882, + "learning_rate": 2.2996271043639713e-05, + "loss": 0.0843, "step": 75815 }, { "epoch": 3.54, - "learning_rate": 1.2984857718812995e-05, - "loss": 0.1038, + "learning_rate": 2.2995802973803693e-05, + "loss": 0.0408, "step": 75820 }, { "epoch": 3.54, - "learning_rate": 1.2984388917537855e-05, - "loss": 0.077, + "learning_rate": 2.2995334903967676e-05, + "loss": 0.0298, "step": 75825 }, { "epoch": 3.54, - "learning_rate": 1.2983920116262718e-05, - "loss": 0.1234, + "learning_rate": 2.2994866834131652e-05, + "loss": 0.0492, "step": 75830 }, { "epoch": 3.54, - "learning_rate": 1.2983451314987578e-05, - "loss": 0.0779, + "learning_rate": 2.2994398764295632e-05, + "loss": 0.1977, "step": 75835 }, { "epoch": 3.54, - "learning_rate": 1.2982982513712438e-05, - "loss": 0.1526, + "learning_rate": 2.2993930694459612e-05, + "loss": 0.1296, "step": 75840 }, { "epoch": 3.54, - "learning_rate": 1.29825137124373e-05, - "loss": 0.2533, + "learning_rate": 2.2993462624623595e-05, + "loss": 0.3362, "step": 75845 }, { "epoch": 3.54, - "learning_rate": 1.298204491116216e-05, - "loss": 0.033, + "learning_rate": 2.2992994554787575e-05, + "loss": 0.0892, "step": 75850 }, { "epoch": 3.54, - "learning_rate": 1.298157610988702e-05, - "loss": 0.062, + "learning_rate": 2.2992526484951555e-05, + "loss": 0.0421, "step": 75855 }, { "epoch": 3.54, - "learning_rate": 1.298110730861188e-05, - "loss": 0.0556, + "learning_rate": 2.2992058415115535e-05, + "loss": 0.0259, "step": 75860 }, { "epoch": 3.54, - "learning_rate": 1.298063850733674e-05, - "loss": 0.076, + "learning_rate": 2.2991590345279518e-05, + "loss": 0.0719, "step": 75865 }, { "epoch": 3.54, - "learning_rate": 1.2980169706061601e-05, - "loss": 0.0307, + "learning_rate": 2.2991122275443498e-05, + "loss": 0.1003, "step": 75870 }, { "epoch": 3.54, - "learning_rate": 1.2979700904786463e-05, - "loss": 0.094, + "learning_rate": 2.2990654205607477e-05, + "loss": 0.0444, "step": 75875 }, { "epoch": 3.54, - "learning_rate": 1.2979232103511323e-05, - "loss": 0.1644, + "learning_rate": 2.299018613577146e-05, + "loss": 0.1047, "step": 75880 }, { "epoch": 3.54, - "learning_rate": 1.2978763302236184e-05, - "loss": 0.159, + "learning_rate": 2.298971806593544e-05, + "loss": 0.0841, "step": 75885 }, { "epoch": 3.54, - "learning_rate": 1.2978294500961044e-05, - "loss": 0.1273, + "learning_rate": 2.2989249996099417e-05, + "loss": 0.3822, "step": 75890 }, { "epoch": 3.54, - "learning_rate": 1.2977825699685904e-05, - "loss": 0.2573, + "learning_rate": 2.2988781926263397e-05, + "loss": 0.213, "step": 75895 }, { "epoch": 3.54, - "learning_rate": 1.2977356898410764e-05, - "loss": 0.0301, + "learning_rate": 2.298831385642738e-05, + "loss": 0.032, "step": 75900 }, { "epoch": 3.54, - "learning_rate": 1.2976888097135624e-05, - "loss": 0.0274, + "learning_rate": 2.298784578659136e-05, + "loss": 0.0514, "step": 75905 }, { "epoch": 3.54, - "learning_rate": 1.2976419295860486e-05, - "loss": 0.0311, + "learning_rate": 2.298737771675534e-05, + "loss": 0.0097, "step": 75910 }, { "epoch": 3.54, - "learning_rate": 1.2975950494585346e-05, - "loss": 0.062, + "learning_rate": 2.298690964691932e-05, + "loss": 0.1011, "step": 75915 }, { "epoch": 3.54, - "learning_rate": 1.2975481693310207e-05, - "loss": 0.0581, + "learning_rate": 2.2986441577083303e-05, + "loss": 0.0534, "step": 75920 }, { "epoch": 3.54, - "learning_rate": 1.2975012892035069e-05, - "loss": 0.0784, + "learning_rate": 2.2985973507247282e-05, + "loss": 0.0574, "step": 75925 }, { "epoch": 3.54, - "learning_rate": 1.2974544090759929e-05, - "loss": 0.1502, + "learning_rate": 2.2985505437411262e-05, + "loss": 0.0788, "step": 75930 }, { "epoch": 3.54, - "learning_rate": 1.2974075289484789e-05, - "loss": 0.1541, + "learning_rate": 2.2985037367575242e-05, + "loss": 0.1008, "step": 75935 }, { "epoch": 3.54, - "learning_rate": 1.2973606488209649e-05, - "loss": 0.2041, + "learning_rate": 2.2984569297739225e-05, + "loss": 0.0825, "step": 75940 }, { "epoch": 3.54, - "learning_rate": 1.2973137686934509e-05, - "loss": 0.3387, + "learning_rate": 2.2984101227903205e-05, + "loss": 0.2126, "step": 75945 }, { "epoch": 3.54, - "learning_rate": 1.297266888565937e-05, - "loss": 0.0406, + "learning_rate": 2.2983633158067185e-05, + "loss": 0.0414, "step": 75950 }, { "epoch": 3.54, - "learning_rate": 1.297220008438423e-05, - "loss": 0.0361, + "learning_rate": 2.2983165088231165e-05, + "loss": 0.0204, "step": 75955 }, { "epoch": 3.54, - "learning_rate": 1.297173128310909e-05, - "loss": 0.0568, + "learning_rate": 2.2982697018395144e-05, + "loss": 0.0314, "step": 75960 }, { "epoch": 3.54, - "learning_rate": 1.2971262481833953e-05, - "loss": 0.1504, + "learning_rate": 2.2982228948559124e-05, + "loss": 0.024, "step": 75965 }, { "epoch": 3.54, - "learning_rate": 1.2970793680558813e-05, - "loss": 0.1142, + "learning_rate": 2.2981760878723104e-05, + "loss": 0.025, "step": 75970 }, { "epoch": 3.55, - "learning_rate": 1.2970324879283673e-05, - "loss": 0.1309, + "learning_rate": 2.2981292808887087e-05, + "loss": 0.0855, "step": 75975 }, { "epoch": 3.55, - "learning_rate": 1.2969856078008533e-05, - "loss": 0.1008, + "learning_rate": 2.2980824739051067e-05, + "loss": 0.1139, "step": 75980 }, { "epoch": 3.55, - "learning_rate": 1.2969387276733393e-05, - "loss": 0.1449, + "learning_rate": 2.2980356669215047e-05, + "loss": 0.0926, "step": 75985 }, { "epoch": 3.55, - "learning_rate": 1.2968918475458255e-05, - "loss": 0.2658, + "learning_rate": 2.2979888599379027e-05, + "loss": 0.1505, "step": 75990 }, { "epoch": 3.55, - "learning_rate": 1.2968449674183115e-05, - "loss": 0.1993, + "learning_rate": 2.297942052954301e-05, + "loss": 0.3218, "step": 75995 }, { "epoch": 3.55, - "learning_rate": 1.2967980872907975e-05, - "loss": 0.1139, + "learning_rate": 2.297895245970699e-05, + "loss": 0.0624, "step": 76000 }, { "epoch": 3.55, - "learning_rate": 1.2967512071632835e-05, - "loss": 0.0226, + "learning_rate": 2.297848438987097e-05, + "loss": 0.059, "step": 76005 }, { "epoch": 3.55, - "learning_rate": 1.2967043270357695e-05, - "loss": 0.0666, + "learning_rate": 2.2978016320034953e-05, + "loss": 0.0265, "step": 76010 }, { "epoch": 3.55, - "learning_rate": 1.2966574469082558e-05, - "loss": 0.0206, + "learning_rate": 2.2977548250198933e-05, + "loss": 0.0891, "step": 76015 }, { "epoch": 3.55, - "learning_rate": 1.2966105667807418e-05, - "loss": 0.08, + "learning_rate": 2.297708018036291e-05, + "loss": 0.1124, "step": 76020 }, { "epoch": 3.55, - "learning_rate": 1.2965636866532278e-05, - "loss": 0.1427, + "learning_rate": 2.297661211052689e-05, + "loss": 0.0709, "step": 76025 }, { "epoch": 3.55, - "learning_rate": 1.296516806525714e-05, - "loss": 0.1729, + "learning_rate": 2.2976144040690872e-05, + "loss": 0.0708, "step": 76030 }, { "epoch": 3.55, - "learning_rate": 1.2964699263982e-05, - "loss": 0.1243, + "learning_rate": 2.2975675970854852e-05, + "loss": 0.1013, "step": 76035 }, { "epoch": 3.55, - "learning_rate": 1.296423046270686e-05, - "loss": 0.2969, + "learning_rate": 2.297520790101883e-05, + "loss": 0.1331, "step": 76040 }, { "epoch": 3.55, - "learning_rate": 1.296376166143172e-05, - "loss": 0.3342, + "learning_rate": 2.297473983118281e-05, + "loss": 0.2289, "step": 76045 }, { "epoch": 3.55, - "learning_rate": 1.296329286015658e-05, - "loss": 0.0422, + "learning_rate": 2.2974271761346795e-05, + "loss": 0.0751, "step": 76050 }, { "epoch": 3.55, - "learning_rate": 1.296282405888144e-05, - "loss": 0.0714, + "learning_rate": 2.2973803691510775e-05, + "loss": 0.0219, "step": 76055 }, { "epoch": 3.55, - "learning_rate": 1.2962355257606302e-05, - "loss": 0.0715, + "learning_rate": 2.2973335621674754e-05, + "loss": 0.0489, "step": 76060 }, { "epoch": 3.55, - "learning_rate": 1.2961886456331162e-05, - "loss": 0.0332, + "learning_rate": 2.2972867551838738e-05, + "loss": 0.0348, "step": 76065 }, { "epoch": 3.55, - "learning_rate": 1.2961417655056024e-05, - "loss": 0.0604, + "learning_rate": 2.2972399482002717e-05, + "loss": 0.1195, "step": 76070 }, { "epoch": 3.55, - "learning_rate": 1.2960948853780884e-05, - "loss": 0.0826, + "learning_rate": 2.2971931412166697e-05, + "loss": 0.0628, "step": 76075 }, { "epoch": 3.55, - "learning_rate": 1.2960480052505744e-05, - "loss": 0.0887, + "learning_rate": 2.2971463342330674e-05, + "loss": 0.0701, "step": 76080 }, { "epoch": 3.55, - "learning_rate": 1.2960011251230604e-05, - "loss": 0.1112, + "learning_rate": 2.2970995272494657e-05, + "loss": 0.2953, "step": 76085 }, { "epoch": 3.55, - "learning_rate": 1.2959542449955465e-05, - "loss": 0.1512, + "learning_rate": 2.2970527202658637e-05, + "loss": 0.1406, "step": 76090 }, { "epoch": 3.55, - "learning_rate": 1.2959073648680325e-05, - "loss": 0.221, + "learning_rate": 2.2970059132822616e-05, + "loss": 0.2119, "step": 76095 }, { "epoch": 3.55, - "learning_rate": 1.2958604847405185e-05, - "loss": 0.0429, + "learning_rate": 2.2969591062986596e-05, + "loss": 0.0777, "step": 76100 }, { "epoch": 3.55, - "learning_rate": 1.2958136046130047e-05, - "loss": 0.0337, + "learning_rate": 2.296912299315058e-05, + "loss": 0.0034, "step": 76105 }, { "epoch": 3.55, - "learning_rate": 1.2957667244854908e-05, - "loss": 0.0281, + "learning_rate": 2.296865492331456e-05, + "loss": 0.0567, "step": 76110 }, { "epoch": 3.55, - "learning_rate": 1.2957198443579768e-05, - "loss": 0.068, + "learning_rate": 2.296818685347854e-05, + "loss": 0.041, "step": 76115 }, { "epoch": 3.55, - "learning_rate": 1.2956729642304628e-05, - "loss": 0.0651, + "learning_rate": 2.296771878364252e-05, + "loss": 0.0218, "step": 76120 }, { "epoch": 3.55, - "learning_rate": 1.2956260841029488e-05, - "loss": 0.0569, + "learning_rate": 2.2967250713806502e-05, + "loss": 0.0892, "step": 76125 }, { "epoch": 3.55, - "learning_rate": 1.295579203975435e-05, - "loss": 0.1717, + "learning_rate": 2.2966782643970482e-05, + "loss": 0.1246, "step": 76130 }, { "epoch": 3.55, - "learning_rate": 1.295532323847921e-05, - "loss": 0.1179, + "learning_rate": 2.2966314574134462e-05, + "loss": 0.0864, "step": 76135 }, { "epoch": 3.55, - "learning_rate": 1.295485443720407e-05, - "loss": 0.27, + "learning_rate": 2.2965846504298445e-05, + "loss": 0.1869, "step": 76140 }, { "epoch": 3.55, - "learning_rate": 1.295438563592893e-05, - "loss": 0.2105, + "learning_rate": 2.296537843446242e-05, + "loss": 0.2577, "step": 76145 }, { "epoch": 3.55, - "learning_rate": 1.295391683465379e-05, - "loss": 0.0964, + "learning_rate": 2.29649103646264e-05, + "loss": 0.1, "step": 76150 }, { "epoch": 3.55, - "learning_rate": 1.2953448033378653e-05, - "loss": 0.0165, + "learning_rate": 2.296444229479038e-05, + "loss": 0.0219, "step": 76155 }, { "epoch": 3.55, - "learning_rate": 1.2952979232103513e-05, - "loss": 0.0608, + "learning_rate": 2.2963974224954364e-05, + "loss": 0.0653, "step": 76160 }, { "epoch": 3.55, - "learning_rate": 1.2952510430828373e-05, - "loss": 0.0416, + "learning_rate": 2.2963506155118344e-05, + "loss": 0.047, "step": 76165 }, { "epoch": 3.55, - "learning_rate": 1.2952041629553234e-05, - "loss": 0.0805, + "learning_rate": 2.2963038085282324e-05, + "loss": 0.0275, "step": 76170 }, { "epoch": 3.55, - "learning_rate": 1.2951572828278094e-05, - "loss": 0.0718, + "learning_rate": 2.2962570015446304e-05, + "loss": 0.0646, "step": 76175 }, { "epoch": 3.55, - "learning_rate": 1.2951104027002954e-05, - "loss": 0.1264, + "learning_rate": 2.2962101945610287e-05, + "loss": 0.1236, "step": 76180 }, { "epoch": 3.55, - "learning_rate": 1.2950635225727814e-05, - "loss": 0.1554, + "learning_rate": 2.2961633875774267e-05, + "loss": 0.0879, "step": 76185 }, { "epoch": 3.56, - "learning_rate": 1.2950166424452674e-05, - "loss": 0.262, + "learning_rate": 2.2961165805938247e-05, + "loss": 0.1976, "step": 76190 }, { "epoch": 3.56, - "learning_rate": 1.2949697623177536e-05, - "loss": 0.3103, + "learning_rate": 2.296069773610223e-05, + "loss": 0.1869, "step": 76195 }, { "epoch": 3.56, - "learning_rate": 1.2949228821902397e-05, - "loss": 0.0947, + "learning_rate": 2.296022966626621e-05, + "loss": 0.0744, "step": 76200 }, { "epoch": 3.56, - "learning_rate": 1.2948760020627257e-05, - "loss": 0.042, + "learning_rate": 2.2959761596430186e-05, + "loss": 0.0348, "step": 76205 }, { "epoch": 3.56, - "learning_rate": 1.2948291219352119e-05, - "loss": 0.0349, + "learning_rate": 2.2959293526594166e-05, + "loss": 0.0285, "step": 76210 }, { "epoch": 3.56, - "learning_rate": 1.2947822418076979e-05, - "loss": 0.0472, + "learning_rate": 2.295882545675815e-05, + "loss": 0.0129, "step": 76215 }, { "epoch": 3.56, - "learning_rate": 1.2947353616801839e-05, - "loss": 0.0736, + "learning_rate": 2.295835738692213e-05, + "loss": 0.1022, "step": 76220 }, { "epoch": 3.56, - "learning_rate": 1.2946884815526699e-05, - "loss": 0.1034, + "learning_rate": 2.295788931708611e-05, + "loss": 0.0734, "step": 76225 }, { "epoch": 3.56, - "learning_rate": 1.2946416014251559e-05, - "loss": 0.1762, + "learning_rate": 2.295742124725009e-05, + "loss": 0.1081, "step": 76230 }, { "epoch": 3.56, - "learning_rate": 1.294594721297642e-05, - "loss": 0.1403, + "learning_rate": 2.295695317741407e-05, + "loss": 0.171, "step": 76235 }, { "epoch": 3.56, - "learning_rate": 1.294547841170128e-05, - "loss": 0.1301, + "learning_rate": 2.295648510757805e-05, + "loss": 0.1444, "step": 76240 }, { "epoch": 3.56, - "learning_rate": 1.2945009610426142e-05, - "loss": 0.3283, + "learning_rate": 2.295601703774203e-05, + "loss": 0.1584, "step": 76245 }, { "epoch": 3.56, - "learning_rate": 1.2944540809151004e-05, - "loss": 0.0479, + "learning_rate": 2.2955548967906015e-05, + "loss": 0.0951, "step": 76250 }, { "epoch": 3.56, - "learning_rate": 1.2944072007875863e-05, - "loss": 0.014, + "learning_rate": 2.2955080898069994e-05, + "loss": 0.018, "step": 76255 }, { "epoch": 3.56, - "learning_rate": 1.2943603206600723e-05, - "loss": 0.0383, + "learning_rate": 2.2954612828233974e-05, + "loss": 0.0203, "step": 76260 }, { "epoch": 3.56, - "learning_rate": 1.2943134405325583e-05, - "loss": 0.0522, + "learning_rate": 2.2954144758397954e-05, + "loss": 0.027, "step": 76265 }, { "epoch": 3.56, - "learning_rate": 1.2942665604050443e-05, - "loss": 0.1356, + "learning_rate": 2.2953676688561934e-05, + "loss": 0.0879, "step": 76270 }, { "epoch": 3.56, - "learning_rate": 1.2942196802775305e-05, - "loss": 0.0687, + "learning_rate": 2.2953208618725914e-05, + "loss": 0.0914, "step": 76275 }, { "epoch": 3.56, - "learning_rate": 1.2941728001500165e-05, - "loss": 0.0764, + "learning_rate": 2.2952740548889893e-05, + "loss": 0.0525, "step": 76280 }, { "epoch": 3.56, - "learning_rate": 1.2941259200225025e-05, - "loss": 0.1817, + "learning_rate": 2.2952272479053873e-05, + "loss": 0.2049, "step": 76285 }, { "epoch": 3.56, - "learning_rate": 1.2940790398949888e-05, - "loss": 0.2576, + "learning_rate": 2.2951804409217856e-05, + "loss": 0.1856, "step": 76290 }, { "epoch": 3.56, - "learning_rate": 1.2940321597674748e-05, - "loss": 0.2009, + "learning_rate": 2.2951336339381836e-05, + "loss": 0.2291, "step": 76295 }, { "epoch": 3.56, - "learning_rate": 1.2939852796399608e-05, - "loss": 0.0719, + "learning_rate": 2.2950868269545816e-05, + "loss": 0.0713, "step": 76300 }, { "epoch": 3.56, - "learning_rate": 1.2939383995124468e-05, - "loss": 0.0553, + "learning_rate": 2.2950400199709796e-05, + "loss": 0.0263, "step": 76305 }, { "epoch": 3.56, - "learning_rate": 1.2938915193849328e-05, - "loss": 0.0314, + "learning_rate": 2.294993212987378e-05, + "loss": 0.0241, "step": 76310 }, { "epoch": 3.56, - "learning_rate": 1.293844639257419e-05, - "loss": 0.0829, + "learning_rate": 2.294946406003776e-05, + "loss": 0.0202, "step": 76315 }, { "epoch": 3.56, - "learning_rate": 1.293797759129905e-05, - "loss": 0.121, + "learning_rate": 2.294899599020174e-05, + "loss": 0.1128, "step": 76320 }, { "epoch": 3.56, - "learning_rate": 1.293750879002391e-05, - "loss": 0.071, + "learning_rate": 2.2948527920365722e-05, + "loss": 0.0587, "step": 76325 }, { "epoch": 3.56, - "learning_rate": 1.293703998874877e-05, - "loss": 0.0498, + "learning_rate": 2.2948059850529702e-05, + "loss": 0.0849, "step": 76330 }, { "epoch": 3.56, - "learning_rate": 1.293657118747363e-05, - "loss": 0.0843, + "learning_rate": 2.2947591780693678e-05, + "loss": 0.0773, "step": 76335 }, { "epoch": 3.56, - "learning_rate": 1.2936102386198493e-05, - "loss": 0.1836, + "learning_rate": 2.2947123710857658e-05, + "loss": 0.1456, "step": 76340 }, { "epoch": 3.56, - "learning_rate": 1.2935633584923352e-05, - "loss": 0.2321, + "learning_rate": 2.294665564102164e-05, + "loss": 0.1668, "step": 76345 }, { "epoch": 3.56, - "learning_rate": 1.2935164783648212e-05, - "loss": 0.0305, + "learning_rate": 2.294618757118562e-05, + "loss": 0.0526, "step": 76350 }, { "epoch": 3.56, - "learning_rate": 1.2934695982373074e-05, - "loss": 0.0301, + "learning_rate": 2.29457195013496e-05, + "loss": 0.0321, "step": 76355 }, { "epoch": 3.56, - "learning_rate": 1.2934227181097934e-05, - "loss": 0.0454, + "learning_rate": 2.294525143151358e-05, + "loss": 0.0436, "step": 76360 }, { "epoch": 3.56, - "learning_rate": 1.2933758379822794e-05, - "loss": 0.0474, + "learning_rate": 2.2944783361677564e-05, + "loss": 0.009, "step": 76365 }, { "epoch": 3.56, - "learning_rate": 1.2933289578547654e-05, - "loss": 0.0194, + "learning_rate": 2.2944315291841544e-05, + "loss": 0.0496, "step": 76370 }, { "epoch": 3.56, - "learning_rate": 1.2932820777272514e-05, - "loss": 0.0325, + "learning_rate": 2.2943847222005524e-05, + "loss": 0.1081, "step": 76375 }, { "epoch": 3.56, - "learning_rate": 1.2932351975997375e-05, - "loss": 0.074, + "learning_rate": 2.2943379152169507e-05, + "loss": 0.1664, "step": 76380 }, { "epoch": 3.56, - "learning_rate": 1.2931883174722237e-05, - "loss": 0.134, + "learning_rate": 2.2942911082333487e-05, + "loss": 0.0751, "step": 76385 }, { "epoch": 3.56, - "learning_rate": 1.2931414373447097e-05, - "loss": 0.1937, + "learning_rate": 2.2942443012497466e-05, + "loss": 0.1534, "step": 76390 }, { "epoch": 3.56, - "learning_rate": 1.2930945572171959e-05, - "loss": 0.2208, + "learning_rate": 2.2941974942661443e-05, + "loss": 0.375, "step": 76395 }, { "epoch": 3.56, - "learning_rate": 1.2930476770896819e-05, - "loss": 0.073, + "learning_rate": 2.2941506872825426e-05, + "loss": 0.0287, "step": 76400 }, { "epoch": 3.57, - "learning_rate": 1.2930007969621678e-05, - "loss": 0.0244, + "learning_rate": 2.2941038802989406e-05, + "loss": 0.0216, "step": 76405 }, { "epoch": 3.57, - "learning_rate": 1.2929539168346538e-05, - "loss": 0.0703, + "learning_rate": 2.2940570733153386e-05, + "loss": 0.0363, "step": 76410 }, { "epoch": 3.57, - "learning_rate": 1.2929070367071398e-05, - "loss": 0.0698, + "learning_rate": 2.2940102663317365e-05, + "loss": 0.0692, "step": 76415 }, { "epoch": 3.57, - "learning_rate": 1.292860156579626e-05, - "loss": 0.0817, + "learning_rate": 2.293963459348135e-05, + "loss": 0.0744, "step": 76420 }, { "epoch": 3.57, - "learning_rate": 1.292813276452112e-05, - "loss": 0.0615, + "learning_rate": 2.293916652364533e-05, + "loss": 0.0741, "step": 76425 }, { "epoch": 3.57, - "learning_rate": 1.2927663963245981e-05, - "loss": 0.0691, + "learning_rate": 2.2938698453809308e-05, + "loss": 0.1401, "step": 76430 }, { "epoch": 3.57, - "learning_rate": 1.2927195161970843e-05, - "loss": 0.0809, + "learning_rate": 2.293823038397329e-05, + "loss": 0.1518, "step": 76435 }, { "epoch": 3.57, - "learning_rate": 1.2926726360695703e-05, - "loss": 0.2511, + "learning_rate": 2.293776231413727e-05, + "loss": 0.202, "step": 76440 }, { "epoch": 3.57, - "learning_rate": 1.2926257559420563e-05, - "loss": 0.2983, + "learning_rate": 2.293729424430125e-05, + "loss": 0.3493, "step": 76445 }, { "epoch": 3.57, - "learning_rate": 1.2925788758145423e-05, - "loss": 0.073, + "learning_rate": 2.293682617446523e-05, + "loss": 0.0707, "step": 76450 }, { "epoch": 3.57, - "learning_rate": 1.2925319956870283e-05, - "loss": 0.0145, + "learning_rate": 2.2936358104629214e-05, + "loss": 0.0196, "step": 76455 }, { "epoch": 3.57, - "learning_rate": 1.2924851155595144e-05, - "loss": 0.0306, + "learning_rate": 2.293589003479319e-05, + "loss": 0.0618, "step": 76460 }, { "epoch": 3.57, - "learning_rate": 1.2924382354320004e-05, - "loss": 0.0594, + "learning_rate": 2.293542196495717e-05, + "loss": 0.0249, "step": 76465 }, { "epoch": 3.57, - "learning_rate": 1.2923913553044864e-05, - "loss": 0.056, + "learning_rate": 2.293495389512115e-05, + "loss": 0.0592, "step": 76470 }, { "epoch": 3.57, - "learning_rate": 1.2923444751769724e-05, - "loss": 0.0702, + "learning_rate": 2.2934485825285133e-05, + "loss": 0.052, "step": 76475 }, { "epoch": 3.57, - "learning_rate": 1.2922975950494588e-05, - "loss": 0.039, + "learning_rate": 2.2934017755449113e-05, + "loss": 0.0874, "step": 76480 }, { "epoch": 3.57, - "learning_rate": 1.2922507149219448e-05, - "loss": 0.1921, + "learning_rate": 2.2933549685613093e-05, + "loss": 0.108, "step": 76485 }, { "epoch": 3.57, - "learning_rate": 1.2922038347944307e-05, - "loss": 0.1554, + "learning_rate": 2.2933081615777076e-05, + "loss": 0.1093, "step": 76490 }, { "epoch": 3.57, - "learning_rate": 1.2921569546669167e-05, - "loss": 0.2371, + "learning_rate": 2.2932613545941056e-05, + "loss": 0.1926, "step": 76495 }, { "epoch": 3.57, - "learning_rate": 1.2921100745394029e-05, - "loss": 0.0921, + "learning_rate": 2.2932145476105036e-05, + "loss": 0.1299, "step": 76500 }, { "epoch": 3.57, - "learning_rate": 1.2920631944118889e-05, - "loss": 0.0387, + "learning_rate": 2.2931677406269016e-05, + "loss": 0.0526, "step": 76505 }, { "epoch": 3.57, - "learning_rate": 1.2920163142843749e-05, - "loss": 0.0425, + "learning_rate": 2.2931209336433e-05, + "loss": 0.0501, "step": 76510 }, { "epoch": 3.57, - "learning_rate": 1.2919694341568609e-05, - "loss": 0.0556, + "learning_rate": 2.293074126659698e-05, + "loss": 0.0336, "step": 76515 }, { "epoch": 3.57, - "learning_rate": 1.291922554029347e-05, - "loss": 0.0308, + "learning_rate": 2.293027319676096e-05, + "loss": 0.027, "step": 76520 }, { "epoch": 3.57, - "learning_rate": 1.2918756739018332e-05, - "loss": 0.0584, + "learning_rate": 2.2929805126924935e-05, + "loss": 0.0495, "step": 76525 }, { "epoch": 3.57, - "learning_rate": 1.2918287937743192e-05, - "loss": 0.1801, + "learning_rate": 2.2929337057088918e-05, + "loss": 0.1188, "step": 76530 }, { "epoch": 3.57, - "learning_rate": 1.2917819136468052e-05, - "loss": 0.0882, + "learning_rate": 2.2928868987252898e-05, + "loss": 0.1611, "step": 76535 }, { "epoch": 3.57, - "learning_rate": 1.2917350335192914e-05, - "loss": 0.1778, + "learning_rate": 2.2928400917416878e-05, + "loss": 0.2037, "step": 76540 }, { "epoch": 3.57, - "learning_rate": 1.2916881533917774e-05, - "loss": 0.3989, + "learning_rate": 2.2927932847580858e-05, + "loss": 0.2346, "step": 76545 }, { "epoch": 3.57, - "learning_rate": 1.2916412732642633e-05, - "loss": 0.0859, + "learning_rate": 2.292746477774484e-05, + "loss": 0.0153, "step": 76550 }, { "epoch": 3.57, - "learning_rate": 1.2915943931367493e-05, - "loss": 0.0368, + "learning_rate": 2.292699670790882e-05, + "loss": 0.0139, "step": 76555 }, { "epoch": 3.57, - "learning_rate": 1.2915475130092355e-05, - "loss": 0.0566, + "learning_rate": 2.29265286380728e-05, + "loss": 0.0635, "step": 76560 }, { "epoch": 3.57, - "learning_rate": 1.2915006328817215e-05, - "loss": 0.081, + "learning_rate": 2.2926060568236784e-05, + "loss": 0.0439, "step": 76565 }, { "epoch": 3.57, - "learning_rate": 1.2914537527542077e-05, - "loss": 0.0375, + "learning_rate": 2.2925592498400764e-05, + "loss": 0.0382, "step": 76570 }, { "epoch": 3.57, - "learning_rate": 1.2914068726266937e-05, - "loss": 0.0694, + "learning_rate": 2.2925124428564743e-05, + "loss": 0.0692, "step": 76575 }, { "epoch": 3.57, - "learning_rate": 1.2913599924991798e-05, - "loss": 0.1095, + "learning_rate": 2.2924656358728723e-05, + "loss": 0.0943, "step": 76580 }, { "epoch": 3.57, - "learning_rate": 1.2913131123716658e-05, - "loss": 0.1153, + "learning_rate": 2.2924188288892703e-05, + "loss": 0.1173, "step": 76585 }, { "epoch": 3.57, - "learning_rate": 1.2912662322441518e-05, - "loss": 0.1803, + "learning_rate": 2.2923720219056683e-05, + "loss": 0.1675, "step": 76590 }, { "epoch": 3.57, - "learning_rate": 1.2912193521166378e-05, - "loss": 0.2579, + "learning_rate": 2.2923252149220663e-05, + "loss": 0.3236, "step": 76595 }, { "epoch": 3.57, - "learning_rate": 1.291172471989124e-05, - "loss": 0.1152, + "learning_rate": 2.2922784079384642e-05, + "loss": 0.0778, "step": 76600 }, { "epoch": 3.57, - "learning_rate": 1.29112559186161e-05, - "loss": 0.0289, + "learning_rate": 2.2922316009548626e-05, + "loss": 0.0151, "step": 76605 }, { "epoch": 3.57, - "learning_rate": 1.291078711734096e-05, - "loss": 0.0535, + "learning_rate": 2.2921847939712605e-05, + "loss": 0.0665, "step": 76610 }, { "epoch": 3.57, - "learning_rate": 1.291031831606582e-05, - "loss": 0.1614, + "learning_rate": 2.2921379869876585e-05, + "loss": 0.0376, "step": 76615 }, { "epoch": 3.58, - "learning_rate": 1.2909849514790683e-05, - "loss": 0.0627, + "learning_rate": 2.292091180004057e-05, + "loss": 0.0486, "step": 76620 }, { "epoch": 3.58, - "learning_rate": 1.2909380713515543e-05, - "loss": 0.0397, + "learning_rate": 2.2920443730204548e-05, + "loss": 0.0484, "step": 76625 }, { "epoch": 3.58, - "learning_rate": 1.2908911912240403e-05, - "loss": 0.1691, + "learning_rate": 2.2919975660368528e-05, + "loss": 0.15, "step": 76630 }, { "epoch": 3.58, - "learning_rate": 1.2908443110965262e-05, - "loss": 0.0838, + "learning_rate": 2.2919507590532508e-05, + "loss": 0.1707, "step": 76635 }, { "epoch": 3.58, - "learning_rate": 1.2907974309690124e-05, - "loss": 0.1414, + "learning_rate": 2.291903952069649e-05, + "loss": 0.2918, "step": 76640 }, { "epoch": 3.58, - "learning_rate": 1.2907505508414984e-05, - "loss": 0.2367, + "learning_rate": 2.291857145086047e-05, + "loss": 0.2758, "step": 76645 }, { "epoch": 3.58, - "learning_rate": 1.2907036707139844e-05, - "loss": 0.1015, + "learning_rate": 2.2918103381024447e-05, + "loss": 0.0535, "step": 76650 }, { "epoch": 3.58, - "learning_rate": 1.2906567905864704e-05, - "loss": 0.0206, + "learning_rate": 2.2917635311188427e-05, + "loss": 0.0563, "step": 76655 }, { "epoch": 3.58, - "learning_rate": 1.2906099104589564e-05, - "loss": 0.0277, + "learning_rate": 2.291716724135241e-05, + "loss": 0.0557, "step": 76660 }, { "epoch": 3.58, - "learning_rate": 1.2905630303314427e-05, - "loss": 0.059, + "learning_rate": 2.291669917151639e-05, + "loss": 0.0522, "step": 76665 }, { "epoch": 3.58, - "learning_rate": 1.2905161502039287e-05, - "loss": 0.0672, + "learning_rate": 2.291623110168037e-05, + "loss": 0.0445, "step": 76670 }, { "epoch": 3.58, - "learning_rate": 1.2904692700764147e-05, - "loss": 0.0522, + "learning_rate": 2.2915763031844353e-05, + "loss": 0.0677, "step": 76675 }, { "epoch": 3.58, - "learning_rate": 1.2904223899489009e-05, - "loss": 0.1238, + "learning_rate": 2.2915294962008333e-05, + "loss": 0.0632, "step": 76680 }, { "epoch": 3.58, - "learning_rate": 1.2903755098213869e-05, - "loss": 0.105, + "learning_rate": 2.2914826892172313e-05, + "loss": 0.1305, "step": 76685 }, { "epoch": 3.58, - "learning_rate": 1.2903286296938729e-05, - "loss": 0.299, + "learning_rate": 2.2914358822336293e-05, + "loss": 0.0235, "step": 76690 }, { "epoch": 3.58, - "learning_rate": 1.2902817495663588e-05, - "loss": 0.1564, + "learning_rate": 2.2913890752500276e-05, + "loss": 0.2446, "step": 76695 }, { "epoch": 3.58, - "learning_rate": 1.2902348694388448e-05, - "loss": 0.0793, + "learning_rate": 2.2913422682664256e-05, + "loss": 0.0739, "step": 76700 }, { "epoch": 3.58, - "learning_rate": 1.290187989311331e-05, - "loss": 0.0337, + "learning_rate": 2.2912954612828236e-05, + "loss": 0.0492, "step": 76705 }, { "epoch": 3.58, - "learning_rate": 1.2901411091838172e-05, - "loss": 0.0111, + "learning_rate": 2.2912486542992215e-05, + "loss": 0.0438, "step": 76710 }, { "epoch": 3.58, - "learning_rate": 1.2900942290563032e-05, - "loss": 0.0667, + "learning_rate": 2.2912018473156195e-05, + "loss": 0.0563, "step": 76715 }, { "epoch": 3.58, - "learning_rate": 1.2900473489287893e-05, - "loss": 0.0295, + "learning_rate": 2.2911550403320175e-05, + "loss": 0.0855, "step": 76720 }, { "epoch": 3.58, - "learning_rate": 1.2900004688012753e-05, - "loss": 0.0884, + "learning_rate": 2.2911082333484155e-05, + "loss": 0.0906, "step": 76725 }, { "epoch": 3.58, - "learning_rate": 1.2899535886737613e-05, - "loss": 0.0905, + "learning_rate": 2.2910614263648135e-05, + "loss": 0.0823, "step": 76730 }, { "epoch": 3.58, - "learning_rate": 1.2899067085462473e-05, - "loss": 0.0383, + "learning_rate": 2.2910146193812118e-05, + "loss": 0.1404, "step": 76735 }, { "epoch": 3.58, - "learning_rate": 1.2898598284187333e-05, - "loss": 0.1757, + "learning_rate": 2.2909678123976098e-05, + "loss": 0.1766, "step": 76740 }, { "epoch": 3.58, - "learning_rate": 1.2898129482912195e-05, - "loss": 0.1919, + "learning_rate": 2.2909210054140077e-05, + "loss": 0.4092, "step": 76745 }, { "epoch": 3.58, - "learning_rate": 1.2897660681637055e-05, - "loss": 0.0592, + "learning_rate": 2.290874198430406e-05, + "loss": 0.0721, "step": 76750 }, { "epoch": 3.58, - "learning_rate": 1.2897191880361916e-05, - "loss": 0.0298, + "learning_rate": 2.290827391446804e-05, + "loss": 0.0066, "step": 76755 }, { "epoch": 3.58, - "learning_rate": 1.2896723079086778e-05, - "loss": 0.0382, + "learning_rate": 2.290780584463202e-05, + "loss": 0.0315, "step": 76760 }, { "epoch": 3.58, - "learning_rate": 1.2896254277811638e-05, - "loss": 0.028, + "learning_rate": 2.2907337774796e-05, + "loss": 0.051, "step": 76765 }, { "epoch": 3.58, - "learning_rate": 1.2895785476536498e-05, - "loss": 0.0667, + "learning_rate": 2.2906869704959983e-05, + "loss": 0.0717, "step": 76770 }, { "epoch": 3.58, - "learning_rate": 1.2895316675261358e-05, - "loss": 0.101, + "learning_rate": 2.290640163512396e-05, + "loss": 0.1244, "step": 76775 }, { "epoch": 3.58, - "learning_rate": 1.2894847873986218e-05, - "loss": 0.0995, + "learning_rate": 2.290593356528794e-05, + "loss": 0.1347, "step": 76780 }, { "epoch": 3.58, - "learning_rate": 1.2894379072711079e-05, - "loss": 0.1493, + "learning_rate": 2.290546549545192e-05, + "loss": 0.1084, "step": 76785 }, { "epoch": 3.58, - "learning_rate": 1.2893910271435939e-05, - "loss": 0.1684, + "learning_rate": 2.2904997425615903e-05, + "loss": 0.1771, "step": 76790 }, { "epoch": 3.58, - "learning_rate": 1.2893441470160799e-05, - "loss": 0.2884, + "learning_rate": 2.2904529355779882e-05, + "loss": 0.1611, "step": 76795 }, { "epoch": 3.58, - "learning_rate": 1.2892972668885659e-05, - "loss": 0.0547, + "learning_rate": 2.2904061285943862e-05, + "loss": 0.053, "step": 76800 }, { "epoch": 3.58, - "learning_rate": 1.2892503867610522e-05, - "loss": 0.0472, + "learning_rate": 2.2903593216107845e-05, + "loss": 0.0307, "step": 76805 }, { "epoch": 3.58, - "learning_rate": 1.2892035066335382e-05, - "loss": 0.0213, + "learning_rate": 2.2903125146271825e-05, + "loss": 0.0566, "step": 76810 }, { "epoch": 3.58, - "learning_rate": 1.2891566265060242e-05, - "loss": 0.0917, + "learning_rate": 2.2902657076435805e-05, + "loss": 0.107, "step": 76815 }, { "epoch": 3.58, - "learning_rate": 1.2891097463785102e-05, - "loss": 0.0812, + "learning_rate": 2.2902189006599785e-05, + "loss": 0.0571, "step": 76820 }, { "epoch": 3.58, - "learning_rate": 1.2890628662509964e-05, - "loss": 0.1001, + "learning_rate": 2.2901720936763768e-05, + "loss": 0.063, "step": 76825 }, { "epoch": 3.58, - "learning_rate": 1.2890159861234824e-05, - "loss": 0.0974, + "learning_rate": 2.2901252866927748e-05, + "loss": 0.0585, "step": 76830 }, { "epoch": 3.59, - "learning_rate": 1.2889691059959684e-05, - "loss": 0.1771, + "learning_rate": 2.2900784797091728e-05, + "loss": 0.1431, "step": 76835 }, { "epoch": 3.59, - "learning_rate": 1.2889222258684543e-05, - "loss": 0.3194, + "learning_rate": 2.2900316727255704e-05, + "loss": 0.2054, "step": 76840 }, { "epoch": 3.59, - "learning_rate": 1.2888753457409403e-05, - "loss": 0.1879, + "learning_rate": 2.2899848657419687e-05, + "loss": 0.2841, "step": 76845 }, { "epoch": 3.59, - "learning_rate": 1.2888284656134267e-05, - "loss": 0.0425, + "learning_rate": 2.2899380587583667e-05, + "loss": 0.0998, "step": 76850 }, { "epoch": 3.59, - "learning_rate": 1.2887815854859127e-05, - "loss": 0.022, + "learning_rate": 2.2898912517747647e-05, + "loss": 0.0338, "step": 76855 }, { "epoch": 3.59, - "learning_rate": 1.2887347053583987e-05, - "loss": 0.037, + "learning_rate": 2.289844444791163e-05, + "loss": 0.0177, "step": 76860 }, { "epoch": 3.59, - "learning_rate": 1.2886878252308848e-05, - "loss": 0.0621, + "learning_rate": 2.289797637807561e-05, + "loss": 0.0435, "step": 76865 }, { "epoch": 3.59, - "learning_rate": 1.2886409451033708e-05, - "loss": 0.1295, + "learning_rate": 2.289750830823959e-05, + "loss": 0.0564, "step": 76870 }, { "epoch": 3.59, - "learning_rate": 1.2885940649758568e-05, - "loss": 0.0795, + "learning_rate": 2.289704023840357e-05, + "loss": 0.0402, "step": 76875 }, { "epoch": 3.59, - "learning_rate": 1.2885471848483428e-05, - "loss": 0.1052, + "learning_rate": 2.2896572168567553e-05, + "loss": 0.0868, "step": 76880 }, { "epoch": 3.59, - "learning_rate": 1.2885003047208288e-05, - "loss": 0.1687, + "learning_rate": 2.2896104098731533e-05, + "loss": 0.0601, "step": 76885 }, { "epoch": 3.59, - "learning_rate": 1.288453424593315e-05, - "loss": 0.2261, + "learning_rate": 2.2895636028895513e-05, + "loss": 0.2652, "step": 76890 }, { "epoch": 3.59, - "learning_rate": 1.2884065444658011e-05, - "loss": 0.2531, + "learning_rate": 2.2895167959059492e-05, + "loss": 0.2143, "step": 76895 }, { "epoch": 3.59, - "learning_rate": 1.2883596643382871e-05, - "loss": 0.0715, + "learning_rate": 2.2894699889223472e-05, + "loss": 0.1247, "step": 76900 }, { "epoch": 3.59, - "learning_rate": 1.2883127842107733e-05, - "loss": 0.0353, + "learning_rate": 2.2894231819387452e-05, + "loss": 0.0494, "step": 76905 }, { "epoch": 3.59, - "learning_rate": 1.2882659040832593e-05, - "loss": 0.0076, + "learning_rate": 2.2893763749551432e-05, + "loss": 0.0564, "step": 76910 }, { "epoch": 3.59, - "learning_rate": 1.2882190239557453e-05, - "loss": 0.0836, + "learning_rate": 2.289329567971541e-05, + "loss": 0.0446, "step": 76915 }, { "epoch": 3.59, - "learning_rate": 1.2881721438282313e-05, - "loss": 0.0912, + "learning_rate": 2.2892827609879395e-05, + "loss": 0.0488, "step": 76920 }, { "epoch": 3.59, - "learning_rate": 1.2881252637007173e-05, - "loss": 0.1204, + "learning_rate": 2.2892359540043375e-05, + "loss": 0.0634, "step": 76925 }, { "epoch": 3.59, - "learning_rate": 1.2880783835732034e-05, - "loss": 0.1609, + "learning_rate": 2.2891891470207354e-05, + "loss": 0.1471, "step": 76930 }, { "epoch": 3.59, - "learning_rate": 1.2880315034456894e-05, - "loss": 0.0685, + "learning_rate": 2.2891423400371338e-05, + "loss": 0.1803, "step": 76935 }, { "epoch": 3.59, - "learning_rate": 1.2879846233181754e-05, - "loss": 0.0957, + "learning_rate": 2.2890955330535317e-05, + "loss": 0.1917, "step": 76940 }, { "epoch": 3.59, - "learning_rate": 1.2879377431906617e-05, - "loss": 0.3234, + "learning_rate": 2.2890487260699297e-05, + "loss": 0.3945, "step": 76945 }, { "epoch": 3.59, - "learning_rate": 1.2878908630631477e-05, - "loss": 0.0517, + "learning_rate": 2.2890019190863277e-05, + "loss": 0.0597, "step": 76950 }, { "epoch": 3.59, - "learning_rate": 1.2878439829356337e-05, - "loss": 0.0045, + "learning_rate": 2.288955112102726e-05, + "loss": 0.012, "step": 76955 }, { "epoch": 3.59, - "learning_rate": 1.2877971028081197e-05, - "loss": 0.0288, + "learning_rate": 2.288908305119124e-05, + "loss": 0.0276, "step": 76960 }, { "epoch": 3.59, - "learning_rate": 1.2877502226806057e-05, - "loss": 0.0282, + "learning_rate": 2.2888614981355217e-05, + "loss": 0.0758, "step": 76965 }, { "epoch": 3.59, - "learning_rate": 1.2877033425530919e-05, - "loss": 0.0993, + "learning_rate": 2.2888146911519196e-05, + "loss": 0.0597, "step": 76970 }, { "epoch": 3.59, - "learning_rate": 1.2876564624255779e-05, - "loss": 0.0922, + "learning_rate": 2.288767884168318e-05, + "loss": 0.087, "step": 76975 }, { "epoch": 3.59, - "learning_rate": 1.2876095822980639e-05, - "loss": 0.0956, + "learning_rate": 2.288721077184716e-05, + "loss": 0.1382, "step": 76980 }, { "epoch": 3.59, - "learning_rate": 1.2875627021705498e-05, - "loss": 0.17, + "learning_rate": 2.288674270201114e-05, + "loss": 0.1197, "step": 76985 }, { "epoch": 3.59, - "learning_rate": 1.2875158220430362e-05, - "loss": 0.2218, + "learning_rate": 2.2886274632175122e-05, + "loss": 0.1913, "step": 76990 }, { "epoch": 3.59, - "learning_rate": 1.2874689419155222e-05, - "loss": 0.2137, + "learning_rate": 2.2885806562339102e-05, + "loss": 0.278, "step": 76995 }, { "epoch": 3.59, - "learning_rate": 1.2874220617880082e-05, - "loss": 0.0566, + "learning_rate": 2.2885338492503082e-05, + "loss": 0.0846, "step": 77000 }, { "epoch": 3.59, - "learning_rate": 1.2873751816604942e-05, - "loss": 0.052, + "learning_rate": 2.2884870422667062e-05, + "loss": 0.0396, "step": 77005 }, { "epoch": 3.59, - "learning_rate": 1.2873283015329803e-05, - "loss": 0.0441, + "learning_rate": 2.2884402352831045e-05, + "loss": 0.052, "step": 77010 }, { "epoch": 3.59, - "learning_rate": 1.2872814214054663e-05, - "loss": 0.0196, + "learning_rate": 2.2883934282995025e-05, + "loss": 0.0465, "step": 77015 }, { "epoch": 3.59, - "learning_rate": 1.2872345412779523e-05, - "loss": 0.0477, + "learning_rate": 2.2883466213159005e-05, + "loss": 0.1641, "step": 77020 }, { "epoch": 3.59, - "learning_rate": 1.2871876611504383e-05, - "loss": 0.1028, + "learning_rate": 2.2882998143322985e-05, + "loss": 0.1081, "step": 77025 }, { "epoch": 3.59, - "learning_rate": 1.2871407810229245e-05, - "loss": 0.1279, + "learning_rate": 2.2882530073486964e-05, + "loss": 0.0512, "step": 77030 }, { "epoch": 3.59, - "learning_rate": 1.2870939008954106e-05, - "loss": 0.0714, + "learning_rate": 2.2882062003650944e-05, + "loss": 0.0383, "step": 77035 }, { "epoch": 3.59, - "learning_rate": 1.2870470207678966e-05, - "loss": 0.1664, + "learning_rate": 2.2881593933814924e-05, + "loss": 0.1927, "step": 77040 }, { "epoch": 3.6, - "learning_rate": 1.2870001406403826e-05, - "loss": 0.1847, + "learning_rate": 2.2881125863978907e-05, + "loss": 0.2773, "step": 77045 }, { "epoch": 3.6, - "learning_rate": 1.2869532605128688e-05, - "loss": 0.0399, + "learning_rate": 2.2880657794142887e-05, + "loss": 0.0384, "step": 77050 }, { "epoch": 3.6, - "learning_rate": 1.2869063803853548e-05, - "loss": 0.0232, + "learning_rate": 2.2880189724306867e-05, + "loss": 0.0311, "step": 77055 }, { "epoch": 3.6, - "learning_rate": 1.2868595002578408e-05, - "loss": 0.0392, + "learning_rate": 2.2879721654470847e-05, + "loss": 0.0418, "step": 77060 }, { "epoch": 3.6, - "learning_rate": 1.2868126201303268e-05, - "loss": 0.028, + "learning_rate": 2.287925358463483e-05, + "loss": 0.0221, "step": 77065 }, { "epoch": 3.6, - "learning_rate": 1.286765740002813e-05, - "loss": 0.1343, + "learning_rate": 2.287878551479881e-05, + "loss": 0.0916, "step": 77070 }, { "epoch": 3.6, - "learning_rate": 1.286718859875299e-05, - "loss": 0.0538, + "learning_rate": 2.287831744496279e-05, + "loss": 0.1397, "step": 77075 }, { "epoch": 3.6, - "learning_rate": 1.286671979747785e-05, - "loss": 0.1227, + "learning_rate": 2.287784937512677e-05, + "loss": 0.2187, "step": 77080 }, { "epoch": 3.6, - "learning_rate": 1.286625099620271e-05, - "loss": 0.1368, + "learning_rate": 2.2877381305290752e-05, + "loss": 0.2083, "step": 77085 }, { "epoch": 3.6, - "learning_rate": 1.2865782194927572e-05, - "loss": 0.151, + "learning_rate": 2.287691323545473e-05, + "loss": 0.2307, "step": 77090 }, { "epoch": 3.6, - "learning_rate": 1.2865313393652432e-05, - "loss": 0.3466, + "learning_rate": 2.287644516561871e-05, + "loss": 0.2147, "step": 77095 }, { "epoch": 3.6, - "learning_rate": 1.2864844592377292e-05, - "loss": 0.0476, + "learning_rate": 2.287597709578269e-05, + "loss": 0.0973, "step": 77100 }, { "epoch": 3.6, - "learning_rate": 1.2864375791102152e-05, - "loss": 0.0579, + "learning_rate": 2.2875509025946672e-05, + "loss": 0.0833, "step": 77105 }, { "epoch": 3.6, - "learning_rate": 1.2863906989827014e-05, - "loss": 0.0507, + "learning_rate": 2.287504095611065e-05, + "loss": 0.0446, "step": 77110 }, { "epoch": 3.6, - "learning_rate": 1.2863438188551874e-05, - "loss": 0.058, + "learning_rate": 2.287457288627463e-05, + "loss": 0.0726, "step": 77115 }, { "epoch": 3.6, - "learning_rate": 1.2862969387276734e-05, - "loss": 0.0483, + "learning_rate": 2.2874104816438615e-05, + "loss": 0.0651, "step": 77120 }, { "epoch": 3.6, - "learning_rate": 1.2862500586001594e-05, - "loss": 0.0714, + "learning_rate": 2.2873636746602594e-05, + "loss": 0.0505, "step": 77125 }, { "epoch": 3.6, - "learning_rate": 1.2862031784726457e-05, - "loss": 0.133, + "learning_rate": 2.2873168676766574e-05, + "loss": 0.0473, "step": 77130 }, { "epoch": 3.6, - "learning_rate": 1.2861562983451317e-05, - "loss": 0.0765, + "learning_rate": 2.2872700606930554e-05, + "loss": 0.1205, "step": 77135 }, { "epoch": 3.6, - "learning_rate": 1.2861094182176177e-05, - "loss": 0.1949, + "learning_rate": 2.2872232537094537e-05, + "loss": 0.1793, "step": 77140 }, { "epoch": 3.6, - "learning_rate": 1.2860625380901037e-05, - "loss": 0.2952, + "learning_rate": 2.2871764467258517e-05, + "loss": 0.2065, "step": 77145 }, { "epoch": 3.6, - "learning_rate": 1.2860156579625898e-05, - "loss": 0.0776, + "learning_rate": 2.2871296397422497e-05, + "loss": 0.0924, "step": 77150 }, { "epoch": 3.6, - "learning_rate": 1.2859687778350758e-05, - "loss": 0.0083, + "learning_rate": 2.2870828327586473e-05, + "loss": 0.0196, "step": 77155 }, { "epoch": 3.6, - "learning_rate": 1.2859218977075618e-05, - "loss": 0.0133, + "learning_rate": 2.2870360257750457e-05, + "loss": 0.0296, "step": 77160 }, { "epoch": 3.6, - "learning_rate": 1.2858750175800478e-05, - "loss": 0.014, + "learning_rate": 2.2869892187914436e-05, + "loss": 0.0794, "step": 77165 }, { "epoch": 3.6, - "learning_rate": 1.2858281374525338e-05, - "loss": 0.0443, + "learning_rate": 2.2869424118078416e-05, + "loss": 0.0759, "step": 77170 }, { "epoch": 3.6, - "learning_rate": 1.2857812573250201e-05, - "loss": 0.0292, + "learning_rate": 2.28689560482424e-05, + "loss": 0.1252, "step": 77175 }, { "epoch": 3.6, - "learning_rate": 1.2857343771975061e-05, - "loss": 0.0948, + "learning_rate": 2.286848797840638e-05, + "loss": 0.0861, "step": 77180 }, { "epoch": 3.6, - "learning_rate": 1.2856874970699921e-05, - "loss": 0.0503, + "learning_rate": 2.286801990857036e-05, + "loss": 0.1007, "step": 77185 }, { "epoch": 3.6, - "learning_rate": 1.2856406169424783e-05, - "loss": 0.3029, + "learning_rate": 2.286755183873434e-05, + "loss": 0.147, "step": 77190 }, { "epoch": 3.6, - "learning_rate": 1.2855937368149643e-05, - "loss": 0.2604, + "learning_rate": 2.2867083768898322e-05, + "loss": 0.3076, "step": 77195 }, { "epoch": 3.6, - "learning_rate": 1.2855468566874503e-05, - "loss": 0.0711, + "learning_rate": 2.2866615699062302e-05, + "loss": 0.1299, "step": 77200 }, { "epoch": 3.6, - "learning_rate": 1.2854999765599363e-05, - "loss": 0.0367, + "learning_rate": 2.286614762922628e-05, + "loss": 0.0306, "step": 77205 }, { "epoch": 3.6, - "learning_rate": 1.2854530964324223e-05, - "loss": 0.0417, + "learning_rate": 2.286567955939026e-05, + "loss": 0.0764, "step": 77210 }, { "epoch": 3.6, - "learning_rate": 1.2854062163049084e-05, - "loss": 0.0567, + "learning_rate": 2.2865211489554245e-05, + "loss": 0.046, "step": 77215 }, { "epoch": 3.6, - "learning_rate": 1.2853593361773946e-05, - "loss": 0.0547, + "learning_rate": 2.286474341971822e-05, + "loss": 0.099, "step": 77220 }, { "epoch": 3.6, - "learning_rate": 1.2853124560498806e-05, - "loss": 0.0556, + "learning_rate": 2.28642753498822e-05, + "loss": 0.0829, "step": 77225 }, { "epoch": 3.6, - "learning_rate": 1.2852655759223667e-05, - "loss": 0.0986, + "learning_rate": 2.2863807280046184e-05, + "loss": 0.0815, "step": 77230 }, { "epoch": 3.6, - "learning_rate": 1.2852186957948527e-05, - "loss": 0.1588, + "learning_rate": 2.2863339210210164e-05, + "loss": 0.0698, "step": 77235 }, { "epoch": 3.6, - "learning_rate": 1.2851718156673387e-05, - "loss": 0.1353, + "learning_rate": 2.2862871140374144e-05, + "loss": 0.1487, "step": 77240 }, { "epoch": 3.6, - "learning_rate": 1.2851249355398247e-05, - "loss": 0.1648, + "learning_rate": 2.2862403070538124e-05, + "loss": 0.1869, "step": 77245 }, { "epoch": 3.6, - "learning_rate": 1.2850780554123107e-05, - "loss": 0.0773, + "learning_rate": 2.2861935000702107e-05, + "loss": 0.0585, "step": 77250 }, { "epoch": 3.6, - "learning_rate": 1.2850311752847969e-05, - "loss": 0.0213, + "learning_rate": 2.2861466930866087e-05, + "loss": 0.0553, "step": 77255 }, { "epoch": 3.61, - "learning_rate": 1.2849842951572829e-05, - "loss": 0.023, + "learning_rate": 2.2860998861030066e-05, + "loss": 0.0348, "step": 77260 }, { "epoch": 3.61, - "learning_rate": 1.2849374150297689e-05, - "loss": 0.0577, + "learning_rate": 2.2860530791194046e-05, + "loss": 0.0639, "step": 77265 }, { "epoch": 3.61, - "learning_rate": 1.2848905349022552e-05, - "loss": 0.0631, + "learning_rate": 2.286006272135803e-05, + "loss": 0.0364, "step": 77270 }, { "epoch": 3.61, - "learning_rate": 1.2848436547747412e-05, - "loss": 0.0435, + "learning_rate": 2.285959465152201e-05, + "loss": 0.0523, "step": 77275 }, { "epoch": 3.61, - "learning_rate": 1.2847967746472272e-05, - "loss": 0.1016, + "learning_rate": 2.2859126581685986e-05, + "loss": 0.118, "step": 77280 }, { "epoch": 3.61, - "learning_rate": 1.2847498945197132e-05, - "loss": 0.1129, + "learning_rate": 2.285865851184997e-05, + "loss": 0.2266, "step": 77285 }, { "epoch": 3.61, - "learning_rate": 1.2847030143921992e-05, - "loss": 0.1439, + "learning_rate": 2.285819044201395e-05, + "loss": 0.2458, "step": 77290 }, { "epoch": 3.61, - "learning_rate": 1.2846561342646853e-05, - "loss": 0.2883, + "learning_rate": 2.285772237217793e-05, + "loss": 0.1937, "step": 77295 }, { "epoch": 3.61, - "learning_rate": 1.2846092541371713e-05, - "loss": 0.0785, + "learning_rate": 2.285725430234191e-05, + "loss": 0.0734, "step": 77300 }, { "epoch": 3.61, - "learning_rate": 1.2845623740096573e-05, - "loss": 0.0401, + "learning_rate": 2.285678623250589e-05, + "loss": 0.0459, "step": 77305 }, { "epoch": 3.61, - "learning_rate": 1.2845154938821433e-05, - "loss": 0.0234, + "learning_rate": 2.285631816266987e-05, + "loss": 0.0165, "step": 77310 }, { "epoch": 3.61, - "learning_rate": 1.2844686137546296e-05, - "loss": 0.0456, + "learning_rate": 2.285585009283385e-05, + "loss": 0.0499, "step": 77315 }, { "epoch": 3.61, - "learning_rate": 1.2844217336271156e-05, - "loss": 0.0422, + "learning_rate": 2.285538202299783e-05, + "loss": 0.0414, "step": 77320 }, { "epoch": 3.61, - "learning_rate": 1.2843748534996016e-05, - "loss": 0.035, + "learning_rate": 2.2854913953161814e-05, + "loss": 0.0833, "step": 77325 }, { "epoch": 3.61, - "learning_rate": 1.2843279733720876e-05, - "loss": 0.1707, + "learning_rate": 2.2854445883325794e-05, + "loss": 0.1647, "step": 77330 }, { "epoch": 3.61, - "learning_rate": 1.2842810932445738e-05, - "loss": 0.1678, + "learning_rate": 2.2853977813489774e-05, + "loss": 0.1333, "step": 77335 }, { "epoch": 3.61, - "learning_rate": 1.2842342131170598e-05, - "loss": 0.191, + "learning_rate": 2.2853509743653754e-05, + "loss": 0.2404, "step": 77340 }, { "epoch": 3.61, - "learning_rate": 1.2841873329895458e-05, - "loss": 0.1558, + "learning_rate": 2.2853041673817734e-05, + "loss": 0.2577, "step": 77345 }, { "epoch": 3.61, - "learning_rate": 1.2841404528620318e-05, - "loss": 0.0502, + "learning_rate": 2.2852573603981713e-05, + "loss": 0.088, "step": 77350 }, { "epoch": 3.61, - "learning_rate": 1.2840935727345178e-05, - "loss": 0.0446, + "learning_rate": 2.2852105534145693e-05, + "loss": 0.031, "step": 77355 }, { "epoch": 3.61, - "learning_rate": 1.2840466926070041e-05, - "loss": 0.0883, + "learning_rate": 2.2851637464309676e-05, + "loss": 0.0437, "step": 77360 }, { "epoch": 3.61, - "learning_rate": 1.2839998124794901e-05, - "loss": 0.0766, + "learning_rate": 2.2851169394473656e-05, + "loss": 0.0528, "step": 77365 }, { "epoch": 3.61, - "learning_rate": 1.283952932351976e-05, - "loss": 0.0502, + "learning_rate": 2.2850701324637636e-05, + "loss": 0.0338, "step": 77370 }, { "epoch": 3.61, - "learning_rate": 1.2839060522244622e-05, - "loss": 0.0774, + "learning_rate": 2.2850233254801616e-05, + "loss": 0.1397, "step": 77375 }, { "epoch": 3.61, - "learning_rate": 1.2838591720969482e-05, - "loss": 0.1192, + "learning_rate": 2.28497651849656e-05, + "loss": 0.0826, "step": 77380 }, { "epoch": 3.61, - "learning_rate": 1.2838122919694342e-05, - "loss": 0.0739, + "learning_rate": 2.284929711512958e-05, + "loss": 0.0876, "step": 77385 }, { "epoch": 3.61, - "learning_rate": 1.2837654118419202e-05, - "loss": 0.2167, + "learning_rate": 2.284882904529356e-05, + "loss": 0.0956, "step": 77390 }, { "epoch": 3.61, - "learning_rate": 1.2837185317144062e-05, - "loss": 0.2906, + "learning_rate": 2.284836097545754e-05, + "loss": 0.3678, "step": 77395 }, { "epoch": 3.61, - "learning_rate": 1.2836716515868924e-05, - "loss": 0.0427, + "learning_rate": 2.284789290562152e-05, + "loss": 0.0386, "step": 77400 }, { "epoch": 3.61, - "learning_rate": 1.2836247714593785e-05, - "loss": 0.0139, + "learning_rate": 2.2847424835785498e-05, + "loss": 0.0349, "step": 77405 }, { "epoch": 3.61, - "learning_rate": 1.2835778913318645e-05, - "loss": 0.0063, + "learning_rate": 2.2846956765949478e-05, + "loss": 0.0141, "step": 77410 }, { "epoch": 3.61, - "learning_rate": 1.2835310112043507e-05, - "loss": 0.0543, + "learning_rate": 2.284648869611346e-05, + "loss": 0.0441, "step": 77415 }, { "epoch": 3.61, - "learning_rate": 1.2834841310768367e-05, - "loss": 0.0994, + "learning_rate": 2.284602062627744e-05, + "loss": 0.0485, "step": 77420 }, { "epoch": 3.61, - "learning_rate": 1.2834372509493227e-05, - "loss": 0.1773, + "learning_rate": 2.284555255644142e-05, + "loss": 0.0688, "step": 77425 }, { "epoch": 3.61, - "learning_rate": 1.2833903708218087e-05, - "loss": 0.1949, + "learning_rate": 2.28450844866054e-05, + "loss": 0.0419, "step": 77430 }, { "epoch": 3.61, - "learning_rate": 1.2833434906942947e-05, - "loss": 0.1184, + "learning_rate": 2.2844616416769384e-05, + "loss": 0.1234, "step": 77435 }, { "epoch": 3.61, - "learning_rate": 1.2832966105667808e-05, - "loss": 0.2374, + "learning_rate": 2.2844148346933364e-05, + "loss": 0.2272, "step": 77440 }, { "epoch": 3.61, - "learning_rate": 1.2832497304392668e-05, - "loss": 0.2472, + "learning_rate": 2.2843680277097343e-05, + "loss": 0.2801, "step": 77445 }, { "epoch": 3.61, - "learning_rate": 1.2832028503117528e-05, - "loss": 0.0869, + "learning_rate": 2.2843212207261323e-05, + "loss": 0.0584, "step": 77450 }, { "epoch": 3.61, - "learning_rate": 1.2831559701842392e-05, - "loss": 0.018, + "learning_rate": 2.2842744137425306e-05, + "loss": 0.0521, "step": 77455 }, { "epoch": 3.61, - "learning_rate": 1.2831090900567251e-05, - "loss": 0.028, + "learning_rate": 2.2842276067589286e-05, + "loss": 0.0585, "step": 77460 }, { "epoch": 3.61, - "learning_rate": 1.2830622099292111e-05, - "loss": 0.0759, + "learning_rate": 2.2841807997753266e-05, + "loss": 0.0554, "step": 77465 }, { "epoch": 3.61, - "learning_rate": 1.2830153298016971e-05, - "loss": 0.0593, + "learning_rate": 2.2841339927917246e-05, + "loss": 0.0312, "step": 77470 }, { "epoch": 3.62, - "learning_rate": 1.2829684496741833e-05, - "loss": 0.1196, + "learning_rate": 2.2840871858081226e-05, + "loss": 0.0697, "step": 77475 }, { "epoch": 3.62, - "learning_rate": 1.2829215695466693e-05, - "loss": 0.0831, + "learning_rate": 2.2840403788245206e-05, + "loss": 0.0863, "step": 77480 }, { "epoch": 3.62, - "learning_rate": 1.2828746894191553e-05, - "loss": 0.2338, + "learning_rate": 2.2839935718409185e-05, + "loss": 0.1363, "step": 77485 }, { "epoch": 3.62, - "learning_rate": 1.2828278092916413e-05, - "loss": 0.2097, + "learning_rate": 2.283946764857317e-05, + "loss": 0.0614, "step": 77490 }, { "epoch": 3.62, - "learning_rate": 1.2827809291641273e-05, - "loss": 0.2988, + "learning_rate": 2.283899957873715e-05, + "loss": 0.2338, "step": 77495 }, { "epoch": 3.62, - "learning_rate": 1.2827340490366136e-05, - "loss": 0.0616, + "learning_rate": 2.2838531508901128e-05, + "loss": 0.0994, "step": 77500 }, { "epoch": 3.62, - "learning_rate": 1.2826871689090996e-05, - "loss": 0.0468, + "learning_rate": 2.2838063439065108e-05, + "loss": 0.0125, "step": 77505 }, { "epoch": 3.62, - "learning_rate": 1.2826402887815856e-05, - "loss": 0.0371, + "learning_rate": 2.283759536922909e-05, + "loss": 0.0368, "step": 77510 }, { "epoch": 3.62, - "learning_rate": 1.2825934086540718e-05, - "loss": 0.0759, + "learning_rate": 2.283712729939307e-05, + "loss": 0.0695, "step": 77515 }, { "epoch": 3.62, - "learning_rate": 1.2825465285265577e-05, - "loss": 0.0393, + "learning_rate": 2.283665922955705e-05, + "loss": 0.0643, "step": 77520 }, { "epoch": 3.62, - "learning_rate": 1.2824996483990437e-05, - "loss": 0.0323, + "learning_rate": 2.283619115972103e-05, + "loss": 0.0632, "step": 77525 }, { "epoch": 3.62, - "learning_rate": 1.2824527682715297e-05, - "loss": 0.0613, + "learning_rate": 2.2835723089885014e-05, + "loss": 0.0966, "step": 77530 }, { "epoch": 3.62, - "learning_rate": 1.2824058881440157e-05, - "loss": 0.182, + "learning_rate": 2.283525502004899e-05, + "loss": 0.1334, "step": 77535 }, { "epoch": 3.62, - "learning_rate": 1.2823590080165019e-05, - "loss": 0.1234, + "learning_rate": 2.283478695021297e-05, + "loss": 0.1889, "step": 77540 }, { "epoch": 3.62, - "learning_rate": 1.282312127888988e-05, - "loss": 0.2522, + "learning_rate": 2.2834318880376953e-05, + "loss": 0.2676, "step": 77545 }, { "epoch": 3.62, - "learning_rate": 1.282265247761474e-05, - "loss": 0.0885, + "learning_rate": 2.2833850810540933e-05, + "loss": 0.092, "step": 77550 }, { "epoch": 3.62, - "learning_rate": 1.2822183676339602e-05, - "loss": 0.0178, + "learning_rate": 2.2833382740704913e-05, + "loss": 0.0046, "step": 77555 }, { "epoch": 3.62, - "learning_rate": 1.2821714875064462e-05, - "loss": 0.0603, + "learning_rate": 2.2832914670868893e-05, + "loss": 0.0203, "step": 77560 }, { "epoch": 3.62, - "learning_rate": 1.2821246073789322e-05, - "loss": 0.0397, + "learning_rate": 2.2832446601032876e-05, + "loss": 0.0244, "step": 77565 }, { "epoch": 3.62, - "learning_rate": 1.2820777272514182e-05, - "loss": 0.0488, + "learning_rate": 2.2831978531196856e-05, + "loss": 0.0689, "step": 77570 }, { "epoch": 3.62, - "learning_rate": 1.2820308471239042e-05, - "loss": 0.0395, + "learning_rate": 2.2831510461360836e-05, + "loss": 0.1386, "step": 77575 }, { "epoch": 3.62, - "learning_rate": 1.2819839669963903e-05, - "loss": 0.0458, + "learning_rate": 2.2831042391524815e-05, + "loss": 0.1724, "step": 77580 }, { "epoch": 3.62, - "learning_rate": 1.2819370868688763e-05, - "loss": 0.1616, + "learning_rate": 2.28305743216888e-05, + "loss": 0.138, "step": 77585 }, { "epoch": 3.62, - "learning_rate": 1.2818902067413623e-05, - "loss": 0.1039, + "learning_rate": 2.283010625185278e-05, + "loss": 0.1506, "step": 77590 }, { "epoch": 3.62, - "learning_rate": 1.2818433266138487e-05, - "loss": 0.1839, + "learning_rate": 2.2829638182016755e-05, + "loss": 0.2491, "step": 77595 }, { "epoch": 3.62, - "learning_rate": 1.2817964464863347e-05, - "loss": 0.0934, + "learning_rate": 2.2829170112180738e-05, + "loss": 0.0653, "step": 77600 }, { "epoch": 3.62, - "learning_rate": 1.2817495663588207e-05, - "loss": 0.0272, + "learning_rate": 2.2828702042344718e-05, + "loss": 0.0102, "step": 77605 }, { "epoch": 3.62, - "learning_rate": 1.2817026862313066e-05, - "loss": 0.042, + "learning_rate": 2.2828233972508698e-05, + "loss": 0.0228, "step": 77610 }, { "epoch": 3.62, - "learning_rate": 1.2816558061037926e-05, - "loss": 0.0479, + "learning_rate": 2.2827765902672678e-05, + "loss": 0.0794, "step": 77615 }, { "epoch": 3.62, - "learning_rate": 1.2816089259762788e-05, - "loss": 0.0599, + "learning_rate": 2.282729783283666e-05, + "loss": 0.0856, "step": 77620 }, { "epoch": 3.62, - "learning_rate": 1.2815620458487648e-05, - "loss": 0.0756, + "learning_rate": 2.282682976300064e-05, + "loss": 0.1105, "step": 77625 }, { "epoch": 3.62, - "learning_rate": 1.2815151657212508e-05, - "loss": 0.1672, + "learning_rate": 2.282636169316462e-05, + "loss": 0.1489, "step": 77630 }, { "epoch": 3.62, - "learning_rate": 1.2814682855937368e-05, - "loss": 0.0426, + "learning_rate": 2.28258936233286e-05, + "loss": 0.1486, "step": 77635 }, { "epoch": 3.62, - "learning_rate": 1.2814214054662231e-05, - "loss": 0.1304, + "learning_rate": 2.2825425553492583e-05, + "loss": 0.1359, "step": 77640 }, { "epoch": 3.62, - "learning_rate": 1.2813745253387091e-05, - "loss": 0.2211, + "learning_rate": 2.2824957483656563e-05, + "loss": 0.2225, "step": 77645 }, { "epoch": 3.62, - "learning_rate": 1.2813276452111951e-05, - "loss": 0.0842, + "learning_rate": 2.2824489413820543e-05, + "loss": 0.0609, "step": 77650 }, { "epoch": 3.62, - "learning_rate": 1.2812807650836811e-05, - "loss": 0.041, + "learning_rate": 2.2824021343984526e-05, + "loss": 0.0223, "step": 77655 }, { "epoch": 3.62, - "learning_rate": 1.2812338849561673e-05, - "loss": 0.0409, + "learning_rate": 2.2823553274148503e-05, + "loss": 0.0481, "step": 77660 }, { "epoch": 3.62, - "learning_rate": 1.2811870048286532e-05, - "loss": 0.0172, + "learning_rate": 2.2823085204312482e-05, + "loss": 0.0383, "step": 77665 }, { "epoch": 3.62, - "learning_rate": 1.2811401247011392e-05, - "loss": 0.0704, + "learning_rate": 2.2822617134476462e-05, + "loss": 0.0664, "step": 77670 }, { "epoch": 3.62, - "learning_rate": 1.2810932445736252e-05, - "loss": 0.1615, + "learning_rate": 2.2822149064640446e-05, + "loss": 0.0565, "step": 77675 }, { "epoch": 3.62, - "learning_rate": 1.2810463644461112e-05, - "loss": 0.1176, + "learning_rate": 2.2821680994804425e-05, + "loss": 0.0847, "step": 77680 }, { "epoch": 3.62, - "learning_rate": 1.2809994843185976e-05, - "loss": 0.158, + "learning_rate": 2.2821212924968405e-05, + "loss": 0.1482, "step": 77685 }, { "epoch": 3.63, - "learning_rate": 1.2809526041910836e-05, - "loss": 0.2321, + "learning_rate": 2.2820744855132385e-05, + "loss": 0.2332, "step": 77690 }, { "epoch": 3.63, - "learning_rate": 1.2809057240635695e-05, - "loss": 0.2896, + "learning_rate": 2.2820276785296368e-05, + "loss": 0.2595, "step": 77695 }, { "epoch": 3.63, - "learning_rate": 1.2808588439360557e-05, - "loss": 0.0511, + "learning_rate": 2.2819808715460348e-05, + "loss": 0.0204, "step": 77700 }, { "epoch": 3.63, - "learning_rate": 1.2808119638085417e-05, - "loss": 0.0223, + "learning_rate": 2.2819340645624328e-05, + "loss": 0.0176, "step": 77705 }, { "epoch": 3.63, - "learning_rate": 1.2807650836810277e-05, - "loss": 0.0048, + "learning_rate": 2.2818872575788308e-05, + "loss": 0.0269, "step": 77710 }, { "epoch": 3.63, - "learning_rate": 1.2807182035535137e-05, - "loss": 0.0474, + "learning_rate": 2.281840450595229e-05, + "loss": 0.1064, "step": 77715 }, { "epoch": 3.63, - "learning_rate": 1.2806713234259997e-05, - "loss": 0.1519, + "learning_rate": 2.281793643611627e-05, + "loss": 0.144, "step": 77720 }, { "epoch": 3.63, - "learning_rate": 1.2806244432984858e-05, - "loss": 0.0975, + "learning_rate": 2.2817468366280247e-05, + "loss": 0.0875, "step": 77725 }, { "epoch": 3.63, - "learning_rate": 1.280577563170972e-05, - "loss": 0.0945, + "learning_rate": 2.281700029644423e-05, + "loss": 0.1103, "step": 77730 }, { "epoch": 3.63, - "learning_rate": 1.280530683043458e-05, - "loss": 0.1527, + "learning_rate": 2.281653222660821e-05, + "loss": 0.1732, "step": 77735 }, { "epoch": 3.63, - "learning_rate": 1.2804838029159442e-05, - "loss": 0.27, + "learning_rate": 2.281606415677219e-05, + "loss": 0.1617, "step": 77740 }, { "epoch": 3.63, - "learning_rate": 1.2804369227884302e-05, - "loss": 0.3309, + "learning_rate": 2.281559608693617e-05, + "loss": 0.1958, "step": 77745 }, { "epoch": 3.63, - "learning_rate": 1.2803900426609162e-05, - "loss": 0.045, + "learning_rate": 2.2815128017100153e-05, + "loss": 0.0687, "step": 77750 }, { "epoch": 3.63, - "learning_rate": 1.2803431625334021e-05, - "loss": 0.0078, + "learning_rate": 2.2814659947264133e-05, + "loss": 0.0337, "step": 77755 }, { "epoch": 3.63, - "learning_rate": 1.2802962824058881e-05, - "loss": 0.0441, + "learning_rate": 2.2814191877428113e-05, + "loss": 0.0934, "step": 77760 }, { "epoch": 3.63, - "learning_rate": 1.2802494022783743e-05, - "loss": 0.0816, + "learning_rate": 2.2813723807592092e-05, + "loss": 0.0847, "step": 77765 }, { "epoch": 3.63, - "learning_rate": 1.2802025221508603e-05, - "loss": 0.0397, + "learning_rate": 2.2813255737756076e-05, + "loss": 0.0848, "step": 77770 }, { "epoch": 3.63, - "learning_rate": 1.2801556420233463e-05, - "loss": 0.1505, + "learning_rate": 2.2812787667920055e-05, + "loss": 0.1022, "step": 77775 }, { "epoch": 3.63, - "learning_rate": 1.2801087618958326e-05, - "loss": 0.073, + "learning_rate": 2.2812319598084035e-05, + "loss": 0.1506, "step": 77780 }, { "epoch": 3.63, - "learning_rate": 1.2800618817683186e-05, - "loss": 0.1564, + "learning_rate": 2.2811851528248015e-05, + "loss": 0.1482, "step": 77785 }, { "epoch": 3.63, - "learning_rate": 1.2800150016408046e-05, - "loss": 0.176, + "learning_rate": 2.2811383458411995e-05, + "loss": 0.1981, "step": 77790 }, { "epoch": 3.63, - "learning_rate": 1.2799681215132906e-05, - "loss": 0.2646, + "learning_rate": 2.2810915388575975e-05, + "loss": 0.2364, "step": 77795 }, { "epoch": 3.63, - "learning_rate": 1.2799212413857766e-05, - "loss": 0.0586, + "learning_rate": 2.2810447318739955e-05, + "loss": 0.0469, "step": 77800 }, { "epoch": 3.63, - "learning_rate": 1.2798743612582628e-05, - "loss": 0.0285, + "learning_rate": 2.2809979248903938e-05, + "loss": 0.0289, "step": 77805 }, { "epoch": 3.63, - "learning_rate": 1.2798274811307488e-05, - "loss": 0.0784, + "learning_rate": 2.2809511179067918e-05, + "loss": 0.0342, "step": 77810 }, { "epoch": 3.63, - "learning_rate": 1.2797806010032347e-05, - "loss": 0.0579, + "learning_rate": 2.2809043109231897e-05, + "loss": 0.0505, "step": 77815 }, { "epoch": 3.63, - "learning_rate": 1.2797337208757207e-05, - "loss": 0.057, + "learning_rate": 2.2808575039395877e-05, + "loss": 0.0458, "step": 77820 }, { "epoch": 3.63, - "learning_rate": 1.279686840748207e-05, - "loss": 0.0754, + "learning_rate": 2.280810696955986e-05, + "loss": 0.1324, "step": 77825 }, { "epoch": 3.63, - "learning_rate": 1.279639960620693e-05, - "loss": 0.0948, + "learning_rate": 2.280763889972384e-05, + "loss": 0.1034, "step": 77830 }, { "epoch": 3.63, - "learning_rate": 1.279593080493179e-05, - "loss": 0.1263, + "learning_rate": 2.280717082988782e-05, + "loss": 0.0796, "step": 77835 }, { "epoch": 3.63, - "learning_rate": 1.279546200365665e-05, - "loss": 0.1962, + "learning_rate": 2.2806702760051803e-05, + "loss": 0.2047, "step": 77840 }, { "epoch": 3.63, - "learning_rate": 1.2794993202381512e-05, - "loss": 0.3922, + "learning_rate": 2.2806234690215783e-05, + "loss": 0.2449, "step": 77845 }, { "epoch": 3.63, - "learning_rate": 1.2794524401106372e-05, - "loss": 0.0564, + "learning_rate": 2.280576662037976e-05, + "loss": 0.0833, "step": 77850 }, { "epoch": 3.63, - "learning_rate": 1.2794055599831232e-05, - "loss": 0.0262, + "learning_rate": 2.280529855054374e-05, + "loss": 0.0289, "step": 77855 }, { "epoch": 3.63, - "learning_rate": 1.2793586798556092e-05, - "loss": 0.0637, + "learning_rate": 2.2804830480707722e-05, + "loss": 0.036, "step": 77860 }, { "epoch": 3.63, - "learning_rate": 1.2793117997280954e-05, - "loss": 0.0372, + "learning_rate": 2.2804362410871702e-05, + "loss": 0.0689, "step": 77865 }, { "epoch": 3.63, - "learning_rate": 1.2792649196005815e-05, - "loss": 0.0452, + "learning_rate": 2.2803894341035682e-05, + "loss": 0.0421, "step": 77870 }, { "epoch": 3.63, - "learning_rate": 1.2792180394730675e-05, - "loss": 0.0574, + "learning_rate": 2.2803426271199662e-05, + "loss": 0.0849, "step": 77875 }, { "epoch": 3.63, - "learning_rate": 1.2791711593455535e-05, - "loss": 0.0336, + "learning_rate": 2.2802958201363645e-05, + "loss": 0.1115, "step": 77880 }, { "epoch": 3.63, - "learning_rate": 1.2791242792180397e-05, - "loss": 0.2095, + "learning_rate": 2.2802490131527625e-05, + "loss": 0.1498, "step": 77885 }, { "epoch": 3.63, - "learning_rate": 1.2790773990905257e-05, - "loss": 0.2141, + "learning_rate": 2.2802022061691605e-05, + "loss": 0.1997, "step": 77890 }, { "epoch": 3.63, - "learning_rate": 1.2790305189630117e-05, - "loss": 0.2808, + "learning_rate": 2.2801553991855588e-05, + "loss": 0.2139, "step": 77895 }, { "epoch": 3.63, - "learning_rate": 1.2789836388354976e-05, - "loss": 0.0335, + "learning_rate": 2.2801085922019568e-05, + "loss": 0.0072, "step": 77900 }, { "epoch": 3.64, - "learning_rate": 1.2789367587079838e-05, - "loss": 0.0293, + "learning_rate": 2.2800617852183548e-05, + "loss": 0.0554, "step": 77905 }, { "epoch": 3.64, - "learning_rate": 1.2788898785804698e-05, - "loss": 0.0677, + "learning_rate": 2.2800149782347527e-05, + "loss": 0.0537, "step": 77910 }, { "epoch": 3.64, - "learning_rate": 1.2788429984529558e-05, - "loss": 0.0894, + "learning_rate": 2.2799681712511507e-05, + "loss": 0.0565, "step": 77915 }, { "epoch": 3.64, - "learning_rate": 1.278796118325442e-05, - "loss": 0.1004, + "learning_rate": 2.2799213642675487e-05, + "loss": 0.0793, "step": 77920 }, { "epoch": 3.64, - "learning_rate": 1.2787492381979281e-05, - "loss": 0.1227, + "learning_rate": 2.2798745572839467e-05, + "loss": 0.0993, "step": 77925 }, { "epoch": 3.64, - "learning_rate": 1.2787023580704141e-05, - "loss": 0.077, + "learning_rate": 2.2798277503003447e-05, + "loss": 0.0928, "step": 77930 }, { "epoch": 3.64, - "learning_rate": 1.2786554779429001e-05, - "loss": 0.1198, + "learning_rate": 2.279780943316743e-05, + "loss": 0.2201, "step": 77935 }, { "epoch": 3.64, - "learning_rate": 1.2786085978153861e-05, - "loss": 0.166, + "learning_rate": 2.279734136333141e-05, + "loss": 0.1546, "step": 77940 }, { "epoch": 3.64, - "learning_rate": 1.2785617176878723e-05, - "loss": 0.2562, + "learning_rate": 2.279687329349539e-05, + "loss": 0.4255, "step": 77945 }, { "epoch": 3.64, - "learning_rate": 1.2785148375603583e-05, - "loss": 0.0703, + "learning_rate": 2.279640522365937e-05, + "loss": 0.0771, "step": 77950 }, { "epoch": 3.64, - "learning_rate": 1.2784679574328443e-05, - "loss": 0.0304, + "learning_rate": 2.2795937153823353e-05, + "loss": 0.0034, "step": 77955 }, { "epoch": 3.64, - "learning_rate": 1.2784210773053302e-05, - "loss": 0.0579, + "learning_rate": 2.2795469083987332e-05, + "loss": 0.0493, "step": 77960 }, { "epoch": 3.64, - "learning_rate": 1.2783741971778166e-05, - "loss": 0.0867, + "learning_rate": 2.2795001014151312e-05, + "loss": 0.0712, "step": 77965 }, { "epoch": 3.64, - "learning_rate": 1.2783273170503026e-05, - "loss": 0.0752, + "learning_rate": 2.2794532944315295e-05, + "loss": 0.0538, "step": 77970 }, { "epoch": 3.64, - "learning_rate": 1.2782804369227886e-05, - "loss": 0.089, + "learning_rate": 2.2794064874479272e-05, + "loss": 0.0484, "step": 77975 }, { "epoch": 3.64, - "learning_rate": 1.2782335567952746e-05, - "loss": 0.1287, + "learning_rate": 2.279359680464325e-05, + "loss": 0.0954, "step": 77980 }, { "epoch": 3.64, - "learning_rate": 1.2781866766677607e-05, - "loss": 0.1227, + "learning_rate": 2.279312873480723e-05, + "loss": 0.0521, "step": 77985 }, { "epoch": 3.64, - "learning_rate": 1.2781397965402467e-05, - "loss": 0.2063, + "learning_rate": 2.2792660664971215e-05, + "loss": 0.1643, "step": 77990 }, { "epoch": 3.64, - "learning_rate": 1.2780929164127327e-05, - "loss": 0.2305, + "learning_rate": 2.2792192595135195e-05, + "loss": 0.2737, "step": 77995 }, { "epoch": 3.64, - "learning_rate": 1.2780460362852187e-05, - "loss": 0.0413, + "learning_rate": 2.2791724525299174e-05, + "loss": 0.0151, "step": 78000 }, { "epoch": 3.64, - "learning_rate": 1.2779991561577047e-05, - "loss": 0.0298, + "learning_rate": 2.2791256455463154e-05, + "loss": 0.0712, "step": 78005 }, { "epoch": 3.64, - "learning_rate": 1.277952276030191e-05, - "loss": 0.0333, + "learning_rate": 2.2790788385627137e-05, + "loss": 0.01, "step": 78010 }, { "epoch": 3.64, - "learning_rate": 1.277905395902677e-05, - "loss": 0.0585, + "learning_rate": 2.2790320315791117e-05, + "loss": 0.0224, "step": 78015 }, { "epoch": 3.64, - "learning_rate": 1.277858515775163e-05, - "loss": 0.1186, + "learning_rate": 2.2789852245955097e-05, + "loss": 0.0429, "step": 78020 }, { "epoch": 3.64, - "learning_rate": 1.2778116356476492e-05, - "loss": 0.1, + "learning_rate": 2.278938417611908e-05, + "loss": 0.1012, "step": 78025 }, { "epoch": 3.64, - "learning_rate": 1.2777647555201352e-05, - "loss": 0.1195, + "learning_rate": 2.278891610628306e-05, + "loss": 0.115, "step": 78030 }, { "epoch": 3.64, - "learning_rate": 1.2777178753926212e-05, - "loss": 0.1112, + "learning_rate": 2.278844803644704e-05, + "loss": 0.0922, "step": 78035 }, { "epoch": 3.64, - "learning_rate": 1.2776709952651072e-05, - "loss": 0.2569, + "learning_rate": 2.2787979966611016e-05, + "loss": 0.2033, "step": 78040 }, { "epoch": 3.64, - "learning_rate": 1.2776241151375931e-05, - "loss": 0.2934, + "learning_rate": 2.2787511896775e-05, + "loss": 0.1685, "step": 78045 }, { "epoch": 3.64, - "learning_rate": 1.2775772350100793e-05, - "loss": 0.1156, + "learning_rate": 2.278704382693898e-05, + "loss": 0.0497, "step": 78050 }, { "epoch": 3.64, - "learning_rate": 1.2775303548825655e-05, - "loss": 0.0148, + "learning_rate": 2.278657575710296e-05, + "loss": 0.0431, "step": 78055 }, { "epoch": 3.64, - "learning_rate": 1.2774834747550515e-05, - "loss": 0.0416, + "learning_rate": 2.278610768726694e-05, + "loss": 0.0583, "step": 78060 }, { "epoch": 3.64, - "learning_rate": 1.2774365946275376e-05, - "loss": 0.0416, + "learning_rate": 2.2785639617430922e-05, + "loss": 0.0869, "step": 78065 }, { "epoch": 3.64, - "learning_rate": 1.2773897145000236e-05, - "loss": 0.1044, + "learning_rate": 2.2785171547594902e-05, + "loss": 0.0701, "step": 78070 }, { "epoch": 3.64, - "learning_rate": 1.2773428343725096e-05, - "loss": 0.041, + "learning_rate": 2.2784703477758882e-05, + "loss": 0.0754, "step": 78075 }, { "epoch": 3.64, - "learning_rate": 1.2772959542449956e-05, - "loss": 0.1944, + "learning_rate": 2.2784235407922865e-05, + "loss": 0.0703, "step": 78080 }, { "epoch": 3.64, - "learning_rate": 1.2772490741174816e-05, - "loss": 0.1567, + "learning_rate": 2.2783767338086845e-05, + "loss": 0.1249, "step": 78085 }, { "epoch": 3.64, - "learning_rate": 1.2772021939899678e-05, - "loss": 0.1879, + "learning_rate": 2.2783299268250825e-05, + "loss": 0.1624, "step": 78090 }, { "epoch": 3.64, - "learning_rate": 1.2771553138624538e-05, - "loss": 0.2258, + "learning_rate": 2.2782831198414804e-05, + "loss": 0.2986, "step": 78095 }, { "epoch": 3.64, - "learning_rate": 1.2771084337349398e-05, - "loss": 0.067, + "learning_rate": 2.2782363128578784e-05, + "loss": 0.0434, "step": 78100 }, { "epoch": 3.64, - "learning_rate": 1.2770615536074261e-05, - "loss": 0.0147, + "learning_rate": 2.2781895058742764e-05, + "loss": 0.0179, "step": 78105 }, { "epoch": 3.64, - "learning_rate": 1.277014673479912e-05, - "loss": 0.0195, + "learning_rate": 2.2781426988906744e-05, + "loss": 0.0966, "step": 78110 }, { "epoch": 3.64, - "learning_rate": 1.276967793352398e-05, - "loss": 0.0328, + "learning_rate": 2.2780958919070724e-05, + "loss": 0.0463, "step": 78115 }, { "epoch": 3.65, - "learning_rate": 1.276920913224884e-05, - "loss": 0.0619, + "learning_rate": 2.2780490849234707e-05, + "loss": 0.0872, "step": 78120 }, { "epoch": 3.65, - "learning_rate": 1.27687403309737e-05, - "loss": 0.0703, + "learning_rate": 2.2780022779398687e-05, + "loss": 0.0567, "step": 78125 }, { "epoch": 3.65, - "learning_rate": 1.2768271529698562e-05, - "loss": 0.1786, + "learning_rate": 2.2779554709562667e-05, + "loss": 0.1094, "step": 78130 }, { "epoch": 3.65, - "learning_rate": 1.2767802728423422e-05, - "loss": 0.0753, + "learning_rate": 2.2779086639726646e-05, + "loss": 0.1237, "step": 78135 }, { "epoch": 3.65, - "learning_rate": 1.2767333927148282e-05, - "loss": 0.1554, + "learning_rate": 2.277861856989063e-05, + "loss": 0.2986, "step": 78140 }, { "epoch": 3.65, - "learning_rate": 1.2766865125873142e-05, - "loss": 0.2485, + "learning_rate": 2.277815050005461e-05, + "loss": 0.2538, "step": 78145 }, { "epoch": 3.65, - "learning_rate": 1.2766396324598005e-05, - "loss": 0.0698, + "learning_rate": 2.277768243021859e-05, + "loss": 0.0622, "step": 78150 }, { "epoch": 3.65, - "learning_rate": 1.2765927523322865e-05, - "loss": 0.0372, + "learning_rate": 2.2777214360382572e-05, + "loss": 0.0259, "step": 78155 }, { "epoch": 3.65, - "learning_rate": 1.2765458722047725e-05, - "loss": 0.0206, + "learning_rate": 2.2776746290546552e-05, + "loss": 0.0195, "step": 78160 }, { "epoch": 3.65, - "learning_rate": 1.2764989920772585e-05, - "loss": 0.082, + "learning_rate": 2.277627822071053e-05, + "loss": 0.0116, "step": 78165 }, { "epoch": 3.65, - "learning_rate": 1.2764521119497447e-05, - "loss": 0.0817, + "learning_rate": 2.277581015087451e-05, + "loss": 0.0269, "step": 78170 }, { "epoch": 3.65, - "learning_rate": 1.2764052318222307e-05, - "loss": 0.1483, + "learning_rate": 2.277534208103849e-05, + "loss": 0.0656, "step": 78175 }, { "epoch": 3.65, - "learning_rate": 1.2763583516947167e-05, - "loss": 0.1579, + "learning_rate": 2.277487401120247e-05, + "loss": 0.1205, "step": 78180 }, { "epoch": 3.65, - "learning_rate": 1.2763114715672027e-05, - "loss": 0.1345, + "learning_rate": 2.277440594136645e-05, + "loss": 0.097, "step": 78185 }, { "epoch": 3.65, - "learning_rate": 1.2762645914396887e-05, - "loss": 0.1934, + "learning_rate": 2.277393787153043e-05, + "loss": 0.1085, "step": 78190 }, { "epoch": 3.65, - "learning_rate": 1.276217711312175e-05, - "loss": 0.2748, + "learning_rate": 2.2773469801694414e-05, + "loss": 0.2845, "step": 78195 }, { "epoch": 3.65, - "learning_rate": 1.276170831184661e-05, - "loss": 0.0228, + "learning_rate": 2.2773001731858394e-05, + "loss": 0.084, "step": 78200 }, { "epoch": 3.65, - "learning_rate": 1.276123951057147e-05, - "loss": 0.0207, + "learning_rate": 2.2772533662022374e-05, + "loss": 0.0258, "step": 78205 }, { "epoch": 3.65, - "learning_rate": 1.2760770709296331e-05, - "loss": 0.0314, + "learning_rate": 2.2772065592186357e-05, + "loss": 0.0121, "step": 78210 }, { "epoch": 3.65, - "learning_rate": 1.2760301908021191e-05, - "loss": 0.0844, + "learning_rate": 2.2771597522350337e-05, + "loss": 0.0375, "step": 78215 }, { "epoch": 3.65, - "learning_rate": 1.2759833106746051e-05, - "loss": 0.1211, + "learning_rate": 2.2771129452514317e-05, + "loss": 0.0756, "step": 78220 }, { "epoch": 3.65, - "learning_rate": 1.2759364305470911e-05, - "loss": 0.0499, + "learning_rate": 2.2770661382678297e-05, + "loss": 0.0604, "step": 78225 }, { "epoch": 3.65, - "learning_rate": 1.2758895504195771e-05, - "loss": 0.127, + "learning_rate": 2.2770193312842276e-05, + "loss": 0.0673, "step": 78230 }, { "epoch": 3.65, - "learning_rate": 1.2758426702920633e-05, - "loss": 0.1436, + "learning_rate": 2.2769725243006256e-05, + "loss": 0.1226, "step": 78235 }, { "epoch": 3.65, - "learning_rate": 1.2757957901645493e-05, - "loss": 0.3458, + "learning_rate": 2.2769257173170236e-05, + "loss": 0.3088, "step": 78240 }, { "epoch": 3.65, - "learning_rate": 1.2757489100370354e-05, - "loss": 0.2878, + "learning_rate": 2.2768789103334216e-05, + "loss": 0.2616, "step": 78245 }, { "epoch": 3.65, - "learning_rate": 1.2757020299095216e-05, - "loss": 0.063, + "learning_rate": 2.27683210334982e-05, + "loss": 0.0547, "step": 78250 }, { "epoch": 3.65, - "learning_rate": 1.2756551497820076e-05, - "loss": 0.0437, + "learning_rate": 2.276785296366218e-05, + "loss": 0.0142, "step": 78255 }, { "epoch": 3.65, - "learning_rate": 1.2756082696544936e-05, - "loss": 0.0231, + "learning_rate": 2.276738489382616e-05, + "loss": 0.0242, "step": 78260 }, { "epoch": 3.65, - "learning_rate": 1.2755613895269796e-05, - "loss": 0.0648, + "learning_rate": 2.2766916823990142e-05, + "loss": 0.0644, "step": 78265 }, { "epoch": 3.65, - "learning_rate": 1.2755145093994656e-05, - "loss": 0.0842, + "learning_rate": 2.2766448754154122e-05, + "loss": 0.0732, "step": 78270 }, { "epoch": 3.65, - "learning_rate": 1.2754676292719517e-05, - "loss": 0.0898, + "learning_rate": 2.27659806843181e-05, + "loss": 0.1355, "step": 78275 }, { "epoch": 3.65, - "learning_rate": 1.2754207491444377e-05, - "loss": 0.079, + "learning_rate": 2.276551261448208e-05, + "loss": 0.1148, "step": 78280 }, { "epoch": 3.65, - "learning_rate": 1.2753738690169237e-05, - "loss": 0.1467, + "learning_rate": 2.2765044544646065e-05, + "loss": 0.1983, "step": 78285 }, { "epoch": 3.65, - "learning_rate": 1.27532698888941e-05, - "loss": 0.163, + "learning_rate": 2.276457647481004e-05, + "loss": 0.1194, "step": 78290 }, { "epoch": 3.65, - "learning_rate": 1.275280108761896e-05, - "loss": 0.357, + "learning_rate": 2.276410840497402e-05, + "loss": 0.2329, "step": 78295 }, { "epoch": 3.65, - "learning_rate": 1.275233228634382e-05, - "loss": 0.0504, + "learning_rate": 2.2763640335138e-05, + "loss": 0.0873, "step": 78300 }, { "epoch": 3.65, - "learning_rate": 1.275186348506868e-05, - "loss": 0.0187, + "learning_rate": 2.2763172265301984e-05, + "loss": 0.0103, "step": 78305 }, { "epoch": 3.65, - "learning_rate": 1.275139468379354e-05, - "loss": 0.0454, + "learning_rate": 2.2762704195465964e-05, + "loss": 0.0657, "step": 78310 }, { "epoch": 3.65, - "learning_rate": 1.2750925882518402e-05, - "loss": 0.0805, + "learning_rate": 2.2762236125629943e-05, + "loss": 0.0699, "step": 78315 }, { "epoch": 3.65, - "learning_rate": 1.2750457081243262e-05, - "loss": 0.1055, + "learning_rate": 2.2761768055793923e-05, + "loss": 0.018, "step": 78320 }, { "epoch": 3.65, - "learning_rate": 1.2749988279968122e-05, - "loss": 0.1327, + "learning_rate": 2.2761299985957907e-05, + "loss": 0.1219, "step": 78325 }, { "epoch": 3.65, - "learning_rate": 1.2749519478692982e-05, - "loss": 0.0689, + "learning_rate": 2.2760831916121886e-05, + "loss": 0.1351, "step": 78330 }, { "epoch": 3.66, - "learning_rate": 1.2749050677417845e-05, - "loss": 0.2196, + "learning_rate": 2.2760363846285866e-05, + "loss": 0.0882, "step": 78335 }, { "epoch": 3.66, - "learning_rate": 1.2748581876142705e-05, - "loss": 0.1498, + "learning_rate": 2.275989577644985e-05, + "loss": 0.2069, "step": 78340 }, { "epoch": 3.66, - "learning_rate": 1.2748113074867565e-05, - "loss": 0.1981, + "learning_rate": 2.275942770661383e-05, + "loss": 0.3191, "step": 78345 }, { "epoch": 3.66, - "learning_rate": 1.2747644273592425e-05, - "loss": 0.0679, + "learning_rate": 2.275895963677781e-05, + "loss": 0.0543, "step": 78350 }, { "epoch": 3.66, - "learning_rate": 1.2747175472317286e-05, - "loss": 0.0213, + "learning_rate": 2.2758491566941785e-05, + "loss": 0.0372, "step": 78355 }, { "epoch": 3.66, - "learning_rate": 1.2746706671042146e-05, - "loss": 0.0227, + "learning_rate": 2.275802349710577e-05, + "loss": 0.0597, "step": 78360 }, { "epoch": 3.66, - "learning_rate": 1.2746237869767006e-05, - "loss": 0.0756, + "learning_rate": 2.275755542726975e-05, + "loss": 0.0567, "step": 78365 }, { "epoch": 3.66, - "learning_rate": 1.2745769068491866e-05, - "loss": 0.072, + "learning_rate": 2.2757087357433728e-05, + "loss": 0.0915, "step": 78370 }, { "epoch": 3.66, - "learning_rate": 1.2745300267216728e-05, - "loss": 0.059, + "learning_rate": 2.2756619287597708e-05, + "loss": 0.0892, "step": 78375 }, { "epoch": 3.66, - "learning_rate": 1.274483146594159e-05, - "loss": 0.1238, + "learning_rate": 2.275615121776169e-05, + "loss": 0.0721, "step": 78380 }, { "epoch": 3.66, - "learning_rate": 1.274436266466645e-05, - "loss": 0.1507, + "learning_rate": 2.275568314792567e-05, + "loss": 0.0889, "step": 78385 }, { "epoch": 3.66, - "learning_rate": 1.274389386339131e-05, - "loss": 0.1631, + "learning_rate": 2.275521507808965e-05, + "loss": 0.2066, "step": 78390 }, { "epoch": 3.66, - "learning_rate": 1.2743425062116171e-05, - "loss": 0.2719, + "learning_rate": 2.2754747008253634e-05, + "loss": 0.1744, "step": 78395 }, { "epoch": 3.66, - "learning_rate": 1.274295626084103e-05, - "loss": 0.1075, + "learning_rate": 2.2754278938417614e-05, + "loss": 0.0508, "step": 78400 }, { "epoch": 3.66, - "learning_rate": 1.274248745956589e-05, - "loss": 0.0285, + "learning_rate": 2.2753810868581594e-05, + "loss": 0.0217, "step": 78405 }, { "epoch": 3.66, - "learning_rate": 1.274201865829075e-05, - "loss": 0.0663, + "learning_rate": 2.2753342798745574e-05, + "loss": 0.021, "step": 78410 }, { "epoch": 3.66, - "learning_rate": 1.2741549857015612e-05, - "loss": 0.041, + "learning_rate": 2.2752874728909557e-05, + "loss": 0.035, "step": 78415 }, { "epoch": 3.66, - "learning_rate": 1.2741081055740472e-05, - "loss": 0.0262, + "learning_rate": 2.2752406659073533e-05, + "loss": 0.048, "step": 78420 }, { "epoch": 3.66, - "learning_rate": 1.2740612254465332e-05, - "loss": 0.077, + "learning_rate": 2.2751938589237513e-05, + "loss": 0.1137, "step": 78425 }, { "epoch": 3.66, - "learning_rate": 1.2740143453190194e-05, - "loss": 0.0287, + "learning_rate": 2.2751470519401493e-05, + "loss": 0.0874, "step": 78430 }, { "epoch": 3.66, - "learning_rate": 1.2739674651915055e-05, - "loss": 0.1625, + "learning_rate": 2.2751002449565476e-05, + "loss": 0.1429, "step": 78435 }, { "epoch": 3.66, - "learning_rate": 1.2739205850639915e-05, - "loss": 0.1884, + "learning_rate": 2.2750534379729456e-05, + "loss": 0.2074, "step": 78440 }, { "epoch": 3.66, - "learning_rate": 1.2738737049364775e-05, - "loss": 0.2646, + "learning_rate": 2.2750066309893436e-05, + "loss": 0.1557, "step": 78445 }, { "epoch": 3.66, - "learning_rate": 1.2738268248089635e-05, - "loss": 0.0529, + "learning_rate": 2.274959824005742e-05, + "loss": 0.0892, "step": 78450 }, { "epoch": 3.66, - "learning_rate": 1.2737799446814497e-05, - "loss": 0.0175, + "learning_rate": 2.27491301702214e-05, + "loss": 0.0351, "step": 78455 }, { "epoch": 3.66, - "learning_rate": 1.2737330645539357e-05, - "loss": 0.0781, + "learning_rate": 2.274866210038538e-05, + "loss": 0.0501, "step": 78460 }, { "epoch": 3.66, - "learning_rate": 1.2736861844264217e-05, - "loss": 0.0381, + "learning_rate": 2.274819403054936e-05, + "loss": 0.0639, "step": 78465 }, { "epoch": 3.66, - "learning_rate": 1.2736393042989077e-05, - "loss": 0.0914, + "learning_rate": 2.274772596071334e-05, + "loss": 0.0596, "step": 78470 }, { "epoch": 3.66, - "learning_rate": 1.273592424171394e-05, - "loss": 0.0668, + "learning_rate": 2.274725789087732e-05, + "loss": 0.1114, "step": 78475 }, { "epoch": 3.66, - "learning_rate": 1.27354554404388e-05, - "loss": 0.0598, + "learning_rate": 2.2746789821041298e-05, + "loss": 0.0984, "step": 78480 }, { "epoch": 3.66, - "learning_rate": 1.273498663916366e-05, - "loss": 0.0972, + "learning_rate": 2.2746321751205278e-05, + "loss": 0.2512, "step": 78485 }, { "epoch": 3.66, - "learning_rate": 1.273451783788852e-05, - "loss": 0.154, + "learning_rate": 2.274585368136926e-05, + "loss": 0.175, "step": 78490 }, { "epoch": 3.66, - "learning_rate": 1.2734049036613381e-05, - "loss": 0.1806, + "learning_rate": 2.274538561153324e-05, + "loss": 0.2608, "step": 78495 }, { "epoch": 3.66, - "learning_rate": 1.2733580235338241e-05, - "loss": 0.083, + "learning_rate": 2.274491754169722e-05, + "loss": 0.0441, "step": 78500 }, { "epoch": 3.66, - "learning_rate": 1.2733111434063101e-05, - "loss": 0.0532, + "learning_rate": 2.27444494718612e-05, + "loss": 0.036, "step": 78505 }, { "epoch": 3.66, - "learning_rate": 1.2732642632787961e-05, - "loss": 0.0337, + "learning_rate": 2.2743981402025183e-05, + "loss": 0.0365, "step": 78510 }, { "epoch": 3.66, - "learning_rate": 1.2732173831512821e-05, - "loss": 0.1008, + "learning_rate": 2.2743513332189163e-05, + "loss": 0.0342, "step": 78515 }, { "epoch": 3.66, - "learning_rate": 1.2731705030237684e-05, - "loss": 0.0331, + "learning_rate": 2.2743045262353143e-05, + "loss": 0.0571, "step": 78520 }, { "epoch": 3.66, - "learning_rate": 1.2731236228962544e-05, - "loss": 0.0478, + "learning_rate": 2.2742577192517126e-05, + "loss": 0.0975, "step": 78525 }, { "epoch": 3.66, - "learning_rate": 1.2730767427687404e-05, - "loss": 0.1999, + "learning_rate": 2.2742109122681106e-05, + "loss": 0.0898, "step": 78530 }, { "epoch": 3.66, - "learning_rate": 1.2730298626412266e-05, - "loss": 0.0914, + "learning_rate": 2.2741641052845086e-05, + "loss": 0.095, "step": 78535 }, { "epoch": 3.66, - "learning_rate": 1.2729829825137126e-05, - "loss": 0.2158, + "learning_rate": 2.2741172983009066e-05, + "loss": 0.1435, "step": 78540 }, { "epoch": 3.67, - "learning_rate": 1.2729361023861986e-05, - "loss": 0.2418, + "learning_rate": 2.2740704913173046e-05, + "loss": 0.1806, "step": 78545 }, { "epoch": 3.67, - "learning_rate": 1.2728892222586846e-05, - "loss": 0.0747, + "learning_rate": 2.2740236843337025e-05, + "loss": 0.0376, "step": 78550 }, { "epoch": 3.67, - "learning_rate": 1.2728423421311706e-05, - "loss": 0.0427, + "learning_rate": 2.2739768773501005e-05, + "loss": 0.0331, "step": 78555 }, { "epoch": 3.67, - "learning_rate": 1.2727954620036567e-05, - "loss": 0.0648, + "learning_rate": 2.2739300703664985e-05, + "loss": 0.0285, "step": 78560 }, { "epoch": 3.67, - "learning_rate": 1.2727485818761427e-05, - "loss": 0.0626, + "learning_rate": 2.2738832633828968e-05, + "loss": 0.0491, "step": 78565 }, { "epoch": 3.67, - "learning_rate": 1.2727017017486289e-05, - "loss": 0.0518, + "learning_rate": 2.2738364563992948e-05, + "loss": 0.079, "step": 78570 }, { "epoch": 3.67, - "learning_rate": 1.272654821621115e-05, - "loss": 0.0454, + "learning_rate": 2.2737896494156928e-05, + "loss": 0.2297, "step": 78575 }, { "epoch": 3.67, - "learning_rate": 1.272607941493601e-05, - "loss": 0.0809, + "learning_rate": 2.273742842432091e-05, + "loss": 0.0501, "step": 78580 }, { "epoch": 3.67, - "learning_rate": 1.272561061366087e-05, - "loss": 0.0751, + "learning_rate": 2.273696035448489e-05, + "loss": 0.151, "step": 78585 }, { "epoch": 3.67, - "learning_rate": 1.272514181238573e-05, - "loss": 0.2592, + "learning_rate": 2.273649228464887e-05, + "loss": 0.0897, "step": 78590 }, { "epoch": 3.67, - "learning_rate": 1.272467301111059e-05, - "loss": 0.2784, + "learning_rate": 2.273602421481285e-05, + "loss": 0.2062, "step": 78595 }, { "epoch": 3.67, - "learning_rate": 1.2724204209835452e-05, - "loss": 0.0902, + "learning_rate": 2.2735556144976834e-05, + "loss": 0.0793, "step": 78600 }, { "epoch": 3.67, - "learning_rate": 1.2723735408560312e-05, - "loss": 0.0245, + "learning_rate": 2.2735088075140814e-05, + "loss": 0.0325, "step": 78605 }, { "epoch": 3.67, - "learning_rate": 1.2723266607285172e-05, - "loss": 0.0941, + "learning_rate": 2.273462000530479e-05, + "loss": 0.0626, "step": 78610 }, { "epoch": 3.67, - "learning_rate": 1.2722797806010035e-05, - "loss": 0.0411, + "learning_rate": 2.273415193546877e-05, + "loss": 0.0506, "step": 78615 }, { "epoch": 3.67, - "learning_rate": 1.2722329004734895e-05, - "loss": 0.0267, + "learning_rate": 2.2733683865632753e-05, + "loss": 0.0706, "step": 78620 }, { "epoch": 3.67, - "learning_rate": 1.2721860203459755e-05, - "loss": 0.0835, + "learning_rate": 2.2733215795796733e-05, + "loss": 0.1215, "step": 78625 }, { "epoch": 3.67, - "learning_rate": 1.2721391402184615e-05, - "loss": 0.0921, + "learning_rate": 2.2732747725960713e-05, + "loss": 0.1097, "step": 78630 }, { "epoch": 3.67, - "learning_rate": 1.2720922600909475e-05, - "loss": 0.1474, + "learning_rate": 2.2732279656124696e-05, + "loss": 0.1271, "step": 78635 }, { "epoch": 3.67, - "learning_rate": 1.2720453799634336e-05, - "loss": 0.2466, + "learning_rate": 2.2731811586288676e-05, + "loss": 0.2125, "step": 78640 }, { "epoch": 3.67, - "learning_rate": 1.2719984998359196e-05, - "loss": 0.2737, + "learning_rate": 2.2731343516452656e-05, + "loss": 0.1813, "step": 78645 }, { "epoch": 3.67, - "learning_rate": 1.2719516197084056e-05, - "loss": 0.0547, + "learning_rate": 2.2730875446616635e-05, + "loss": 0.0305, "step": 78650 }, { "epoch": 3.67, - "learning_rate": 1.2719047395808916e-05, - "loss": 0.0126, + "learning_rate": 2.273040737678062e-05, + "loss": 0.0227, "step": 78655 }, { "epoch": 3.67, - "learning_rate": 1.271857859453378e-05, - "loss": 0.0942, + "learning_rate": 2.27299393069446e-05, + "loss": 0.0679, "step": 78660 }, { "epoch": 3.67, - "learning_rate": 1.271810979325864e-05, - "loss": 0.0636, + "learning_rate": 2.2729471237108578e-05, + "loss": 0.075, "step": 78665 }, { "epoch": 3.67, - "learning_rate": 1.27176409919835e-05, - "loss": 0.0685, + "learning_rate": 2.2729003167272555e-05, + "loss": 0.0268, "step": 78670 }, { "epoch": 3.67, - "learning_rate": 1.271717219070836e-05, - "loss": 0.1465, + "learning_rate": 2.2728535097436538e-05, + "loss": 0.0712, "step": 78675 }, { "epoch": 3.67, - "learning_rate": 1.2716703389433221e-05, - "loss": 0.1381, + "learning_rate": 2.2728067027600518e-05, + "loss": 0.0878, "step": 78680 }, { "epoch": 3.67, - "learning_rate": 1.2716234588158081e-05, - "loss": 0.1399, + "learning_rate": 2.2727598957764497e-05, + "loss": 0.0967, "step": 78685 }, { "epoch": 3.67, - "learning_rate": 1.2715765786882941e-05, - "loss": 0.1988, + "learning_rate": 2.272713088792848e-05, + "loss": 0.0973, "step": 78690 }, { "epoch": 3.67, - "learning_rate": 1.27152969856078e-05, - "loss": 0.3297, + "learning_rate": 2.272666281809246e-05, + "loss": 0.1805, "step": 78695 }, { "epoch": 3.67, - "learning_rate": 1.271482818433266e-05, - "loss": 0.0692, + "learning_rate": 2.272619474825644e-05, + "loss": 0.0557, "step": 78700 }, { "epoch": 3.67, - "learning_rate": 1.2714359383057524e-05, - "loss": 0.0334, + "learning_rate": 2.272572667842042e-05, + "loss": 0.0249, "step": 78705 }, { "epoch": 3.67, - "learning_rate": 1.2713890581782384e-05, - "loss": 0.0716, + "learning_rate": 2.2725258608584403e-05, + "loss": 0.0443, "step": 78710 }, { "epoch": 3.67, - "learning_rate": 1.2713421780507244e-05, - "loss": 0.0738, + "learning_rate": 2.2724790538748383e-05, + "loss": 0.032, "step": 78715 }, { "epoch": 3.67, - "learning_rate": 1.2712952979232106e-05, - "loss": 0.0247, + "learning_rate": 2.2724322468912363e-05, + "loss": 0.0564, "step": 78720 }, { "epoch": 3.67, - "learning_rate": 1.2712484177956965e-05, - "loss": 0.0789, + "learning_rate": 2.2723854399076343e-05, + "loss": 0.1471, "step": 78725 }, { "epoch": 3.67, - "learning_rate": 1.2712015376681825e-05, - "loss": 0.0535, + "learning_rate": 2.2723386329240326e-05, + "loss": 0.0741, "step": 78730 }, { "epoch": 3.67, - "learning_rate": 1.2711546575406685e-05, - "loss": 0.1207, + "learning_rate": 2.2722918259404302e-05, + "loss": 0.1166, "step": 78735 }, { "epoch": 3.67, - "learning_rate": 1.2711077774131545e-05, - "loss": 0.084, + "learning_rate": 2.2722450189568282e-05, + "loss": 0.1757, "step": 78740 }, { "epoch": 3.67, - "learning_rate": 1.2710608972856407e-05, - "loss": 0.2068, + "learning_rate": 2.2721982119732262e-05, + "loss": 0.2749, "step": 78745 }, { "epoch": 3.67, - "learning_rate": 1.2710140171581267e-05, - "loss": 0.0803, + "learning_rate": 2.2721514049896245e-05, + "loss": 0.0821, "step": 78750 }, { "epoch": 3.67, - "learning_rate": 1.2709671370306128e-05, - "loss": 0.0285, + "learning_rate": 2.2721045980060225e-05, + "loss": 0.0092, "step": 78755 }, { "epoch": 3.68, - "learning_rate": 1.270920256903099e-05, - "loss": 0.0382, + "learning_rate": 2.2720577910224205e-05, + "loss": 0.0445, "step": 78760 }, { "epoch": 3.68, - "learning_rate": 1.270873376775585e-05, - "loss": 0.0453, + "learning_rate": 2.2720109840388188e-05, + "loss": 0.0159, "step": 78765 }, { "epoch": 3.68, - "learning_rate": 1.270826496648071e-05, - "loss": 0.0484, + "learning_rate": 2.2719641770552168e-05, + "loss": 0.053, "step": 78770 }, { "epoch": 3.68, - "learning_rate": 1.270779616520557e-05, - "loss": 0.0617, + "learning_rate": 2.2719173700716148e-05, + "loss": 0.0807, "step": 78775 }, { "epoch": 3.68, - "learning_rate": 1.270732736393043e-05, - "loss": 0.0609, + "learning_rate": 2.2718705630880128e-05, + "loss": 0.0264, "step": 78780 }, { "epoch": 3.68, - "learning_rate": 1.2706858562655291e-05, - "loss": 0.1081, + "learning_rate": 2.271823756104411e-05, + "loss": 0.1584, "step": 78785 }, { "epoch": 3.68, - "learning_rate": 1.2706389761380151e-05, - "loss": 0.2445, + "learning_rate": 2.271776949120809e-05, + "loss": 0.2195, "step": 78790 }, { "epoch": 3.68, - "learning_rate": 1.2705920960105011e-05, - "loss": 0.1503, + "learning_rate": 2.2717301421372067e-05, + "loss": 0.2432, "step": 78795 }, { "epoch": 3.68, - "learning_rate": 1.2705452158829875e-05, - "loss": 0.1369, + "learning_rate": 2.2716833351536047e-05, + "loss": 0.0867, "step": 78800 }, { "epoch": 3.68, - "learning_rate": 1.2704983357554735e-05, - "loss": 0.0178, + "learning_rate": 2.271636528170003e-05, + "loss": 0.0638, "step": 78805 }, { "epoch": 3.68, - "learning_rate": 1.2704514556279595e-05, - "loss": 0.0554, + "learning_rate": 2.271589721186401e-05, + "loss": 0.0194, "step": 78810 }, { "epoch": 3.68, - "learning_rate": 1.2704045755004454e-05, - "loss": 0.051, + "learning_rate": 2.271542914202799e-05, + "loss": 0.0421, "step": 78815 }, { "epoch": 3.68, - "learning_rate": 1.2703576953729314e-05, - "loss": 0.0841, + "learning_rate": 2.2714961072191973e-05, + "loss": 0.1609, "step": 78820 }, { "epoch": 3.68, - "learning_rate": 1.2703108152454176e-05, - "loss": 0.0833, + "learning_rate": 2.2714493002355953e-05, + "loss": 0.0861, "step": 78825 }, { "epoch": 3.68, - "learning_rate": 1.2702639351179036e-05, - "loss": 0.0539, + "learning_rate": 2.2714024932519932e-05, + "loss": 0.0946, "step": 78830 }, { "epoch": 3.68, - "learning_rate": 1.2702170549903896e-05, - "loss": 0.1065, + "learning_rate": 2.2713556862683912e-05, + "loss": 0.1825, "step": 78835 }, { "epoch": 3.68, - "learning_rate": 1.2701701748628756e-05, - "loss": 0.1182, + "learning_rate": 2.2713088792847896e-05, + "loss": 0.2532, "step": 78840 }, { "epoch": 3.68, - "learning_rate": 1.2701232947353619e-05, - "loss": 0.1948, + "learning_rate": 2.2712620723011875e-05, + "loss": 0.2818, "step": 78845 }, { "epoch": 3.68, - "learning_rate": 1.2700764146078479e-05, - "loss": 0.0758, + "learning_rate": 2.2712152653175855e-05, + "loss": 0.0684, "step": 78850 }, { "epoch": 3.68, - "learning_rate": 1.2700295344803339e-05, - "loss": 0.0617, + "learning_rate": 2.2711684583339835e-05, + "loss": 0.0264, "step": 78855 }, { "epoch": 3.68, - "learning_rate": 1.2699826543528199e-05, - "loss": 0.0756, + "learning_rate": 2.2711216513503815e-05, + "loss": 0.12, "step": 78860 }, { "epoch": 3.68, - "learning_rate": 1.269935774225306e-05, - "loss": 0.0447, + "learning_rate": 2.2710748443667795e-05, + "loss": 0.0769, "step": 78865 }, { "epoch": 3.68, - "learning_rate": 1.269888894097792e-05, - "loss": 0.1191, + "learning_rate": 2.2710280373831774e-05, + "loss": 0.0295, "step": 78870 }, { "epoch": 3.68, - "learning_rate": 1.269842013970278e-05, - "loss": 0.0886, + "learning_rate": 2.2709812303995758e-05, + "loss": 0.0775, "step": 78875 }, { "epoch": 3.68, - "learning_rate": 1.269795133842764e-05, - "loss": 0.1098, + "learning_rate": 2.2709344234159737e-05, + "loss": 0.1726, "step": 78880 }, { "epoch": 3.68, - "learning_rate": 1.2697482537152502e-05, - "loss": 0.0687, + "learning_rate": 2.2708876164323717e-05, + "loss": 0.0916, "step": 78885 }, { "epoch": 3.68, - "learning_rate": 1.2697013735877362e-05, - "loss": 0.1674, + "learning_rate": 2.2708408094487697e-05, + "loss": 0.1304, "step": 78890 }, { "epoch": 3.68, - "learning_rate": 1.2696544934602224e-05, - "loss": 0.1744, + "learning_rate": 2.270794002465168e-05, + "loss": 0.2285, "step": 78895 }, { "epoch": 3.68, - "learning_rate": 1.2696076133327085e-05, - "loss": 0.0752, + "learning_rate": 2.270747195481566e-05, + "loss": 0.069, "step": 78900 }, { "epoch": 3.68, - "learning_rate": 1.2695607332051945e-05, - "loss": 0.0548, + "learning_rate": 2.270700388497964e-05, + "loss": 0.0423, "step": 78905 }, { "epoch": 3.68, - "learning_rate": 1.2695138530776805e-05, - "loss": 0.0126, + "learning_rate": 2.270653581514362e-05, + "loss": 0.0192, "step": 78910 }, { "epoch": 3.68, - "learning_rate": 1.2694669729501665e-05, - "loss": 0.0724, + "learning_rate": 2.2706067745307603e-05, + "loss": 0.0478, "step": 78915 }, { "epoch": 3.68, - "learning_rate": 1.2694200928226525e-05, - "loss": 0.0485, + "learning_rate": 2.2705599675471583e-05, + "loss": 0.0643, "step": 78920 }, { "epoch": 3.68, - "learning_rate": 1.2693732126951387e-05, - "loss": 0.1092, + "learning_rate": 2.270513160563556e-05, + "loss": 0.0955, "step": 78925 }, { "epoch": 3.68, - "learning_rate": 1.2693263325676246e-05, - "loss": 0.0903, + "learning_rate": 2.270466353579954e-05, + "loss": 0.0941, "step": 78930 }, { "epoch": 3.68, - "learning_rate": 1.2692794524401106e-05, - "loss": 0.1176, + "learning_rate": 2.2704195465963522e-05, + "loss": 0.154, "step": 78935 }, { "epoch": 3.68, - "learning_rate": 1.269232572312597e-05, - "loss": 0.1223, + "learning_rate": 2.2703727396127502e-05, + "loss": 0.2171, "step": 78940 }, { "epoch": 3.68, - "learning_rate": 1.269185692185083e-05, - "loss": 0.2449, + "learning_rate": 2.2703259326291482e-05, + "loss": 0.1763, "step": 78945 }, { "epoch": 3.68, - "learning_rate": 1.269138812057569e-05, - "loss": 0.0657, + "learning_rate": 2.2702791256455465e-05, + "loss": 0.0848, "step": 78950 }, { "epoch": 3.68, - "learning_rate": 1.269091931930055e-05, - "loss": 0.0164, + "learning_rate": 2.2702323186619445e-05, + "loss": 0.0145, "step": 78955 }, { "epoch": 3.68, - "learning_rate": 1.269045051802541e-05, - "loss": 0.0325, + "learning_rate": 2.2701855116783425e-05, + "loss": 0.0321, "step": 78960 }, { "epoch": 3.68, - "learning_rate": 1.2689981716750271e-05, - "loss": 0.0615, + "learning_rate": 2.2701387046947404e-05, + "loss": 0.0196, "step": 78965 }, { "epoch": 3.68, - "learning_rate": 1.2689512915475131e-05, - "loss": 0.0515, + "learning_rate": 2.2700918977111388e-05, + "loss": 0.0757, "step": 78970 }, { "epoch": 3.69, - "learning_rate": 1.2689044114199991e-05, - "loss": 0.0701, + "learning_rate": 2.2700450907275368e-05, + "loss": 0.0615, "step": 78975 }, { "epoch": 3.69, - "learning_rate": 1.2688575312924851e-05, - "loss": 0.1229, + "learning_rate": 2.2699982837439347e-05, + "loss": 0.0737, "step": 78980 }, { "epoch": 3.69, - "learning_rate": 1.2688106511649714e-05, - "loss": 0.187, + "learning_rate": 2.2699514767603324e-05, + "loss": 0.1314, "step": 78985 }, { "epoch": 3.69, - "learning_rate": 1.2687637710374574e-05, - "loss": 0.1972, + "learning_rate": 2.2699046697767307e-05, + "loss": 0.2125, "step": 78990 }, { "epoch": 3.69, - "learning_rate": 1.2687168909099434e-05, - "loss": 0.3156, + "learning_rate": 2.2698578627931287e-05, + "loss": 0.3037, "step": 78995 }, { "epoch": 3.69, - "learning_rate": 1.2686700107824294e-05, - "loss": 0.0696, + "learning_rate": 2.2698110558095267e-05, + "loss": 0.0774, "step": 79000 }, { "epoch": 3.69, - "learning_rate": 1.2686231306549156e-05, - "loss": 0.0243, + "learning_rate": 2.269764248825925e-05, + "loss": 0.0239, "step": 79005 }, { "epoch": 3.69, - "learning_rate": 1.2685762505274016e-05, - "loss": 0.0274, + "learning_rate": 2.269717441842323e-05, + "loss": 0.0133, "step": 79010 }, { "epoch": 3.69, - "learning_rate": 1.2685293703998876e-05, - "loss": 0.0367, + "learning_rate": 2.269670634858721e-05, + "loss": 0.0368, "step": 79015 }, { "epoch": 3.69, - "learning_rate": 1.2684824902723735e-05, - "loss": 0.0755, + "learning_rate": 2.269623827875119e-05, + "loss": 0.0632, "step": 79020 }, { "epoch": 3.69, - "learning_rate": 1.2684356101448595e-05, - "loss": 0.0655, + "learning_rate": 2.2695770208915172e-05, + "loss": 0.1588, "step": 79025 }, { "epoch": 3.69, - "learning_rate": 1.2683887300173459e-05, - "loss": 0.0875, + "learning_rate": 2.2695302139079152e-05, + "loss": 0.1011, "step": 79030 }, { "epoch": 3.69, - "learning_rate": 1.2683418498898319e-05, - "loss": 0.0886, + "learning_rate": 2.2694834069243132e-05, + "loss": 0.2306, "step": 79035 }, { "epoch": 3.69, - "learning_rate": 1.2682949697623179e-05, - "loss": 0.2241, + "learning_rate": 2.2694365999407112e-05, + "loss": 0.1923, "step": 79040 }, { "epoch": 3.69, - "learning_rate": 1.268248089634804e-05, - "loss": 0.2467, + "learning_rate": 2.2693897929571095e-05, + "loss": 0.3183, "step": 79045 }, { "epoch": 3.69, - "learning_rate": 1.26820120950729e-05, - "loss": 0.0494, + "learning_rate": 2.269342985973507e-05, + "loss": 0.0428, "step": 79050 }, { "epoch": 3.69, - "learning_rate": 1.268154329379776e-05, - "loss": 0.0358, + "learning_rate": 2.269296178989905e-05, + "loss": 0.0216, "step": 79055 }, { "epoch": 3.69, - "learning_rate": 1.268107449252262e-05, - "loss": 0.0871, + "learning_rate": 2.2692493720063035e-05, + "loss": 0.0303, "step": 79060 }, { "epoch": 3.69, - "learning_rate": 1.268060569124748e-05, - "loss": 0.0837, + "learning_rate": 2.2692025650227014e-05, + "loss": 0.0508, "step": 79065 }, { "epoch": 3.69, - "learning_rate": 1.2680136889972342e-05, - "loss": 0.137, + "learning_rate": 2.2691557580390994e-05, + "loss": 0.0529, "step": 79070 }, { "epoch": 3.69, - "learning_rate": 1.2679668088697201e-05, - "loss": 0.1637, + "learning_rate": 2.2691089510554974e-05, + "loss": 0.0927, "step": 79075 }, { "epoch": 3.69, - "learning_rate": 1.2679199287422063e-05, - "loss": 0.0536, + "learning_rate": 2.2690621440718957e-05, + "loss": 0.1128, "step": 79080 }, { "epoch": 3.69, - "learning_rate": 1.2678730486146925e-05, - "loss": 0.1471, + "learning_rate": 2.2690153370882937e-05, + "loss": 0.0713, "step": 79085 }, { "epoch": 3.69, - "learning_rate": 1.2678261684871785e-05, - "loss": 0.1969, + "learning_rate": 2.2689685301046917e-05, + "loss": 0.2878, "step": 79090 }, { "epoch": 3.69, - "learning_rate": 1.2677792883596645e-05, - "loss": 0.266, + "learning_rate": 2.2689217231210897e-05, + "loss": 0.2255, "step": 79095 }, { "epoch": 3.69, - "learning_rate": 1.2677324082321505e-05, - "loss": 0.1092, + "learning_rate": 2.268874916137488e-05, + "loss": 0.0683, "step": 79100 }, { "epoch": 3.69, - "learning_rate": 1.2676855281046364e-05, - "loss": 0.0263, + "learning_rate": 2.268828109153886e-05, + "loss": 0.0308, "step": 79105 }, { "epoch": 3.69, - "learning_rate": 1.2676386479771226e-05, - "loss": 0.0262, + "learning_rate": 2.268781302170284e-05, + "loss": 0.0227, "step": 79110 }, { "epoch": 3.69, - "learning_rate": 1.2675917678496086e-05, - "loss": 0.0766, + "learning_rate": 2.2687344951866816e-05, + "loss": 0.043, "step": 79115 }, { "epoch": 3.69, - "learning_rate": 1.2675448877220946e-05, - "loss": 0.0506, + "learning_rate": 2.26868768820308e-05, + "loss": 0.096, "step": 79120 }, { "epoch": 3.69, - "learning_rate": 1.267498007594581e-05, - "loss": 0.0422, + "learning_rate": 2.268640881219478e-05, + "loss": 0.1171, "step": 79125 }, { "epoch": 3.69, - "learning_rate": 1.267451127467067e-05, - "loss": 0.0788, + "learning_rate": 2.268594074235876e-05, + "loss": 0.1054, "step": 79130 }, { "epoch": 3.69, - "learning_rate": 1.267404247339553e-05, - "loss": 0.1153, + "learning_rate": 2.2685472672522742e-05, + "loss": 0.1268, "step": 79135 }, { "epoch": 3.69, - "learning_rate": 1.2673573672120389e-05, - "loss": 0.2412, + "learning_rate": 2.2685004602686722e-05, + "loss": 0.3333, "step": 79140 }, { "epoch": 3.69, - "learning_rate": 1.2673104870845249e-05, - "loss": 0.289, + "learning_rate": 2.26845365328507e-05, + "loss": 0.2143, "step": 79145 }, { "epoch": 3.69, - "learning_rate": 1.267263606957011e-05, - "loss": 0.086, + "learning_rate": 2.268406846301468e-05, + "loss": 0.1017, "step": 79150 }, { "epoch": 3.69, - "learning_rate": 1.267216726829497e-05, - "loss": 0.0452, + "learning_rate": 2.2683600393178665e-05, + "loss": 0.0287, "step": 79155 }, { "epoch": 3.69, - "learning_rate": 1.267169846701983e-05, - "loss": 0.0435, + "learning_rate": 2.2683132323342644e-05, + "loss": 0.013, "step": 79160 }, { "epoch": 3.69, - "learning_rate": 1.267122966574469e-05, - "loss": 0.0802, + "learning_rate": 2.2682664253506624e-05, + "loss": 0.0205, "step": 79165 }, { "epoch": 3.69, - "learning_rate": 1.2670760864469554e-05, - "loss": 0.0239, + "learning_rate": 2.2682196183670604e-05, + "loss": 0.0471, "step": 79170 }, { "epoch": 3.69, - "learning_rate": 1.2670292063194414e-05, - "loss": 0.1055, + "learning_rate": 2.2681728113834584e-05, + "loss": 0.0776, "step": 79175 }, { "epoch": 3.69, - "learning_rate": 1.2669823261919274e-05, - "loss": 0.0681, + "learning_rate": 2.2681260043998564e-05, + "loss": 0.0945, "step": 79180 }, { "epoch": 3.69, - "learning_rate": 1.2669354460644134e-05, - "loss": 0.1015, + "learning_rate": 2.2680791974162544e-05, + "loss": 0.0566, "step": 79185 }, { "epoch": 3.7, - "learning_rate": 1.2668885659368995e-05, - "loss": 0.1638, + "learning_rate": 2.2680323904326527e-05, + "loss": 0.2853, "step": 79190 }, { "epoch": 3.7, - "learning_rate": 1.2668416858093855e-05, - "loss": 0.2779, + "learning_rate": 2.2679855834490507e-05, + "loss": 0.2321, "step": 79195 }, { "epoch": 3.7, - "learning_rate": 1.2667948056818715e-05, - "loss": 0.077, + "learning_rate": 2.2679387764654486e-05, + "loss": 0.0353, "step": 79200 }, { "epoch": 3.7, - "learning_rate": 1.2667479255543575e-05, - "loss": 0.036, + "learning_rate": 2.2678919694818466e-05, + "loss": 0.0148, "step": 79205 }, { "epoch": 3.7, - "learning_rate": 1.2667010454268435e-05, - "loss": 0.0236, + "learning_rate": 2.267845162498245e-05, + "loss": 0.0711, "step": 79210 }, { "epoch": 3.7, - "learning_rate": 1.2666541652993297e-05, - "loss": 0.0436, + "learning_rate": 2.267798355514643e-05, + "loss": 0.0325, "step": 79215 }, { "epoch": 3.7, - "learning_rate": 1.2666072851718158e-05, - "loss": 0.0387, + "learning_rate": 2.267751548531041e-05, + "loss": 0.0919, "step": 79220 }, { "epoch": 3.7, - "learning_rate": 1.2665604050443018e-05, - "loss": 0.0819, + "learning_rate": 2.267704741547439e-05, + "loss": 0.0877, "step": 79225 }, { "epoch": 3.7, - "learning_rate": 1.266513524916788e-05, - "loss": 0.0543, + "learning_rate": 2.2676579345638372e-05, + "loss": 0.1162, "step": 79230 }, { "epoch": 3.7, - "learning_rate": 1.266466644789274e-05, - "loss": 0.1324, + "learning_rate": 2.2676111275802352e-05, + "loss": 0.1279, "step": 79235 }, { "epoch": 3.7, - "learning_rate": 1.26641976466176e-05, - "loss": 0.1139, + "learning_rate": 2.267564320596633e-05, + "loss": 0.1699, "step": 79240 }, { "epoch": 3.7, - "learning_rate": 1.266372884534246e-05, - "loss": 0.2049, + "learning_rate": 2.267517513613031e-05, + "loss": 0.2584, "step": 79245 }, { "epoch": 3.7, - "learning_rate": 1.2663260044067321e-05, - "loss": 0.0223, + "learning_rate": 2.267470706629429e-05, + "loss": 0.0977, "step": 79250 }, { "epoch": 3.7, - "learning_rate": 1.2662791242792181e-05, - "loss": 0.0313, + "learning_rate": 2.267423899645827e-05, + "loss": 0.0411, "step": 79255 }, { "epoch": 3.7, - "learning_rate": 1.2662322441517041e-05, - "loss": 0.0249, + "learning_rate": 2.267377092662225e-05, + "loss": 0.0511, "step": 79260 }, { "epoch": 3.7, - "learning_rate": 1.2661853640241903e-05, - "loss": 0.0203, + "learning_rate": 2.2673302856786234e-05, + "loss": 0.0917, "step": 79265 }, { "epoch": 3.7, - "learning_rate": 1.2661384838966764e-05, - "loss": 0.0849, + "learning_rate": 2.2672834786950214e-05, + "loss": 0.0989, "step": 79270 }, { "epoch": 3.7, - "learning_rate": 1.2660916037691624e-05, - "loss": 0.1066, + "learning_rate": 2.2672366717114194e-05, + "loss": 0.0767, "step": 79275 }, { "epoch": 3.7, - "learning_rate": 1.2660447236416484e-05, - "loss": 0.1808, + "learning_rate": 2.2671898647278174e-05, + "loss": 0.0889, "step": 79280 }, { "epoch": 3.7, - "learning_rate": 1.2659978435141344e-05, - "loss": 0.1255, + "learning_rate": 2.2671430577442157e-05, + "loss": 0.1315, "step": 79285 }, { "epoch": 3.7, - "learning_rate": 1.2659509633866206e-05, - "loss": 0.1438, + "learning_rate": 2.2670962507606137e-05, + "loss": 0.1564, "step": 79290 }, { "epoch": 3.7, - "learning_rate": 1.2659040832591066e-05, - "loss": 0.1604, + "learning_rate": 2.2670494437770117e-05, + "loss": 0.2035, "step": 79295 }, { "epoch": 3.7, - "learning_rate": 1.2658572031315926e-05, - "loss": 0.0171, + "learning_rate": 2.2670026367934096e-05, + "loss": 0.0661, "step": 79300 }, { "epoch": 3.7, - "learning_rate": 1.2658103230040786e-05, - "loss": 0.0412, + "learning_rate": 2.2669558298098076e-05, + "loss": 0.0164, "step": 79305 }, { "epoch": 3.7, - "learning_rate": 1.2657634428765649e-05, - "loss": 0.0508, + "learning_rate": 2.2669090228262056e-05, + "loss": 0.077, "step": 79310 }, { "epoch": 3.7, - "learning_rate": 1.2657165627490509e-05, - "loss": 0.086, + "learning_rate": 2.2668622158426036e-05, + "loss": 0.0476, "step": 79315 }, { "epoch": 3.7, - "learning_rate": 1.2656696826215369e-05, - "loss": 0.0974, + "learning_rate": 2.266815408859002e-05, + "loss": 0.0349, "step": 79320 }, { "epoch": 3.7, - "learning_rate": 1.2656228024940229e-05, - "loss": 0.0887, + "learning_rate": 2.2667686018754e-05, + "loss": 0.0753, "step": 79325 }, { "epoch": 3.7, - "learning_rate": 1.265575922366509e-05, - "loss": 0.1053, + "learning_rate": 2.266721794891798e-05, + "loss": 0.0613, "step": 79330 }, { "epoch": 3.7, - "learning_rate": 1.265529042238995e-05, - "loss": 0.1274, + "learning_rate": 2.266674987908196e-05, + "loss": 0.0562, "step": 79335 }, { "epoch": 3.7, - "learning_rate": 1.265482162111481e-05, - "loss": 0.1013, + "learning_rate": 2.266628180924594e-05, + "loss": 0.2706, "step": 79340 }, { "epoch": 3.7, - "learning_rate": 1.265435281983967e-05, - "loss": 0.2872, + "learning_rate": 2.266581373940992e-05, + "loss": 0.2792, "step": 79345 }, { "epoch": 3.7, - "learning_rate": 1.265388401856453e-05, - "loss": 0.043, + "learning_rate": 2.26653456695739e-05, + "loss": 0.0754, "step": 79350 }, { "epoch": 3.7, - "learning_rate": 1.2653415217289393e-05, - "loss": 0.0644, + "learning_rate": 2.266487759973788e-05, + "loss": 0.0363, "step": 79355 }, { "epoch": 3.7, - "learning_rate": 1.2652946416014253e-05, - "loss": 0.0232, + "learning_rate": 2.2664409529901864e-05, + "loss": 0.0173, "step": 79360 }, { "epoch": 3.7, - "learning_rate": 1.2652477614739113e-05, - "loss": 0.0363, + "learning_rate": 2.266394146006584e-05, + "loss": 0.0265, "step": 79365 }, { "epoch": 3.7, - "learning_rate": 1.2652008813463975e-05, - "loss": 0.0281, + "learning_rate": 2.266347339022982e-05, + "loss": 0.048, "step": 79370 }, { "epoch": 3.7, - "learning_rate": 1.2651540012188835e-05, - "loss": 0.0818, + "learning_rate": 2.2663005320393804e-05, + "loss": 0.0956, "step": 79375 }, { "epoch": 3.7, - "learning_rate": 1.2651071210913695e-05, - "loss": 0.0831, + "learning_rate": 2.2662537250557784e-05, + "loss": 0.0964, "step": 79380 }, { "epoch": 3.7, - "learning_rate": 1.2650602409638555e-05, - "loss": 0.2475, + "learning_rate": 2.2662069180721763e-05, + "loss": 0.0834, "step": 79385 }, { "epoch": 3.7, - "learning_rate": 1.2650133608363415e-05, - "loss": 0.2168, + "learning_rate": 2.2661601110885743e-05, + "loss": 0.2136, "step": 79390 }, { "epoch": 3.7, - "learning_rate": 1.2649664807088276e-05, - "loss": 0.2529, + "learning_rate": 2.2661133041049726e-05, + "loss": 0.2474, "step": 79395 }, { "epoch": 3.7, - "learning_rate": 1.2649196005813136e-05, - "loss": 0.0632, + "learning_rate": 2.2660664971213706e-05, + "loss": 0.0305, "step": 79400 }, { "epoch": 3.71, - "learning_rate": 1.2648727204537998e-05, - "loss": 0.0312, + "learning_rate": 2.2660196901377686e-05, + "loss": 0.036, "step": 79405 }, { "epoch": 3.71, - "learning_rate": 1.264825840326286e-05, - "loss": 0.046, + "learning_rate": 2.2659728831541666e-05, + "loss": 0.0659, "step": 79410 }, { "epoch": 3.71, - "learning_rate": 1.264778960198772e-05, - "loss": 0.0752, + "learning_rate": 2.265926076170565e-05, + "loss": 0.0651, "step": 79415 }, { "epoch": 3.71, - "learning_rate": 1.264732080071258e-05, - "loss": 0.0744, + "learning_rate": 2.265879269186963e-05, + "loss": 0.0399, "step": 79420 }, { "epoch": 3.71, - "learning_rate": 1.264685199943744e-05, - "loss": 0.0209, + "learning_rate": 2.265832462203361e-05, + "loss": 0.0574, "step": 79425 }, { "epoch": 3.71, - "learning_rate": 1.2646383198162299e-05, - "loss": 0.1069, + "learning_rate": 2.265785655219759e-05, + "loss": 0.2155, "step": 79430 }, { "epoch": 3.71, - "learning_rate": 1.264591439688716e-05, - "loss": 0.1733, + "learning_rate": 2.265738848236157e-05, + "loss": 0.1631, "step": 79435 }, { "epoch": 3.71, - "learning_rate": 1.264544559561202e-05, - "loss": 0.2361, + "learning_rate": 2.2656920412525548e-05, + "loss": 0.0919, "step": 79440 }, { "epoch": 3.71, - "learning_rate": 1.264497679433688e-05, - "loss": 0.2983, + "learning_rate": 2.2656452342689528e-05, + "loss": 0.1644, "step": 79445 }, { "epoch": 3.71, - "learning_rate": 1.2644507993061744e-05, - "loss": 0.0935, + "learning_rate": 2.265598427285351e-05, + "loss": 0.1061, "step": 79450 }, { "epoch": 3.71, - "learning_rate": 1.2644039191786604e-05, - "loss": 0.0397, + "learning_rate": 2.265551620301749e-05, + "loss": 0.0443, "step": 79455 }, { "epoch": 3.71, - "learning_rate": 1.2643570390511464e-05, - "loss": 0.0362, + "learning_rate": 2.265504813318147e-05, + "loss": 0.0458, "step": 79460 }, { "epoch": 3.71, - "learning_rate": 1.2643101589236324e-05, - "loss": 0.0528, + "learning_rate": 2.265458006334545e-05, + "loss": 0.0981, "step": 79465 }, { "epoch": 3.71, - "learning_rate": 1.2642632787961184e-05, - "loss": 0.0264, + "learning_rate": 2.2654111993509434e-05, + "loss": 0.025, "step": 79470 }, { "epoch": 3.71, - "learning_rate": 1.2642163986686045e-05, - "loss": 0.0674, + "learning_rate": 2.2653643923673414e-05, + "loss": 0.1162, "step": 79475 }, { "epoch": 3.71, - "learning_rate": 1.2641695185410905e-05, - "loss": 0.0648, + "learning_rate": 2.2653175853837393e-05, + "loss": 0.0565, "step": 79480 }, { "epoch": 3.71, - "learning_rate": 1.2641226384135765e-05, - "loss": 0.1282, + "learning_rate": 2.2652707784001377e-05, + "loss": 0.153, "step": 79485 }, { "epoch": 3.71, - "learning_rate": 1.2640757582860625e-05, - "loss": 0.1517, + "learning_rate": 2.2652239714165353e-05, + "loss": 0.2194, "step": 79490 }, { "epoch": 3.71, - "learning_rate": 1.2640288781585488e-05, - "loss": 0.2751, + "learning_rate": 2.2651771644329333e-05, + "loss": 0.3534, "step": 79495 }, { "epoch": 3.71, - "learning_rate": 1.2639819980310348e-05, - "loss": 0.0242, + "learning_rate": 2.2651303574493313e-05, + "loss": 0.0887, "step": 79500 }, { "epoch": 3.71, - "learning_rate": 1.2639351179035208e-05, - "loss": 0.0099, + "learning_rate": 2.2650835504657296e-05, + "loss": 0.0403, "step": 79505 }, { "epoch": 3.71, - "learning_rate": 1.2638882377760068e-05, - "loss": 0.0235, + "learning_rate": 2.2650367434821276e-05, + "loss": 0.0194, "step": 79510 }, { "epoch": 3.71, - "learning_rate": 1.263841357648493e-05, - "loss": 0.0483, + "learning_rate": 2.2649899364985256e-05, + "loss": 0.0337, "step": 79515 }, { "epoch": 3.71, - "learning_rate": 1.263794477520979e-05, - "loss": 0.015, + "learning_rate": 2.2649431295149235e-05, + "loss": 0.0204, "step": 79520 }, { "epoch": 3.71, - "learning_rate": 1.263747597393465e-05, - "loss": 0.0684, + "learning_rate": 2.264896322531322e-05, + "loss": 0.0595, "step": 79525 }, { "epoch": 3.71, - "learning_rate": 1.263700717265951e-05, - "loss": 0.1226, + "learning_rate": 2.26484951554772e-05, + "loss": 0.096, "step": 79530 }, { "epoch": 3.71, - "learning_rate": 1.263653837138437e-05, - "loss": 0.1239, + "learning_rate": 2.2648027085641178e-05, + "loss": 0.2024, "step": 79535 }, { "epoch": 3.71, - "learning_rate": 1.2636069570109231e-05, - "loss": 0.0897, + "learning_rate": 2.2647559015805158e-05, + "loss": 0.1221, "step": 79540 }, { "epoch": 3.71, - "learning_rate": 1.2635600768834093e-05, - "loss": 0.3738, + "learning_rate": 2.264709094596914e-05, + "loss": 0.3267, "step": 79545 }, { "epoch": 3.71, - "learning_rate": 1.2635131967558953e-05, - "loss": 0.0834, + "learning_rate": 2.264662287613312e-05, + "loss": 0.0485, "step": 79550 }, { "epoch": 3.71, - "learning_rate": 1.2634663166283814e-05, - "loss": 0.0141, + "learning_rate": 2.2646154806297098e-05, + "loss": 0.0645, "step": 79555 }, { "epoch": 3.71, - "learning_rate": 1.2634194365008674e-05, - "loss": 0.0216, + "learning_rate": 2.264568673646108e-05, + "loss": 0.0132, "step": 79560 }, { "epoch": 3.71, - "learning_rate": 1.2633725563733534e-05, - "loss": 0.0372, + "learning_rate": 2.264521866662506e-05, + "loss": 0.0424, "step": 79565 }, { "epoch": 3.71, - "learning_rate": 1.2633256762458394e-05, - "loss": 0.0566, + "learning_rate": 2.264475059678904e-05, + "loss": 0.0741, "step": 79570 }, { "epoch": 3.71, - "learning_rate": 1.2632787961183254e-05, - "loss": 0.0272, + "learning_rate": 2.264428252695302e-05, + "loss": 0.1111, "step": 79575 }, { "epoch": 3.71, - "learning_rate": 1.2632319159908116e-05, - "loss": 0.0713, + "learning_rate": 2.2643814457117003e-05, + "loss": 0.1886, "step": 79580 }, { "epoch": 3.71, - "learning_rate": 1.2631850358632976e-05, - "loss": 0.1718, + "learning_rate": 2.2643346387280983e-05, + "loss": 0.1498, "step": 79585 }, { "epoch": 3.71, - "learning_rate": 1.2631381557357837e-05, - "loss": 0.1198, + "learning_rate": 2.2642878317444963e-05, + "loss": 0.2644, "step": 79590 }, { "epoch": 3.71, - "learning_rate": 1.2630912756082699e-05, - "loss": 0.2892, + "learning_rate": 2.2642410247608943e-05, + "loss": 0.2278, "step": 79595 }, { "epoch": 3.71, - "learning_rate": 1.2630443954807559e-05, - "loss": 0.0324, + "learning_rate": 2.2641942177772926e-05, + "loss": 0.0585, "step": 79600 }, { "epoch": 3.71, - "learning_rate": 1.2629975153532419e-05, - "loss": 0.0319, + "learning_rate": 2.2641474107936906e-05, + "loss": 0.0373, "step": 79605 }, { "epoch": 3.71, - "learning_rate": 1.2629506352257279e-05, - "loss": 0.0529, + "learning_rate": 2.2641006038100886e-05, + "loss": 0.0879, "step": 79610 }, { "epoch": 3.71, - "learning_rate": 1.2629037550982139e-05, - "loss": 0.015, + "learning_rate": 2.264053796826487e-05, + "loss": 0.0609, "step": 79615 }, { "epoch": 3.72, - "learning_rate": 1.2628568749707e-05, - "loss": 0.0752, + "learning_rate": 2.2640069898428845e-05, + "loss": 0.0718, "step": 79620 }, { "epoch": 3.72, - "learning_rate": 1.262809994843186e-05, - "loss": 0.062, + "learning_rate": 2.2639601828592825e-05, + "loss": 0.0486, "step": 79625 }, { "epoch": 3.72, - "learning_rate": 1.262763114715672e-05, - "loss": 0.1813, + "learning_rate": 2.2639133758756805e-05, + "loss": 0.1504, "step": 79630 }, { "epoch": 3.72, - "learning_rate": 1.2627162345881584e-05, - "loss": 0.1121, + "learning_rate": 2.2638665688920788e-05, + "loss": 0.072, "step": 79635 }, { "epoch": 3.72, - "learning_rate": 1.2626693544606443e-05, - "loss": 0.1794, + "learning_rate": 2.2638197619084768e-05, + "loss": 0.2988, "step": 79640 }, { "epoch": 3.72, - "learning_rate": 1.2626224743331303e-05, - "loss": 0.2176, + "learning_rate": 2.2637729549248748e-05, + "loss": 0.2334, "step": 79645 }, { "epoch": 3.72, - "learning_rate": 1.2625755942056163e-05, - "loss": 0.0534, + "learning_rate": 2.2637261479412728e-05, + "loss": 0.086, "step": 79650 }, { "epoch": 3.72, - "learning_rate": 1.2625287140781023e-05, - "loss": 0.0285, + "learning_rate": 2.263679340957671e-05, + "loss": 0.0212, "step": 79655 }, { "epoch": 3.72, - "learning_rate": 1.2624818339505885e-05, - "loss": 0.0811, + "learning_rate": 2.263632533974069e-05, + "loss": 0.0386, "step": 79660 }, { "epoch": 3.72, - "learning_rate": 1.2624349538230745e-05, - "loss": 0.0518, + "learning_rate": 2.263585726990467e-05, + "loss": 0.1297, "step": 79665 }, { "epoch": 3.72, - "learning_rate": 1.2623880736955605e-05, - "loss": 0.0688, + "learning_rate": 2.2635389200068654e-05, + "loss": 0.0501, "step": 79670 }, { "epoch": 3.72, - "learning_rate": 1.2623411935680465e-05, - "loss": 0.0886, + "learning_rate": 2.2634921130232633e-05, + "loss": 0.1009, "step": 79675 }, { "epoch": 3.72, - "learning_rate": 1.2622943134405328e-05, - "loss": 0.0969, + "learning_rate": 2.263445306039661e-05, + "loss": 0.1486, "step": 79680 }, { "epoch": 3.72, - "learning_rate": 1.2622474333130188e-05, - "loss": 0.053, + "learning_rate": 2.263398499056059e-05, + "loss": 0.0626, "step": 79685 }, { "epoch": 3.72, - "learning_rate": 1.2622005531855048e-05, - "loss": 0.3041, + "learning_rate": 2.2633516920724573e-05, + "loss": 0.1048, "step": 79690 }, { "epoch": 3.72, - "learning_rate": 1.2621536730579908e-05, - "loss": 0.2311, + "learning_rate": 2.2633048850888553e-05, + "loss": 0.4594, "step": 79695 }, { "epoch": 3.72, - "learning_rate": 1.262106792930477e-05, - "loss": 0.0817, + "learning_rate": 2.2632580781052533e-05, + "loss": 0.0732, "step": 79700 }, { "epoch": 3.72, - "learning_rate": 1.262059912802963e-05, - "loss": 0.0394, + "learning_rate": 2.2632112711216512e-05, + "loss": 0.037, "step": 79705 }, { "epoch": 3.72, - "learning_rate": 1.262013032675449e-05, - "loss": 0.0818, + "learning_rate": 2.2631644641380496e-05, + "loss": 0.0133, "step": 79710 }, { "epoch": 3.72, - "learning_rate": 1.261966152547935e-05, - "loss": 0.0913, + "learning_rate": 2.2631176571544475e-05, + "loss": 0.0526, "step": 79715 }, { "epoch": 3.72, - "learning_rate": 1.2619192724204211e-05, - "loss": 0.0738, + "learning_rate": 2.2630708501708455e-05, + "loss": 0.0687, "step": 79720 }, { "epoch": 3.72, - "learning_rate": 1.261872392292907e-05, - "loss": 0.0892, + "learning_rate": 2.2630240431872435e-05, + "loss": 0.0913, "step": 79725 }, { "epoch": 3.72, - "learning_rate": 1.2618255121653932e-05, - "loss": 0.1014, + "learning_rate": 2.2629772362036418e-05, + "loss": 0.1861, "step": 79730 }, { "epoch": 3.72, - "learning_rate": 1.2617786320378792e-05, - "loss": 0.1061, + "learning_rate": 2.2629304292200398e-05, + "loss": 0.1512, "step": 79735 }, { "epoch": 3.72, - "learning_rate": 1.2617317519103654e-05, - "loss": 0.2978, + "learning_rate": 2.2628836222364378e-05, + "loss": 0.1053, "step": 79740 }, { "epoch": 3.72, - "learning_rate": 1.2616848717828514e-05, - "loss": 0.2318, + "learning_rate": 2.2628368152528358e-05, + "loss": 0.2336, "step": 79745 }, { "epoch": 3.72, - "learning_rate": 1.2616379916553374e-05, - "loss": 0.0564, + "learning_rate": 2.2627900082692338e-05, + "loss": 0.0441, "step": 79750 }, { "epoch": 3.72, - "learning_rate": 1.2615911115278234e-05, - "loss": 0.0441, + "learning_rate": 2.2627432012856317e-05, + "loss": 0.0453, "step": 79755 }, { "epoch": 3.72, - "learning_rate": 1.2615442314003095e-05, - "loss": 0.0578, + "learning_rate": 2.2626963943020297e-05, + "loss": 0.0467, "step": 79760 }, { "epoch": 3.72, - "learning_rate": 1.2614973512727955e-05, - "loss": 0.0798, + "learning_rate": 2.262649587318428e-05, + "loss": 0.0819, "step": 79765 }, { "epoch": 3.72, - "learning_rate": 1.2614504711452815e-05, - "loss": 0.0579, + "learning_rate": 2.262602780334826e-05, + "loss": 0.0453, "step": 79770 }, { "epoch": 3.72, - "learning_rate": 1.2614035910177677e-05, - "loss": 0.0825, + "learning_rate": 2.262555973351224e-05, + "loss": 0.0414, "step": 79775 }, { "epoch": 3.72, - "learning_rate": 1.2613567108902539e-05, - "loss": 0.0441, + "learning_rate": 2.262509166367622e-05, + "loss": 0.0632, "step": 79780 }, { "epoch": 3.72, - "learning_rate": 1.2613098307627398e-05, - "loss": 0.2085, + "learning_rate": 2.2624623593840203e-05, + "loss": 0.1206, "step": 79785 }, { "epoch": 3.72, - "learning_rate": 1.2612629506352258e-05, - "loss": 0.2451, + "learning_rate": 2.2624155524004183e-05, + "loss": 0.213, "step": 79790 }, { "epoch": 3.72, - "learning_rate": 1.2612160705077118e-05, - "loss": 0.1445, + "learning_rate": 2.2623687454168163e-05, + "loss": 0.3363, "step": 79795 }, { "epoch": 3.72, - "learning_rate": 1.261169190380198e-05, - "loss": 0.0607, + "learning_rate": 2.2623219384332146e-05, + "loss": 0.1028, "step": 79800 }, { "epoch": 3.72, - "learning_rate": 1.261122310252684e-05, - "loss": 0.0031, + "learning_rate": 2.2622751314496126e-05, + "loss": 0.0424, "step": 79805 }, { "epoch": 3.72, - "learning_rate": 1.26107543012517e-05, - "loss": 0.0603, + "learning_rate": 2.2622283244660102e-05, + "loss": 0.0205, "step": 79810 }, { "epoch": 3.72, - "learning_rate": 1.261028549997656e-05, - "loss": 0.0963, + "learning_rate": 2.2621815174824082e-05, + "loss": 0.0367, "step": 79815 }, { "epoch": 3.72, - "learning_rate": 1.2609816698701423e-05, - "loss": 0.1376, + "learning_rate": 2.2621347104988065e-05, + "loss": 0.0536, "step": 79820 }, { "epoch": 3.72, - "learning_rate": 1.2609347897426283e-05, - "loss": 0.0389, + "learning_rate": 2.2620879035152045e-05, + "loss": 0.0535, "step": 79825 }, { "epoch": 3.72, - "learning_rate": 1.2608879096151143e-05, - "loss": 0.0209, + "learning_rate": 2.2620410965316025e-05, + "loss": 0.0339, "step": 79830 }, { "epoch": 3.73, - "learning_rate": 1.2608410294876003e-05, - "loss": 0.057, + "learning_rate": 2.2619942895480005e-05, + "loss": 0.1479, "step": 79835 }, { "epoch": 3.73, - "learning_rate": 1.2607941493600865e-05, - "loss": 0.2354, + "learning_rate": 2.2619474825643988e-05, + "loss": 0.134, "step": 79840 }, { "epoch": 3.73, - "learning_rate": 1.2607472692325724e-05, - "loss": 0.2524, + "learning_rate": 2.2619006755807968e-05, + "loss": 0.2473, "step": 79845 }, { "epoch": 3.73, - "learning_rate": 1.2607003891050584e-05, - "loss": 0.0598, + "learning_rate": 2.2618538685971947e-05, + "loss": 0.0835, "step": 79850 }, { "epoch": 3.73, - "learning_rate": 1.2606535089775444e-05, - "loss": 0.0671, + "learning_rate": 2.261807061613593e-05, + "loss": 0.0419, "step": 79855 }, { "epoch": 3.73, - "learning_rate": 1.2606066288500304e-05, - "loss": 0.0201, + "learning_rate": 2.261760254629991e-05, + "loss": 0.0465, "step": 79860 }, { "epoch": 3.73, - "learning_rate": 1.2605597487225166e-05, - "loss": 0.0904, + "learning_rate": 2.261713447646389e-05, + "loss": 0.0357, "step": 79865 }, { "epoch": 3.73, - "learning_rate": 1.2605128685950027e-05, - "loss": 0.0632, + "learning_rate": 2.2616666406627867e-05, + "loss": 0.0719, "step": 79870 }, { "epoch": 3.73, - "learning_rate": 1.2604659884674887e-05, - "loss": 0.062, + "learning_rate": 2.261619833679185e-05, + "loss": 0.0855, "step": 79875 }, { "epoch": 3.73, - "learning_rate": 1.2604191083399749e-05, - "loss": 0.0882, + "learning_rate": 2.261573026695583e-05, + "loss": 0.1084, "step": 79880 }, { "epoch": 3.73, - "learning_rate": 1.2603722282124609e-05, - "loss": 0.1525, + "learning_rate": 2.261526219711981e-05, + "loss": 0.1106, "step": 79885 }, { "epoch": 3.73, - "learning_rate": 1.2603253480849469e-05, - "loss": 0.1475, + "learning_rate": 2.261479412728379e-05, + "loss": 0.2183, "step": 79890 }, { "epoch": 3.73, - "learning_rate": 1.2602784679574329e-05, - "loss": 0.3101, + "learning_rate": 2.2614326057447773e-05, + "loss": 0.1852, "step": 79895 }, { "epoch": 3.73, - "learning_rate": 1.2602315878299189e-05, - "loss": 0.0707, + "learning_rate": 2.2613857987611752e-05, + "loss": 0.0776, "step": 79900 }, { "epoch": 3.73, - "learning_rate": 1.260184707702405e-05, - "loss": 0.0436, + "learning_rate": 2.2613389917775732e-05, + "loss": 0.0276, "step": 79905 }, { "epoch": 3.73, - "learning_rate": 1.260137827574891e-05, - "loss": 0.0728, + "learning_rate": 2.2612921847939712e-05, + "loss": 0.0276, "step": 79910 }, { "epoch": 3.73, - "learning_rate": 1.2600909474473772e-05, - "loss": 0.0497, + "learning_rate": 2.2612453778103695e-05, + "loss": 0.025, "step": 79915 }, { "epoch": 3.73, - "learning_rate": 1.2600440673198634e-05, - "loss": 0.0326, + "learning_rate": 2.2611985708267675e-05, + "loss": 0.0466, "step": 79920 }, { "epoch": 3.73, - "learning_rate": 1.2599971871923494e-05, - "loss": 0.0354, + "learning_rate": 2.2611517638431655e-05, + "loss": 0.0604, "step": 79925 }, { "epoch": 3.73, - "learning_rate": 1.2599503070648353e-05, + "learning_rate": 2.2611049568595638e-05, "loss": 0.1185, "step": 79930 }, { "epoch": 3.73, - "learning_rate": 1.2599034269373213e-05, - "loss": 0.1294, + "learning_rate": 2.2610581498759614e-05, + "loss": 0.0482, "step": 79935 }, { "epoch": 3.73, - "learning_rate": 1.2598565468098073e-05, - "loss": 0.1781, + "learning_rate": 2.2610113428923594e-05, + "loss": 0.1429, "step": 79940 }, { "epoch": 3.73, - "learning_rate": 1.2598096666822935e-05, - "loss": 0.1797, + "learning_rate": 2.2609645359087574e-05, + "loss": 0.261, "step": 79945 }, { "epoch": 3.73, - "learning_rate": 1.2597627865547795e-05, - "loss": 0.0274, + "learning_rate": 2.2609177289251557e-05, + "loss": 0.1163, "step": 79950 }, { "epoch": 3.73, - "learning_rate": 1.2597159064272655e-05, - "loss": 0.0551, + "learning_rate": 2.2608709219415537e-05, + "loss": 0.033, "step": 79955 }, { "epoch": 3.73, - "learning_rate": 1.2596690262997518e-05, - "loss": 0.0142, + "learning_rate": 2.2608241149579517e-05, + "loss": 0.0655, "step": 79960 }, { "epoch": 3.73, - "learning_rate": 1.2596221461722378e-05, - "loss": 0.045, + "learning_rate": 2.2607773079743497e-05, + "loss": 0.0841, "step": 79965 }, { "epoch": 3.73, - "learning_rate": 1.2595752660447238e-05, - "loss": 0.0673, + "learning_rate": 2.260730500990748e-05, + "loss": 0.0549, "step": 79970 }, { "epoch": 3.73, - "learning_rate": 1.2595283859172098e-05, - "loss": 0.1018, + "learning_rate": 2.260683694007146e-05, + "loss": 0.0508, "step": 79975 }, { "epoch": 3.73, - "learning_rate": 1.2594815057896958e-05, - "loss": 0.1354, + "learning_rate": 2.260636887023544e-05, + "loss": 0.1232, "step": 79980 }, { "epoch": 3.73, - "learning_rate": 1.259434625662182e-05, - "loss": 0.134, + "learning_rate": 2.2605900800399423e-05, + "loss": 0.0702, "step": 79985 }, { "epoch": 3.73, - "learning_rate": 1.259387745534668e-05, - "loss": 0.2244, + "learning_rate": 2.2605432730563403e-05, + "loss": 0.1341, "step": 79990 }, { "epoch": 3.73, - "learning_rate": 1.259340865407154e-05, - "loss": 0.2815, + "learning_rate": 2.2604964660727382e-05, + "loss": 0.2371, "step": 79995 }, { "epoch": 3.73, - "learning_rate": 1.25929398527964e-05, - "loss": 0.0444, + "learning_rate": 2.260449659089136e-05, + "loss": 0.043, "step": 80000 }, { "epoch": 3.73, - "learning_rate": 1.2592471051521263e-05, - "loss": 0.0804, + "learning_rate": 2.2604028521055342e-05, + "loss": 0.0298, "step": 80005 }, { "epoch": 3.73, - "learning_rate": 1.2592002250246123e-05, - "loss": 0.0218, + "learning_rate": 2.2603560451219322e-05, + "loss": 0.0874, "step": 80010 }, { "epoch": 3.73, - "learning_rate": 1.2591533448970983e-05, - "loss": 0.0604, + "learning_rate": 2.2603092381383302e-05, + "loss": 0.0331, "step": 80015 }, { "epoch": 3.73, - "learning_rate": 1.2591064647695842e-05, - "loss": 0.0753, + "learning_rate": 2.260262431154728e-05, + "loss": 0.1456, "step": 80020 }, { "epoch": 3.73, - "learning_rate": 1.2590595846420704e-05, - "loss": 0.0844, + "learning_rate": 2.2602156241711265e-05, + "loss": 0.1048, "step": 80025 }, { "epoch": 3.73, - "learning_rate": 1.2590127045145564e-05, - "loss": 0.0364, + "learning_rate": 2.2601688171875245e-05, + "loss": 0.1152, "step": 80030 }, { "epoch": 3.73, - "learning_rate": 1.2589658243870424e-05, - "loss": 0.1538, + "learning_rate": 2.2601220102039224e-05, + "loss": 0.1155, "step": 80035 }, { "epoch": 3.73, - "learning_rate": 1.2589189442595284e-05, - "loss": 0.1413, + "learning_rate": 2.2600752032203208e-05, + "loss": 0.1624, "step": 80040 }, { "epoch": 3.74, - "learning_rate": 1.2588720641320144e-05, - "loss": 0.2433, + "learning_rate": 2.2600283962367187e-05, + "loss": 0.1401, "step": 80045 }, { "epoch": 3.74, - "learning_rate": 1.2588251840045005e-05, - "loss": 0.0731, + "learning_rate": 2.2599815892531167e-05, + "loss": 0.0625, "step": 80050 }, { "epoch": 3.74, - "learning_rate": 1.2587783038769867e-05, - "loss": 0.0485, + "learning_rate": 2.2599347822695147e-05, + "loss": 0.0514, "step": 80055 }, { "epoch": 3.74, - "learning_rate": 1.2587314237494727e-05, - "loss": 0.0428, + "learning_rate": 2.2598879752859127e-05, + "loss": 0.0568, "step": 80060 }, { "epoch": 3.74, - "learning_rate": 1.2586845436219589e-05, - "loss": 0.0224, + "learning_rate": 2.2598411683023107e-05, + "loss": 0.047, "step": 80065 }, { "epoch": 3.74, - "learning_rate": 1.2586376634944449e-05, - "loss": 0.0614, + "learning_rate": 2.2597943613187087e-05, + "loss": 0.0727, "step": 80070 }, { "epoch": 3.74, - "learning_rate": 1.2585907833669308e-05, - "loss": 0.0539, + "learning_rate": 2.2597475543351066e-05, + "loss": 0.0935, "step": 80075 }, { "epoch": 3.74, - "learning_rate": 1.2585439032394168e-05, - "loss": 0.1579, + "learning_rate": 2.259700747351505e-05, + "loss": 0.0783, "step": 80080 }, { "epoch": 3.74, - "learning_rate": 1.2584970231119028e-05, - "loss": 0.1344, + "learning_rate": 2.259653940367903e-05, + "loss": 0.0935, "step": 80085 }, { "epoch": 3.74, - "learning_rate": 1.258450142984389e-05, - "loss": 0.1079, + "learning_rate": 2.259607133384301e-05, + "loss": 0.142, "step": 80090 }, { "epoch": 3.74, - "learning_rate": 1.258403262856875e-05, - "loss": 0.3042, + "learning_rate": 2.2595603264006992e-05, + "loss": 0.2139, "step": 80095 }, { "epoch": 3.74, - "learning_rate": 1.2583563827293612e-05, - "loss": 0.0669, + "learning_rate": 2.2595135194170972e-05, + "loss": 0.0762, "step": 80100 }, { "epoch": 3.74, - "learning_rate": 1.2583095026018473e-05, - "loss": 0.0288, + "learning_rate": 2.2594667124334952e-05, + "loss": 0.0388, "step": 80105 }, { "epoch": 3.74, - "learning_rate": 1.2582626224743333e-05, - "loss": 0.0293, + "learning_rate": 2.2594199054498932e-05, + "loss": 0.0429, "step": 80110 }, { "epoch": 3.74, - "learning_rate": 1.2582157423468193e-05, - "loss": 0.0376, + "learning_rate": 2.2593730984662915e-05, + "loss": 0.0741, "step": 80115 }, { "epoch": 3.74, - "learning_rate": 1.2581688622193053e-05, - "loss": 0.0837, + "learning_rate": 2.2593262914826895e-05, + "loss": 0.0392, "step": 80120 }, { "epoch": 3.74, - "learning_rate": 1.2581219820917913e-05, - "loss": 0.0305, + "learning_rate": 2.259279484499087e-05, + "loss": 0.1095, "step": 80125 }, { "epoch": 3.74, - "learning_rate": 1.2580751019642775e-05, - "loss": 0.0277, + "learning_rate": 2.259232677515485e-05, + "loss": 0.0864, "step": 80130 }, { "epoch": 3.74, - "learning_rate": 1.2580282218367634e-05, - "loss": 0.1386, + "learning_rate": 2.2591858705318834e-05, + "loss": 0.1118, "step": 80135 }, { "epoch": 3.74, - "learning_rate": 1.2579813417092494e-05, - "loss": 0.2618, + "learning_rate": 2.2591390635482814e-05, + "loss": 0.3407, "step": 80140 }, { "epoch": 3.74, - "learning_rate": 1.2579344615817358e-05, - "loss": 0.3455, + "learning_rate": 2.2590922565646794e-05, + "loss": 0.2283, "step": 80145 }, { "epoch": 3.74, - "learning_rate": 1.2578875814542218e-05, - "loss": 0.0272, + "learning_rate": 2.2590454495810774e-05, + "loss": 0.0639, "step": 80150 }, { "epoch": 3.74, - "learning_rate": 1.2578407013267078e-05, - "loss": 0.0101, + "learning_rate": 2.2589986425974757e-05, + "loss": 0.006, "step": 80155 }, { "epoch": 3.74, - "learning_rate": 1.2577938211991938e-05, - "loss": 0.0286, + "learning_rate": 2.2589518356138737e-05, + "loss": 0.0439, "step": 80160 }, { "epoch": 3.74, - "learning_rate": 1.2577469410716797e-05, - "loss": 0.0154, + "learning_rate": 2.2589050286302717e-05, + "loss": 0.0567, "step": 80165 }, { "epoch": 3.74, - "learning_rate": 1.2577000609441659e-05, - "loss": 0.0527, + "learning_rate": 2.25885822164667e-05, + "loss": 0.0482, "step": 80170 }, { "epoch": 3.74, - "learning_rate": 1.2576531808166519e-05, - "loss": 0.054, + "learning_rate": 2.258811414663068e-05, + "loss": 0.0302, "step": 80175 }, { "epoch": 3.74, - "learning_rate": 1.2576063006891379e-05, - "loss": 0.108, + "learning_rate": 2.258764607679466e-05, + "loss": 0.0515, "step": 80180 }, { "epoch": 3.74, - "learning_rate": 1.2575594205616239e-05, - "loss": 0.1607, + "learning_rate": 2.2587178006958636e-05, + "loss": 0.1951, "step": 80185 }, { "epoch": 3.74, - "learning_rate": 1.25751254043411e-05, - "loss": 0.2015, + "learning_rate": 2.258670993712262e-05, + "loss": 0.1239, "step": 80190 }, { "epoch": 3.74, - "learning_rate": 1.2574656603065962e-05, - "loss": 0.2262, + "learning_rate": 2.25862418672866e-05, + "loss": 0.1687, "step": 80195 }, { "epoch": 3.74, - "learning_rate": 1.2574187801790822e-05, - "loss": 0.0363, + "learning_rate": 2.258577379745058e-05, + "loss": 0.0508, "step": 80200 }, { "epoch": 3.74, - "learning_rate": 1.2573719000515682e-05, - "loss": 0.0288, + "learning_rate": 2.258530572761456e-05, + "loss": 0.0232, "step": 80205 }, { "epoch": 3.74, - "learning_rate": 1.2573250199240544e-05, - "loss": 0.0167, + "learning_rate": 2.2584837657778542e-05, + "loss": 0.0319, "step": 80210 }, { "epoch": 3.74, - "learning_rate": 1.2572781397965404e-05, - "loss": 0.0268, + "learning_rate": 2.258436958794252e-05, + "loss": 0.0648, "step": 80215 }, { "epoch": 3.74, - "learning_rate": 1.2572312596690264e-05, - "loss": 0.0623, + "learning_rate": 2.25839015181065e-05, + "loss": 0.1184, "step": 80220 }, { "epoch": 3.74, - "learning_rate": 1.2571843795415123e-05, - "loss": 0.097, + "learning_rate": 2.2583433448270485e-05, + "loss": 0.0776, "step": 80225 }, { "epoch": 3.74, - "learning_rate": 1.2571374994139985e-05, - "loss": 0.0578, + "learning_rate": 2.2582965378434464e-05, + "loss": 0.2517, "step": 80230 }, { "epoch": 3.74, - "learning_rate": 1.2570906192864845e-05, - "loss": 0.0878, + "learning_rate": 2.2582497308598444e-05, + "loss": 0.1033, "step": 80235 }, { "epoch": 3.74, - "learning_rate": 1.2570437391589707e-05, - "loss": 0.2988, + "learning_rate": 2.2582029238762424e-05, + "loss": 0.0705, "step": 80240 }, { "epoch": 3.74, - "learning_rate": 1.2569968590314567e-05, - "loss": 0.3496, + "learning_rate": 2.2581561168926407e-05, + "loss": 0.2098, "step": 80245 }, { "epoch": 3.74, - "learning_rate": 1.2569499789039428e-05, - "loss": 0.0512, + "learning_rate": 2.2581093099090384e-05, + "loss": 0.0677, "step": 80250 }, { "epoch": 3.74, - "learning_rate": 1.2569030987764288e-05, - "loss": 0.035, + "learning_rate": 2.2580625029254363e-05, + "loss": 0.0431, "step": 80255 }, { "epoch": 3.75, - "learning_rate": 1.2568562186489148e-05, - "loss": 0.0614, + "learning_rate": 2.2580156959418343e-05, + "loss": 0.0346, "step": 80260 }, { "epoch": 3.75, - "learning_rate": 1.2568093385214008e-05, - "loss": 0.0579, + "learning_rate": 2.2579688889582326e-05, + "loss": 0.02, "step": 80265 }, { "epoch": 3.75, - "learning_rate": 1.256762458393887e-05, - "loss": 0.0558, + "learning_rate": 2.2579220819746306e-05, + "loss": 0.0543, "step": 80270 }, { "epoch": 3.75, - "learning_rate": 1.256715578266373e-05, - "loss": 0.1048, + "learning_rate": 2.2578752749910286e-05, + "loss": 0.0514, "step": 80275 }, { "epoch": 3.75, - "learning_rate": 1.256668698138859e-05, - "loss": 0.0946, + "learning_rate": 2.257828468007427e-05, + "loss": 0.1295, "step": 80280 }, { "epoch": 3.75, - "learning_rate": 1.2566218180113451e-05, - "loss": 0.1173, + "learning_rate": 2.257781661023825e-05, + "loss": 0.1067, "step": 80285 }, { "epoch": 3.75, - "learning_rate": 1.2565749378838313e-05, - "loss": 0.192, + "learning_rate": 2.257734854040223e-05, + "loss": 0.2234, "step": 80290 }, { "epoch": 3.75, - "learning_rate": 1.2565280577563173e-05, - "loss": 0.2514, + "learning_rate": 2.257688047056621e-05, + "loss": 0.2255, "step": 80295 }, { "epoch": 3.75, - "learning_rate": 1.2564811776288033e-05, - "loss": 0.0597, + "learning_rate": 2.2576412400730192e-05, + "loss": 0.0724, "step": 80300 }, { "epoch": 3.75, - "learning_rate": 1.2564342975012893e-05, - "loss": 0.031, + "learning_rate": 2.2575944330894172e-05, + "loss": 0.0269, "step": 80305 }, { "epoch": 3.75, - "learning_rate": 1.2563874173737754e-05, - "loss": 0.0351, + "learning_rate": 2.257547626105815e-05, + "loss": 0.0123, "step": 80310 }, { "epoch": 3.75, - "learning_rate": 1.2563405372462614e-05, - "loss": 0.0605, + "learning_rate": 2.2575008191222128e-05, + "loss": 0.0894, "step": 80315 }, { "epoch": 3.75, - "learning_rate": 1.2562936571187474e-05, - "loss": 0.0533, + "learning_rate": 2.257454012138611e-05, + "loss": 0.0692, "step": 80320 }, { "epoch": 3.75, - "learning_rate": 1.2562467769912334e-05, - "loss": 0.096, + "learning_rate": 2.257407205155009e-05, + "loss": 0.0268, "step": 80325 }, { "epoch": 3.75, - "learning_rate": 1.2561998968637197e-05, - "loss": 0.0566, + "learning_rate": 2.257360398171407e-05, + "loss": 0.1046, "step": 80330 }, { "epoch": 3.75, - "learning_rate": 1.2561530167362057e-05, - "loss": 0.1548, + "learning_rate": 2.257313591187805e-05, + "loss": 0.0841, "step": 80335 }, { "epoch": 3.75, - "learning_rate": 1.2561061366086917e-05, - "loss": 0.2723, + "learning_rate": 2.2572667842042034e-05, + "loss": 0.1631, "step": 80340 }, { "epoch": 3.75, - "learning_rate": 1.2560592564811777e-05, - "loss": 0.2603, + "learning_rate": 2.2572199772206014e-05, + "loss": 0.2071, "step": 80345 }, { "epoch": 3.75, - "learning_rate": 1.2560123763536639e-05, - "loss": 0.1099, + "learning_rate": 2.2571731702369994e-05, + "loss": 0.0443, "step": 80350 }, { "epoch": 3.75, - "learning_rate": 1.2559654962261499e-05, - "loss": 0.021, + "learning_rate": 2.2571263632533977e-05, + "loss": 0.0246, "step": 80355 }, { "epoch": 3.75, - "learning_rate": 1.2559186160986359e-05, - "loss": 0.0277, + "learning_rate": 2.2570795562697957e-05, + "loss": 0.0841, "step": 80360 }, { "epoch": 3.75, - "learning_rate": 1.2558717359711219e-05, - "loss": 0.0486, + "learning_rate": 2.2570327492861936e-05, + "loss": 0.0412, "step": 80365 }, { "epoch": 3.75, - "learning_rate": 1.2558248558436078e-05, - "loss": 0.0699, + "learning_rate": 2.2569859423025916e-05, + "loss": 0.1004, "step": 80370 }, { "epoch": 3.75, - "learning_rate": 1.255777975716094e-05, - "loss": 0.0733, + "learning_rate": 2.2569391353189896e-05, + "loss": 0.0679, "step": 80375 }, { "epoch": 3.75, - "learning_rate": 1.2557310955885802e-05, - "loss": 0.0568, + "learning_rate": 2.2568923283353876e-05, + "loss": 0.1392, "step": 80380 }, { "epoch": 3.75, - "learning_rate": 1.2556842154610662e-05, - "loss": 0.0946, + "learning_rate": 2.2568455213517856e-05, + "loss": 0.2198, "step": 80385 }, { "epoch": 3.75, - "learning_rate": 1.2556373353335523e-05, - "loss": 0.1323, + "learning_rate": 2.2567987143681835e-05, + "loss": 0.1181, "step": 80390 }, { "epoch": 3.75, - "learning_rate": 1.2555904552060383e-05, - "loss": 0.2184, + "learning_rate": 2.256751907384582e-05, + "loss": 0.1875, "step": 80395 }, { "epoch": 3.75, - "learning_rate": 1.2555435750785243e-05, - "loss": 0.0836, + "learning_rate": 2.25670510040098e-05, + "loss": 0.0611, "step": 80400 }, { "epoch": 3.75, - "learning_rate": 1.2554966949510103e-05, - "loss": 0.0477, + "learning_rate": 2.256658293417378e-05, + "loss": 0.0176, "step": 80405 }, { "epoch": 3.75, - "learning_rate": 1.2554498148234963e-05, - "loss": 0.0217, + "learning_rate": 2.256611486433776e-05, + "loss": 0.0308, "step": 80410 }, { "epoch": 3.75, - "learning_rate": 1.2554029346959825e-05, - "loss": 0.0079, + "learning_rate": 2.256564679450174e-05, + "loss": 0.0636, "step": 80415 }, { "epoch": 3.75, - "learning_rate": 1.2553560545684685e-05, - "loss": 0.0874, + "learning_rate": 2.256517872466572e-05, + "loss": 0.042, "step": 80420 }, { "epoch": 3.75, - "learning_rate": 1.2553091744409546e-05, - "loss": 0.0398, + "learning_rate": 2.25647106548297e-05, + "loss": 0.0549, "step": 80425 }, { "epoch": 3.75, - "learning_rate": 1.2552622943134408e-05, - "loss": 0.0725, + "learning_rate": 2.2564242584993684e-05, + "loss": 0.1107, "step": 80430 }, { "epoch": 3.75, - "learning_rate": 1.2552154141859268e-05, - "loss": 0.1184, + "learning_rate": 2.2563774515157664e-05, + "loss": 0.1023, "step": 80435 }, { "epoch": 3.75, - "learning_rate": 1.2551685340584128e-05, - "loss": 0.1652, + "learning_rate": 2.256330644532164e-05, + "loss": 0.2071, "step": 80440 }, { "epoch": 3.75, - "learning_rate": 1.2551216539308988e-05, - "loss": 0.3357, + "learning_rate": 2.256283837548562e-05, + "loss": 0.3878, "step": 80445 }, { "epoch": 3.75, - "learning_rate": 1.2550747738033848e-05, - "loss": 0.0782, + "learning_rate": 2.2562370305649603e-05, + "loss": 0.0872, "step": 80450 }, { "epoch": 3.75, - "learning_rate": 1.255027893675871e-05, - "loss": 0.0426, + "learning_rate": 2.2561902235813583e-05, + "loss": 0.0428, "step": 80455 }, { "epoch": 3.75, - "learning_rate": 1.2549810135483569e-05, - "loss": 0.015, + "learning_rate": 2.2561434165977563e-05, + "loss": 0.0501, "step": 80460 }, { "epoch": 3.75, - "learning_rate": 1.2549341334208429e-05, - "loss": 0.0465, + "learning_rate": 2.2560966096141546e-05, + "loss": 0.0578, "step": 80465 }, { "epoch": 3.75, - "learning_rate": 1.2548872532933292e-05, - "loss": 0.0607, + "learning_rate": 2.2560498026305526e-05, + "loss": 0.0381, "step": 80470 }, { "epoch": 3.76, - "learning_rate": 1.2548403731658152e-05, - "loss": 0.0239, + "learning_rate": 2.2560029956469506e-05, + "loss": 0.0518, "step": 80475 }, { "epoch": 3.76, - "learning_rate": 1.2547934930383012e-05, - "loss": 0.1087, + "learning_rate": 2.2559561886633486e-05, + "loss": 0.1014, "step": 80480 }, { "epoch": 3.76, - "learning_rate": 1.2547466129107872e-05, - "loss": 0.1056, + "learning_rate": 2.255909381679747e-05, + "loss": 0.0429, "step": 80485 }, { "epoch": 3.76, - "learning_rate": 1.2546997327832732e-05, - "loss": 0.2304, + "learning_rate": 2.255862574696145e-05, + "loss": 0.1489, "step": 80490 }, { "epoch": 3.76, - "learning_rate": 1.2546528526557594e-05, - "loss": 0.3079, + "learning_rate": 2.255815767712543e-05, + "loss": 0.2951, "step": 80495 }, { "epoch": 3.76, - "learning_rate": 1.2546059725282454e-05, - "loss": 0.0593, + "learning_rate": 2.255768960728941e-05, + "loss": 0.0595, "step": 80500 }, { "epoch": 3.76, - "learning_rate": 1.2545590924007314e-05, - "loss": 0.038, + "learning_rate": 2.2557221537453388e-05, + "loss": 0.0602, "step": 80505 }, { "epoch": 3.76, - "learning_rate": 1.2545122122732174e-05, - "loss": 0.0221, + "learning_rate": 2.2556753467617368e-05, + "loss": 0.0192, "step": 80510 }, { "epoch": 3.76, - "learning_rate": 1.2544653321457033e-05, - "loss": 0.0695, + "learning_rate": 2.2556285397781348e-05, + "loss": 0.0321, "step": 80515 }, { "epoch": 3.76, - "learning_rate": 1.2544184520181897e-05, - "loss": 0.0898, + "learning_rate": 2.2555817327945328e-05, + "loss": 0.1003, "step": 80520 }, { "epoch": 3.76, - "learning_rate": 1.2543715718906757e-05, - "loss": 0.0655, + "learning_rate": 2.255534925810931e-05, + "loss": 0.0375, "step": 80525 }, { "epoch": 3.76, - "learning_rate": 1.2543246917631617e-05, - "loss": 0.0924, + "learning_rate": 2.255488118827329e-05, + "loss": 0.0776, "step": 80530 }, { "epoch": 3.76, - "learning_rate": 1.2542778116356478e-05, - "loss": 0.1634, + "learning_rate": 2.255441311843727e-05, + "loss": 0.0711, "step": 80535 }, { "epoch": 3.76, - "learning_rate": 1.2542309315081338e-05, - "loss": 0.2233, + "learning_rate": 2.2553945048601254e-05, + "loss": 0.2079, "step": 80540 }, { "epoch": 3.76, - "learning_rate": 1.2541840513806198e-05, - "loss": 0.2174, + "learning_rate": 2.2553476978765234e-05, + "loss": 0.3264, "step": 80545 }, { "epoch": 3.76, - "learning_rate": 1.2541371712531058e-05, - "loss": 0.0375, + "learning_rate": 2.2553008908929213e-05, + "loss": 0.0575, "step": 80550 }, { "epoch": 3.76, - "learning_rate": 1.2540902911255918e-05, - "loss": 0.0261, + "learning_rate": 2.2552540839093193e-05, + "loss": 0.0142, "step": 80555 }, { "epoch": 3.76, - "learning_rate": 1.254043410998078e-05, - "loss": 0.03, + "learning_rate": 2.2552072769257176e-05, + "loss": 0.0275, "step": 80560 }, { "epoch": 3.76, - "learning_rate": 1.2539965308705641e-05, - "loss": 0.0588, + "learning_rate": 2.2551604699421153e-05, + "loss": 0.0728, "step": 80565 }, { "epoch": 3.76, - "learning_rate": 1.2539496507430501e-05, - "loss": 0.0313, + "learning_rate": 2.2551136629585133e-05, + "loss": 0.055, "step": 80570 }, { "epoch": 3.76, - "learning_rate": 1.2539027706155363e-05, - "loss": 0.0533, + "learning_rate": 2.2550668559749112e-05, + "loss": 0.1545, "step": 80575 }, { "epoch": 3.76, - "learning_rate": 1.2538558904880223e-05, - "loss": 0.0913, + "learning_rate": 2.2550200489913096e-05, + "loss": 0.0666, "step": 80580 }, { "epoch": 3.76, - "learning_rate": 1.2538090103605083e-05, - "loss": 0.1477, + "learning_rate": 2.2549732420077075e-05, + "loss": 0.0761, "step": 80585 }, { "epoch": 3.76, - "learning_rate": 1.2537621302329943e-05, - "loss": 0.1816, + "learning_rate": 2.2549264350241055e-05, + "loss": 0.2299, "step": 80590 }, { "epoch": 3.76, - "learning_rate": 1.2537152501054803e-05, - "loss": 0.2674, + "learning_rate": 2.254879628040504e-05, + "loss": 0.3614, "step": 80595 }, { "epoch": 3.76, - "learning_rate": 1.2536683699779664e-05, - "loss": 0.046, + "learning_rate": 2.254832821056902e-05, + "loss": 0.0478, "step": 80600 }, { "epoch": 3.76, - "learning_rate": 1.2536214898504524e-05, - "loss": 0.0391, + "learning_rate": 2.2547860140732998e-05, + "loss": 0.0204, "step": 80605 }, { "epoch": 3.76, - "learning_rate": 1.2535746097229386e-05, - "loss": 0.0888, + "learning_rate": 2.2547392070896978e-05, + "loss": 0.0442, "step": 80610 }, { "epoch": 3.76, - "learning_rate": 1.2535277295954247e-05, - "loss": 0.0344, + "learning_rate": 2.254692400106096e-05, + "loss": 0.03, "step": 80615 }, { "epoch": 3.76, - "learning_rate": 1.2534808494679107e-05, - "loss": 0.0741, + "learning_rate": 2.254645593122494e-05, + "loss": 0.0753, "step": 80620 }, { "epoch": 3.76, - "learning_rate": 1.2534339693403967e-05, - "loss": 0.0925, + "learning_rate": 2.254598786138892e-05, + "loss": 0.0641, "step": 80625 }, { "epoch": 3.76, - "learning_rate": 1.2533870892128827e-05, - "loss": 0.1463, + "learning_rate": 2.2545519791552897e-05, + "loss": 0.1385, "step": 80630 }, { "epoch": 3.76, - "learning_rate": 1.2533402090853687e-05, - "loss": 0.1298, + "learning_rate": 2.254505172171688e-05, + "loss": 0.0742, "step": 80635 }, { "epoch": 3.76, - "learning_rate": 1.2532933289578549e-05, - "loss": 0.1728, + "learning_rate": 2.254458365188086e-05, + "loss": 0.2227, "step": 80640 }, { "epoch": 3.76, - "learning_rate": 1.2532464488303409e-05, - "loss": 0.4064, + "learning_rate": 2.254411558204484e-05, + "loss": 0.2698, "step": 80645 }, { "epoch": 3.76, - "learning_rate": 1.2531995687028269e-05, - "loss": 0.0877, + "learning_rate": 2.2543647512208823e-05, + "loss": 0.0313, "step": 80650 }, { "epoch": 3.76, - "learning_rate": 1.2531526885753132e-05, - "loss": 0.039, + "learning_rate": 2.2543179442372803e-05, + "loss": 0.0164, "step": 80655 }, { "epoch": 3.76, - "learning_rate": 1.2531058084477992e-05, - "loss": 0.0609, + "learning_rate": 2.2542711372536783e-05, + "loss": 0.0576, "step": 80660 }, { "epoch": 3.76, - "learning_rate": 1.2530589283202852e-05, - "loss": 0.058, + "learning_rate": 2.2542243302700763e-05, + "loss": 0.033, "step": 80665 }, { "epoch": 3.76, - "learning_rate": 1.2530120481927712e-05, - "loss": 0.0619, + "learning_rate": 2.2541775232864746e-05, + "loss": 0.0889, "step": 80670 }, { "epoch": 3.76, - "learning_rate": 1.2529651680652573e-05, - "loss": 0.1318, + "learning_rate": 2.2541307163028726e-05, + "loss": 0.0973, "step": 80675 }, { "epoch": 3.76, - "learning_rate": 1.2529182879377433e-05, - "loss": 0.0365, + "learning_rate": 2.2540839093192706e-05, + "loss": 0.0626, "step": 80680 }, { "epoch": 3.76, - "learning_rate": 1.2528714078102293e-05, - "loss": 0.1273, + "learning_rate": 2.2540371023356685e-05, + "loss": 0.1014, "step": 80685 }, { "epoch": 3.77, - "learning_rate": 1.2528245276827153e-05, - "loss": 0.0779, + "learning_rate": 2.2539902953520665e-05, + "loss": 0.1736, "step": 80690 }, { "epoch": 3.77, - "learning_rate": 1.2527776475552013e-05, - "loss": 0.2552, + "learning_rate": 2.2539434883684645e-05, + "loss": 0.2768, "step": 80695 }, { "epoch": 3.77, - "learning_rate": 1.2527307674276875e-05, - "loss": 0.0446, + "learning_rate": 2.2538966813848625e-05, + "loss": 0.0603, "step": 80700 }, { "epoch": 3.77, - "learning_rate": 1.2526838873001736e-05, - "loss": 0.014, + "learning_rate": 2.2538498744012608e-05, + "loss": 0.0397, "step": 80705 }, { "epoch": 3.77, - "learning_rate": 1.2526370071726596e-05, - "loss": 0.029, + "learning_rate": 2.2538030674176588e-05, + "loss": 0.0189, "step": 80710 }, { "epoch": 3.77, - "learning_rate": 1.2525901270451458e-05, - "loss": 0.0484, + "learning_rate": 2.2537562604340568e-05, + "loss": 0.0572, "step": 80715 }, { "epoch": 3.77, - "learning_rate": 1.2525432469176318e-05, - "loss": 0.0712, + "learning_rate": 2.2537094534504548e-05, + "loss": 0.121, "step": 80720 }, { "epoch": 3.77, - "learning_rate": 1.2524963667901178e-05, - "loss": 0.1041, + "learning_rate": 2.253662646466853e-05, + "loss": 0.098, "step": 80725 }, { "epoch": 3.77, - "learning_rate": 1.2524494866626038e-05, - "loss": 0.1207, + "learning_rate": 2.253615839483251e-05, + "loss": 0.1361, "step": 80730 }, { "epoch": 3.77, - "learning_rate": 1.2524026065350898e-05, - "loss": 0.1015, + "learning_rate": 2.253569032499649e-05, + "loss": 0.2474, "step": 80735 }, { "epoch": 3.77, - "learning_rate": 1.252355726407576e-05, - "loss": 0.161, + "learning_rate": 2.253522225516047e-05, + "loss": 0.2938, "step": 80740 }, { "epoch": 3.77, - "learning_rate": 1.252308846280062e-05, - "loss": 0.2087, + "learning_rate": 2.2534754185324453e-05, + "loss": 0.2006, "step": 80745 }, { "epoch": 3.77, - "learning_rate": 1.2522619661525481e-05, - "loss": 0.0644, + "learning_rate": 2.2534286115488433e-05, + "loss": 0.0962, "step": 80750 }, { "epoch": 3.77, - "learning_rate": 1.2522150860250342e-05, - "loss": 0.027, + "learning_rate": 2.253381804565241e-05, + "loss": 0.0336, "step": 80755 }, { "epoch": 3.77, - "learning_rate": 1.2521682058975202e-05, - "loss": 0.0427, + "learning_rate": 2.253334997581639e-05, + "loss": 0.0434, "step": 80760 }, { "epoch": 3.77, - "learning_rate": 1.2521213257700062e-05, - "loss": 0.0347, + "learning_rate": 2.2532881905980373e-05, + "loss": 0.044, "step": 80765 }, { "epoch": 3.77, - "learning_rate": 1.2520744456424922e-05, - "loss": 0.0643, + "learning_rate": 2.2532413836144352e-05, + "loss": 0.0301, "step": 80770 }, { "epoch": 3.77, - "learning_rate": 1.2520275655149782e-05, - "loss": 0.078, + "learning_rate": 2.2531945766308332e-05, + "loss": 0.0592, "step": 80775 }, { "epoch": 3.77, - "learning_rate": 1.2519806853874644e-05, - "loss": 0.1366, + "learning_rate": 2.2531477696472315e-05, + "loss": 0.0837, "step": 80780 }, { "epoch": 3.77, - "learning_rate": 1.2519338052599504e-05, - "loss": 0.0901, + "learning_rate": 2.2531009626636295e-05, + "loss": 0.099, "step": 80785 }, { "epoch": 3.77, - "learning_rate": 1.2518869251324364e-05, - "loss": 0.1419, + "learning_rate": 2.2530541556800275e-05, + "loss": 0.1758, "step": 80790 }, { "epoch": 3.77, - "learning_rate": 1.2518400450049227e-05, - "loss": 0.158, + "learning_rate": 2.2530073486964255e-05, + "loss": 0.2774, "step": 80795 }, { "epoch": 3.77, - "learning_rate": 1.2517931648774087e-05, - "loss": 0.0393, + "learning_rate": 2.2529605417128238e-05, + "loss": 0.0735, "step": 80800 }, { "epoch": 3.77, - "learning_rate": 1.2517462847498947e-05, - "loss": 0.0096, + "learning_rate": 2.2529137347292218e-05, + "loss": 0.0183, "step": 80805 }, { "epoch": 3.77, - "learning_rate": 1.2516994046223807e-05, - "loss": 0.0595, + "learning_rate": 2.2528669277456198e-05, + "loss": 0.1163, "step": 80810 }, { "epoch": 3.77, - "learning_rate": 1.2516525244948667e-05, - "loss": 0.0377, + "learning_rate": 2.2528201207620178e-05, + "loss": 0.0951, "step": 80815 }, { "epoch": 3.77, - "learning_rate": 1.2516056443673528e-05, - "loss": 0.1042, + "learning_rate": 2.2527733137784157e-05, + "loss": 0.0531, "step": 80820 }, { "epoch": 3.77, - "learning_rate": 1.2515587642398388e-05, - "loss": 0.0524, + "learning_rate": 2.2527265067948137e-05, + "loss": 0.0786, "step": 80825 }, { "epoch": 3.77, - "learning_rate": 1.2515118841123248e-05, - "loss": 0.1414, + "learning_rate": 2.2526796998112117e-05, + "loss": 0.073, "step": 80830 }, { "epoch": 3.77, - "learning_rate": 1.2514650039848108e-05, - "loss": 0.1149, + "learning_rate": 2.25263289282761e-05, + "loss": 0.1282, "step": 80835 }, { "epoch": 3.77, - "learning_rate": 1.2514181238572968e-05, - "loss": 0.2525, + "learning_rate": 2.252586085844008e-05, + "loss": 0.1156, "step": 80840 }, { "epoch": 3.77, - "learning_rate": 1.2513712437297831e-05, - "loss": 0.3062, + "learning_rate": 2.252539278860406e-05, + "loss": 0.3498, "step": 80845 }, { "epoch": 3.77, - "learning_rate": 1.2513243636022691e-05, - "loss": 0.0789, + "learning_rate": 2.252492471876804e-05, + "loss": 0.0512, "step": 80850 }, { "epoch": 3.77, - "learning_rate": 1.2512774834747551e-05, - "loss": 0.0635, + "learning_rate": 2.2524456648932023e-05, + "loss": 0.0255, "step": 80855 }, { "epoch": 3.77, - "learning_rate": 1.2512306033472413e-05, - "loss": 0.0316, + "learning_rate": 2.2523988579096003e-05, + "loss": 0.0845, "step": 80860 }, { "epoch": 3.77, - "learning_rate": 1.2511837232197273e-05, - "loss": 0.0441, + "learning_rate": 2.2523520509259983e-05, + "loss": 0.0425, "step": 80865 }, { "epoch": 3.77, - "learning_rate": 1.2511368430922133e-05, - "loss": 0.0474, + "learning_rate": 2.2523052439423962e-05, + "loss": 0.0664, "step": 80870 }, { "epoch": 3.77, - "learning_rate": 1.2510899629646993e-05, - "loss": 0.0926, + "learning_rate": 2.2522584369587946e-05, + "loss": 0.0492, "step": 80875 }, { "epoch": 3.77, - "learning_rate": 1.2510430828371853e-05, - "loss": 0.1219, + "learning_rate": 2.2522116299751922e-05, + "loss": 0.0466, "step": 80880 }, { "epoch": 3.77, - "learning_rate": 1.2509962027096714e-05, - "loss": 0.105, + "learning_rate": 2.2521648229915902e-05, + "loss": 0.1142, "step": 80885 }, { "epoch": 3.77, - "learning_rate": 1.2509493225821576e-05, - "loss": 0.1523, + "learning_rate": 2.2521180160079885e-05, + "loss": 0.1399, "step": 80890 }, { "epoch": 3.77, - "learning_rate": 1.2509024424546436e-05, - "loss": 0.2556, + "learning_rate": 2.2520712090243865e-05, + "loss": 0.1709, "step": 80895 }, { "epoch": 3.77, - "learning_rate": 1.2508555623271297e-05, - "loss": 0.0617, + "learning_rate": 2.2520244020407845e-05, + "loss": 0.0702, "step": 80900 }, { "epoch": 3.78, - "learning_rate": 1.2508086821996157e-05, - "loss": 0.0293, + "learning_rate": 2.2519775950571824e-05, + "loss": 0.0197, "step": 80905 }, { "epoch": 3.78, - "learning_rate": 1.2507618020721017e-05, - "loss": 0.0318, + "learning_rate": 2.2519307880735808e-05, + "loss": 0.0569, "step": 80910 }, { "epoch": 3.78, - "learning_rate": 1.2507149219445877e-05, - "loss": 0.066, + "learning_rate": 2.2518839810899787e-05, + "loss": 0.0318, "step": 80915 }, { "epoch": 3.78, - "learning_rate": 1.2506680418170737e-05, - "loss": 0.0665, + "learning_rate": 2.2518371741063767e-05, + "loss": 0.0337, "step": 80920 }, { "epoch": 3.78, - "learning_rate": 1.2506211616895599e-05, - "loss": 0.0471, + "learning_rate": 2.2517903671227747e-05, + "loss": 0.0676, "step": 80925 }, { "epoch": 3.78, - "learning_rate": 1.2505742815620459e-05, - "loss": 0.0831, + "learning_rate": 2.251743560139173e-05, + "loss": 0.0673, "step": 80930 }, { "epoch": 3.78, - "learning_rate": 1.250527401434532e-05, - "loss": 0.1305, + "learning_rate": 2.251696753155571e-05, + "loss": 0.0888, "step": 80935 }, { "epoch": 3.78, - "learning_rate": 1.2504805213070182e-05, - "loss": 0.2657, + "learning_rate": 2.251649946171969e-05, + "loss": 0.1632, "step": 80940 }, { "epoch": 3.78, - "learning_rate": 1.2504336411795042e-05, - "loss": 0.2224, + "learning_rate": 2.2516031391883666e-05, + "loss": 0.4098, "step": 80945 }, { "epoch": 3.78, - "learning_rate": 1.2503867610519902e-05, - "loss": 0.0763, + "learning_rate": 2.251556332204765e-05, + "loss": 0.0991, "step": 80950 }, { "epoch": 3.78, - "learning_rate": 1.2503398809244762e-05, - "loss": 0.0322, + "learning_rate": 2.251509525221163e-05, + "loss": 0.0373, "step": 80955 }, { "epoch": 3.78, - "learning_rate": 1.2502930007969622e-05, - "loss": 0.0113, + "learning_rate": 2.251462718237561e-05, + "loss": 0.0376, "step": 80960 }, { "epoch": 3.78, - "learning_rate": 1.2502461206694483e-05, - "loss": 0.0685, + "learning_rate": 2.2514159112539592e-05, + "loss": 0.027, "step": 80965 }, { "epoch": 3.78, - "learning_rate": 1.2501992405419343e-05, - "loss": 0.0726, + "learning_rate": 2.2513691042703572e-05, + "loss": 0.0416, "step": 80970 }, { "epoch": 3.78, - "learning_rate": 1.2501523604144203e-05, - "loss": 0.0621, + "learning_rate": 2.2513222972867552e-05, + "loss": 0.0666, "step": 80975 }, { "epoch": 3.78, - "learning_rate": 1.2501054802869067e-05, - "loss": 0.1519, + "learning_rate": 2.2512754903031532e-05, + "loss": 0.1088, "step": 80980 }, { "epoch": 3.78, - "learning_rate": 1.2500586001593927e-05, - "loss": 0.1479, + "learning_rate": 2.2512286833195515e-05, + "loss": 0.1426, "step": 80985 }, { "epoch": 3.78, - "learning_rate": 1.2500117200318786e-05, - "loss": 0.177, + "learning_rate": 2.2511818763359495e-05, + "loss": 0.114, "step": 80990 }, { "epoch": 3.78, - "learning_rate": 1.2499648399043646e-05, - "loss": 0.1956, + "learning_rate": 2.2511350693523475e-05, + "loss": 0.1963, "step": 80995 }, { "epoch": 3.78, - "learning_rate": 1.2499179597768506e-05, - "loss": 0.1411, + "learning_rate": 2.2510882623687455e-05, + "loss": 0.0725, "step": 81000 }, { "epoch": 3.78, - "learning_rate": 1.2498710796493368e-05, - "loss": 0.0162, + "learning_rate": 2.2510414553851438e-05, + "loss": 0.0245, "step": 81005 }, { "epoch": 3.78, - "learning_rate": 1.2498241995218228e-05, - "loss": 0.0299, + "learning_rate": 2.2509946484015414e-05, + "loss": 0.0936, "step": 81010 }, { "epoch": 3.78, - "learning_rate": 1.2497773193943088e-05, - "loss": 0.0735, + "learning_rate": 2.2509478414179394e-05, + "loss": 0.0409, "step": 81015 }, { "epoch": 3.78, - "learning_rate": 1.2497304392667948e-05, - "loss": 0.0619, + "learning_rate": 2.2509010344343377e-05, + "loss": 0.0283, "step": 81020 }, { "epoch": 3.78, - "learning_rate": 1.249683559139281e-05, - "loss": 0.1804, + "learning_rate": 2.2508542274507357e-05, + "loss": 0.0897, "step": 81025 }, { "epoch": 3.78, - "learning_rate": 1.2496366790117671e-05, - "loss": 0.0614, + "learning_rate": 2.2508074204671337e-05, + "loss": 0.1347, "step": 81030 }, { "epoch": 3.78, - "learning_rate": 1.2495897988842531e-05, - "loss": 0.1022, + "learning_rate": 2.2507606134835317e-05, + "loss": 0.0707, "step": 81035 }, { "epoch": 3.78, - "learning_rate": 1.2495429187567391e-05, - "loss": 0.2208, + "learning_rate": 2.25071380649993e-05, + "loss": 0.1798, "step": 81040 }, { "epoch": 3.78, - "learning_rate": 1.2494960386292253e-05, - "loss": 0.2193, + "learning_rate": 2.250666999516328e-05, + "loss": 0.1663, "step": 81045 }, { "epoch": 3.78, - "learning_rate": 1.2494491585017112e-05, - "loss": 0.0922, + "learning_rate": 2.250620192532726e-05, + "loss": 0.0467, "step": 81050 }, { "epoch": 3.78, - "learning_rate": 1.2494022783741972e-05, - "loss": 0.0116, + "learning_rate": 2.250573385549124e-05, + "loss": 0.016, "step": 81055 }, { "epoch": 3.78, - "learning_rate": 1.2493553982466832e-05, - "loss": 0.0212, + "learning_rate": 2.2505265785655223e-05, + "loss": 0.0801, "step": 81060 }, { "epoch": 3.78, - "learning_rate": 1.2493085181191694e-05, - "loss": 0.0263, + "learning_rate": 2.2504797715819202e-05, + "loss": 0.0898, "step": 81065 }, { "epoch": 3.78, - "learning_rate": 1.2492616379916554e-05, - "loss": 0.0611, + "learning_rate": 2.250432964598318e-05, + "loss": 0.0525, "step": 81070 }, { "epoch": 3.78, - "learning_rate": 1.2492147578641415e-05, - "loss": 0.2149, + "learning_rate": 2.2503861576147162e-05, + "loss": 0.1312, "step": 81075 }, { "epoch": 3.78, - "learning_rate": 1.2491678777366275e-05, - "loss": 0.0776, + "learning_rate": 2.2503393506311142e-05, + "loss": 0.1717, "step": 81080 }, { "epoch": 3.78, - "learning_rate": 1.2491209976091137e-05, - "loss": 0.0936, + "learning_rate": 2.250292543647512e-05, + "loss": 0.1517, "step": 81085 }, { "epoch": 3.78, - "learning_rate": 1.2490741174815997e-05, - "loss": 0.2098, + "learning_rate": 2.25024573666391e-05, + "loss": 0.133, "step": 81090 }, { "epoch": 3.78, - "learning_rate": 1.2490272373540857e-05, - "loss": 0.2427, + "learning_rate": 2.2501989296803085e-05, + "loss": 0.336, "step": 81095 }, { "epoch": 3.78, - "learning_rate": 1.2489803572265717e-05, - "loss": 0.1095, + "learning_rate": 2.2501521226967064e-05, + "loss": 0.0281, "step": 81100 }, { "epoch": 3.78, - "learning_rate": 1.2489334770990578e-05, - "loss": 0.0136, + "learning_rate": 2.2501053157131044e-05, + "loss": 0.0526, "step": 81105 }, { "epoch": 3.78, - "learning_rate": 1.2488865969715438e-05, - "loss": 0.0428, + "learning_rate": 2.2500585087295024e-05, + "loss": 0.0224, "step": 81110 }, { "epoch": 3.78, - "learning_rate": 1.2488397168440298e-05, - "loss": 0.0439, + "learning_rate": 2.2500117017459007e-05, + "loss": 0.0521, "step": 81115 }, { "epoch": 3.79, - "learning_rate": 1.248792836716516e-05, - "loss": 0.07, + "learning_rate": 2.2499648947622987e-05, + "loss": 0.0459, "step": 81120 }, { "epoch": 3.79, - "learning_rate": 1.2487459565890022e-05, - "loss": 0.0502, + "learning_rate": 2.2499180877786967e-05, + "loss": 0.0571, "step": 81125 }, { "epoch": 3.79, - "learning_rate": 1.2486990764614882e-05, - "loss": 0.1179, + "learning_rate": 2.2498712807950947e-05, + "loss": 0.0932, "step": 81130 }, { "epoch": 3.79, - "learning_rate": 1.2486521963339741e-05, - "loss": 0.1173, + "learning_rate": 2.2498244738114927e-05, + "loss": 0.1587, "step": 81135 }, { "epoch": 3.79, - "learning_rate": 1.2486053162064601e-05, - "loss": 0.1832, + "learning_rate": 2.2497776668278906e-05, + "loss": 0.1433, "step": 81140 }, { "epoch": 3.79, - "learning_rate": 1.2485584360789463e-05, - "loss": 0.3722, + "learning_rate": 2.2497308598442886e-05, + "loss": 0.3636, "step": 81145 }, { "epoch": 3.79, - "learning_rate": 1.2485115559514323e-05, - "loss": 0.0988, + "learning_rate": 2.249684052860687e-05, + "loss": 0.0411, "step": 81150 }, { "epoch": 3.79, - "learning_rate": 1.2484646758239183e-05, - "loss": 0.0145, + "learning_rate": 2.249637245877085e-05, + "loss": 0.0706, "step": 81155 }, { "epoch": 3.79, - "learning_rate": 1.2484177956964043e-05, - "loss": 0.0427, + "learning_rate": 2.249590438893483e-05, + "loss": 0.0576, "step": 81160 }, { "epoch": 3.79, - "learning_rate": 1.2483709155688903e-05, - "loss": 0.0635, + "learning_rate": 2.249543631909881e-05, + "loss": 0.0571, "step": 81165 }, { "epoch": 3.79, - "learning_rate": 1.2483240354413766e-05, - "loss": 0.0609, + "learning_rate": 2.2494968249262792e-05, + "loss": 0.0646, "step": 81170 }, { "epoch": 3.79, - "learning_rate": 1.2482771553138626e-05, - "loss": 0.0846, + "learning_rate": 2.2494500179426772e-05, + "loss": 0.1142, "step": 81175 }, { "epoch": 3.79, - "learning_rate": 1.2482302751863486e-05, - "loss": 0.1059, + "learning_rate": 2.2494032109590752e-05, + "loss": 0.1132, "step": 81180 }, { "epoch": 3.79, - "learning_rate": 1.2481833950588348e-05, - "loss": 0.1432, + "learning_rate": 2.249356403975473e-05, + "loss": 0.1318, "step": 81185 }, { "epoch": 3.79, - "learning_rate": 1.2481365149313208e-05, - "loss": 0.1982, + "learning_rate": 2.2493095969918715e-05, + "loss": 0.1909, "step": 81190 }, { "epoch": 3.79, - "learning_rate": 1.2480896348038067e-05, - "loss": 0.3358, + "learning_rate": 2.2492627900082695e-05, + "loss": 0.3889, "step": 81195 }, { "epoch": 3.79, - "learning_rate": 1.2480427546762927e-05, - "loss": 0.0494, + "learning_rate": 2.249215983024667e-05, + "loss": 0.0416, "step": 81200 }, { "epoch": 3.79, - "learning_rate": 1.2479958745487787e-05, - "loss": 0.0277, + "learning_rate": 2.2491691760410654e-05, + "loss": 0.0206, "step": 81205 }, { "epoch": 3.79, - "learning_rate": 1.2479489944212649e-05, - "loss": 0.0717, + "learning_rate": 2.2491223690574634e-05, + "loss": 0.0315, "step": 81210 }, { "epoch": 3.79, - "learning_rate": 1.247902114293751e-05, - "loss": 0.0455, + "learning_rate": 2.2490755620738614e-05, + "loss": 0.068, "step": 81215 }, { "epoch": 3.79, - "learning_rate": 1.247855234166237e-05, - "loss": 0.1189, + "learning_rate": 2.2490287550902594e-05, + "loss": 0.07, "step": 81220 }, { "epoch": 3.79, - "learning_rate": 1.2478083540387232e-05, - "loss": 0.0544, + "learning_rate": 2.2489819481066577e-05, + "loss": 0.0877, "step": 81225 }, { "epoch": 3.79, - "learning_rate": 1.2477614739112092e-05, - "loss": 0.1064, + "learning_rate": 2.2489351411230557e-05, + "loss": 0.1307, "step": 81230 }, { "epoch": 3.79, - "learning_rate": 1.2477145937836952e-05, - "loss": 0.1111, + "learning_rate": 2.2488883341394536e-05, + "loss": 0.1394, "step": 81235 }, { "epoch": 3.79, - "learning_rate": 1.2476677136561812e-05, - "loss": 0.1693, + "learning_rate": 2.2488415271558516e-05, + "loss": 0.1812, "step": 81240 }, { "epoch": 3.79, - "learning_rate": 1.2476208335286672e-05, - "loss": 0.278, + "learning_rate": 2.24879472017225e-05, + "loss": 0.1965, "step": 81245 }, { "epoch": 3.79, - "learning_rate": 1.2475739534011534e-05, - "loss": 0.0397, + "learning_rate": 2.248747913188648e-05, + "loss": 0.0368, "step": 81250 }, { "epoch": 3.79, - "learning_rate": 1.2475270732736393e-05, - "loss": 0.0198, + "learning_rate": 2.248701106205046e-05, + "loss": 0.0307, "step": 81255 }, { "epoch": 3.79, - "learning_rate": 1.2474801931461255e-05, - "loss": 0.029, + "learning_rate": 2.248654299221444e-05, + "loss": 0.021, "step": 81260 }, { "epoch": 3.79, - "learning_rate": 1.2474333130186117e-05, - "loss": 0.035, + "learning_rate": 2.248607492237842e-05, + "loss": 0.0562, "step": 81265 }, { "epoch": 3.79, - "learning_rate": 1.2473864328910977e-05, - "loss": 0.0945, + "learning_rate": 2.24856068525424e-05, + "loss": 0.0352, "step": 81270 }, { "epoch": 3.79, - "learning_rate": 1.2473395527635837e-05, - "loss": 0.0477, + "learning_rate": 2.248513878270638e-05, + "loss": 0.1048, "step": 81275 }, { "epoch": 3.79, - "learning_rate": 1.2472926726360696e-05, - "loss": 0.069, + "learning_rate": 2.248467071287036e-05, + "loss": 0.1112, "step": 81280 }, { "epoch": 3.79, - "learning_rate": 1.2472457925085556e-05, - "loss": 0.1089, + "learning_rate": 2.248420264303434e-05, + "loss": 0.1209, "step": 81285 }, { "epoch": 3.79, - "learning_rate": 1.2471989123810418e-05, - "loss": 0.1984, + "learning_rate": 2.248373457319832e-05, + "loss": 0.1283, "step": 81290 }, { "epoch": 3.79, - "learning_rate": 1.2471520322535278e-05, - "loss": 0.3018, + "learning_rate": 2.24832665033623e-05, + "loss": 0.2, "step": 81295 }, { "epoch": 3.79, - "learning_rate": 1.2471051521260138e-05, - "loss": 0.0599, + "learning_rate": 2.2482798433526284e-05, + "loss": 0.0424, "step": 81300 }, { "epoch": 3.79, - "learning_rate": 1.2470582719985001e-05, - "loss": 0.0302, + "learning_rate": 2.2482330363690264e-05, + "loss": 0.0873, "step": 81305 }, { "epoch": 3.79, - "learning_rate": 1.2470113918709861e-05, - "loss": 0.0116, + "learning_rate": 2.2481862293854244e-05, + "loss": 0.0707, "step": 81310 }, { "epoch": 3.79, - "learning_rate": 1.2469645117434721e-05, - "loss": 0.0685, + "learning_rate": 2.2481394224018227e-05, + "loss": 0.0522, "step": 81315 }, { "epoch": 3.79, - "learning_rate": 1.2469176316159581e-05, - "loss": 0.1053, + "learning_rate": 2.2480926154182207e-05, + "loss": 0.0507, "step": 81320 }, { "epoch": 3.79, - "learning_rate": 1.2468707514884441e-05, - "loss": 0.1866, + "learning_rate": 2.2480458084346183e-05, + "loss": 0.0149, "step": 81325 }, { "epoch": 3.79, - "learning_rate": 1.2468238713609303e-05, - "loss": 0.0863, + "learning_rate": 2.2479990014510163e-05, + "loss": 0.0966, "step": 81330 }, { "epoch": 3.8, - "learning_rate": 1.2467769912334163e-05, - "loss": 0.0869, + "learning_rate": 2.2479521944674146e-05, + "loss": 0.0936, "step": 81335 }, { "epoch": 3.8, - "learning_rate": 1.2467301111059022e-05, - "loss": 0.1803, + "learning_rate": 2.2479053874838126e-05, + "loss": 0.1273, "step": 81340 }, { "epoch": 3.8, - "learning_rate": 1.2466832309783882e-05, - "loss": 0.4354, + "learning_rate": 2.2478585805002106e-05, + "loss": 0.3431, "step": 81345 }, { "epoch": 3.8, - "learning_rate": 1.2466363508508742e-05, - "loss": 0.0594, + "learning_rate": 2.2478117735166086e-05, + "loss": 0.0376, "step": 81350 }, { "epoch": 3.8, - "learning_rate": 1.2465894707233606e-05, - "loss": 0.0423, + "learning_rate": 2.247764966533007e-05, + "loss": 0.0103, "step": 81355 }, { "epoch": 3.8, - "learning_rate": 1.2465425905958466e-05, - "loss": 0.0198, + "learning_rate": 2.247718159549405e-05, + "loss": 0.0372, "step": 81360 }, { "epoch": 3.8, - "learning_rate": 1.2464957104683326e-05, - "loss": 0.0344, + "learning_rate": 2.247671352565803e-05, + "loss": 0.1112, "step": 81365 }, { "epoch": 3.8, - "learning_rate": 1.2464488303408187e-05, - "loss": 0.0732, + "learning_rate": 2.247624545582201e-05, + "loss": 0.1296, "step": 81370 }, { "epoch": 3.8, - "learning_rate": 1.2464019502133047e-05, - "loss": 0.0447, + "learning_rate": 2.2475777385985992e-05, + "loss": 0.0747, "step": 81375 }, { "epoch": 3.8, - "learning_rate": 1.2463550700857907e-05, - "loss": 0.1097, + "learning_rate": 2.247530931614997e-05, + "loss": 0.0647, "step": 81380 }, { "epoch": 3.8, - "learning_rate": 1.2463081899582767e-05, - "loss": 0.1406, + "learning_rate": 2.247484124631395e-05, + "loss": 0.1253, "step": 81385 }, { "epoch": 3.8, - "learning_rate": 1.2462613098307627e-05, - "loss": 0.1767, + "learning_rate": 2.247437317647793e-05, + "loss": 0.1373, "step": 81390 }, { "epoch": 3.8, - "learning_rate": 1.2462144297032489e-05, - "loss": 0.3201, + "learning_rate": 2.247390510664191e-05, + "loss": 0.2142, "step": 81395 }, { "epoch": 3.8, - "learning_rate": 1.246167549575735e-05, - "loss": 0.0523, + "learning_rate": 2.247343703680589e-05, + "loss": 0.0727, "step": 81400 }, { "epoch": 3.8, - "learning_rate": 1.246120669448221e-05, - "loss": 0.012, + "learning_rate": 2.247296896696987e-05, + "loss": 0.0524, "step": 81405 }, { "epoch": 3.8, - "learning_rate": 1.2460737893207072e-05, - "loss": 0.0234, + "learning_rate": 2.2472500897133854e-05, + "loss": 0.0285, "step": 81410 }, { "epoch": 3.8, - "learning_rate": 1.2460269091931932e-05, - "loss": 0.0656, + "learning_rate": 2.2472032827297834e-05, + "loss": 0.0682, "step": 81415 }, { "epoch": 3.8, - "learning_rate": 1.2459800290656792e-05, - "loss": 0.065, + "learning_rate": 2.2471564757461813e-05, + "loss": 0.0669, "step": 81420 }, { "epoch": 3.8, - "learning_rate": 1.2459331489381652e-05, - "loss": 0.1022, + "learning_rate": 2.2471096687625793e-05, + "loss": 0.1274, "step": 81425 }, { "epoch": 3.8, - "learning_rate": 1.2458862688106511e-05, - "loss": 0.0835, + "learning_rate": 2.2470628617789776e-05, + "loss": 0.1272, "step": 81430 }, { "epoch": 3.8, - "learning_rate": 1.2458393886831373e-05, - "loss": 0.0948, + "learning_rate": 2.2470160547953756e-05, + "loss": 0.1299, "step": 81435 }, { "epoch": 3.8, - "learning_rate": 1.2457925085556233e-05, - "loss": 0.1996, + "learning_rate": 2.2469692478117736e-05, + "loss": 0.1829, "step": 81440 }, { "epoch": 3.8, - "learning_rate": 1.2457456284281095e-05, - "loss": 0.2463, + "learning_rate": 2.246922440828172e-05, + "loss": 0.2676, "step": 81445 }, { "epoch": 3.8, - "learning_rate": 1.2456987483005956e-05, - "loss": 0.0458, + "learning_rate": 2.2468756338445696e-05, + "loss": 0.0923, "step": 81450 }, { "epoch": 3.8, - "learning_rate": 1.2456518681730816e-05, - "loss": 0.0485, + "learning_rate": 2.2468288268609676e-05, + "loss": 0.0429, "step": 81455 }, { "epoch": 3.8, - "learning_rate": 1.2456049880455676e-05, - "loss": 0.0489, + "learning_rate": 2.2467820198773655e-05, + "loss": 0.0145, "step": 81460 }, { "epoch": 3.8, - "learning_rate": 1.2455581079180536e-05, - "loss": 0.051, + "learning_rate": 2.246735212893764e-05, + "loss": 0.0123, "step": 81465 }, { "epoch": 3.8, - "learning_rate": 1.2455112277905396e-05, - "loss": 0.0888, + "learning_rate": 2.246688405910162e-05, + "loss": 0.0652, "step": 81470 }, { "epoch": 3.8, - "learning_rate": 1.2454643476630258e-05, - "loss": 0.0656, + "learning_rate": 2.2466415989265598e-05, + "loss": 0.0607, "step": 81475 }, { "epoch": 3.8, - "learning_rate": 1.2454174675355118e-05, - "loss": 0.1618, + "learning_rate": 2.2465947919429578e-05, + "loss": 0.0604, "step": 81480 }, { "epoch": 3.8, - "learning_rate": 1.2453705874079977e-05, - "loss": 0.1987, + "learning_rate": 2.246547984959356e-05, + "loss": 0.0375, "step": 81485 }, { "epoch": 3.8, - "learning_rate": 1.2453237072804837e-05, - "loss": 0.1733, + "learning_rate": 2.246501177975754e-05, + "loss": 0.1782, "step": 81490 }, { "epoch": 3.8, - "learning_rate": 1.24527682715297e-05, - "loss": 0.4266, + "learning_rate": 2.246454370992152e-05, + "loss": 0.1906, "step": 81495 }, { "epoch": 3.8, - "learning_rate": 1.245229947025456e-05, - "loss": 0.0668, + "learning_rate": 2.2464075640085504e-05, + "loss": 0.0396, "step": 81500 }, { "epoch": 3.8, - "learning_rate": 1.245183066897942e-05, - "loss": 0.0266, + "learning_rate": 2.2463607570249484e-05, + "loss": 0.0174, "step": 81505 }, { "epoch": 3.8, - "learning_rate": 1.245136186770428e-05, - "loss": 0.0385, + "learning_rate": 2.2463139500413464e-05, + "loss": 0.0392, "step": 81510 }, { "epoch": 3.8, - "learning_rate": 1.2450893066429142e-05, - "loss": 0.0189, + "learning_rate": 2.246267143057744e-05, + "loss": 0.0174, "step": 81515 }, { "epoch": 3.8, - "learning_rate": 1.2450424265154002e-05, - "loss": 0.0433, + "learning_rate": 2.2462203360741423e-05, + "loss": 0.0572, "step": 81520 }, { "epoch": 3.8, - "learning_rate": 1.2449955463878862e-05, - "loss": 0.0716, + "learning_rate": 2.2461735290905403e-05, + "loss": 0.0997, "step": 81525 }, { "epoch": 3.8, - "learning_rate": 1.2449486662603722e-05, - "loss": 0.0516, + "learning_rate": 2.2461267221069383e-05, + "loss": 0.0912, "step": 81530 }, { "epoch": 3.8, - "learning_rate": 1.2449017861328584e-05, - "loss": 0.0534, + "learning_rate": 2.2460799151233363e-05, + "loss": 0.5396, "step": 81535 }, { "epoch": 3.8, - "learning_rate": 1.2448549060053445e-05, - "loss": 0.174, + "learning_rate": 2.2460331081397346e-05, + "loss": 0.1379, "step": 81540 }, { "epoch": 3.81, - "learning_rate": 1.2448080258778305e-05, - "loss": 0.3213, + "learning_rate": 2.2459863011561326e-05, + "loss": 0.2993, "step": 81545 }, { "epoch": 3.81, - "learning_rate": 1.2447611457503165e-05, - "loss": 0.2926, + "learning_rate": 2.2459394941725306e-05, + "loss": 0.0644, "step": 81550 }, { "epoch": 3.81, - "learning_rate": 1.2447142656228027e-05, - "loss": 0.0328, + "learning_rate": 2.2458926871889285e-05, + "loss": 0.0069, "step": 81555 }, { "epoch": 3.81, - "learning_rate": 1.2446673854952887e-05, - "loss": 0.042, + "learning_rate": 2.245845880205327e-05, + "loss": 0.0241, "step": 81560 }, { "epoch": 3.81, - "learning_rate": 1.2446205053677747e-05, - "loss": 0.0228, + "learning_rate": 2.245799073221725e-05, + "loss": 0.0367, "step": 81565 }, { "epoch": 3.81, - "learning_rate": 1.2445736252402607e-05, - "loss": 0.0259, + "learning_rate": 2.245752266238123e-05, + "loss": 0.116, "step": 81570 }, { "epoch": 3.81, - "learning_rate": 1.2445267451127468e-05, - "loss": 0.0905, + "learning_rate": 2.2457054592545208e-05, + "loss": 0.0727, "step": 81575 }, { "epoch": 3.81, - "learning_rate": 1.2444798649852328e-05, - "loss": 0.1253, + "learning_rate": 2.2456586522709188e-05, + "loss": 0.0712, "step": 81580 }, { "epoch": 3.81, - "learning_rate": 1.244432984857719e-05, - "loss": 0.1169, + "learning_rate": 2.2456118452873168e-05, + "loss": 0.0916, "step": 81585 }, { "epoch": 3.81, - "learning_rate": 1.244386104730205e-05, - "loss": 0.1853, + "learning_rate": 2.2455650383037148e-05, + "loss": 0.1928, "step": 81590 }, { "epoch": 3.81, - "learning_rate": 1.2443392246026911e-05, - "loss": 0.2413, + "learning_rate": 2.245518231320113e-05, + "loss": 0.2557, "step": 81595 }, { "epoch": 3.81, - "learning_rate": 1.2442923444751771e-05, - "loss": 0.0897, + "learning_rate": 2.245471424336511e-05, + "loss": 0.046, "step": 81600 }, { "epoch": 3.81, - "learning_rate": 1.2442454643476631e-05, - "loss": 0.0224, + "learning_rate": 2.245424617352909e-05, + "loss": 0.0365, "step": 81605 }, { "epoch": 3.81, - "learning_rate": 1.2441985842201491e-05, - "loss": 0.057, + "learning_rate": 2.245377810369307e-05, + "loss": 0.0095, "step": 81610 }, { "epoch": 3.81, - "learning_rate": 1.2441517040926353e-05, - "loss": 0.0515, + "learning_rate": 2.2453310033857053e-05, + "loss": 0.0749, "step": 81615 }, { "epoch": 3.81, - "learning_rate": 1.2441048239651213e-05, - "loss": 0.073, + "learning_rate": 2.2452841964021033e-05, + "loss": 0.029, "step": 81620 }, { "epoch": 3.81, - "learning_rate": 1.2440579438376073e-05, - "loss": 0.0742, + "learning_rate": 2.2452373894185013e-05, + "loss": 0.0351, "step": 81625 }, { "epoch": 3.81, - "learning_rate": 1.2440110637100934e-05, - "loss": 0.1327, + "learning_rate": 2.2451905824348996e-05, + "loss": 0.0478, "step": 81630 }, { "epoch": 3.81, - "learning_rate": 1.2439641835825796e-05, - "loss": 0.0831, + "learning_rate": 2.2451437754512976e-05, + "loss": 0.1325, "step": 81635 }, { "epoch": 3.81, - "learning_rate": 1.2439173034550656e-05, - "loss": 0.1077, + "learning_rate": 2.2450969684676953e-05, + "loss": 0.0673, "step": 81640 }, { "epoch": 3.81, - "learning_rate": 1.2438704233275516e-05, - "loss": 0.2796, + "learning_rate": 2.2450501614840932e-05, + "loss": 0.1749, "step": 81645 }, { "epoch": 3.81, - "learning_rate": 1.2438235432000376e-05, - "loss": 0.0781, + "learning_rate": 2.2450033545004916e-05, + "loss": 0.0212, "step": 81650 }, { "epoch": 3.81, - "learning_rate": 1.2437766630725237e-05, - "loss": 0.0172, + "learning_rate": 2.2449565475168895e-05, + "loss": 0.0171, "step": 81655 }, { "epoch": 3.81, - "learning_rate": 1.2437297829450097e-05, - "loss": 0.0175, + "learning_rate": 2.2449097405332875e-05, + "loss": 0.0798, "step": 81660 }, { "epoch": 3.81, - "learning_rate": 1.2436829028174957e-05, - "loss": 0.0653, + "learning_rate": 2.2448629335496855e-05, + "loss": 0.0385, "step": 81665 }, { "epoch": 3.81, - "learning_rate": 1.2436360226899817e-05, - "loss": 0.1016, + "learning_rate": 2.2448161265660838e-05, + "loss": 0.075, "step": 81670 }, { "epoch": 3.81, - "learning_rate": 1.2435891425624677e-05, - "loss": 0.046, + "learning_rate": 2.2447693195824818e-05, + "loss": 0.1046, "step": 81675 }, { "epoch": 3.81, - "learning_rate": 1.243542262434954e-05, - "loss": 0.1179, + "learning_rate": 2.2447225125988798e-05, + "loss": 0.0673, "step": 81680 }, { "epoch": 3.81, - "learning_rate": 1.24349538230744e-05, - "loss": 0.039, + "learning_rate": 2.244675705615278e-05, + "loss": 0.1852, "step": 81685 }, { "epoch": 3.81, - "learning_rate": 1.243448502179926e-05, - "loss": 0.1893, + "learning_rate": 2.244628898631676e-05, + "loss": 0.177, "step": 81690 }, { "epoch": 3.81, - "learning_rate": 1.2434016220524122e-05, - "loss": 0.2309, + "learning_rate": 2.244582091648074e-05, + "loss": 0.1431, "step": 81695 }, { "epoch": 3.81, - "learning_rate": 1.2433547419248982e-05, - "loss": 0.0438, + "learning_rate": 2.244535284664472e-05, + "loss": 0.0671, "step": 81700 }, { "epoch": 3.81, - "learning_rate": 1.2433078617973842e-05, - "loss": 0.0377, + "learning_rate": 2.24448847768087e-05, + "loss": 0.0329, "step": 81705 }, { "epoch": 3.81, - "learning_rate": 1.2432609816698702e-05, - "loss": 0.0477, + "learning_rate": 2.244441670697268e-05, + "loss": 0.0291, "step": 81710 }, { "epoch": 3.81, - "learning_rate": 1.2432141015423562e-05, - "loss": 0.0524, + "learning_rate": 2.244394863713666e-05, + "loss": 0.0185, "step": 81715 }, { "epoch": 3.81, - "learning_rate": 1.2431672214148423e-05, - "loss": 0.0866, + "learning_rate": 2.244348056730064e-05, + "loss": 0.0994, "step": 81720 }, { "epoch": 3.81, - "learning_rate": 1.2431203412873285e-05, - "loss": 0.081, + "learning_rate": 2.2443012497464623e-05, + "loss": 0.0861, "step": 81725 }, { "epoch": 3.81, - "learning_rate": 1.2430734611598145e-05, - "loss": 0.1324, + "learning_rate": 2.2442544427628603e-05, + "loss": 0.0886, "step": 81730 }, { "epoch": 3.81, - "learning_rate": 1.2430265810323006e-05, - "loss": 0.147, + "learning_rate": 2.2442076357792583e-05, + "loss": 0.1412, "step": 81735 }, { "epoch": 3.81, - "learning_rate": 1.2429797009047866e-05, - "loss": 0.1628, + "learning_rate": 2.2441608287956562e-05, + "loss": 0.1733, "step": 81740 }, { "epoch": 3.81, - "learning_rate": 1.2429328207772726e-05, - "loss": 0.2222, + "learning_rate": 2.2441140218120546e-05, + "loss": 0.3332, "step": 81745 }, { "epoch": 3.81, - "learning_rate": 1.2428859406497586e-05, - "loss": 0.118, + "learning_rate": 2.2440672148284525e-05, + "loss": 0.0683, "step": 81750 }, { "epoch": 3.81, - "learning_rate": 1.2428390605222446e-05, - "loss": 0.0129, + "learning_rate": 2.2440204078448505e-05, + "loss": 0.0697, "step": 81755 }, { "epoch": 3.82, - "learning_rate": 1.2427921803947308e-05, - "loss": 0.0356, + "learning_rate": 2.243973600861249e-05, + "loss": 0.0283, "step": 81760 }, { "epoch": 3.82, - "learning_rate": 1.2427453002672168e-05, - "loss": 0.0453, + "learning_rate": 2.2439267938776465e-05, + "loss": 0.0344, "step": 81765 }, { "epoch": 3.82, - "learning_rate": 1.242698420139703e-05, - "loss": 0.0307, + "learning_rate": 2.2438799868940445e-05, + "loss": 0.0455, "step": 81770 }, { "epoch": 3.82, - "learning_rate": 1.2426515400121891e-05, - "loss": 0.0505, + "learning_rate": 2.2438331799104425e-05, + "loss": 0.0457, "step": 81775 }, { "epoch": 3.82, - "learning_rate": 1.242604659884675e-05, - "loss": 0.0904, + "learning_rate": 2.2437863729268408e-05, + "loss": 0.0571, "step": 81780 }, { "epoch": 3.82, - "learning_rate": 1.242557779757161e-05, - "loss": 0.1525, + "learning_rate": 2.2437395659432388e-05, + "loss": 0.0783, "step": 81785 }, { "epoch": 3.82, - "learning_rate": 1.242510899629647e-05, - "loss": 0.202, + "learning_rate": 2.2436927589596367e-05, + "loss": 0.2395, "step": 81790 }, { "epoch": 3.82, - "learning_rate": 1.242464019502133e-05, - "loss": 0.2554, + "learning_rate": 2.2436459519760347e-05, + "loss": 0.223, "step": 81795 }, { "epoch": 3.82, - "learning_rate": 1.2424171393746192e-05, - "loss": 0.0533, + "learning_rate": 2.243599144992433e-05, + "loss": 0.034, "step": 81800 }, { "epoch": 3.82, - "learning_rate": 1.2423702592471052e-05, - "loss": 0.0764, + "learning_rate": 2.243552338008831e-05, + "loss": 0.0307, "step": 81805 }, { "epoch": 3.82, - "learning_rate": 1.2423233791195912e-05, - "loss": 0.0551, + "learning_rate": 2.243505531025229e-05, + "loss": 0.0172, "step": 81810 }, { "epoch": 3.82, - "learning_rate": 1.2422764989920772e-05, - "loss": 0.035, + "learning_rate": 2.2434587240416273e-05, + "loss": 0.0143, "step": 81815 }, { "epoch": 3.82, - "learning_rate": 1.2422296188645635e-05, - "loss": 0.0921, + "learning_rate": 2.2434119170580253e-05, + "loss": 0.0243, "step": 81820 }, { "epoch": 3.82, - "learning_rate": 1.2421827387370495e-05, - "loss": 0.0407, + "learning_rate": 2.2433651100744233e-05, + "loss": 0.0117, "step": 81825 }, { "epoch": 3.82, - "learning_rate": 1.2421358586095355e-05, - "loss": 0.1333, + "learning_rate": 2.243318303090821e-05, + "loss": 0.0678, "step": 81830 }, { "epoch": 3.82, - "learning_rate": 1.2420889784820215e-05, - "loss": 0.1425, + "learning_rate": 2.2432714961072193e-05, + "loss": 0.0965, "step": 81835 }, { "epoch": 3.82, - "learning_rate": 1.2420420983545077e-05, - "loss": 0.2069, + "learning_rate": 2.2432246891236172e-05, + "loss": 0.2754, "step": 81840 }, { "epoch": 3.82, - "learning_rate": 1.2419952182269937e-05, - "loss": 0.2449, + "learning_rate": 2.2431778821400152e-05, + "loss": 0.2316, "step": 81845 }, { "epoch": 3.82, - "learning_rate": 1.2419483380994797e-05, - "loss": 0.0492, + "learning_rate": 2.2431310751564132e-05, + "loss": 0.0668, "step": 81850 }, { "epoch": 3.82, - "learning_rate": 1.2419014579719657e-05, - "loss": 0.0101, + "learning_rate": 2.2430842681728115e-05, + "loss": 0.0172, "step": 81855 }, { "epoch": 3.82, - "learning_rate": 1.2418545778444517e-05, - "loss": 0.0477, + "learning_rate": 2.2430374611892095e-05, + "loss": 0.0551, "step": 81860 }, { "epoch": 3.82, - "learning_rate": 1.241807697716938e-05, - "loss": 0.0073, + "learning_rate": 2.2429906542056075e-05, + "loss": 0.0279, "step": 81865 }, { "epoch": 3.82, - "learning_rate": 1.241760817589424e-05, - "loss": 0.0802, + "learning_rate": 2.2429438472220058e-05, + "loss": 0.0733, "step": 81870 }, { "epoch": 3.82, - "learning_rate": 1.24171393746191e-05, - "loss": 0.0941, + "learning_rate": 2.2428970402384038e-05, + "loss": 0.0442, "step": 81875 }, { "epoch": 3.82, - "learning_rate": 1.2416670573343961e-05, - "loss": 0.0735, + "learning_rate": 2.2428502332548018e-05, + "loss": 0.1123, "step": 81880 }, { "epoch": 3.82, - "learning_rate": 1.2416201772068821e-05, - "loss": 0.0528, + "learning_rate": 2.2428034262711997e-05, + "loss": 0.054, "step": 81885 }, { "epoch": 3.82, - "learning_rate": 1.2415732970793681e-05, - "loss": 0.1818, + "learning_rate": 2.2427566192875977e-05, + "loss": 0.2083, "step": 81890 }, { "epoch": 3.82, - "learning_rate": 1.2415264169518541e-05, - "loss": 0.2368, + "learning_rate": 2.2427098123039957e-05, + "loss": 0.2592, "step": 81895 }, { "epoch": 3.82, - "learning_rate": 1.2414795368243401e-05, - "loss": 0.0771, + "learning_rate": 2.2426630053203937e-05, + "loss": 0.0729, "step": 81900 }, { "epoch": 3.82, - "learning_rate": 1.2414326566968263e-05, - "loss": 0.0333, + "learning_rate": 2.2426161983367917e-05, + "loss": 0.074, "step": 81905 }, { "epoch": 3.82, - "learning_rate": 1.2413857765693124e-05, - "loss": 0.0334, + "learning_rate": 2.24256939135319e-05, + "loss": 0.0459, "step": 81910 }, { "epoch": 3.82, - "learning_rate": 1.2413388964417984e-05, - "loss": 0.0567, + "learning_rate": 2.242522584369588e-05, + "loss": 0.0349, "step": 81915 }, { "epoch": 3.82, - "learning_rate": 1.2412920163142846e-05, - "loss": 0.0596, + "learning_rate": 2.242475777385986e-05, + "loss": 0.0741, "step": 81920 }, { "epoch": 3.82, - "learning_rate": 1.2412451361867706e-05, - "loss": 0.0773, + "learning_rate": 2.242428970402384e-05, + "loss": 0.0518, "step": 81925 }, { "epoch": 3.82, - "learning_rate": 1.2411982560592566e-05, - "loss": 0.0531, + "learning_rate": 2.2423821634187823e-05, + "loss": 0.1162, "step": 81930 }, { "epoch": 3.82, - "learning_rate": 1.2411513759317426e-05, - "loss": 0.0679, + "learning_rate": 2.2423353564351802e-05, + "loss": 0.1522, "step": 81935 }, { "epoch": 3.82, - "learning_rate": 1.2411044958042286e-05, - "loss": 0.2293, + "learning_rate": 2.2422885494515782e-05, + "loss": 0.1398, "step": 81940 }, { "epoch": 3.82, - "learning_rate": 1.2410576156767147e-05, - "loss": 0.1537, + "learning_rate": 2.2422417424679765e-05, + "loss": 0.2632, "step": 81945 }, { "epoch": 3.82, - "learning_rate": 1.2410107355492007e-05, - "loss": 0.0234, + "learning_rate": 2.2421949354843745e-05, + "loss": 0.0368, "step": 81950 }, { "epoch": 3.82, - "learning_rate": 1.2409638554216869e-05, - "loss": 0.0393, + "learning_rate": 2.2421481285007722e-05, + "loss": 0.0325, "step": 81955 }, { "epoch": 3.82, - "learning_rate": 1.240916975294173e-05, - "loss": 0.0569, + "learning_rate": 2.24210132151717e-05, + "loss": 0.0372, "step": 81960 }, { "epoch": 3.82, - "learning_rate": 1.240870095166659e-05, - "loss": 0.0688, + "learning_rate": 2.2420545145335685e-05, + "loss": 0.0622, "step": 81965 }, { "epoch": 3.82, - "learning_rate": 1.240823215039145e-05, - "loss": 0.033, + "learning_rate": 2.2420077075499665e-05, + "loss": 0.045, "step": 81970 }, { "epoch": 3.83, - "learning_rate": 1.240776334911631e-05, - "loss": 0.1131, + "learning_rate": 2.2419609005663644e-05, + "loss": 0.1199, "step": 81975 }, { "epoch": 3.83, - "learning_rate": 1.240729454784117e-05, - "loss": 0.0823, + "learning_rate": 2.2419140935827624e-05, + "loss": 0.0433, "step": 81980 }, { "epoch": 3.83, - "learning_rate": 1.2406825746566032e-05, - "loss": 0.0743, + "learning_rate": 2.2418672865991607e-05, + "loss": 0.2047, "step": 81985 }, { "epoch": 3.83, - "learning_rate": 1.2406356945290892e-05, - "loss": 0.1694, + "learning_rate": 2.2418204796155587e-05, + "loss": 0.1019, "step": 81990 }, { "epoch": 3.83, - "learning_rate": 1.2405888144015752e-05, - "loss": 0.3186, + "learning_rate": 2.2417736726319567e-05, + "loss": 0.3998, "step": 81995 }, { "epoch": 3.83, - "learning_rate": 1.2405419342740612e-05, - "loss": 0.0837, + "learning_rate": 2.241726865648355e-05, + "loss": 0.0638, "step": 82000 }, { "epoch": 3.83, - "learning_rate": 1.2404950541465475e-05, - "loss": 0.0543, + "learning_rate": 2.241680058664753e-05, + "loss": 0.0228, "step": 82005 }, { "epoch": 3.83, - "learning_rate": 1.2404481740190335e-05, - "loss": 0.0624, + "learning_rate": 2.241633251681151e-05, + "loss": 0.0525, "step": 82010 }, { "epoch": 3.83, - "learning_rate": 1.2404012938915195e-05, - "loss": 0.0256, + "learning_rate": 2.241586444697549e-05, + "loss": 0.0161, "step": 82015 }, { "epoch": 3.83, - "learning_rate": 1.2403544137640055e-05, - "loss": 0.089, + "learning_rate": 2.241539637713947e-05, + "loss": 0.1072, "step": 82020 }, { "epoch": 3.83, - "learning_rate": 1.2403075336364916e-05, - "loss": 0.0715, + "learning_rate": 2.241492830730345e-05, + "loss": 0.0221, "step": 82025 }, { "epoch": 3.83, - "learning_rate": 1.2402606535089776e-05, - "loss": 0.166, + "learning_rate": 2.241446023746743e-05, + "loss": 0.0828, "step": 82030 }, { "epoch": 3.83, - "learning_rate": 1.2402137733814636e-05, - "loss": 0.1849, + "learning_rate": 2.241399216763141e-05, + "loss": 0.121, "step": 82035 }, { "epoch": 3.83, - "learning_rate": 1.2401668932539496e-05, - "loss": 0.2395, + "learning_rate": 2.2413524097795392e-05, + "loss": 0.0965, "step": 82040 }, { "epoch": 3.83, - "learning_rate": 1.2401200131264358e-05, - "loss": 0.2466, + "learning_rate": 2.2413056027959372e-05, + "loss": 0.1978, "step": 82045 }, { "epoch": 3.83, - "learning_rate": 1.240073132998922e-05, - "loss": 0.0628, + "learning_rate": 2.2412587958123352e-05, + "loss": 0.0952, "step": 82050 }, { "epoch": 3.83, - "learning_rate": 1.240026252871408e-05, - "loss": 0.0546, + "learning_rate": 2.2412119888287335e-05, + "loss": 0.0388, "step": 82055 }, { "epoch": 3.83, - "learning_rate": 1.2399793727438941e-05, - "loss": 0.0379, + "learning_rate": 2.2411651818451315e-05, + "loss": 0.0492, "step": 82060 }, { "epoch": 3.83, - "learning_rate": 1.2399324926163801e-05, - "loss": 0.0424, + "learning_rate": 2.2411183748615295e-05, + "loss": 0.1366, "step": 82065 }, { "epoch": 3.83, - "learning_rate": 1.2398856124888661e-05, - "loss": 0.0487, + "learning_rate": 2.2410715678779274e-05, + "loss": 0.0235, "step": 82070 }, { "epoch": 3.83, - "learning_rate": 1.239838732361352e-05, - "loss": 0.1114, + "learning_rate": 2.2410247608943258e-05, + "loss": 0.0634, "step": 82075 }, { "epoch": 3.83, - "learning_rate": 1.239791852233838e-05, - "loss": 0.06, + "learning_rate": 2.2409779539107234e-05, + "loss": 0.112, "step": 82080 }, { "epoch": 3.83, - "learning_rate": 1.2397449721063242e-05, - "loss": 0.1358, + "learning_rate": 2.2409311469271214e-05, + "loss": 0.0931, "step": 82085 }, { "epoch": 3.83, - "learning_rate": 1.2396980919788102e-05, - "loss": 0.2307, + "learning_rate": 2.2408843399435194e-05, + "loss": 0.1681, "step": 82090 }, { "epoch": 3.83, - "learning_rate": 1.2396512118512964e-05, - "loss": 0.3083, + "learning_rate": 2.2408375329599177e-05, + "loss": 0.2242, "step": 82095 }, { "epoch": 3.83, - "learning_rate": 1.2396043317237826e-05, - "loss": 0.0341, + "learning_rate": 2.2407907259763157e-05, + "loss": 0.0684, "step": 82100 }, { "epoch": 3.83, - "learning_rate": 1.2395574515962685e-05, - "loss": 0.0243, + "learning_rate": 2.2407439189927137e-05, + "loss": 0.028, "step": 82105 }, { "epoch": 3.83, - "learning_rate": 1.2395105714687545e-05, - "loss": 0.0231, + "learning_rate": 2.240697112009112e-05, + "loss": 0.0208, "step": 82110 }, { "epoch": 3.83, - "learning_rate": 1.2394636913412405e-05, - "loss": 0.0442, + "learning_rate": 2.24065030502551e-05, + "loss": 0.0376, "step": 82115 }, { "epoch": 3.83, - "learning_rate": 1.2394168112137265e-05, - "loss": 0.1169, + "learning_rate": 2.240603498041908e-05, + "loss": 0.0273, "step": 82120 }, { "epoch": 3.83, - "learning_rate": 1.2393699310862127e-05, - "loss": 0.1093, + "learning_rate": 2.240556691058306e-05, + "loss": 0.0543, "step": 82125 }, { "epoch": 3.83, - "learning_rate": 1.2393230509586987e-05, - "loss": 0.1125, + "learning_rate": 2.2405098840747042e-05, + "loss": 0.0738, "step": 82130 }, { "epoch": 3.83, - "learning_rate": 1.2392761708311847e-05, - "loss": 0.1146, + "learning_rate": 2.2404630770911022e-05, + "loss": 0.1504, "step": 82135 }, { "epoch": 3.83, - "learning_rate": 1.2392292907036707e-05, - "loss": 0.2026, + "learning_rate": 2.2404162701075002e-05, + "loss": 0.1901, "step": 82140 }, { "epoch": 3.83, - "learning_rate": 1.239182410576157e-05, - "loss": 0.3897, + "learning_rate": 2.240369463123898e-05, + "loss": 0.5032, "step": 82145 }, { "epoch": 3.83, - "learning_rate": 1.239135530448643e-05, + "learning_rate": 2.2403226561402962e-05, "loss": 0.0805, "step": 82150 }, { "epoch": 3.83, - "learning_rate": 1.239088650321129e-05, - "loss": 0.037, + "learning_rate": 2.240275849156694e-05, + "loss": 0.018, "step": 82155 }, { "epoch": 3.83, - "learning_rate": 1.239041770193615e-05, - "loss": 0.0358, + "learning_rate": 2.240229042173092e-05, + "loss": 0.0484, "step": 82160 }, { "epoch": 3.83, - "learning_rate": 1.2389948900661011e-05, - "loss": 0.019, + "learning_rate": 2.24018223518949e-05, + "loss": 0.0252, "step": 82165 }, { "epoch": 3.83, - "learning_rate": 1.2389480099385871e-05, - "loss": 0.0678, + "learning_rate": 2.2401354282058884e-05, + "loss": 0.0464, "step": 82170 }, { "epoch": 3.83, - "learning_rate": 1.2389011298110731e-05, - "loss": 0.1076, + "learning_rate": 2.2400886212222864e-05, + "loss": 0.0862, "step": 82175 }, { "epoch": 3.83, - "learning_rate": 1.2388542496835591e-05, - "loss": 0.1069, + "learning_rate": 2.2400418142386844e-05, + "loss": 0.187, "step": 82180 }, { "epoch": 3.83, - "learning_rate": 1.2388073695560451e-05, - "loss": 0.1264, + "learning_rate": 2.2399950072550827e-05, + "loss": 0.1144, "step": 82185 }, { "epoch": 3.84, - "learning_rate": 1.2387604894285315e-05, - "loss": 0.1963, + "learning_rate": 2.2399482002714807e-05, + "loss": 0.1864, "step": 82190 }, { "epoch": 3.84, - "learning_rate": 1.2387136093010174e-05, - "loss": 0.3654, + "learning_rate": 2.2399013932878787e-05, + "loss": 0.2725, "step": 82195 }, { "epoch": 3.84, - "learning_rate": 1.2386667291735034e-05, - "loss": 0.0684, + "learning_rate": 2.2398545863042767e-05, + "loss": 0.062, "step": 82200 }, { "epoch": 3.84, - "learning_rate": 1.2386198490459896e-05, - "loss": 0.0057, + "learning_rate": 2.239807779320675e-05, + "loss": 0.0449, "step": 82205 }, { "epoch": 3.84, - "learning_rate": 1.2385729689184756e-05, - "loss": 0.0854, + "learning_rate": 2.2397609723370726e-05, + "loss": 0.0248, "step": 82210 }, { "epoch": 3.84, - "learning_rate": 1.2385260887909616e-05, - "loss": 0.069, + "learning_rate": 2.2397141653534706e-05, + "loss": 0.0412, "step": 82215 }, { "epoch": 3.84, - "learning_rate": 1.2384792086634476e-05, - "loss": 0.0872, + "learning_rate": 2.2396673583698686e-05, + "loss": 0.0482, "step": 82220 }, { "epoch": 3.84, - "learning_rate": 1.2384323285359336e-05, - "loss": 0.0495, + "learning_rate": 2.239620551386267e-05, + "loss": 0.0866, "step": 82225 }, { "epoch": 3.84, - "learning_rate": 1.2383854484084197e-05, - "loss": 0.1551, + "learning_rate": 2.239573744402665e-05, + "loss": 0.102, "step": 82230 }, { "epoch": 3.84, - "learning_rate": 1.2383385682809059e-05, - "loss": 0.1237, + "learning_rate": 2.239526937419063e-05, + "loss": 0.2175, "step": 82235 }, { "epoch": 3.84, - "learning_rate": 1.2382916881533919e-05, - "loss": 0.327, + "learning_rate": 2.2394801304354612e-05, + "loss": 0.1815, "step": 82240 }, { "epoch": 3.84, - "learning_rate": 1.238244808025878e-05, - "loss": 0.292, + "learning_rate": 2.2394333234518592e-05, + "loss": 0.2345, "step": 82245 }, { "epoch": 3.84, - "learning_rate": 1.238197927898364e-05, - "loss": 0.0647, + "learning_rate": 2.239386516468257e-05, + "loss": 0.0523, "step": 82250 }, { "epoch": 3.84, - "learning_rate": 1.23815104777085e-05, - "loss": 0.0344, + "learning_rate": 2.239339709484655e-05, + "loss": 0.0251, "step": 82255 }, { "epoch": 3.84, - "learning_rate": 1.238104167643336e-05, - "loss": 0.0264, + "learning_rate": 2.2392929025010535e-05, + "loss": 0.0158, "step": 82260 }, { "epoch": 3.84, - "learning_rate": 1.238057287515822e-05, - "loss": 0.0361, + "learning_rate": 2.2392460955174514e-05, + "loss": 0.0979, "step": 82265 }, { "epoch": 3.84, - "learning_rate": 1.2380104073883082e-05, - "loss": 0.0335, + "learning_rate": 2.239199288533849e-05, + "loss": 0.0564, "step": 82270 }, { "epoch": 3.84, - "learning_rate": 1.2379635272607942e-05, - "loss": 0.0492, + "learning_rate": 2.239152481550247e-05, + "loss": 0.0944, "step": 82275 }, { "epoch": 3.84, - "learning_rate": 1.2379166471332804e-05, - "loss": 0.1007, + "learning_rate": 2.2391056745666454e-05, + "loss": 0.0365, "step": 82280 }, { "epoch": 3.84, - "learning_rate": 1.2378697670057665e-05, - "loss": 0.106, + "learning_rate": 2.2390588675830434e-05, + "loss": 0.1771, "step": 82285 }, { "epoch": 3.84, - "learning_rate": 1.2378228868782525e-05, - "loss": 0.235, + "learning_rate": 2.2390120605994414e-05, + "loss": 0.0726, "step": 82290 }, { "epoch": 3.84, - "learning_rate": 1.2377760067507385e-05, - "loss": 0.2, + "learning_rate": 2.2389652536158397e-05, + "loss": 0.1684, "step": 82295 }, { "epoch": 3.84, - "learning_rate": 1.2377291266232245e-05, - "loss": 0.0597, + "learning_rate": 2.2389184466322377e-05, + "loss": 0.0681, "step": 82300 }, { "epoch": 3.84, - "learning_rate": 1.2376822464957105e-05, - "loss": 0.0244, + "learning_rate": 2.2388716396486356e-05, + "loss": 0.0435, "step": 82305 }, { "epoch": 3.84, - "learning_rate": 1.2376353663681966e-05, - "loss": 0.0544, + "learning_rate": 2.2388248326650336e-05, + "loss": 0.0655, "step": 82310 }, { "epoch": 3.84, - "learning_rate": 1.2375884862406826e-05, - "loss": 0.0364, + "learning_rate": 2.238778025681432e-05, + "loss": 0.0567, "step": 82315 }, { "epoch": 3.84, - "learning_rate": 1.2375416061131686e-05, - "loss": 0.1265, + "learning_rate": 2.23873121869783e-05, + "loss": 0.0861, "step": 82320 }, { "epoch": 3.84, - "learning_rate": 1.2374947259856546e-05, - "loss": 0.1188, + "learning_rate": 2.238684411714228e-05, + "loss": 0.0521, "step": 82325 }, { "epoch": 3.84, - "learning_rate": 1.237447845858141e-05, - "loss": 0.1075, + "learning_rate": 2.238637604730626e-05, + "loss": 0.0765, "step": 82330 }, { "epoch": 3.84, - "learning_rate": 1.237400965730627e-05, - "loss": 0.0742, + "learning_rate": 2.238590797747024e-05, + "loss": 0.1953, "step": 82335 }, { "epoch": 3.84, - "learning_rate": 1.237354085603113e-05, - "loss": 0.3229, + "learning_rate": 2.238543990763422e-05, + "loss": 0.1195, "step": 82340 }, { "epoch": 3.84, - "learning_rate": 1.237307205475599e-05, - "loss": 0.3322, + "learning_rate": 2.23849718377982e-05, + "loss": 0.3025, "step": 82345 }, { "epoch": 3.84, - "learning_rate": 1.2372603253480851e-05, - "loss": 0.0564, + "learning_rate": 2.2384503767962178e-05, + "loss": 0.0505, "step": 82350 }, { "epoch": 3.84, - "learning_rate": 1.2372134452205711e-05, - "loss": 0.0833, + "learning_rate": 2.238403569812616e-05, + "loss": 0.0512, "step": 82355 }, { "epoch": 3.84, - "learning_rate": 1.2371665650930571e-05, - "loss": 0.0352, + "learning_rate": 2.238356762829014e-05, + "loss": 0.012, "step": 82360 }, { "epoch": 3.84, - "learning_rate": 1.237119684965543e-05, - "loss": 0.0388, + "learning_rate": 2.238309955845412e-05, + "loss": 0.0928, "step": 82365 }, { "epoch": 3.84, - "learning_rate": 1.237072804838029e-05, - "loss": 0.0282, + "learning_rate": 2.2382631488618104e-05, + "loss": 0.0475, "step": 82370 }, { "epoch": 3.84, - "learning_rate": 1.2370259247105154e-05, - "loss": 0.0479, + "learning_rate": 2.2382163418782084e-05, + "loss": 0.1051, "step": 82375 }, { "epoch": 3.84, - "learning_rate": 1.2369790445830014e-05, - "loss": 0.1388, + "learning_rate": 2.2381695348946064e-05, + "loss": 0.0843, "step": 82380 }, { "epoch": 3.84, - "learning_rate": 1.2369321644554874e-05, - "loss": 0.0462, + "learning_rate": 2.2381227279110044e-05, + "loss": 0.1098, "step": 82385 }, { "epoch": 3.84, - "learning_rate": 1.2368852843279736e-05, - "loss": 0.1379, + "learning_rate": 2.2380759209274027e-05, + "loss": 0.0967, "step": 82390 }, { "epoch": 3.84, - "learning_rate": 1.2368384042004596e-05, - "loss": 0.2816, + "learning_rate": 2.2380291139438007e-05, + "loss": 0.2581, "step": 82395 }, { "epoch": 3.84, - "learning_rate": 1.2367915240729455e-05, - "loss": 0.072, + "learning_rate": 2.2379823069601983e-05, + "loss": 0.0778, "step": 82400 }, { "epoch": 3.85, - "learning_rate": 1.2367446439454315e-05, - "loss": 0.0364, + "learning_rate": 2.2379354999765963e-05, + "loss": 0.0046, "step": 82405 }, { "epoch": 3.85, - "learning_rate": 1.2366977638179177e-05, - "loss": 0.05, + "learning_rate": 2.2378886929929946e-05, + "loss": 0.0354, "step": 82410 }, { "epoch": 3.85, - "learning_rate": 1.2366508836904037e-05, - "loss": 0.0285, + "learning_rate": 2.2378418860093926e-05, + "loss": 0.0479, "step": 82415 }, { "epoch": 3.85, - "learning_rate": 1.2366040035628899e-05, - "loss": 0.058, + "learning_rate": 2.2377950790257906e-05, + "loss": 0.0225, "step": 82420 }, { "epoch": 3.85, - "learning_rate": 1.2365571234353759e-05, - "loss": 0.0935, + "learning_rate": 2.237748272042189e-05, + "loss": 0.1116, "step": 82425 }, { "epoch": 3.85, - "learning_rate": 1.236510243307862e-05, - "loss": 0.0923, + "learning_rate": 2.237701465058587e-05, + "loss": 0.0269, "step": 82430 }, { "epoch": 3.85, - "learning_rate": 1.236463363180348e-05, - "loss": 0.1319, + "learning_rate": 2.237654658074985e-05, + "loss": 0.0923, "step": 82435 }, { "epoch": 3.85, - "learning_rate": 1.236416483052834e-05, - "loss": 0.081, + "learning_rate": 2.237607851091383e-05, + "loss": 0.1733, "step": 82440 }, { "epoch": 3.85, - "learning_rate": 1.23636960292532e-05, - "loss": 0.2415, + "learning_rate": 2.237561044107781e-05, + "loss": 0.1763, "step": 82445 }, { "epoch": 3.85, - "learning_rate": 1.2363227227978062e-05, - "loss": 0.0919, + "learning_rate": 2.237514237124179e-05, + "loss": 0.0401, "step": 82450 }, { "epoch": 3.85, - "learning_rate": 1.2362758426702922e-05, - "loss": 0.0045, + "learning_rate": 2.237467430140577e-05, + "loss": 0.0214, "step": 82455 }, { "epoch": 3.85, - "learning_rate": 1.2362289625427781e-05, - "loss": 0.0395, + "learning_rate": 2.2374206231569748e-05, + "loss": 0.0284, "step": 82460 }, { "epoch": 3.85, - "learning_rate": 1.2361820824152641e-05, - "loss": 0.0473, + "learning_rate": 2.237373816173373e-05, + "loss": 0.0377, "step": 82465 }, { "epoch": 3.85, - "learning_rate": 1.2361352022877505e-05, - "loss": 0.0098, + "learning_rate": 2.237327009189771e-05, + "loss": 0.044, "step": 82470 }, { "epoch": 3.85, - "learning_rate": 1.2360883221602365e-05, - "loss": 0.1075, + "learning_rate": 2.237280202206169e-05, + "loss": 0.0713, "step": 82475 }, { "epoch": 3.85, - "learning_rate": 1.2360414420327225e-05, - "loss": 0.0496, + "learning_rate": 2.2372333952225674e-05, + "loss": 0.1342, "step": 82480 }, { "epoch": 3.85, - "learning_rate": 1.2359945619052084e-05, - "loss": 0.135, + "learning_rate": 2.2371865882389654e-05, + "loss": 0.1273, "step": 82485 }, { "epoch": 3.85, - "learning_rate": 1.2359476817776946e-05, - "loss": 0.1991, + "learning_rate": 2.2371397812553633e-05, + "loss": 0.1875, "step": 82490 }, { "epoch": 3.85, - "learning_rate": 1.2359008016501806e-05, - "loss": 0.1782, + "learning_rate": 2.2370929742717613e-05, + "loss": 0.1975, "step": 82495 }, { "epoch": 3.85, - "learning_rate": 1.2358539215226666e-05, - "loss": 0.0896, + "learning_rate": 2.2370461672881596e-05, + "loss": 0.0662, "step": 82500 }, { "epoch": 3.85, - "learning_rate": 1.2358070413951526e-05, - "loss": 0.0338, + "learning_rate": 2.2369993603045576e-05, + "loss": 0.0392, "step": 82505 }, { "epoch": 3.85, - "learning_rate": 1.2357601612676386e-05, - "loss": 0.033, + "learning_rate": 2.2369525533209556e-05, + "loss": 0.0345, "step": 82510 }, { "epoch": 3.85, - "learning_rate": 1.235713281140125e-05, - "loss": 0.0812, + "learning_rate": 2.2369057463373536e-05, + "loss": 0.044, "step": 82515 }, { "epoch": 3.85, - "learning_rate": 1.2356664010126109e-05, - "loss": 0.0989, + "learning_rate": 2.236858939353752e-05, + "loss": 0.0383, "step": 82520 }, { "epoch": 3.85, - "learning_rate": 1.2356195208850969e-05, - "loss": 0.0882, + "learning_rate": 2.2368121323701495e-05, + "loss": 0.123, "step": 82525 }, { "epoch": 3.85, - "learning_rate": 1.235572640757583e-05, - "loss": 0.0474, + "learning_rate": 2.2367653253865475e-05, + "loss": 0.1136, "step": 82530 }, { "epoch": 3.85, - "learning_rate": 1.235525760630069e-05, - "loss": 0.0846, + "learning_rate": 2.2367185184029455e-05, + "loss": 0.1396, "step": 82535 }, { "epoch": 3.85, - "learning_rate": 1.235478880502555e-05, - "loss": 0.1838, + "learning_rate": 2.2366717114193438e-05, + "loss": 0.1958, "step": 82540 }, { "epoch": 3.85, - "learning_rate": 1.235432000375041e-05, - "loss": 0.1798, + "learning_rate": 2.2366249044357418e-05, + "loss": 0.2909, "step": 82545 }, { "epoch": 3.85, - "learning_rate": 1.235385120247527e-05, - "loss": 0.0859, + "learning_rate": 2.2365780974521398e-05, + "loss": 0.0533, "step": 82550 }, { "epoch": 3.85, - "learning_rate": 1.2353382401200132e-05, - "loss": 0.0166, + "learning_rate": 2.236531290468538e-05, + "loss": 0.0628, "step": 82555 }, { "epoch": 3.85, - "learning_rate": 1.2352913599924994e-05, - "loss": 0.0137, + "learning_rate": 2.236484483484936e-05, + "loss": 0.0421, "step": 82560 }, { "epoch": 3.85, - "learning_rate": 1.2352444798649854e-05, - "loss": 0.013, + "learning_rate": 2.236437676501334e-05, + "loss": 0.0568, "step": 82565 }, { "epoch": 3.85, - "learning_rate": 1.2351975997374715e-05, - "loss": 0.046, + "learning_rate": 2.236390869517732e-05, + "loss": 0.0627, "step": 82570 }, { "epoch": 3.85, - "learning_rate": 1.2351507196099575e-05, - "loss": 0.0529, + "learning_rate": 2.2363440625341304e-05, + "loss": 0.0672, "step": 82575 }, { "epoch": 3.85, - "learning_rate": 1.2351038394824435e-05, - "loss": 0.0893, + "learning_rate": 2.2362972555505284e-05, + "loss": 0.0907, "step": 82580 }, { "epoch": 3.85, - "learning_rate": 1.2350569593549295e-05, - "loss": 0.1083, + "learning_rate": 2.2362504485669263e-05, + "loss": 0.191, "step": 82585 }, { "epoch": 3.85, - "learning_rate": 1.2350100792274155e-05, - "loss": 0.1114, + "learning_rate": 2.236203641583324e-05, + "loss": 0.2798, "step": 82590 }, { "epoch": 3.85, - "learning_rate": 1.2349631990999017e-05, - "loss": 0.3898, + "learning_rate": 2.2361568345997223e-05, + "loss": 0.1866, "step": 82595 }, { "epoch": 3.85, - "learning_rate": 1.2349163189723877e-05, - "loss": 0.0617, + "learning_rate": 2.2361100276161203e-05, + "loss": 0.0876, "step": 82600 }, { "epoch": 3.85, - "learning_rate": 1.2348694388448738e-05, - "loss": 0.025, + "learning_rate": 2.2360632206325183e-05, + "loss": 0.039, "step": 82605 }, { "epoch": 3.85, - "learning_rate": 1.23482255871736e-05, - "loss": 0.0496, + "learning_rate": 2.2360164136489166e-05, + "loss": 0.0732, "step": 82610 }, { "epoch": 3.85, - "learning_rate": 1.234775678589846e-05, - "loss": 0.0378, + "learning_rate": 2.2359696066653146e-05, + "loss": 0.0437, "step": 82615 }, { "epoch": 3.86, - "learning_rate": 1.234728798462332e-05, - "loss": 0.1224, + "learning_rate": 2.2359227996817126e-05, + "loss": 0.0161, "step": 82620 }, { "epoch": 3.86, - "learning_rate": 1.234681918334818e-05, - "loss": 0.1153, + "learning_rate": 2.2358759926981105e-05, + "loss": 0.0969, "step": 82625 }, { "epoch": 3.86, - "learning_rate": 1.234635038207304e-05, - "loss": 0.0989, + "learning_rate": 2.235829185714509e-05, + "loss": 0.0505, "step": 82630 }, { "epoch": 3.86, - "learning_rate": 1.2345881580797901e-05, - "loss": 0.1872, + "learning_rate": 2.235782378730907e-05, + "loss": 0.2088, "step": 82635 }, { "epoch": 3.86, - "learning_rate": 1.2345412779522761e-05, - "loss": 0.2531, + "learning_rate": 2.2357355717473048e-05, + "loss": 0.1124, "step": 82640 }, { "epoch": 3.86, - "learning_rate": 1.2344943978247621e-05, - "loss": 0.2522, + "learning_rate": 2.2356887647637028e-05, + "loss": 0.1963, "step": 82645 }, { "epoch": 3.86, - "learning_rate": 1.2344475176972481e-05, - "loss": 0.0415, + "learning_rate": 2.2356419577801008e-05, + "loss": 0.0701, "step": 82650 }, { "epoch": 3.86, - "learning_rate": 1.2344006375697344e-05, - "loss": 0.0609, + "learning_rate": 2.2355951507964988e-05, + "loss": 0.0513, "step": 82655 }, { "epoch": 3.86, - "learning_rate": 1.2343537574422204e-05, - "loss": 0.0157, + "learning_rate": 2.2355483438128967e-05, + "loss": 0.0517, "step": 82660 }, { "epoch": 3.86, - "learning_rate": 1.2343068773147064e-05, - "loss": 0.1067, + "learning_rate": 2.235501536829295e-05, + "loss": 0.1029, "step": 82665 }, { "epoch": 3.86, - "learning_rate": 1.2342599971871924e-05, - "loss": 0.1152, + "learning_rate": 2.235454729845693e-05, + "loss": 0.0107, "step": 82670 }, { "epoch": 3.86, - "learning_rate": 1.2342131170596786e-05, - "loss": 0.0615, + "learning_rate": 2.235407922862091e-05, + "loss": 0.0779, "step": 82675 }, { "epoch": 3.86, - "learning_rate": 1.2341662369321646e-05, - "loss": 0.1448, + "learning_rate": 2.235361115878489e-05, + "loss": 0.1431, "step": 82680 }, { "epoch": 3.86, - "learning_rate": 1.2341193568046506e-05, - "loss": 0.1699, + "learning_rate": 2.2353143088948873e-05, + "loss": 0.0825, "step": 82685 }, { "epoch": 3.86, - "learning_rate": 1.2340724766771365e-05, - "loss": 0.2683, + "learning_rate": 2.2352675019112853e-05, + "loss": 0.2075, "step": 82690 }, { "epoch": 3.86, - "learning_rate": 1.2340255965496225e-05, - "loss": 0.2239, + "learning_rate": 2.2352206949276833e-05, + "loss": 0.1674, "step": 82695 }, { "epoch": 3.86, - "learning_rate": 1.2339787164221089e-05, - "loss": 0.0372, + "learning_rate": 2.2351738879440813e-05, + "loss": 0.0656, "step": 82700 }, { "epoch": 3.86, - "learning_rate": 1.2339318362945949e-05, - "loss": 0.02, + "learning_rate": 2.2351270809604796e-05, + "loss": 0.0379, "step": 82705 }, { "epoch": 3.86, - "learning_rate": 1.2338849561670809e-05, - "loss": 0.0412, + "learning_rate": 2.2350802739768776e-05, + "loss": 0.0323, "step": 82710 }, { "epoch": 3.86, - "learning_rate": 1.233838076039567e-05, - "loss": 0.0322, + "learning_rate": 2.2350334669932752e-05, + "loss": 0.0988, "step": 82715 }, { "epoch": 3.86, - "learning_rate": 1.233791195912053e-05, - "loss": 0.0874, + "learning_rate": 2.2349866600096735e-05, + "loss": 0.0599, "step": 82720 }, { "epoch": 3.86, - "learning_rate": 1.233744315784539e-05, - "loss": 0.0607, + "learning_rate": 2.2349398530260715e-05, + "loss": 0.0207, "step": 82725 }, { "epoch": 3.86, - "learning_rate": 1.233697435657025e-05, - "loss": 0.13, + "learning_rate": 2.2348930460424695e-05, + "loss": 0.121, "step": 82730 }, { "epoch": 3.86, - "learning_rate": 1.233650555529511e-05, - "loss": 0.1419, + "learning_rate": 2.2348462390588675e-05, + "loss": 0.1216, "step": 82735 }, { "epoch": 3.86, - "learning_rate": 1.2336036754019972e-05, - "loss": 0.1799, + "learning_rate": 2.2347994320752658e-05, + "loss": 0.1477, "step": 82740 }, { "epoch": 3.86, - "learning_rate": 1.2335567952744833e-05, - "loss": 0.1997, + "learning_rate": 2.2347526250916638e-05, + "loss": 0.2474, "step": 82745 }, { "epoch": 3.86, - "learning_rate": 1.2335099151469693e-05, - "loss": 0.054, + "learning_rate": 2.2347058181080618e-05, + "loss": 0.0578, "step": 82750 }, { "epoch": 3.86, - "learning_rate": 1.2334630350194555e-05, - "loss": 0.0319, + "learning_rate": 2.2346590111244598e-05, + "loss": 0.0195, "step": 82755 }, { "epoch": 3.86, - "learning_rate": 1.2334161548919415e-05, - "loss": 0.0091, + "learning_rate": 2.234612204140858e-05, + "loss": 0.0348, "step": 82760 }, { "epoch": 3.86, - "learning_rate": 1.2333692747644275e-05, - "loss": 0.0929, + "learning_rate": 2.234565397157256e-05, + "loss": 0.0948, "step": 82765 }, { "epoch": 3.86, - "learning_rate": 1.2333223946369135e-05, - "loss": 0.052, + "learning_rate": 2.234518590173654e-05, + "loss": 0.0502, "step": 82770 }, { "epoch": 3.86, - "learning_rate": 1.2332755145093995e-05, - "loss": 0.0458, + "learning_rate": 2.2344717831900517e-05, + "loss": 0.073, "step": 82775 }, { "epoch": 3.86, - "learning_rate": 1.2332286343818856e-05, - "loss": 0.06, + "learning_rate": 2.23442497620645e-05, + "loss": 0.112, "step": 82780 }, { "epoch": 3.86, - "learning_rate": 1.2331817542543716e-05, - "loss": 0.2001, + "learning_rate": 2.234378169222848e-05, + "loss": 0.1584, "step": 82785 }, { "epoch": 3.86, - "learning_rate": 1.2331348741268576e-05, - "loss": 0.198, + "learning_rate": 2.234331362239246e-05, + "loss": 0.1909, "step": 82790 }, { "epoch": 3.86, - "learning_rate": 1.233087993999344e-05, - "loss": 0.249, + "learning_rate": 2.2342845552556443e-05, + "loss": 0.3992, "step": 82795 }, { "epoch": 3.86, - "learning_rate": 1.23304111387183e-05, - "loss": 0.0389, + "learning_rate": 2.2342377482720423e-05, + "loss": 0.0497, "step": 82800 }, { "epoch": 3.86, - "learning_rate": 1.232994233744316e-05, - "loss": 0.03, + "learning_rate": 2.2341909412884403e-05, + "loss": 0.0295, "step": 82805 }, { "epoch": 3.86, - "learning_rate": 1.2329473536168019e-05, - "loss": 0.0357, + "learning_rate": 2.2341441343048382e-05, + "loss": 0.0463, "step": 82810 }, { "epoch": 3.86, - "learning_rate": 1.2329004734892879e-05, - "loss": 0.0694, + "learning_rate": 2.2340973273212366e-05, + "loss": 0.0431, "step": 82815 }, { "epoch": 3.86, - "learning_rate": 1.232853593361774e-05, - "loss": 0.0476, + "learning_rate": 2.2340505203376345e-05, + "loss": 0.0397, "step": 82820 }, { "epoch": 3.86, - "learning_rate": 1.23280671323426e-05, - "loss": 0.1148, + "learning_rate": 2.2340037133540325e-05, + "loss": 0.0647, "step": 82825 }, { "epoch": 3.86, - "learning_rate": 1.232759833106746e-05, - "loss": 0.1288, + "learning_rate": 2.2339569063704305e-05, + "loss": 0.0882, "step": 82830 }, { "epoch": 3.87, - "learning_rate": 1.232712952979232e-05, - "loss": 0.0922, + "learning_rate": 2.2339100993868288e-05, + "loss": 0.1095, "step": 82835 }, { "epoch": 3.87, - "learning_rate": 1.2326660728517184e-05, - "loss": 0.1951, + "learning_rate": 2.2338632924032265e-05, + "loss": 0.1615, "step": 82840 }, { "epoch": 3.87, - "learning_rate": 1.2326191927242044e-05, - "loss": 0.2239, + "learning_rate": 2.2338164854196244e-05, + "loss": 0.2288, "step": 82845 }, { "epoch": 3.87, - "learning_rate": 1.2325723125966904e-05, - "loss": 0.0755, + "learning_rate": 2.2337696784360228e-05, + "loss": 0.0408, "step": 82850 }, { "epoch": 3.87, - "learning_rate": 1.2325254324691764e-05, - "loss": 0.033, + "learning_rate": 2.2337228714524207e-05, + "loss": 0.0132, "step": 82855 }, { "epoch": 3.87, - "learning_rate": 1.2324785523416625e-05, - "loss": 0.055, + "learning_rate": 2.2336760644688187e-05, + "loss": 0.0315, "step": 82860 }, { "epoch": 3.87, - "learning_rate": 1.2324316722141485e-05, - "loss": 0.0388, + "learning_rate": 2.2336292574852167e-05, + "loss": 0.0342, "step": 82865 }, { "epoch": 3.87, - "learning_rate": 1.2323847920866345e-05, - "loss": 0.114, + "learning_rate": 2.233582450501615e-05, + "loss": 0.11, "step": 82870 }, { "epoch": 3.87, - "learning_rate": 1.2323379119591205e-05, - "loss": 0.0769, + "learning_rate": 2.233535643518013e-05, + "loss": 0.0778, "step": 82875 }, { "epoch": 3.87, - "learning_rate": 1.2322910318316067e-05, - "loss": 0.112, + "learning_rate": 2.233488836534411e-05, + "loss": 0.082, "step": 82880 }, { "epoch": 3.87, - "learning_rate": 1.2322441517040928e-05, - "loss": 0.164, + "learning_rate": 2.233442029550809e-05, + "loss": 0.1658, "step": 82885 }, { "epoch": 3.87, - "learning_rate": 1.2321972715765788e-05, - "loss": 0.3269, + "learning_rate": 2.2333952225672073e-05, + "loss": 0.1372, "step": 82890 }, { "epoch": 3.87, - "learning_rate": 1.2321503914490648e-05, - "loss": 0.4316, + "learning_rate": 2.2333484155836053e-05, + "loss": 0.3032, "step": 82895 }, { "epoch": 3.87, - "learning_rate": 1.232103511321551e-05, - "loss": 0.0552, + "learning_rate": 2.2333016086000033e-05, + "loss": 0.0296, "step": 82900 }, { "epoch": 3.87, - "learning_rate": 1.232056631194037e-05, - "loss": 0.0282, + "learning_rate": 2.2332548016164012e-05, + "loss": 0.0138, "step": 82905 }, { "epoch": 3.87, - "learning_rate": 1.232009751066523e-05, - "loss": 0.0203, + "learning_rate": 2.2332079946327992e-05, + "loss": 0.0458, "step": 82910 }, { "epoch": 3.87, - "learning_rate": 1.231962870939009e-05, - "loss": 0.0557, + "learning_rate": 2.2331611876491972e-05, + "loss": 0.0344, "step": 82915 }, { "epoch": 3.87, - "learning_rate": 1.2319159908114951e-05, - "loss": 0.0393, + "learning_rate": 2.2331143806655952e-05, + "loss": 0.0212, "step": 82920 }, { "epoch": 3.87, - "learning_rate": 1.2318691106839811e-05, - "loss": 0.0684, + "learning_rate": 2.2330675736819935e-05, + "loss": 0.1003, "step": 82925 }, { "epoch": 3.87, - "learning_rate": 1.2318222305564673e-05, - "loss": 0.0716, + "learning_rate": 2.2330207666983915e-05, + "loss": 0.154, "step": 82930 }, { "epoch": 3.87, - "learning_rate": 1.2317753504289533e-05, - "loss": 0.1523, + "learning_rate": 2.2329739597147895e-05, + "loss": 0.0966, "step": 82935 }, { "epoch": 3.87, - "learning_rate": 1.2317284703014394e-05, - "loss": 0.1399, + "learning_rate": 2.2329271527311875e-05, + "loss": 0.2038, "step": 82940 }, { "epoch": 3.87, - "learning_rate": 1.2316815901739254e-05, - "loss": 0.1503, + "learning_rate": 2.2328803457475858e-05, + "loss": 0.2229, "step": 82945 }, { "epoch": 3.87, - "learning_rate": 1.2316347100464114e-05, - "loss": 0.0584, + "learning_rate": 2.2328335387639838e-05, + "loss": 0.0689, "step": 82950 }, { "epoch": 3.87, - "learning_rate": 1.2315878299188974e-05, - "loss": 0.0096, + "learning_rate": 2.2327867317803817e-05, + "loss": 0.0163, "step": 82955 }, { "epoch": 3.87, - "learning_rate": 1.2315409497913836e-05, - "loss": 0.0223, + "learning_rate": 2.2327399247967797e-05, + "loss": 0.0262, "step": 82960 }, { "epoch": 3.87, - "learning_rate": 1.2314940696638696e-05, - "loss": 0.0296, + "learning_rate": 2.2326931178131777e-05, + "loss": 0.0337, "step": 82965 }, { "epoch": 3.87, - "learning_rate": 1.2314471895363556e-05, - "loss": 0.0519, + "learning_rate": 2.2326463108295757e-05, + "loss": 0.0474, "step": 82970 }, { "epoch": 3.87, - "learning_rate": 1.2314003094088416e-05, - "loss": 0.0427, + "learning_rate": 2.2325995038459737e-05, + "loss": 0.1172, "step": 82975 }, { "epoch": 3.87, - "learning_rate": 1.2313534292813279e-05, - "loss": 0.0689, + "learning_rate": 2.232552696862372e-05, + "loss": 0.0752, "step": 82980 }, { "epoch": 3.87, - "learning_rate": 1.2313065491538139e-05, - "loss": 0.1305, + "learning_rate": 2.23250588987877e-05, + "loss": 0.1473, "step": 82985 }, { "epoch": 3.87, - "learning_rate": 1.2312596690262999e-05, - "loss": 0.1061, + "learning_rate": 2.232459082895168e-05, + "loss": 0.1789, "step": 82990 }, { "epoch": 3.87, - "learning_rate": 1.2312127888987859e-05, - "loss": 0.2652, + "learning_rate": 2.232412275911566e-05, + "loss": 0.2509, "step": 82995 }, { "epoch": 3.87, - "learning_rate": 1.231165908771272e-05, - "loss": 0.0835, + "learning_rate": 2.2323654689279643e-05, + "loss": 0.0294, "step": 83000 }, { "epoch": 3.87, - "learning_rate": 1.231119028643758e-05, - "loss": 0.0156, + "learning_rate": 2.2323186619443622e-05, + "loss": 0.0233, "step": 83005 }, { "epoch": 3.87, - "learning_rate": 1.231072148516244e-05, - "loss": 0.0279, + "learning_rate": 2.2322718549607602e-05, + "loss": 0.0889, "step": 83010 }, { "epoch": 3.87, - "learning_rate": 1.23102526838873e-05, - "loss": 0.0313, + "learning_rate": 2.2322250479771582e-05, + "loss": 0.0056, "step": 83015 }, { "epoch": 3.87, - "learning_rate": 1.230978388261216e-05, - "loss": 0.0731, + "learning_rate": 2.2321782409935565e-05, + "loss": 0.0554, "step": 83020 }, { "epoch": 3.87, - "learning_rate": 1.2309315081337023e-05, - "loss": 0.1185, + "learning_rate": 2.2321314340099545e-05, + "loss": 0.0835, "step": 83025 }, { "epoch": 3.87, - "learning_rate": 1.2308846280061883e-05, - "loss": 0.089, + "learning_rate": 2.232084627026352e-05, + "loss": 0.1505, "step": 83030 }, { "epoch": 3.87, - "learning_rate": 1.2308377478786743e-05, - "loss": 0.1278, + "learning_rate": 2.2320378200427505e-05, + "loss": 0.1368, "step": 83035 }, { "epoch": 3.87, - "learning_rate": 1.2307908677511605e-05, - "loss": 0.1421, + "learning_rate": 2.2319910130591484e-05, + "loss": 0.2236, "step": 83040 }, { "epoch": 3.87, - "learning_rate": 1.2307439876236465e-05, - "loss": 0.2779, + "learning_rate": 2.2319442060755464e-05, + "loss": 0.3199, "step": 83045 }, { "epoch": 3.88, - "learning_rate": 1.2306971074961325e-05, - "loss": 0.0081, + "learning_rate": 2.2318973990919444e-05, + "loss": 0.0805, "step": 83050 }, { "epoch": 3.88, - "learning_rate": 1.2306502273686185e-05, - "loss": 0.0025, + "learning_rate": 2.2318505921083427e-05, + "loss": 0.0241, "step": 83055 }, { "epoch": 3.88, - "learning_rate": 1.2306033472411045e-05, - "loss": 0.0285, + "learning_rate": 2.2318037851247407e-05, + "loss": 0.0312, "step": 83060 }, { "epoch": 3.88, - "learning_rate": 1.2305564671135906e-05, - "loss": 0.0414, + "learning_rate": 2.2317569781411387e-05, + "loss": 0.0618, "step": 83065 }, { "epoch": 3.88, - "learning_rate": 1.2305095869860768e-05, - "loss": 0.0701, + "learning_rate": 2.2317101711575367e-05, + "loss": 0.1242, "step": 83070 }, { "epoch": 3.88, - "learning_rate": 1.2304627068585628e-05, - "loss": 0.069, + "learning_rate": 2.231663364173935e-05, + "loss": 0.0429, "step": 83075 }, { "epoch": 3.88, - "learning_rate": 1.230415826731049e-05, - "loss": 0.0509, + "learning_rate": 2.231616557190333e-05, + "loss": 0.0799, "step": 83080 }, { "epoch": 3.88, - "learning_rate": 1.230368946603535e-05, - "loss": 0.1334, + "learning_rate": 2.231569750206731e-05, + "loss": 0.141, "step": 83085 }, { "epoch": 3.88, - "learning_rate": 1.230322066476021e-05, - "loss": 0.1531, + "learning_rate": 2.231522943223129e-05, + "loss": 0.1201, "step": 83090 }, { "epoch": 3.88, - "learning_rate": 1.230275186348507e-05, - "loss": 0.227, + "learning_rate": 2.231476136239527e-05, + "loss": 0.2535, "step": 83095 }, { "epoch": 3.88, - "learning_rate": 1.230228306220993e-05, - "loss": 0.0466, + "learning_rate": 2.231429329255925e-05, + "loss": 0.06, "step": 83100 }, { "epoch": 3.88, - "learning_rate": 1.230181426093479e-05, - "loss": 0.0232, + "learning_rate": 2.231382522272323e-05, + "loss": 0.0474, "step": 83105 }, { "epoch": 3.88, - "learning_rate": 1.230134545965965e-05, - "loss": 0.0455, + "learning_rate": 2.2313357152887212e-05, + "loss": 0.0128, "step": 83110 }, { "epoch": 3.88, - "learning_rate": 1.230087665838451e-05, - "loss": 0.015, + "learning_rate": 2.2312889083051192e-05, + "loss": 0.0148, "step": 83115 }, { "epoch": 3.88, - "learning_rate": 1.2300407857109374e-05, - "loss": 0.0395, + "learning_rate": 2.231242101321517e-05, + "loss": 0.0678, "step": 83120 }, { "epoch": 3.88, - "learning_rate": 1.2299939055834234e-05, - "loss": 0.1822, + "learning_rate": 2.231195294337915e-05, + "loss": 0.0679, "step": 83125 }, { "epoch": 3.88, - "learning_rate": 1.2299470254559094e-05, - "loss": 0.0729, + "learning_rate": 2.2311484873543135e-05, + "loss": 0.0781, "step": 83130 }, { "epoch": 3.88, - "learning_rate": 1.2299001453283954e-05, - "loss": 0.0764, + "learning_rate": 2.2311016803707115e-05, + "loss": 0.1216, "step": 83135 }, { "epoch": 3.88, - "learning_rate": 1.2298532652008814e-05, - "loss": 0.1371, + "learning_rate": 2.2310548733871094e-05, + "loss": 0.1405, "step": 83140 }, { "epoch": 3.88, - "learning_rate": 1.2298063850733675e-05, - "loss": 0.3928, + "learning_rate": 2.2310080664035074e-05, + "loss": 0.2729, "step": 83145 }, { "epoch": 3.88, - "learning_rate": 1.2297595049458535e-05, - "loss": 0.0693, + "learning_rate": 2.2309612594199057e-05, + "loss": 0.0347, "step": 83150 }, { "epoch": 3.88, - "learning_rate": 1.2297126248183395e-05, - "loss": 0.0335, + "learning_rate": 2.2309144524363034e-05, + "loss": 0.0122, "step": 83155 }, { "epoch": 3.88, - "learning_rate": 1.2296657446908255e-05, - "loss": 0.007, + "learning_rate": 2.2308676454527014e-05, + "loss": 0.063, "step": 83160 }, { "epoch": 3.88, - "learning_rate": 1.2296188645633118e-05, - "loss": 0.027, + "learning_rate": 2.2308208384690997e-05, + "loss": 0.0246, "step": 83165 }, { "epoch": 3.88, - "learning_rate": 1.2295719844357978e-05, - "loss": 0.0918, + "learning_rate": 2.2307740314854977e-05, + "loss": 0.0602, "step": 83170 }, { "epoch": 3.88, - "learning_rate": 1.2295251043082838e-05, - "loss": 0.0589, + "learning_rate": 2.2307272245018956e-05, + "loss": 0.0402, "step": 83175 }, { "epoch": 3.88, - "learning_rate": 1.2294782241807698e-05, - "loss": 0.1043, + "learning_rate": 2.2306804175182936e-05, + "loss": 0.0778, "step": 83180 }, { "epoch": 3.88, - "learning_rate": 1.229431344053256e-05, - "loss": 0.0707, + "learning_rate": 2.230633610534692e-05, + "loss": 0.0535, "step": 83185 }, { "epoch": 3.88, - "learning_rate": 1.229384463925742e-05, - "loss": 0.2569, + "learning_rate": 2.23058680355109e-05, + "loss": 0.1887, "step": 83190 }, { "epoch": 3.88, - "learning_rate": 1.229337583798228e-05, - "loss": 0.2281, + "learning_rate": 2.230539996567488e-05, + "loss": 0.1845, "step": 83195 }, { "epoch": 3.88, - "learning_rate": 1.229290703670714e-05, - "loss": 0.0738, + "learning_rate": 2.230493189583886e-05, + "loss": 0.0541, "step": 83200 }, { "epoch": 3.88, - "learning_rate": 1.2292438235432e-05, - "loss": 0.0241, + "learning_rate": 2.2304463826002842e-05, + "loss": 0.0616, "step": 83205 }, { "epoch": 3.88, - "learning_rate": 1.2291969434156863e-05, - "loss": 0.0525, + "learning_rate": 2.2303995756166822e-05, + "loss": 0.0257, "step": 83210 }, { "epoch": 3.88, - "learning_rate": 1.2291500632881723e-05, - "loss": 0.0872, + "learning_rate": 2.2303527686330802e-05, + "loss": 0.0799, "step": 83215 }, { "epoch": 3.88, - "learning_rate": 1.2291031831606583e-05, - "loss": 0.0468, + "learning_rate": 2.230305961649478e-05, + "loss": 0.0825, "step": 83220 }, { "epoch": 3.88, - "learning_rate": 1.2290563030331444e-05, - "loss": 0.0272, + "learning_rate": 2.230259154665876e-05, + "loss": 0.0745, "step": 83225 }, { "epoch": 3.88, - "learning_rate": 1.2290094229056304e-05, - "loss": 0.085, + "learning_rate": 2.230212347682274e-05, + "loss": 0.1404, "step": 83230 }, { "epoch": 3.88, - "learning_rate": 1.2289625427781164e-05, - "loss": 0.1253, + "learning_rate": 2.230165540698672e-05, + "loss": 0.1548, "step": 83235 }, { "epoch": 3.88, - "learning_rate": 1.2289156626506024e-05, - "loss": 0.235, + "learning_rate": 2.2301187337150704e-05, + "loss": 0.2785, "step": 83240 }, { "epoch": 3.88, - "learning_rate": 1.2288687825230884e-05, - "loss": 0.2809, + "learning_rate": 2.2300719267314684e-05, + "loss": 0.205, "step": 83245 }, { "epoch": 3.88, - "learning_rate": 1.2288219023955746e-05, - "loss": 0.0401, + "learning_rate": 2.2300251197478664e-05, + "loss": 0.0923, "step": 83250 }, { "epoch": 3.88, - "learning_rate": 1.2287750222680607e-05, - "loss": 0.0347, + "learning_rate": 2.2299783127642644e-05, + "loss": 0.0431, "step": 83255 }, { "epoch": 3.89, - "learning_rate": 1.2287281421405467e-05, - "loss": 0.056, + "learning_rate": 2.2299315057806627e-05, + "loss": 0.0308, "step": 83260 }, { "epoch": 3.89, - "learning_rate": 1.2286812620130329e-05, - "loss": 0.0216, + "learning_rate": 2.2298846987970607e-05, + "loss": 0.0779, "step": 83265 }, { "epoch": 3.89, - "learning_rate": 1.2286343818855189e-05, - "loss": 0.0674, + "learning_rate": 2.2298378918134587e-05, + "loss": 0.0827, "step": 83270 }, { "epoch": 3.89, - "learning_rate": 1.2285875017580049e-05, - "loss": 0.0733, + "learning_rate": 2.229791084829857e-05, + "loss": 0.1022, "step": 83275 }, { "epoch": 3.89, - "learning_rate": 1.2285406216304909e-05, - "loss": 0.0262, + "learning_rate": 2.2297442778462546e-05, + "loss": 0.0744, "step": 83280 }, { "epoch": 3.89, - "learning_rate": 1.2284937415029769e-05, - "loss": 0.1099, + "learning_rate": 2.2296974708626526e-05, + "loss": 0.1199, "step": 83285 }, { "epoch": 3.89, - "learning_rate": 1.228446861375463e-05, - "loss": 0.1443, + "learning_rate": 2.2296506638790506e-05, + "loss": 0.1262, "step": 83290 }, { "epoch": 3.89, - "learning_rate": 1.228399981247949e-05, - "loss": 0.1946, + "learning_rate": 2.229603856895449e-05, + "loss": 0.2308, "step": 83295 }, { "epoch": 3.89, - "learning_rate": 1.228353101120435e-05, - "loss": 0.0785, + "learning_rate": 2.229557049911847e-05, + "loss": 0.0524, "step": 83300 }, { "epoch": 3.89, - "learning_rate": 1.2283062209929214e-05, - "loss": 0.033, + "learning_rate": 2.229510242928245e-05, + "loss": 0.0239, "step": 83305 }, { "epoch": 3.89, - "learning_rate": 1.2282593408654074e-05, - "loss": 0.0262, + "learning_rate": 2.229463435944643e-05, + "loss": 0.0177, "step": 83310 }, { "epoch": 3.89, - "learning_rate": 1.2282124607378933e-05, - "loss": 0.0774, + "learning_rate": 2.229416628961041e-05, + "loss": 0.0378, "step": 83315 }, { "epoch": 3.89, - "learning_rate": 1.2281655806103793e-05, - "loss": 0.0314, + "learning_rate": 2.229369821977439e-05, + "loss": 0.0553, "step": 83320 }, { "epoch": 3.89, - "learning_rate": 1.2281187004828653e-05, - "loss": 0.0695, + "learning_rate": 2.229323014993837e-05, + "loss": 0.0758, "step": 83325 }, { "epoch": 3.89, - "learning_rate": 1.2280718203553515e-05, - "loss": 0.0794, + "learning_rate": 2.229276208010235e-05, + "loss": 0.1487, "step": 83330 }, { "epoch": 3.89, - "learning_rate": 1.2280249402278375e-05, - "loss": 0.0691, + "learning_rate": 2.2292294010266334e-05, + "loss": 0.2569, "step": 83335 }, { "epoch": 3.89, - "learning_rate": 1.2279780601003235e-05, - "loss": 0.0894, + "learning_rate": 2.2291825940430314e-05, + "loss": 0.218, "step": 83340 }, { "epoch": 3.89, - "learning_rate": 1.2279311799728095e-05, - "loss": 0.2373, + "learning_rate": 2.229135787059429e-05, + "loss": 0.3052, "step": 83345 }, { "epoch": 3.89, - "learning_rate": 1.2278842998452958e-05, - "loss": 0.0555, + "learning_rate": 2.2290889800758274e-05, + "loss": 0.0763, "step": 83350 }, { "epoch": 3.89, - "learning_rate": 1.2278374197177818e-05, - "loss": 0.0623, + "learning_rate": 2.2290421730922254e-05, + "loss": 0.0093, "step": 83355 }, { "epoch": 3.89, - "learning_rate": 1.2277905395902678e-05, - "loss": 0.1059, + "learning_rate": 2.2289953661086233e-05, + "loss": 0.028, "step": 83360 }, { "epoch": 3.89, - "learning_rate": 1.2277436594627538e-05, - "loss": 0.1103, + "learning_rate": 2.2289485591250213e-05, + "loss": 0.0534, "step": 83365 }, { "epoch": 3.89, - "learning_rate": 1.22769677933524e-05, - "loss": 0.0721, + "learning_rate": 2.2289017521414196e-05, + "loss": 0.0184, "step": 83370 }, { "epoch": 3.89, - "learning_rate": 1.227649899207726e-05, - "loss": 0.0832, + "learning_rate": 2.2288549451578176e-05, + "loss": 0.0577, "step": 83375 }, { "epoch": 3.89, - "learning_rate": 1.227603019080212e-05, - "loss": 0.0922, + "learning_rate": 2.2288081381742156e-05, + "loss": 0.1888, "step": 83380 }, { "epoch": 3.89, - "learning_rate": 1.227556138952698e-05, - "loss": 0.1668, + "learning_rate": 2.2287613311906136e-05, + "loss": 0.1971, "step": 83385 }, { "epoch": 3.89, - "learning_rate": 1.2275092588251841e-05, - "loss": 0.1856, + "learning_rate": 2.228714524207012e-05, + "loss": 0.1997, "step": 83390 }, { "epoch": 3.89, - "learning_rate": 1.2274623786976703e-05, - "loss": 0.2563, + "learning_rate": 2.22866771722341e-05, + "loss": 0.4214, "step": 83395 }, { "epoch": 3.89, - "learning_rate": 1.2274154985701562e-05, - "loss": 0.0489, + "learning_rate": 2.228620910239808e-05, + "loss": 0.0694, "step": 83400 }, { "epoch": 3.89, - "learning_rate": 1.2273686184426422e-05, - "loss": 0.0425, + "learning_rate": 2.2285741032562062e-05, + "loss": 0.0517, "step": 83405 }, { "epoch": 3.89, - "learning_rate": 1.2273217383151284e-05, - "loss": 0.0183, + "learning_rate": 2.228527296272604e-05, + "loss": 0.0223, "step": 83410 }, { "epoch": 3.89, - "learning_rate": 1.2272748581876144e-05, - "loss": 0.028, + "learning_rate": 2.2284804892890018e-05, + "loss": 0.1046, "step": 83415 }, { "epoch": 3.89, - "learning_rate": 1.2272279780601004e-05, - "loss": 0.074, + "learning_rate": 2.2284336823053998e-05, + "loss": 0.0898, "step": 83420 }, { "epoch": 3.89, - "learning_rate": 1.2271810979325864e-05, - "loss": 0.1143, + "learning_rate": 2.228386875321798e-05, + "loss": 0.0998, "step": 83425 }, { "epoch": 3.89, - "learning_rate": 1.2271342178050725e-05, - "loss": 0.063, + "learning_rate": 2.228340068338196e-05, + "loss": 0.0361, "step": 83430 }, { "epoch": 3.89, - "learning_rate": 1.2270873376775585e-05, - "loss": 0.1443, + "learning_rate": 2.228293261354594e-05, + "loss": 0.0941, "step": 83435 }, { "epoch": 3.89, - "learning_rate": 1.2270404575500445e-05, - "loss": 0.1416, + "learning_rate": 2.228246454370992e-05, + "loss": 0.0772, "step": 83440 }, { "epoch": 3.89, - "learning_rate": 1.2269935774225307e-05, - "loss": 0.3292, + "learning_rate": 2.2281996473873904e-05, + "loss": 0.1887, "step": 83445 }, { "epoch": 3.89, - "learning_rate": 1.2269466972950169e-05, - "loss": 0.0906, + "learning_rate": 2.2281528404037884e-05, + "loss": 0.0736, "step": 83450 }, { "epoch": 3.89, - "learning_rate": 1.2268998171675029e-05, - "loss": 0.025, + "learning_rate": 2.2281060334201864e-05, + "loss": 0.0136, "step": 83455 }, { "epoch": 3.89, - "learning_rate": 1.2268529370399888e-05, - "loss": 0.0695, + "learning_rate": 2.2280592264365847e-05, + "loss": 0.0311, "step": 83460 }, { "epoch": 3.89, - "learning_rate": 1.2268060569124748e-05, - "loss": 0.0739, + "learning_rate": 2.2280124194529827e-05, + "loss": 0.12, "step": 83465 }, { "epoch": 3.89, - "learning_rate": 1.226759176784961e-05, - "loss": 0.0687, + "learning_rate": 2.2279656124693803e-05, + "loss": 0.0913, "step": 83470 }, { "epoch": 3.9, - "learning_rate": 1.226712296657447e-05, - "loss": 0.1008, + "learning_rate": 2.2279188054857783e-05, + "loss": 0.0881, "step": 83475 }, { "epoch": 3.9, - "learning_rate": 1.226665416529933e-05, - "loss": 0.1099, + "learning_rate": 2.2278719985021766e-05, + "loss": 0.1281, "step": 83480 }, { "epoch": 3.9, - "learning_rate": 1.226618536402419e-05, - "loss": 0.1334, + "learning_rate": 2.2278251915185746e-05, + "loss": 0.1474, "step": 83485 }, { "epoch": 3.9, - "learning_rate": 1.2265716562749053e-05, - "loss": 0.2016, + "learning_rate": 2.2277783845349726e-05, + "loss": 0.1841, "step": 83490 }, { "epoch": 3.9, - "learning_rate": 1.2265247761473913e-05, - "loss": 0.3243, + "learning_rate": 2.2277315775513705e-05, + "loss": 0.1951, "step": 83495 }, { "epoch": 3.9, - "learning_rate": 1.2264778960198773e-05, - "loss": 0.0194, + "learning_rate": 2.227684770567769e-05, + "loss": 0.0649, "step": 83500 }, { "epoch": 3.9, - "learning_rate": 1.2264310158923633e-05, - "loss": 0.0295, + "learning_rate": 2.227637963584167e-05, + "loss": 0.0204, "step": 83505 }, { "epoch": 3.9, - "learning_rate": 1.2263841357648495e-05, - "loss": 0.0918, + "learning_rate": 2.2275911566005648e-05, + "loss": 0.0676, "step": 83510 }, { "epoch": 3.9, - "learning_rate": 1.2263372556373354e-05, - "loss": 0.1133, + "learning_rate": 2.227544349616963e-05, + "loss": 0.0747, "step": 83515 }, { "epoch": 3.9, - "learning_rate": 1.2262903755098214e-05, - "loss": 0.039, + "learning_rate": 2.227497542633361e-05, + "loss": 0.0659, "step": 83520 }, { "epoch": 3.9, - "learning_rate": 1.2262434953823074e-05, - "loss": 0.0482, + "learning_rate": 2.227450735649759e-05, + "loss": 0.0439, "step": 83525 }, { "epoch": 3.9, - "learning_rate": 1.2261966152547934e-05, - "loss": 0.0764, + "learning_rate": 2.227403928666157e-05, + "loss": 0.0886, "step": 83530 }, { "epoch": 3.9, - "learning_rate": 1.2261497351272798e-05, - "loss": 0.1765, + "learning_rate": 2.227357121682555e-05, + "loss": 0.162, "step": 83535 }, { "epoch": 3.9, - "learning_rate": 1.2261028549997658e-05, - "loss": 0.152, + "learning_rate": 2.227310314698953e-05, + "loss": 0.1586, "step": 83540 }, { "epoch": 3.9, - "learning_rate": 1.2260559748722517e-05, - "loss": 0.294, + "learning_rate": 2.227263507715351e-05, + "loss": 0.2899, "step": 83545 }, { "epoch": 3.9, - "learning_rate": 1.2260090947447379e-05, - "loss": 0.0708, + "learning_rate": 2.227216700731749e-05, + "loss": 0.0803, "step": 83550 }, { "epoch": 3.9, - "learning_rate": 1.2259622146172239e-05, - "loss": 0.0091, + "learning_rate": 2.2271698937481473e-05, + "loss": 0.0282, "step": 83555 }, { "epoch": 3.9, - "learning_rate": 1.2259153344897099e-05, - "loss": 0.0575, + "learning_rate": 2.2271230867645453e-05, + "loss": 0.0596, "step": 83560 }, { "epoch": 3.9, - "learning_rate": 1.2258684543621959e-05, - "loss": 0.0657, + "learning_rate": 2.2270762797809433e-05, + "loss": 0.0265, "step": 83565 }, { "epoch": 3.9, - "learning_rate": 1.2258215742346819e-05, - "loss": 0.0315, + "learning_rate": 2.2270294727973413e-05, + "loss": 0.0587, "step": 83570 }, { "epoch": 3.9, - "learning_rate": 1.225774694107168e-05, - "loss": 0.2276, + "learning_rate": 2.2269826658137396e-05, + "loss": 0.1659, "step": 83575 }, { "epoch": 3.9, - "learning_rate": 1.2257278139796542e-05, - "loss": 0.0769, + "learning_rate": 2.2269358588301376e-05, + "loss": 0.1037, "step": 83580 }, { "epoch": 3.9, - "learning_rate": 1.2256809338521402e-05, - "loss": 0.2337, + "learning_rate": 2.2268890518465356e-05, + "loss": 0.1101, "step": 83585 }, { "epoch": 3.9, - "learning_rate": 1.2256340537246264e-05, - "loss": 0.1424, + "learning_rate": 2.226842244862934e-05, + "loss": 0.1713, "step": 83590 }, { "epoch": 3.9, - "learning_rate": 1.2255871735971124e-05, - "loss": 0.2379, + "learning_rate": 2.226795437879332e-05, + "loss": 0.1707, "step": 83595 }, { "epoch": 3.9, - "learning_rate": 1.2255402934695984e-05, - "loss": 0.0489, + "learning_rate": 2.2267486308957295e-05, + "loss": 0.074, "step": 83600 }, { "epoch": 3.9, - "learning_rate": 1.2254934133420843e-05, - "loss": 0.0474, + "learning_rate": 2.2267018239121275e-05, + "loss": 0.0166, "step": 83605 }, { "epoch": 3.9, - "learning_rate": 1.2254465332145703e-05, - "loss": 0.0358, + "learning_rate": 2.2266550169285258e-05, + "loss": 0.0708, "step": 83610 }, { "epoch": 3.9, - "learning_rate": 1.2253996530870565e-05, - "loss": 0.044, + "learning_rate": 2.2266082099449238e-05, + "loss": 0.0831, "step": 83615 }, { "epoch": 3.9, - "learning_rate": 1.2253527729595425e-05, - "loss": 0.0684, + "learning_rate": 2.2265614029613218e-05, + "loss": 0.0426, "step": 83620 }, { "epoch": 3.9, - "learning_rate": 1.2253058928320285e-05, - "loss": 0.0767, + "learning_rate": 2.2265145959777198e-05, + "loss": 0.0809, "step": 83625 }, { "epoch": 3.9, - "learning_rate": 1.2252590127045148e-05, - "loss": 0.1371, + "learning_rate": 2.226467788994118e-05, + "loss": 0.1217, "step": 83630 }, { "epoch": 3.9, - "learning_rate": 1.2252121325770008e-05, - "loss": 0.1474, + "learning_rate": 2.226420982010516e-05, + "loss": 0.1375, "step": 83635 }, { "epoch": 3.9, - "learning_rate": 1.2251652524494868e-05, - "loss": 0.1975, + "learning_rate": 2.226374175026914e-05, + "loss": 0.108, "step": 83640 }, { "epoch": 3.9, - "learning_rate": 1.2251183723219728e-05, - "loss": 0.1976, + "learning_rate": 2.2263273680433124e-05, + "loss": 0.1466, "step": 83645 }, { "epoch": 3.9, - "learning_rate": 1.2250714921944588e-05, - "loss": 0.0165, + "learning_rate": 2.2262805610597104e-05, + "loss": 0.0792, "step": 83650 }, { "epoch": 3.9, - "learning_rate": 1.225024612066945e-05, - "loss": 0.0297, + "learning_rate": 2.2262337540761083e-05, + "loss": 0.0201, "step": 83655 }, { "epoch": 3.9, - "learning_rate": 1.224977731939431e-05, - "loss": 0.1577, + "learning_rate": 2.226186947092506e-05, + "loss": 0.0226, "step": 83660 }, { "epoch": 3.9, - "learning_rate": 1.224930851811917e-05, - "loss": 0.0528, + "learning_rate": 2.2261401401089043e-05, + "loss": 0.0573, "step": 83665 }, { "epoch": 3.9, - "learning_rate": 1.224883971684403e-05, - "loss": 0.0448, + "learning_rate": 2.2260933331253023e-05, + "loss": 0.0778, "step": 83670 }, { "epoch": 3.9, - "learning_rate": 1.2248370915568893e-05, - "loss": 0.0638, + "learning_rate": 2.2260465261417003e-05, + "loss": 0.111, "step": 83675 }, { "epoch": 3.9, - "learning_rate": 1.2247902114293753e-05, - "loss": 0.1076, + "learning_rate": 2.2259997191580982e-05, + "loss": 0.1422, "step": 83680 }, { "epoch": 3.9, - "learning_rate": 1.2247433313018613e-05, - "loss": 0.1727, + "learning_rate": 2.2259529121744966e-05, + "loss": 0.0619, "step": 83685 }, { "epoch": 3.91, - "learning_rate": 1.2246964511743473e-05, - "loss": 0.1929, + "learning_rate": 2.2259061051908945e-05, + "loss": 0.1448, "step": 83690 }, { "epoch": 3.91, - "learning_rate": 1.2246495710468334e-05, - "loss": 0.2969, + "learning_rate": 2.2258592982072925e-05, + "loss": 0.1589, "step": 83695 }, { "epoch": 3.91, - "learning_rate": 1.2246026909193194e-05, - "loss": 0.0466, + "learning_rate": 2.225812491223691e-05, + "loss": 0.0376, "step": 83700 }, { "epoch": 3.91, - "learning_rate": 1.2245558107918054e-05, - "loss": 0.048, + "learning_rate": 2.2257656842400888e-05, + "loss": 0.0266, "step": 83705 }, { "epoch": 3.91, - "learning_rate": 1.2245089306642914e-05, - "loss": 0.0381, + "learning_rate": 2.2257188772564868e-05, + "loss": 0.0209, "step": 83710 }, { "epoch": 3.91, - "learning_rate": 1.2244620505367774e-05, - "loss": 0.0691, + "learning_rate": 2.2256720702728848e-05, + "loss": 0.0936, "step": 83715 }, { "epoch": 3.91, - "learning_rate": 1.2244151704092637e-05, - "loss": 0.1224, + "learning_rate": 2.225625263289283e-05, + "loss": 0.0665, "step": 83720 }, { "epoch": 3.91, - "learning_rate": 1.2243682902817497e-05, - "loss": 0.048, + "learning_rate": 2.2255784563056808e-05, + "loss": 0.0458, "step": 83725 }, { "epoch": 3.91, - "learning_rate": 1.2243214101542357e-05, - "loss": 0.1117, + "learning_rate": 2.2255316493220787e-05, + "loss": 0.0968, "step": 83730 }, { "epoch": 3.91, - "learning_rate": 1.2242745300267219e-05, - "loss": 0.1337, + "learning_rate": 2.2254848423384767e-05, + "loss": 0.1263, "step": 83735 }, { "epoch": 3.91, - "learning_rate": 1.2242276498992079e-05, - "loss": 0.3771, + "learning_rate": 2.225438035354875e-05, + "loss": 0.1648, "step": 83740 }, { "epoch": 3.91, - "learning_rate": 1.2241807697716939e-05, - "loss": 0.3642, + "learning_rate": 2.225391228371273e-05, + "loss": 0.2361, "step": 83745 }, { "epoch": 3.91, - "learning_rate": 1.2241338896441798e-05, - "loss": 0.0426, + "learning_rate": 2.225344421387671e-05, + "loss": 0.058, "step": 83750 }, { "epoch": 3.91, - "learning_rate": 1.2240870095166658e-05, - "loss": 0.0917, + "learning_rate": 2.225297614404069e-05, + "loss": 0.034, "step": 83755 }, { "epoch": 3.91, - "learning_rate": 1.224040129389152e-05, - "loss": 0.0423, + "learning_rate": 2.2252508074204673e-05, + "loss": 0.035, "step": 83760 }, { "epoch": 3.91, - "learning_rate": 1.223993249261638e-05, - "loss": 0.0592, + "learning_rate": 2.2252040004368653e-05, + "loss": 0.0422, "step": 83765 }, { "epoch": 3.91, - "learning_rate": 1.2239463691341242e-05, - "loss": 0.0689, + "learning_rate": 2.2251571934532633e-05, + "loss": 0.0747, "step": 83770 }, { "epoch": 3.91, - "learning_rate": 1.2238994890066103e-05, - "loss": 0.0804, + "learning_rate": 2.2251103864696616e-05, + "loss": 0.0581, "step": 83775 }, { "epoch": 3.91, - "learning_rate": 1.2238526088790963e-05, - "loss": 0.1061, + "learning_rate": 2.2250635794860596e-05, + "loss": 0.0622, "step": 83780 }, { "epoch": 3.91, - "learning_rate": 1.2238057287515823e-05, - "loss": 0.1218, + "learning_rate": 2.2250167725024576e-05, + "loss": 0.1086, "step": 83785 }, { "epoch": 3.91, - "learning_rate": 1.2237588486240683e-05, - "loss": 0.1276, + "learning_rate": 2.2249699655188552e-05, + "loss": 0.1031, "step": 83790 }, { "epoch": 3.91, - "learning_rate": 1.2237119684965543e-05, - "loss": 0.2179, + "learning_rate": 2.2249231585352535e-05, + "loss": 0.2319, "step": 83795 }, { "epoch": 3.91, - "learning_rate": 1.2236650883690405e-05, - "loss": 0.0722, + "learning_rate": 2.2248763515516515e-05, + "loss": 0.0675, "step": 83800 }, { "epoch": 3.91, - "learning_rate": 1.2236182082415265e-05, - "loss": 0.0804, + "learning_rate": 2.2248295445680495e-05, + "loss": 0.0241, "step": 83805 }, { "epoch": 3.91, - "learning_rate": 1.2235713281140124e-05, - "loss": 0.0458, + "learning_rate": 2.2247827375844475e-05, + "loss": 0.0184, "step": 83810 }, { "epoch": 3.91, - "learning_rate": 1.2235244479864988e-05, - "loss": 0.067, + "learning_rate": 2.2247359306008458e-05, + "loss": 0.0729, "step": 83815 }, { "epoch": 3.91, - "learning_rate": 1.2234775678589848e-05, - "loss": 0.0403, + "learning_rate": 2.2246891236172438e-05, + "loss": 0.1511, "step": 83820 }, { "epoch": 3.91, - "learning_rate": 1.2234306877314708e-05, - "loss": 0.0922, + "learning_rate": 2.2246423166336417e-05, + "loss": 0.0332, "step": 83825 }, { "epoch": 3.91, - "learning_rate": 1.2233838076039568e-05, - "loss": 0.1323, + "learning_rate": 2.22459550965004e-05, + "loss": 0.0815, "step": 83830 }, { "epoch": 3.91, - "learning_rate": 1.223336927476443e-05, - "loss": 0.1212, + "learning_rate": 2.224548702666438e-05, + "loss": 0.1635, "step": 83835 }, { "epoch": 3.91, - "learning_rate": 1.2232900473489289e-05, - "loss": 0.2292, + "learning_rate": 2.224501895682836e-05, + "loss": 0.1263, "step": 83840 }, { "epoch": 3.91, - "learning_rate": 1.2232431672214149e-05, - "loss": 0.4067, + "learning_rate": 2.224455088699234e-05, + "loss": 0.2396, "step": 83845 }, { "epoch": 3.91, - "learning_rate": 1.2231962870939009e-05, - "loss": 0.0513, + "learning_rate": 2.224408281715632e-05, + "loss": 0.0552, "step": 83850 }, { "epoch": 3.91, - "learning_rate": 1.2231494069663869e-05, - "loss": 0.0135, + "learning_rate": 2.22436147473203e-05, + "loss": 0.0338, "step": 83855 }, { "epoch": 3.91, - "learning_rate": 1.2231025268388732e-05, - "loss": 0.0269, + "learning_rate": 2.224314667748428e-05, + "loss": 0.0408, "step": 83860 }, { "epoch": 3.91, - "learning_rate": 1.2230556467113592e-05, - "loss": 0.0542, + "learning_rate": 2.224267860764826e-05, + "loss": 0.1028, "step": 83865 }, { "epoch": 3.91, - "learning_rate": 1.2230087665838452e-05, - "loss": 0.0683, + "learning_rate": 2.2242210537812243e-05, + "loss": 0.0387, "step": 83870 }, { "epoch": 3.91, - "learning_rate": 1.2229618864563314e-05, - "loss": 0.0926, + "learning_rate": 2.2241742467976222e-05, + "loss": 0.1188, "step": 83875 }, { "epoch": 3.91, - "learning_rate": 1.2229150063288174e-05, - "loss": 0.1678, + "learning_rate": 2.2241274398140202e-05, + "loss": 0.0756, "step": 83880 }, { "epoch": 3.91, - "learning_rate": 1.2228681262013034e-05, - "loss": 0.0925, + "learning_rate": 2.2240806328304185e-05, + "loss": 0.0949, "step": 83885 }, { "epoch": 3.91, - "learning_rate": 1.2228212460737894e-05, - "loss": 0.2065, + "learning_rate": 2.2240338258468165e-05, + "loss": 0.1102, "step": 83890 }, { "epoch": 3.91, - "learning_rate": 1.2227743659462753e-05, - "loss": 0.411, + "learning_rate": 2.2239870188632145e-05, + "loss": 0.2114, "step": 83895 }, { "epoch": 3.91, - "learning_rate": 1.2227274858187615e-05, - "loss": 0.0139, + "learning_rate": 2.2239402118796125e-05, + "loss": 0.0815, "step": 83900 }, { "epoch": 3.92, - "learning_rate": 1.2226806056912477e-05, - "loss": 0.0444, + "learning_rate": 2.2238934048960108e-05, + "loss": 0.016, "step": 83905 }, { "epoch": 3.92, - "learning_rate": 1.2226337255637337e-05, - "loss": 0.0592, + "learning_rate": 2.2238465979124088e-05, + "loss": 0.0197, "step": 83910 }, { "epoch": 3.92, - "learning_rate": 1.2225868454362198e-05, - "loss": 0.0458, + "learning_rate": 2.2237997909288064e-05, + "loss": 0.02, "step": 83915 }, { "epoch": 3.92, - "learning_rate": 1.2225399653087058e-05, - "loss": 0.0628, + "learning_rate": 2.2237529839452044e-05, + "loss": 0.0981, "step": 83920 }, { "epoch": 3.92, - "learning_rate": 1.2224930851811918e-05, - "loss": 0.0943, + "learning_rate": 2.2237061769616027e-05, + "loss": 0.0501, "step": 83925 }, { "epoch": 3.92, - "learning_rate": 1.2224462050536778e-05, - "loss": 0.0753, + "learning_rate": 2.2236593699780007e-05, + "loss": 0.0449, "step": 83930 }, { "epoch": 3.92, - "learning_rate": 1.2223993249261638e-05, - "loss": 0.2237, + "learning_rate": 2.2236125629943987e-05, + "loss": 0.1394, "step": 83935 }, { "epoch": 3.92, - "learning_rate": 1.22235244479865e-05, - "loss": 0.1878, + "learning_rate": 2.2235657560107967e-05, + "loss": 0.2036, "step": 83940 }, { "epoch": 3.92, - "learning_rate": 1.222305564671136e-05, - "loss": 0.3213, + "learning_rate": 2.223518949027195e-05, + "loss": 0.2613, "step": 83945 }, { "epoch": 3.92, - "learning_rate": 1.222258684543622e-05, - "loss": 0.0989, + "learning_rate": 2.223472142043593e-05, + "loss": 0.0609, "step": 83950 }, { "epoch": 3.92, - "learning_rate": 1.2222118044161083e-05, - "loss": 0.033, + "learning_rate": 2.223425335059991e-05, + "loss": 0.0434, "step": 83955 }, { "epoch": 3.92, - "learning_rate": 1.2221649242885943e-05, - "loss": 0.0802, + "learning_rate": 2.2233785280763893e-05, + "loss": 0.0407, "step": 83960 }, { "epoch": 3.92, - "learning_rate": 1.2221180441610803e-05, - "loss": 0.0214, + "learning_rate": 2.2233317210927873e-05, + "loss": 0.0315, "step": 83965 }, { "epoch": 3.92, - "learning_rate": 1.2220711640335663e-05, - "loss": 0.0314, + "learning_rate": 2.2232849141091853e-05, + "loss": 0.073, "step": 83970 }, { "epoch": 3.92, - "learning_rate": 1.2220242839060523e-05, - "loss": 0.1221, + "learning_rate": 2.2232381071255832e-05, + "loss": 0.0569, "step": 83975 }, { "epoch": 3.92, - "learning_rate": 1.2219774037785384e-05, - "loss": 0.1475, + "learning_rate": 2.2231913001419812e-05, + "loss": 0.0988, "step": 83980 }, { "epoch": 3.92, - "learning_rate": 1.2219305236510244e-05, - "loss": 0.1117, + "learning_rate": 2.2231444931583792e-05, + "loss": 0.1382, "step": 83985 }, { "epoch": 3.92, - "learning_rate": 1.2218836435235104e-05, - "loss": 0.3094, + "learning_rate": 2.2230976861747772e-05, + "loss": 0.1619, "step": 83990 }, { "epoch": 3.92, - "learning_rate": 1.2218367633959964e-05, - "loss": 0.2421, + "learning_rate": 2.223050879191175e-05, + "loss": 0.2634, "step": 83995 }, { "epoch": 3.92, - "learning_rate": 1.2217898832684827e-05, - "loss": 0.0479, + "learning_rate": 2.2230040722075735e-05, + "loss": 0.0611, "step": 84000 }, { "epoch": 3.92, - "learning_rate": 1.2217430031409687e-05, - "loss": 0.0401, + "learning_rate": 2.2229572652239715e-05, + "loss": 0.023, "step": 84005 }, { "epoch": 3.92, - "learning_rate": 1.2216961230134547e-05, - "loss": 0.0075, + "learning_rate": 2.2229104582403694e-05, + "loss": 0.0328, "step": 84010 }, { "epoch": 3.92, - "learning_rate": 1.2216492428859407e-05, - "loss": 0.0173, + "learning_rate": 2.2228636512567678e-05, + "loss": 0.0339, "step": 84015 }, { "epoch": 3.92, - "learning_rate": 1.2216023627584269e-05, - "loss": 0.0428, + "learning_rate": 2.2228168442731657e-05, + "loss": 0.0318, "step": 84020 }, { "epoch": 3.92, - "learning_rate": 1.2215554826309129e-05, - "loss": 0.0481, + "learning_rate": 2.2227700372895637e-05, + "loss": 0.1181, "step": 84025 }, { "epoch": 3.92, - "learning_rate": 1.2215086025033989e-05, - "loss": 0.0569, + "learning_rate": 2.2227232303059617e-05, + "loss": 0.123, "step": 84030 }, { "epoch": 3.92, - "learning_rate": 1.2214617223758849e-05, - "loss": 0.2363, + "learning_rate": 2.22267642332236e-05, + "loss": 0.0996, "step": 84035 }, { "epoch": 3.92, - "learning_rate": 1.2214148422483709e-05, - "loss": 0.1255, + "learning_rate": 2.2226296163387577e-05, + "loss": 0.0988, "step": 84040 }, { "epoch": 3.92, - "learning_rate": 1.2213679621208572e-05, - "loss": 0.2421, + "learning_rate": 2.2225828093551557e-05, + "loss": 0.2563, "step": 84045 }, { "epoch": 3.92, - "learning_rate": 1.2213210819933432e-05, - "loss": 0.0725, + "learning_rate": 2.2225360023715536e-05, + "loss": 0.0953, "step": 84050 }, { "epoch": 3.92, - "learning_rate": 1.2212742018658292e-05, - "loss": 0.0362, + "learning_rate": 2.222489195387952e-05, + "loss": 0.028, "step": 84055 }, { "epoch": 3.92, - "learning_rate": 1.2212273217383153e-05, - "loss": 0.0547, + "learning_rate": 2.22244238840435e-05, + "loss": 0.0176, "step": 84060 }, { "epoch": 3.92, - "learning_rate": 1.2211804416108013e-05, - "loss": 0.0224, + "learning_rate": 2.222395581420748e-05, + "loss": 0.0245, "step": 84065 }, { "epoch": 3.92, - "learning_rate": 1.2211335614832873e-05, - "loss": 0.0658, + "learning_rate": 2.2223487744371462e-05, + "loss": 0.1077, "step": 84070 }, { "epoch": 3.92, - "learning_rate": 1.2210866813557733e-05, - "loss": 0.0599, + "learning_rate": 2.2223019674535442e-05, + "loss": 0.07, "step": 84075 }, { "epoch": 3.92, - "learning_rate": 1.2210398012282593e-05, - "loss": 0.0865, + "learning_rate": 2.2222551604699422e-05, + "loss": 0.0771, "step": 84080 }, { "epoch": 3.92, - "learning_rate": 1.2209929211007455e-05, - "loss": 0.1166, + "learning_rate": 2.2222083534863402e-05, + "loss": 0.1158, "step": 84085 }, { "epoch": 3.92, - "learning_rate": 1.2209460409732315e-05, - "loss": 0.1242, + "learning_rate": 2.2221615465027385e-05, + "loss": 0.213, "step": 84090 }, { "epoch": 3.92, - "learning_rate": 1.2208991608457176e-05, - "loss": 0.1641, + "learning_rate": 2.2221147395191365e-05, + "loss": 0.1933, "step": 84095 }, { "epoch": 3.92, - "learning_rate": 1.2208522807182038e-05, - "loss": 0.0467, + "learning_rate": 2.2220679325355345e-05, + "loss": 0.1248, "step": 84100 }, { "epoch": 3.92, - "learning_rate": 1.2208054005906898e-05, - "loss": 0.0167, + "learning_rate": 2.222021125551932e-05, + "loss": 0.0427, "step": 84105 }, { "epoch": 3.92, - "learning_rate": 1.2207585204631758e-05, - "loss": 0.0463, + "learning_rate": 2.2219743185683304e-05, + "loss": 0.047, "step": 84110 }, { "epoch": 3.92, - "learning_rate": 1.2207116403356618e-05, - "loss": 0.0289, + "learning_rate": 2.2219275115847284e-05, + "loss": 0.0413, "step": 84115 }, { "epoch": 3.93, - "learning_rate": 1.2206647602081478e-05, - "loss": 0.0876, + "learning_rate": 2.2218807046011264e-05, + "loss": 0.0746, "step": 84120 }, { "epoch": 3.93, - "learning_rate": 1.220617880080634e-05, - "loss": 0.0997, + "learning_rate": 2.2218338976175247e-05, + "loss": 0.1061, "step": 84125 }, { "epoch": 3.93, - "learning_rate": 1.22057099995312e-05, - "loss": 0.0724, + "learning_rate": 2.2217870906339227e-05, + "loss": 0.0813, "step": 84130 }, { "epoch": 3.93, - "learning_rate": 1.2205241198256059e-05, - "loss": 0.1029, + "learning_rate": 2.2217402836503207e-05, + "loss": 0.0756, "step": 84135 }, { "epoch": 3.93, - "learning_rate": 1.2204772396980922e-05, - "loss": 0.1901, + "learning_rate": 2.2216934766667187e-05, + "loss": 0.168, "step": 84140 }, { "epoch": 3.93, - "learning_rate": 1.2204303595705782e-05, - "loss": 0.1756, + "learning_rate": 2.221646669683117e-05, + "loss": 0.1579, "step": 84145 }, { "epoch": 3.93, - "learning_rate": 1.2203834794430642e-05, - "loss": 0.0648, + "learning_rate": 2.221599862699515e-05, + "loss": 0.0476, "step": 84150 }, { "epoch": 3.93, - "learning_rate": 1.2203365993155502e-05, - "loss": 0.0376, + "learning_rate": 2.221553055715913e-05, + "loss": 0.0271, "step": 84155 }, { "epoch": 3.93, - "learning_rate": 1.2202897191880362e-05, - "loss": 0.0418, + "learning_rate": 2.221506248732311e-05, + "loss": 0.0268, "step": 84160 }, { "epoch": 3.93, - "learning_rate": 1.2202428390605224e-05, - "loss": 0.0209, + "learning_rate": 2.221459441748709e-05, + "loss": 0.0532, "step": 84165 }, { "epoch": 3.93, - "learning_rate": 1.2201959589330084e-05, - "loss": 0.0336, + "learning_rate": 2.221412634765107e-05, + "loss": 0.0415, "step": 84170 }, { "epoch": 3.93, - "learning_rate": 1.2201490788054944e-05, - "loss": 0.0429, + "learning_rate": 2.221365827781505e-05, + "loss": 0.0559, "step": 84175 }, { "epoch": 3.93, - "learning_rate": 1.2201021986779804e-05, - "loss": 0.2253, + "learning_rate": 2.221319020797903e-05, + "loss": 0.0889, "step": 84180 }, { "epoch": 3.93, - "learning_rate": 1.2200553185504667e-05, - "loss": 0.0823, + "learning_rate": 2.2212722138143012e-05, + "loss": 0.1486, "step": 84185 }, { "epoch": 3.93, - "learning_rate": 1.2200084384229527e-05, - "loss": 0.2294, + "learning_rate": 2.221225406830699e-05, + "loss": 0.1689, "step": 84190 }, { "epoch": 3.93, - "learning_rate": 1.2199615582954387e-05, - "loss": 0.3767, + "learning_rate": 2.221178599847097e-05, + "loss": 0.2939, "step": 84195 }, { "epoch": 3.93, - "learning_rate": 1.2199146781679247e-05, - "loss": 0.0257, + "learning_rate": 2.2211317928634955e-05, + "loss": 0.0314, "step": 84200 }, { "epoch": 3.93, - "learning_rate": 1.2198677980404108e-05, - "loss": 0.0578, + "learning_rate": 2.2210849858798934e-05, + "loss": 0.0441, "step": 84205 }, { "epoch": 3.93, - "learning_rate": 1.2198209179128968e-05, - "loss": 0.096, + "learning_rate": 2.2210381788962914e-05, + "loss": 0.0099, "step": 84210 }, { "epoch": 3.93, - "learning_rate": 1.2197740377853828e-05, - "loss": 0.0353, + "learning_rate": 2.2209913719126894e-05, + "loss": 0.057, "step": 84215 }, { "epoch": 3.93, - "learning_rate": 1.2197271576578688e-05, - "loss": 0.017, + "learning_rate": 2.2209445649290877e-05, + "loss": 0.0498, "step": 84220 }, { "epoch": 3.93, - "learning_rate": 1.219680277530355e-05, - "loss": 0.0899, + "learning_rate": 2.2208977579454857e-05, + "loss": 0.0563, "step": 84225 }, { "epoch": 3.93, - "learning_rate": 1.2196333974028411e-05, - "loss": 0.1363, + "learning_rate": 2.2208509509618834e-05, + "loss": 0.0628, "step": 84230 }, { "epoch": 3.93, - "learning_rate": 1.2195865172753271e-05, - "loss": 0.1552, + "learning_rate": 2.2208041439782813e-05, + "loss": 0.0887, "step": 84235 }, { "epoch": 3.93, - "learning_rate": 1.2195396371478131e-05, - "loss": 0.3038, + "learning_rate": 2.2207573369946797e-05, + "loss": 0.1601, "step": 84240 }, { "epoch": 3.93, - "learning_rate": 1.2194927570202993e-05, - "loss": 0.1944, + "learning_rate": 2.2207105300110776e-05, + "loss": 0.198, "step": 84245 }, { "epoch": 3.93, - "learning_rate": 1.2194458768927853e-05, - "loss": 0.083, + "learning_rate": 2.2206637230274756e-05, + "loss": 0.1024, "step": 84250 }, { "epoch": 3.93, - "learning_rate": 1.2193989967652713e-05, - "loss": 0.0862, + "learning_rate": 2.220616916043874e-05, + "loss": 0.0343, "step": 84255 }, { "epoch": 3.93, - "learning_rate": 1.2193521166377573e-05, - "loss": 0.0484, + "learning_rate": 2.220570109060272e-05, + "loss": 0.0209, "step": 84260 }, { "epoch": 3.93, - "learning_rate": 1.2193052365102434e-05, - "loss": 0.0431, + "learning_rate": 2.22052330207667e-05, + "loss": 0.0293, "step": 84265 }, { "epoch": 3.93, - "learning_rate": 1.2192583563827294e-05, - "loss": 0.0566, + "learning_rate": 2.220476495093068e-05, + "loss": 0.0195, "step": 84270 }, { "epoch": 3.93, - "learning_rate": 1.2192114762552154e-05, - "loss": 0.0449, + "learning_rate": 2.2204296881094662e-05, + "loss": 0.1161, "step": 84275 }, { "epoch": 3.93, - "learning_rate": 1.2191645961277016e-05, - "loss": 0.1187, + "learning_rate": 2.2203828811258642e-05, + "loss": 0.1116, "step": 84280 }, { "epoch": 3.93, - "learning_rate": 1.2191177160001877e-05, - "loss": 0.0942, + "learning_rate": 2.220336074142262e-05, + "loss": 0.1121, "step": 84285 }, { "epoch": 3.93, - "learning_rate": 1.2190708358726737e-05, - "loss": 0.1475, + "learning_rate": 2.22028926715866e-05, + "loss": 0.2406, "step": 84290 }, { "epoch": 3.93, - "learning_rate": 1.2190239557451597e-05, - "loss": 0.1864, + "learning_rate": 2.220242460175058e-05, + "loss": 0.1602, "step": 84295 }, { "epoch": 3.93, - "learning_rate": 1.2189770756176457e-05, - "loss": 0.0969, + "learning_rate": 2.220195653191456e-05, + "loss": 0.0239, "step": 84300 }, { "epoch": 3.93, - "learning_rate": 1.2189301954901319e-05, - "loss": 0.0383, + "learning_rate": 2.220148846207854e-05, + "loss": 0.0762, "step": 84305 }, { "epoch": 3.93, - "learning_rate": 1.2188833153626179e-05, - "loss": 0.0172, + "learning_rate": 2.2201020392242524e-05, + "loss": 0.0542, "step": 84310 }, { "epoch": 3.93, - "learning_rate": 1.2188364352351039e-05, - "loss": 0.036, + "learning_rate": 2.2200552322406504e-05, + "loss": 0.1013, "step": 84315 }, { "epoch": 3.93, - "learning_rate": 1.2187895551075899e-05, - "loss": 0.0517, + "learning_rate": 2.2200084252570484e-05, + "loss": 0.0258, "step": 84320 }, { "epoch": 3.93, - "learning_rate": 1.2187426749800762e-05, - "loss": 0.0623, + "learning_rate": 2.2199616182734464e-05, + "loss": 0.0567, "step": 84325 }, { "epoch": 3.93, - "learning_rate": 1.2186957948525622e-05, - "loss": 0.1455, + "learning_rate": 2.2199148112898447e-05, + "loss": 0.025, "step": 84330 }, { "epoch": 3.94, - "learning_rate": 1.2186489147250482e-05, - "loss": 0.0808, + "learning_rate": 2.2198680043062427e-05, + "loss": 0.1012, "step": 84335 }, { "epoch": 3.94, - "learning_rate": 1.2186020345975342e-05, - "loss": 0.2054, + "learning_rate": 2.2198211973226406e-05, + "loss": 0.1515, "step": 84340 }, { "epoch": 3.94, - "learning_rate": 1.2185551544700203e-05, - "loss": 0.2095, + "learning_rate": 2.2197743903390386e-05, + "loss": 0.1918, "step": 84345 }, { "epoch": 3.94, - "learning_rate": 1.2185082743425063e-05, - "loss": 0.0996, + "learning_rate": 2.219727583355437e-05, + "loss": 0.0394, "step": 84350 }, { "epoch": 3.94, - "learning_rate": 1.2184613942149923e-05, - "loss": 0.016, + "learning_rate": 2.2196807763718346e-05, + "loss": 0.0339, "step": 84355 }, { "epoch": 3.94, - "learning_rate": 1.2184145140874783e-05, - "loss": 0.0279, + "learning_rate": 2.2196339693882326e-05, + "loss": 0.0174, "step": 84360 }, { "epoch": 3.94, - "learning_rate": 1.2183676339599643e-05, - "loss": 0.0278, + "learning_rate": 2.2195871624046306e-05, + "loss": 0.028, "step": 84365 }, { "epoch": 3.94, - "learning_rate": 1.2183207538324506e-05, - "loss": 0.0713, + "learning_rate": 2.219540355421029e-05, + "loss": 0.0837, "step": 84370 }, { "epoch": 3.94, - "learning_rate": 1.2182738737049366e-05, - "loss": 0.1478, + "learning_rate": 2.219493548437427e-05, + "loss": 0.1136, "step": 84375 }, { "epoch": 3.94, - "learning_rate": 1.2182269935774226e-05, - "loss": 0.0615, + "learning_rate": 2.219446741453825e-05, + "loss": 0.0859, "step": 84380 }, { "epoch": 3.94, - "learning_rate": 1.2181801134499088e-05, - "loss": 0.0657, + "learning_rate": 2.219399934470223e-05, + "loss": 0.0407, "step": 84385 }, { "epoch": 3.94, - "learning_rate": 1.2181332333223948e-05, - "loss": 0.1012, + "learning_rate": 2.219353127486621e-05, + "loss": 0.1393, "step": 84390 }, { "epoch": 3.94, - "learning_rate": 1.2180863531948808e-05, - "loss": 0.2686, + "learning_rate": 2.219306320503019e-05, + "loss": 0.3359, "step": 84395 }, { "epoch": 3.94, - "learning_rate": 1.2180394730673668e-05, - "loss": 0.06, + "learning_rate": 2.219259513519417e-05, + "loss": 0.0271, "step": 84400 }, { "epoch": 3.94, - "learning_rate": 1.2179925929398528e-05, - "loss": 0.0194, + "learning_rate": 2.2192127065358154e-05, + "loss": 0.0354, "step": 84405 }, { "epoch": 3.94, - "learning_rate": 1.217945712812339e-05, - "loss": 0.0553, + "learning_rate": 2.2191658995522134e-05, + "loss": 0.0752, "step": 84410 }, { "epoch": 3.94, - "learning_rate": 1.217898832684825e-05, - "loss": 0.096, + "learning_rate": 2.2191190925686114e-05, + "loss": 0.0308, "step": 84415 }, { "epoch": 3.94, - "learning_rate": 1.2178519525573111e-05, - "loss": 0.0314, + "learning_rate": 2.219072285585009e-05, + "loss": 0.0837, "step": 84420 }, { "epoch": 3.94, - "learning_rate": 1.2178050724297973e-05, - "loss": 0.0546, + "learning_rate": 2.2190254786014074e-05, + "loss": 0.1144, "step": 84425 }, { "epoch": 3.94, - "learning_rate": 1.2177581923022832e-05, - "loss": 0.1234, + "learning_rate": 2.2189786716178053e-05, + "loss": 0.0689, "step": 84430 }, { "epoch": 3.94, - "learning_rate": 1.2177113121747692e-05, - "loss": 0.1008, + "learning_rate": 2.2189318646342033e-05, + "loss": 0.1491, "step": 84435 }, { "epoch": 3.94, - "learning_rate": 1.2176644320472552e-05, - "loss": 0.0997, + "learning_rate": 2.2188850576506016e-05, + "loss": 0.2832, "step": 84440 }, { "epoch": 3.94, - "learning_rate": 1.2176175519197412e-05, - "loss": 0.2987, + "learning_rate": 2.2188382506669996e-05, + "loss": 0.2857, "step": 84445 }, { "epoch": 3.94, - "learning_rate": 1.2175706717922274e-05, - "loss": 0.0427, + "learning_rate": 2.2187914436833976e-05, + "loss": 0.0505, "step": 84450 }, { "epoch": 3.94, - "learning_rate": 1.2175237916647134e-05, - "loss": 0.0185, + "learning_rate": 2.2187446366997956e-05, + "loss": 0.0274, "step": 84455 }, { "epoch": 3.94, - "learning_rate": 1.2174769115371994e-05, - "loss": 0.0178, + "learning_rate": 2.218697829716194e-05, + "loss": 0.0564, "step": 84460 }, { "epoch": 3.94, - "learning_rate": 1.2174300314096857e-05, - "loss": 0.0259, + "learning_rate": 2.218651022732592e-05, + "loss": 0.0709, "step": 84465 }, { "epoch": 3.94, - "learning_rate": 1.2173831512821717e-05, - "loss": 0.0739, + "learning_rate": 2.21860421574899e-05, + "loss": 0.0572, "step": 84470 }, { "epoch": 3.94, - "learning_rate": 1.2173362711546577e-05, - "loss": 0.0694, + "learning_rate": 2.218557408765388e-05, + "loss": 0.109, "step": 84475 }, { "epoch": 3.94, - "learning_rate": 1.2172893910271437e-05, - "loss": 0.1191, + "learning_rate": 2.2185106017817858e-05, + "loss": 0.1434, "step": 84480 }, { "epoch": 3.94, - "learning_rate": 1.2172425108996297e-05, - "loss": 0.1201, + "learning_rate": 2.2184637947981838e-05, + "loss": 0.1123, "step": 84485 }, { "epoch": 3.94, - "learning_rate": 1.2171956307721158e-05, - "loss": 0.1859, + "learning_rate": 2.2184169878145818e-05, + "loss": 0.1096, "step": 84490 }, { "epoch": 3.94, - "learning_rate": 1.2171487506446018e-05, - "loss": 0.2759, + "learning_rate": 2.21837018083098e-05, + "loss": 0.2999, "step": 84495 }, { "epoch": 3.94, - "learning_rate": 1.2171018705170878e-05, - "loss": 0.0926, + "learning_rate": 2.218323373847378e-05, + "loss": 0.0498, "step": 84500 }, { "epoch": 3.94, - "learning_rate": 1.2170549903895738e-05, - "loss": 0.0321, + "learning_rate": 2.218276566863776e-05, + "loss": 0.0671, "step": 84505 }, { "epoch": 3.94, - "learning_rate": 1.2170081102620602e-05, - "loss": 0.0295, + "learning_rate": 2.218229759880174e-05, + "loss": 0.0418, "step": 84510 }, { "epoch": 3.94, - "learning_rate": 1.2169612301345462e-05, - "loss": 0.0777, + "learning_rate": 2.2181829528965724e-05, + "loss": 0.0588, "step": 84515 }, { "epoch": 3.94, - "learning_rate": 1.2169143500070321e-05, - "loss": 0.0552, + "learning_rate": 2.2181361459129704e-05, + "loss": 0.0763, "step": 84520 }, { "epoch": 3.94, - "learning_rate": 1.2168674698795181e-05, - "loss": 0.1077, + "learning_rate": 2.2180893389293683e-05, + "loss": 0.0722, "step": 84525 }, { "epoch": 3.94, - "learning_rate": 1.2168205897520043e-05, - "loss": 0.1633, + "learning_rate": 2.2180425319457663e-05, + "loss": 0.0563, "step": 84530 }, { "epoch": 3.94, - "learning_rate": 1.2167737096244903e-05, - "loss": 0.1222, + "learning_rate": 2.2179957249621646e-05, + "loss": 0.1669, "step": 84535 }, { "epoch": 3.94, - "learning_rate": 1.2167268294969763e-05, - "loss": 0.1673, + "learning_rate": 2.2179489179785626e-05, + "loss": 0.1743, "step": 84540 }, { "epoch": 3.94, - "learning_rate": 1.2166799493694623e-05, - "loss": 0.2728, + "learning_rate": 2.2179021109949603e-05, + "loss": 0.1689, "step": 84545 }, { "epoch": 3.95, - "learning_rate": 1.2166330692419483e-05, - "loss": 0.0829, + "learning_rate": 2.2178553040113583e-05, + "loss": 0.0537, "step": 84550 }, { "epoch": 3.95, - "learning_rate": 1.2165861891144346e-05, - "loss": 0.0173, + "learning_rate": 2.2178084970277566e-05, + "loss": 0.0449, "step": 84555 }, { "epoch": 3.95, - "learning_rate": 1.2165393089869206e-05, - "loss": 0.0413, + "learning_rate": 2.2177616900441546e-05, + "loss": 0.0541, "step": 84560 }, { "epoch": 3.95, - "learning_rate": 1.2164924288594066e-05, - "loss": 0.0687, + "learning_rate": 2.2177148830605525e-05, + "loss": 0.0506, "step": 84565 }, { "epoch": 3.95, - "learning_rate": 1.2164455487318928e-05, - "loss": 0.0593, + "learning_rate": 2.217668076076951e-05, + "loss": 0.0258, "step": 84570 }, { "epoch": 3.95, - "learning_rate": 1.2163986686043787e-05, - "loss": 0.087, + "learning_rate": 2.217621269093349e-05, + "loss": 0.0288, "step": 84575 }, { "epoch": 3.95, - "learning_rate": 1.2163517884768647e-05, - "loss": 0.1026, + "learning_rate": 2.2175744621097468e-05, + "loss": 0.1153, "step": 84580 }, { "epoch": 3.95, - "learning_rate": 1.2163049083493507e-05, - "loss": 0.102, + "learning_rate": 2.2175276551261448e-05, + "loss": 0.1789, "step": 84585 }, { "epoch": 3.95, - "learning_rate": 1.2162580282218367e-05, - "loss": 0.2065, + "learning_rate": 2.217480848142543e-05, + "loss": 0.1418, "step": 84590 }, { "epoch": 3.95, - "learning_rate": 1.2162111480943229e-05, - "loss": 0.1685, + "learning_rate": 2.217434041158941e-05, + "loss": 0.2224, "step": 84595 }, { "epoch": 3.95, - "learning_rate": 1.2161642679668089e-05, - "loss": 0.1001, + "learning_rate": 2.217387234175339e-05, + "loss": 0.0326, "step": 84600 }, { "epoch": 3.95, - "learning_rate": 1.216117387839295e-05, - "loss": 0.0342, + "learning_rate": 2.217340427191737e-05, + "loss": 0.0214, "step": 84605 }, { "epoch": 3.95, - "learning_rate": 1.2160705077117812e-05, - "loss": 0.0525, + "learning_rate": 2.217293620208135e-05, + "loss": 0.0727, "step": 84610 }, { "epoch": 3.95, - "learning_rate": 1.2160236275842672e-05, - "loss": 0.0481, + "learning_rate": 2.217246813224533e-05, + "loss": 0.0665, "step": 84615 }, { "epoch": 3.95, - "learning_rate": 1.2159767474567532e-05, - "loss": 0.149, + "learning_rate": 2.217200006240931e-05, + "loss": 0.1164, "step": 84620 }, { "epoch": 3.95, - "learning_rate": 1.2159298673292392e-05, - "loss": 0.1458, + "learning_rate": 2.2171531992573293e-05, + "loss": 0.037, "step": 84625 }, { "epoch": 3.95, - "learning_rate": 1.2158829872017252e-05, - "loss": 0.1162, + "learning_rate": 2.2171063922737273e-05, + "loss": 0.0882, "step": 84630 }, { "epoch": 3.95, - "learning_rate": 1.2158361070742113e-05, - "loss": 0.1173, + "learning_rate": 2.2170595852901253e-05, + "loss": 0.1409, "step": 84635 }, { "epoch": 3.95, - "learning_rate": 1.2157892269466973e-05, - "loss": 0.1814, + "learning_rate": 2.2170127783065233e-05, + "loss": 0.1753, "step": 84640 }, { "epoch": 3.95, - "learning_rate": 1.2157423468191833e-05, - "loss": 0.2595, + "learning_rate": 2.2169659713229216e-05, + "loss": 0.2664, "step": 84645 }, { "epoch": 3.95, - "learning_rate": 1.2156954666916697e-05, - "loss": 0.0722, + "learning_rate": 2.2169191643393196e-05, + "loss": 0.0829, "step": 84650 }, { "epoch": 3.95, - "learning_rate": 1.2156485865641557e-05, - "loss": 0.039, + "learning_rate": 2.2168723573557176e-05, + "loss": 0.0465, "step": 84655 }, { "epoch": 3.95, - "learning_rate": 1.2156017064366417e-05, - "loss": 0.0383, + "learning_rate": 2.2168255503721155e-05, + "loss": 0.0265, "step": 84660 }, { "epoch": 3.95, - "learning_rate": 1.2155548263091276e-05, - "loss": 0.1506, + "learning_rate": 2.216778743388514e-05, + "loss": 0.0456, "step": 84665 }, { "epoch": 3.95, - "learning_rate": 1.2155079461816136e-05, - "loss": 0.0744, + "learning_rate": 2.2167319364049115e-05, + "loss": 0.0608, "step": 84670 }, { "epoch": 3.95, - "learning_rate": 1.2154610660540998e-05, - "loss": 0.0485, + "learning_rate": 2.2166851294213095e-05, + "loss": 0.0714, "step": 84675 }, { "epoch": 3.95, - "learning_rate": 1.2154141859265858e-05, - "loss": 0.0927, + "learning_rate": 2.2166383224377078e-05, + "loss": 0.0717, "step": 84680 }, { "epoch": 3.95, - "learning_rate": 1.2153673057990718e-05, - "loss": 0.1012, + "learning_rate": 2.2165915154541058e-05, + "loss": 0.1133, "step": 84685 }, { "epoch": 3.95, - "learning_rate": 1.2153204256715578e-05, - "loss": 0.128, + "learning_rate": 2.2165447084705038e-05, + "loss": 0.2277, "step": 84690 }, { "epoch": 3.95, - "learning_rate": 1.2152735455440441e-05, - "loss": 0.2211, + "learning_rate": 2.2164979014869018e-05, + "loss": 0.3942, "step": 84695 }, { "epoch": 3.95, - "learning_rate": 1.2152266654165301e-05, - "loss": 0.098, + "learning_rate": 2.2164510945033e-05, + "loss": 0.0689, "step": 84700 }, { "epoch": 3.95, - "learning_rate": 1.2151797852890161e-05, - "loss": 0.0205, + "learning_rate": 2.216404287519698e-05, + "loss": 0.0233, "step": 84705 }, { "epoch": 3.95, - "learning_rate": 1.2151329051615021e-05, - "loss": 0.0545, + "learning_rate": 2.216357480536096e-05, + "loss": 0.0421, "step": 84710 }, { "epoch": 3.95, - "learning_rate": 1.2150860250339883e-05, - "loss": 0.0574, + "learning_rate": 2.216310673552494e-05, + "loss": 0.0727, "step": 84715 }, { "epoch": 3.95, - "learning_rate": 1.2150391449064743e-05, - "loss": 0.1062, + "learning_rate": 2.2162638665688923e-05, + "loss": 0.0426, "step": 84720 }, { "epoch": 3.95, - "learning_rate": 1.2149922647789602e-05, - "loss": 0.034, + "learning_rate": 2.2162170595852903e-05, + "loss": 0.0591, "step": 84725 }, { "epoch": 3.95, - "learning_rate": 1.2149453846514462e-05, - "loss": 0.0577, + "learning_rate": 2.2161702526016883e-05, + "loss": 0.0912, "step": 84730 }, { "epoch": 3.95, - "learning_rate": 1.2148985045239324e-05, - "loss": 0.0859, + "learning_rate": 2.2161234456180863e-05, + "loss": 0.1518, "step": 84735 }, { "epoch": 3.95, - "learning_rate": 1.2148516243964184e-05, - "loss": 0.1521, + "learning_rate": 2.2160766386344843e-05, + "loss": 0.1132, "step": 84740 }, { "epoch": 3.95, - "learning_rate": 1.2148047442689046e-05, - "loss": 0.2548, + "learning_rate": 2.2160298316508823e-05, + "loss": 0.2787, "step": 84745 }, { "epoch": 3.95, - "learning_rate": 1.2147578641413905e-05, - "loss": 0.0839, + "learning_rate": 2.2159830246672802e-05, + "loss": 0.0525, "step": 84750 }, { "epoch": 3.95, - "learning_rate": 1.2147109840138767e-05, - "loss": 0.0285, + "learning_rate": 2.2159362176836786e-05, + "loss": 0.0411, "step": 84755 }, { "epoch": 3.96, - "learning_rate": 1.2146641038863627e-05, - "loss": 0.0183, + "learning_rate": 2.2158894107000765e-05, + "loss": 0.085, "step": 84760 }, { "epoch": 3.96, - "learning_rate": 1.2146172237588487e-05, - "loss": 0.0776, + "learning_rate": 2.2158426037164745e-05, + "loss": 0.057, "step": 84765 }, { "epoch": 3.96, - "learning_rate": 1.2145703436313347e-05, - "loss": 0.0921, + "learning_rate": 2.2157957967328725e-05, + "loss": 0.1149, "step": 84770 }, { "epoch": 3.96, - "learning_rate": 1.2145234635038209e-05, - "loss": 0.0571, + "learning_rate": 2.2157489897492708e-05, + "loss": 0.0386, "step": 84775 }, { "epoch": 3.96, - "learning_rate": 1.2144765833763068e-05, - "loss": 0.1153, + "learning_rate": 2.2157021827656688e-05, + "loss": 0.1238, "step": 84780 }, { "epoch": 3.96, - "learning_rate": 1.2144297032487928e-05, - "loss": 0.1005, + "learning_rate": 2.2156553757820668e-05, + "loss": 0.1191, "step": 84785 }, { "epoch": 3.96, - "learning_rate": 1.214382823121279e-05, - "loss": 0.1477, + "learning_rate": 2.2156085687984648e-05, + "loss": 0.1675, "step": 84790 }, { "epoch": 3.96, - "learning_rate": 1.2143359429937652e-05, - "loss": 0.2054, + "learning_rate": 2.215561761814863e-05, + "loss": 0.2175, "step": 84795 }, { "epoch": 3.96, - "learning_rate": 1.2142890628662512e-05, - "loss": 0.0746, + "learning_rate": 2.2155149548312607e-05, + "loss": 0.0647, "step": 84800 }, { "epoch": 3.96, - "learning_rate": 1.2142421827387372e-05, - "loss": 0.0249, + "learning_rate": 2.2154681478476587e-05, + "loss": 0.025, "step": 84805 }, { "epoch": 3.96, - "learning_rate": 1.2141953026112231e-05, - "loss": 0.0197, + "learning_rate": 2.215421340864057e-05, + "loss": 0.0547, "step": 84810 }, { "epoch": 3.96, - "learning_rate": 1.2141484224837093e-05, - "loss": 0.0673, + "learning_rate": 2.215374533880455e-05, + "loss": 0.031, "step": 84815 }, { "epoch": 3.96, - "learning_rate": 1.2141015423561953e-05, - "loss": 0.091, + "learning_rate": 2.215327726896853e-05, + "loss": 0.0577, "step": 84820 }, { "epoch": 3.96, - "learning_rate": 1.2140546622286813e-05, - "loss": 0.042, + "learning_rate": 2.215280919913251e-05, + "loss": 0.055, "step": 84825 }, { "epoch": 3.96, - "learning_rate": 1.2140077821011673e-05, - "loss": 0.0678, + "learning_rate": 2.2152341129296493e-05, + "loss": 0.0557, "step": 84830 }, { "epoch": 3.96, - "learning_rate": 1.2139609019736536e-05, - "loss": 0.1079, + "learning_rate": 2.2151873059460473e-05, + "loss": 0.163, "step": 84835 }, { "epoch": 3.96, - "learning_rate": 1.2139140218461396e-05, - "loss": 0.1308, + "learning_rate": 2.2151404989624453e-05, + "loss": 0.269, "step": 84840 }, { "epoch": 3.96, - "learning_rate": 1.2138671417186256e-05, - "loss": 0.1827, + "learning_rate": 2.2150936919788432e-05, + "loss": 0.2503, "step": 84845 }, { "epoch": 3.96, - "learning_rate": 1.2138202615911116e-05, - "loss": 0.0779, + "learning_rate": 2.2150468849952416e-05, + "loss": 0.0904, "step": 84850 }, { "epoch": 3.96, - "learning_rate": 1.2137733814635978e-05, - "loss": 0.0242, + "learning_rate": 2.2150000780116395e-05, + "loss": 0.0297, "step": 84855 }, { "epoch": 3.96, - "learning_rate": 1.2137265013360838e-05, - "loss": 0.0863, + "learning_rate": 2.2149532710280372e-05, + "loss": 0.026, "step": 84860 }, { "epoch": 3.96, - "learning_rate": 1.2136796212085698e-05, - "loss": 0.0686, + "learning_rate": 2.2149064640444355e-05, + "loss": 0.0116, "step": 84865 }, { "epoch": 3.96, - "learning_rate": 1.2136327410810557e-05, - "loss": 0.0695, + "learning_rate": 2.2148596570608335e-05, + "loss": 0.023, "step": 84870 }, { "epoch": 3.96, - "learning_rate": 1.2135858609535417e-05, - "loss": 0.0646, + "learning_rate": 2.2148128500772315e-05, + "loss": 0.089, "step": 84875 }, { "epoch": 3.96, - "learning_rate": 1.213538980826028e-05, - "loss": 0.0794, + "learning_rate": 2.2147660430936295e-05, + "loss": 0.0656, "step": 84880 }, { "epoch": 3.96, - "learning_rate": 1.213492100698514e-05, - "loss": 0.0826, + "learning_rate": 2.2147192361100278e-05, + "loss": 0.0745, "step": 84885 }, { "epoch": 3.96, - "learning_rate": 1.213445220571e-05, - "loss": 0.208, + "learning_rate": 2.2146724291264258e-05, + "loss": 0.2293, "step": 84890 }, { "epoch": 3.96, - "learning_rate": 1.2133983404434862e-05, - "loss": 0.407, + "learning_rate": 2.2146256221428237e-05, + "loss": 0.3333, "step": 84895 }, { "epoch": 3.96, - "learning_rate": 1.2133514603159722e-05, - "loss": 0.0326, + "learning_rate": 2.2145788151592217e-05, + "loss": 0.0445, "step": 84900 }, { "epoch": 3.96, - "learning_rate": 1.2133045801884582e-05, - "loss": 0.0356, + "learning_rate": 2.21453200817562e-05, + "loss": 0.0803, "step": 84905 }, { "epoch": 3.96, - "learning_rate": 1.2132577000609442e-05, - "loss": 0.0131, + "learning_rate": 2.214485201192018e-05, + "loss": 0.0377, "step": 84910 }, { "epoch": 3.96, - "learning_rate": 1.2132108199334302e-05, - "loss": 0.0433, + "learning_rate": 2.214438394208416e-05, + "loss": 0.0308, "step": 84915 }, { "epoch": 3.96, - "learning_rate": 1.2131639398059164e-05, - "loss": 0.0459, + "learning_rate": 2.2143915872248143e-05, + "loss": 0.0353, "step": 84920 }, { "epoch": 3.96, - "learning_rate": 1.2131170596784023e-05, - "loss": 0.0983, + "learning_rate": 2.214344780241212e-05, + "loss": 0.088, "step": 84925 }, { "epoch": 3.96, - "learning_rate": 1.2130701795508885e-05, - "loss": 0.1348, + "learning_rate": 2.21429797325761e-05, + "loss": 0.1289, "step": 84930 }, { "epoch": 3.96, - "learning_rate": 1.2130232994233747e-05, - "loss": 0.1374, + "learning_rate": 2.214251166274008e-05, + "loss": 0.1591, "step": 84935 }, { "epoch": 3.96, - "learning_rate": 1.2129764192958607e-05, - "loss": 0.2749, + "learning_rate": 2.2142043592904062e-05, + "loss": 0.2122, "step": 84940 }, { "epoch": 3.96, - "learning_rate": 1.2129295391683467e-05, - "loss": 0.2818, + "learning_rate": 2.2141575523068042e-05, + "loss": 0.3087, "step": 84945 }, { "epoch": 3.96, - "learning_rate": 1.2128826590408327e-05, - "loss": 0.0536, + "learning_rate": 2.2141107453232022e-05, + "loss": 0.0951, "step": 84950 }, { "epoch": 3.96, - "learning_rate": 1.2128357789133186e-05, - "loss": 0.0252, + "learning_rate": 2.2140639383396002e-05, + "loss": 0.057, "step": 84955 }, { "epoch": 3.96, - "learning_rate": 1.2127888987858048e-05, - "loss": 0.0497, + "learning_rate": 2.2140171313559985e-05, + "loss": 0.0184, "step": 84960 }, { "epoch": 3.96, - "learning_rate": 1.2127420186582908e-05, - "loss": 0.0617, + "learning_rate": 2.2139703243723965e-05, + "loss": 0.0501, "step": 84965 }, { "epoch": 3.96, - "learning_rate": 1.2126951385307768e-05, - "loss": 0.0436, + "learning_rate": 2.2139235173887945e-05, + "loss": 0.0942, "step": 84970 }, { "epoch": 3.97, - "learning_rate": 1.2126482584032631e-05, - "loss": 0.073, + "learning_rate": 2.2138767104051925e-05, + "loss": 0.1287, "step": 84975 }, { "epoch": 3.97, - "learning_rate": 1.2126013782757491e-05, - "loss": 0.1061, + "learning_rate": 2.2138299034215908e-05, + "loss": 0.0537, "step": 84980 }, { "epoch": 3.97, - "learning_rate": 1.2125544981482351e-05, - "loss": 0.0889, + "learning_rate": 2.2137830964379888e-05, + "loss": 0.1158, "step": 84985 }, { "epoch": 3.97, - "learning_rate": 1.2125076180207211e-05, - "loss": 0.1823, + "learning_rate": 2.2137362894543864e-05, + "loss": 0.2418, "step": 84990 }, { "epoch": 3.97, - "learning_rate": 1.2124607378932071e-05, - "loss": 0.2849, + "learning_rate": 2.2136894824707847e-05, + "loss": 0.2223, "step": 84995 }, { "epoch": 3.97, - "learning_rate": 1.2124138577656933e-05, - "loss": 0.0793, + "learning_rate": 2.2136426754871827e-05, + "loss": 0.0441, "step": 85000 }, { "epoch": 3.97, - "learning_rate": 1.2123669776381793e-05, - "loss": 0.0345, + "learning_rate": 2.2135958685035807e-05, + "loss": 0.0348, "step": 85005 }, { "epoch": 3.97, - "learning_rate": 1.2123200975106653e-05, - "loss": 0.0192, + "learning_rate": 2.2135490615199787e-05, + "loss": 0.032, "step": 85010 }, { "epoch": 3.97, - "learning_rate": 1.2122732173831512e-05, - "loss": 0.0744, + "learning_rate": 2.213502254536377e-05, + "loss": 0.0286, "step": 85015 }, { "epoch": 3.97, - "learning_rate": 1.2122263372556376e-05, - "loss": 0.034, + "learning_rate": 2.213455447552775e-05, + "loss": 0.0611, "step": 85020 }, { "epoch": 3.97, - "learning_rate": 1.2121794571281236e-05, - "loss": 0.06, + "learning_rate": 2.213408640569173e-05, + "loss": 0.0741, "step": 85025 }, { "epoch": 3.97, - "learning_rate": 1.2121325770006096e-05, - "loss": 0.0513, + "learning_rate": 2.213361833585571e-05, + "loss": 0.1012, "step": 85030 }, { "epoch": 3.97, - "learning_rate": 1.2120856968730956e-05, - "loss": 0.121, + "learning_rate": 2.2133150266019693e-05, + "loss": 0.1241, "step": 85035 }, { "epoch": 3.97, - "learning_rate": 1.2120388167455817e-05, - "loss": 0.1783, + "learning_rate": 2.2132682196183672e-05, + "loss": 0.1722, "step": 85040 }, { "epoch": 3.97, - "learning_rate": 1.2119919366180677e-05, - "loss": 0.2662, + "learning_rate": 2.2132214126347652e-05, + "loss": 0.3152, "step": 85045 }, { "epoch": 3.97, - "learning_rate": 1.2119450564905537e-05, - "loss": 0.0381, + "learning_rate": 2.2131746056511632e-05, + "loss": 0.0667, "step": 85050 }, { "epoch": 3.97, - "learning_rate": 1.2118981763630397e-05, - "loss": 0.0177, + "learning_rate": 2.2131277986675612e-05, + "loss": 0.0679, "step": 85055 }, { "epoch": 3.97, - "learning_rate": 1.2118512962355257e-05, - "loss": 0.0451, + "learning_rate": 2.213080991683959e-05, + "loss": 0.0216, "step": 85060 }, { "epoch": 3.97, - "learning_rate": 1.2118044161080119e-05, - "loss": 0.0189, + "learning_rate": 2.213034184700357e-05, + "loss": 0.0832, "step": 85065 }, { "epoch": 3.97, - "learning_rate": 1.211757535980498e-05, - "loss": 0.0686, + "learning_rate": 2.2129873777167555e-05, + "loss": 0.0296, "step": 85070 }, { "epoch": 3.97, - "learning_rate": 1.211710655852984e-05, - "loss": 0.0908, + "learning_rate": 2.2129405707331535e-05, + "loss": 0.0788, "step": 85075 }, { "epoch": 3.97, - "learning_rate": 1.2116637757254702e-05, - "loss": 0.0855, + "learning_rate": 2.2128937637495514e-05, + "loss": 0.0468, "step": 85080 }, { "epoch": 3.97, - "learning_rate": 1.2116168955979562e-05, - "loss": 0.1953, + "learning_rate": 2.2128469567659494e-05, + "loss": 0.102, "step": 85085 }, { "epoch": 3.97, - "learning_rate": 1.2115700154704422e-05, - "loss": 0.1753, + "learning_rate": 2.2128001497823477e-05, + "loss": 0.1622, "step": 85090 }, { "epoch": 3.97, - "learning_rate": 1.2115231353429282e-05, - "loss": 0.2956, + "learning_rate": 2.2127533427987457e-05, + "loss": 0.3832, "step": 85095 }, { "epoch": 3.97, - "learning_rate": 1.2114762552154142e-05, - "loss": 0.0359, + "learning_rate": 2.2127065358151437e-05, + "loss": 0.0563, "step": 85100 }, { "epoch": 3.97, - "learning_rate": 1.2114293750879003e-05, - "loss": 0.0425, + "learning_rate": 2.212659728831542e-05, + "loss": 0.0541, "step": 85105 }, { "epoch": 3.97, - "learning_rate": 1.2113824949603863e-05, - "loss": 0.022, + "learning_rate": 2.21261292184794e-05, + "loss": 0.0118, "step": 85110 }, { "epoch": 3.97, - "learning_rate": 1.2113356148328725e-05, - "loss": 0.0516, + "learning_rate": 2.2125661148643376e-05, + "loss": 0.0864, "step": 85115 }, { "epoch": 3.97, - "learning_rate": 1.2112887347053586e-05, - "loss": 0.0781, + "learning_rate": 2.2125193078807356e-05, + "loss": 0.0434, "step": 85120 }, { "epoch": 3.97, - "learning_rate": 1.2112418545778446e-05, - "loss": 0.083, + "learning_rate": 2.212472500897134e-05, + "loss": 0.0888, "step": 85125 }, { "epoch": 3.97, - "learning_rate": 1.2111949744503306e-05, - "loss": 0.0752, + "learning_rate": 2.212425693913532e-05, + "loss": 0.11, "step": 85130 }, { "epoch": 3.97, - "learning_rate": 1.2111480943228166e-05, - "loss": 0.0996, + "learning_rate": 2.21237888692993e-05, + "loss": 0.1078, "step": 85135 }, { "epoch": 3.97, - "learning_rate": 1.2111012141953026e-05, - "loss": 0.1423, + "learning_rate": 2.212332079946328e-05, + "loss": 0.1475, "step": 85140 }, { "epoch": 3.97, - "learning_rate": 1.2110543340677888e-05, - "loss": 0.2798, + "learning_rate": 2.2122852729627262e-05, + "loss": 0.3563, "step": 85145 }, { "epoch": 3.97, - "learning_rate": 1.2110074539402748e-05, - "loss": 0.0164, + "learning_rate": 2.2122384659791242e-05, + "loss": 0.082, "step": 85150 }, { "epoch": 3.97, - "learning_rate": 1.2109605738127608e-05, - "loss": 0.0169, + "learning_rate": 2.2121916589955222e-05, + "loss": 0.038, "step": 85155 }, { "epoch": 3.97, - "learning_rate": 1.2109136936852471e-05, - "loss": 0.0546, + "learning_rate": 2.21214485201192e-05, + "loss": 0.0329, "step": 85160 }, { "epoch": 3.97, - "learning_rate": 1.210866813557733e-05, - "loss": 0.0847, + "learning_rate": 2.2120980450283185e-05, + "loss": 0.0413, "step": 85165 }, { "epoch": 3.97, - "learning_rate": 1.210819933430219e-05, - "loss": 0.0182, + "learning_rate": 2.2120512380447165e-05, + "loss": 0.06, "step": 85170 }, { "epoch": 3.97, - "learning_rate": 1.210773053302705e-05, - "loss": 0.0705, + "learning_rate": 2.2120044310611144e-05, + "loss": 0.1649, "step": 85175 }, { "epoch": 3.97, - "learning_rate": 1.210726173175191e-05, - "loss": 0.1267, + "learning_rate": 2.2119576240775124e-05, + "loss": 0.0651, "step": 85180 }, { "epoch": 3.97, - "learning_rate": 1.2106792930476772e-05, - "loss": 0.165, + "learning_rate": 2.2119108170939104e-05, + "loss": 0.1549, "step": 85185 }, { "epoch": 3.98, - "learning_rate": 1.2106324129201632e-05, - "loss": 0.2124, + "learning_rate": 2.2118640101103084e-05, + "loss": 0.1368, "step": 85190 }, { "epoch": 3.98, - "learning_rate": 1.2105855327926492e-05, - "loss": 0.2334, + "learning_rate": 2.2118172031267064e-05, + "loss": 0.2455, "step": 85195 }, { "epoch": 3.98, - "learning_rate": 1.2105386526651352e-05, - "loss": 0.0667, + "learning_rate": 2.2117703961431047e-05, + "loss": 0.0575, "step": 85200 }, { "epoch": 3.98, - "learning_rate": 1.2104917725376215e-05, - "loss": 0.0356, + "learning_rate": 2.2117235891595027e-05, + "loss": 0.0195, "step": 85205 }, { "epoch": 3.98, - "learning_rate": 1.2104448924101075e-05, - "loss": 0.0628, + "learning_rate": 2.2116767821759007e-05, + "loss": 0.0216, "step": 85210 }, { "epoch": 3.98, - "learning_rate": 1.2103980122825935e-05, - "loss": 0.0778, + "learning_rate": 2.2116299751922986e-05, + "loss": 0.0964, "step": 85215 }, { "epoch": 3.98, - "learning_rate": 1.2103511321550795e-05, - "loss": 0.0354, + "learning_rate": 2.211583168208697e-05, + "loss": 0.0527, "step": 85220 }, { "epoch": 3.98, - "learning_rate": 1.2103042520275657e-05, - "loss": 0.0464, + "learning_rate": 2.211536361225095e-05, + "loss": 0.1026, "step": 85225 }, { "epoch": 3.98, - "learning_rate": 1.2102573719000517e-05, - "loss": 0.0755, + "learning_rate": 2.211489554241493e-05, + "loss": 0.0937, "step": 85230 }, { "epoch": 3.98, - "learning_rate": 1.2102104917725377e-05, - "loss": 0.1194, + "learning_rate": 2.2114427472578912e-05, + "loss": 0.1946, "step": 85235 }, { "epoch": 3.98, - "learning_rate": 1.2101636116450237e-05, - "loss": 0.1531, + "learning_rate": 2.211395940274289e-05, + "loss": 0.1736, "step": 85240 }, { "epoch": 3.98, - "learning_rate": 1.2101167315175098e-05, - "loss": 0.3248, + "learning_rate": 2.211349133290687e-05, + "loss": 0.3098, "step": 85245 }, { "epoch": 3.98, - "learning_rate": 1.2100698513899958e-05, - "loss": 0.1072, + "learning_rate": 2.211302326307085e-05, + "loss": 0.0665, "step": 85250 }, { "epoch": 3.98, - "learning_rate": 1.210022971262482e-05, - "loss": 0.016, + "learning_rate": 2.211255519323483e-05, + "loss": 0.0378, "step": 85255 }, { "epoch": 3.98, - "learning_rate": 1.2099760911349681e-05, - "loss": 0.0689, + "learning_rate": 2.211208712339881e-05, + "loss": 0.0494, "step": 85260 }, { "epoch": 3.98, - "learning_rate": 1.2099292110074541e-05, - "loss": 0.0442, + "learning_rate": 2.211161905356279e-05, + "loss": 0.0596, "step": 85265 }, { "epoch": 3.98, - "learning_rate": 1.2098823308799401e-05, - "loss": 0.0702, + "learning_rate": 2.211115098372677e-05, + "loss": 0.0688, "step": 85270 }, { "epoch": 3.98, - "learning_rate": 1.2098354507524261e-05, - "loss": 0.0641, + "learning_rate": 2.2110682913890754e-05, + "loss": 0.0871, "step": 85275 }, { "epoch": 3.98, - "learning_rate": 1.2097885706249121e-05, - "loss": 0.1959, + "learning_rate": 2.2110214844054734e-05, + "loss": 0.0922, "step": 85280 }, { "epoch": 3.98, - "learning_rate": 1.2097416904973983e-05, - "loss": 0.1061, + "learning_rate": 2.2109746774218714e-05, + "loss": 0.1318, "step": 85285 }, { "epoch": 3.98, - "learning_rate": 1.2096948103698843e-05, - "loss": 0.1526, + "learning_rate": 2.2109278704382697e-05, + "loss": 0.1546, "step": 85290 }, { "epoch": 3.98, - "learning_rate": 1.2096479302423703e-05, - "loss": 0.3181, + "learning_rate": 2.2108810634546677e-05, + "loss": 0.209, "step": 85295 }, { "epoch": 3.98, - "learning_rate": 1.2096010501148566e-05, - "loss": 0.0794, + "learning_rate": 2.2108342564710657e-05, + "loss": 0.0339, "step": 85300 }, { "epoch": 3.98, - "learning_rate": 1.2095541699873426e-05, - "loss": 0.0377, + "learning_rate": 2.2107874494874633e-05, + "loss": 0.0473, "step": 85305 }, { "epoch": 3.98, - "learning_rate": 1.2095072898598286e-05, - "loss": 0.0753, + "learning_rate": 2.2107406425038616e-05, + "loss": 0.0247, "step": 85310 }, { "epoch": 3.98, - "learning_rate": 1.2094604097323146e-05, - "loss": 0.0725, + "learning_rate": 2.2106938355202596e-05, + "loss": 0.0172, "step": 85315 }, { "epoch": 3.98, - "learning_rate": 1.2094135296048006e-05, - "loss": 0.0682, + "learning_rate": 2.2106470285366576e-05, + "loss": 0.0387, "step": 85320 }, { "epoch": 3.98, - "learning_rate": 1.2093666494772867e-05, - "loss": 0.1105, + "learning_rate": 2.2106002215530556e-05, + "loss": 0.1023, "step": 85325 }, { "epoch": 3.98, - "learning_rate": 1.2093197693497727e-05, - "loss": 0.091, + "learning_rate": 2.210553414569454e-05, + "loss": 0.1985, "step": 85330 }, { "epoch": 3.98, - "learning_rate": 1.2092728892222587e-05, - "loss": 0.1482, + "learning_rate": 2.210506607585852e-05, + "loss": 0.1354, "step": 85335 }, { "epoch": 3.98, - "learning_rate": 1.2092260090947447e-05, - "loss": 0.2113, + "learning_rate": 2.21045980060225e-05, + "loss": 0.1199, "step": 85340 }, { "epoch": 3.98, - "learning_rate": 1.209179128967231e-05, - "loss": 0.2973, + "learning_rate": 2.210412993618648e-05, + "loss": 0.2243, "step": 85345 }, { "epoch": 3.98, - "learning_rate": 1.209132248839717e-05, - "loss": 0.079, + "learning_rate": 2.2103661866350462e-05, + "loss": 0.0273, "step": 85350 }, { "epoch": 3.98, - "learning_rate": 1.209085368712203e-05, - "loss": 0.0591, + "learning_rate": 2.210319379651444e-05, + "loss": 0.0234, "step": 85355 }, { "epoch": 3.98, - "learning_rate": 1.209038488584689e-05, - "loss": 0.0622, + "learning_rate": 2.210272572667842e-05, + "loss": 0.0216, "step": 85360 }, { "epoch": 3.98, - "learning_rate": 1.2089916084571752e-05, - "loss": 0.0578, + "learning_rate": 2.21022576568424e-05, + "loss": 0.0742, "step": 85365 }, { "epoch": 3.98, - "learning_rate": 1.2089447283296612e-05, - "loss": 0.0683, + "learning_rate": 2.210178958700638e-05, + "loss": 0.0341, "step": 85370 }, { "epoch": 3.98, - "learning_rate": 1.2088978482021472e-05, - "loss": 0.056, + "learning_rate": 2.210132151717036e-05, + "loss": 0.0859, "step": 85375 }, { "epoch": 3.98, - "learning_rate": 1.2088509680746332e-05, - "loss": 0.0709, + "learning_rate": 2.210085344733434e-05, + "loss": 0.1176, "step": 85380 }, { "epoch": 3.98, - "learning_rate": 1.2088040879471192e-05, - "loss": 0.2024, + "learning_rate": 2.2100385377498324e-05, + "loss": 0.076, "step": 85385 }, { "epoch": 3.98, - "learning_rate": 1.2087572078196053e-05, - "loss": 0.1626, + "learning_rate": 2.2099917307662304e-05, + "loss": 0.1483, "step": 85390 }, { "epoch": 3.98, - "learning_rate": 1.2087103276920915e-05, - "loss": 0.2486, + "learning_rate": 2.2099449237826284e-05, + "loss": 0.2785, "step": 85395 }, { "epoch": 3.98, - "learning_rate": 1.2086634475645775e-05, - "loss": 0.0634, + "learning_rate": 2.2098981167990263e-05, + "loss": 0.0633, "step": 85400 }, { "epoch": 3.99, - "learning_rate": 1.2086165674370636e-05, - "loss": 0.0103, + "learning_rate": 2.2098513098154247e-05, + "loss": 0.0098, "step": 85405 }, { "epoch": 3.99, - "learning_rate": 1.2085696873095496e-05, - "loss": 0.0332, + "learning_rate": 2.2098045028318226e-05, + "loss": 0.0761, "step": 85410 }, { "epoch": 3.99, - "learning_rate": 1.2085228071820356e-05, - "loss": 0.0428, + "learning_rate": 2.2097576958482206e-05, + "loss": 0.0578, "step": 85415 }, { "epoch": 3.99, - "learning_rate": 1.2084759270545216e-05, - "loss": 0.0711, + "learning_rate": 2.209710888864619e-05, + "loss": 0.0146, "step": 85420 }, { "epoch": 3.99, - "learning_rate": 1.2084290469270076e-05, - "loss": 0.0546, + "learning_rate": 2.209664081881017e-05, + "loss": 0.0263, "step": 85425 }, { "epoch": 3.99, - "learning_rate": 1.2083821667994938e-05, - "loss": 0.0558, + "learning_rate": 2.2096172748974146e-05, + "loss": 0.1091, "step": 85430 }, { "epoch": 3.99, - "learning_rate": 1.2083352866719798e-05, - "loss": 0.1043, + "learning_rate": 2.2095704679138125e-05, + "loss": 0.2149, "step": 85435 }, { "epoch": 3.99, - "learning_rate": 1.208288406544466e-05, - "loss": 0.1153, + "learning_rate": 2.209523660930211e-05, + "loss": 0.2683, "step": 85440 }, { "epoch": 3.99, - "learning_rate": 1.2082415264169521e-05, - "loss": 0.3828, + "learning_rate": 2.209476853946609e-05, + "loss": 0.2508, "step": 85445 }, { "epoch": 3.99, - "learning_rate": 1.2081946462894381e-05, - "loss": 0.0537, + "learning_rate": 2.2094300469630068e-05, + "loss": 0.0293, "step": 85450 }, { "epoch": 3.99, - "learning_rate": 1.208147766161924e-05, - "loss": 0.0323, + "learning_rate": 2.2093832399794048e-05, + "loss": 0.0642, "step": 85455 }, { "epoch": 3.99, - "learning_rate": 1.20810088603441e-05, - "loss": 0.0638, + "learning_rate": 2.209336432995803e-05, + "loss": 0.0143, "step": 85460 }, { "epoch": 3.99, - "learning_rate": 1.208054005906896e-05, - "loss": 0.0546, + "learning_rate": 2.209289626012201e-05, + "loss": 0.0771, "step": 85465 }, { "epoch": 3.99, - "learning_rate": 1.2080071257793822e-05, - "loss": 0.0969, + "learning_rate": 2.209242819028599e-05, + "loss": 0.0824, "step": 85470 }, { "epoch": 3.99, - "learning_rate": 1.2079602456518682e-05, - "loss": 0.0441, + "learning_rate": 2.2091960120449974e-05, + "loss": 0.1392, "step": 85475 }, { "epoch": 3.99, - "learning_rate": 1.2079133655243542e-05, - "loss": 0.0426, + "learning_rate": 2.2091492050613954e-05, + "loss": 0.0829, "step": 85480 }, { "epoch": 3.99, - "learning_rate": 1.2078664853968406e-05, - "loss": 0.207, + "learning_rate": 2.2091023980777934e-05, + "loss": 0.1512, "step": 85485 }, { "epoch": 3.99, - "learning_rate": 1.2078196052693265e-05, - "loss": 0.1455, + "learning_rate": 2.2090555910941914e-05, + "loss": 0.1644, "step": 85490 }, { "epoch": 3.99, - "learning_rate": 1.2077727251418125e-05, - "loss": 0.3497, + "learning_rate": 2.2090087841105893e-05, + "loss": 0.1223, "step": 85495 }, { "epoch": 3.99, - "learning_rate": 1.2077258450142985e-05, - "loss": 0.0732, + "learning_rate": 2.2089619771269873e-05, + "loss": 0.0296, "step": 85500 }, { "epoch": 3.99, - "learning_rate": 1.2076789648867845e-05, - "loss": 0.0068, + "learning_rate": 2.2089151701433853e-05, + "loss": 0.0383, "step": 85505 }, { "epoch": 3.99, - "learning_rate": 1.2076320847592707e-05, - "loss": 0.0237, + "learning_rate": 2.2088683631597833e-05, + "loss": 0.0543, "step": 85510 }, { "epoch": 3.99, - "learning_rate": 1.2075852046317567e-05, - "loss": 0.0273, + "learning_rate": 2.2088215561761816e-05, + "loss": 0.0328, "step": 85515 }, { "epoch": 3.99, - "learning_rate": 1.2075383245042427e-05, - "loss": 0.0467, + "learning_rate": 2.2087747491925796e-05, + "loss": 0.0776, "step": 85520 }, { "epoch": 3.99, - "learning_rate": 1.2074914443767287e-05, - "loss": 0.1015, + "learning_rate": 2.2087279422089776e-05, + "loss": 0.073, "step": 85525 }, { "epoch": 3.99, - "learning_rate": 1.2074445642492147e-05, - "loss": 0.08, + "learning_rate": 2.208681135225376e-05, + "loss": 0.0873, "step": 85530 }, { "epoch": 3.99, - "learning_rate": 1.207397684121701e-05, - "loss": 0.1203, + "learning_rate": 2.208634328241774e-05, + "loss": 0.1414, "step": 85535 }, { "epoch": 3.99, - "learning_rate": 1.207350803994187e-05, - "loss": 0.1713, + "learning_rate": 2.208587521258172e-05, + "loss": 0.2211, "step": 85540 }, { "epoch": 3.99, - "learning_rate": 1.207303923866673e-05, - "loss": 0.3559, + "learning_rate": 2.20854071427457e-05, + "loss": 0.2941, "step": 85545 }, { "epoch": 3.99, - "learning_rate": 1.2072570437391591e-05, - "loss": 0.0424, + "learning_rate": 2.208493907290968e-05, + "loss": 0.0392, "step": 85550 }, { "epoch": 3.99, - "learning_rate": 1.2072101636116451e-05, - "loss": 0.0211, + "learning_rate": 2.2084471003073658e-05, + "loss": 0.038, "step": 85555 }, { "epoch": 3.99, - "learning_rate": 1.2071632834841311e-05, - "loss": 0.0303, + "learning_rate": 2.2084002933237638e-05, + "loss": 0.0223, "step": 85560 }, { "epoch": 3.99, - "learning_rate": 1.2071164033566171e-05, - "loss": 0.0616, + "learning_rate": 2.2083534863401618e-05, + "loss": 0.0221, "step": 85565 }, { "epoch": 3.99, - "learning_rate": 1.2070695232291031e-05, - "loss": 0.0772, + "learning_rate": 2.20830667935656e-05, + "loss": 0.0473, "step": 85570 }, { "epoch": 3.99, - "learning_rate": 1.2070226431015893e-05, - "loss": 0.0717, + "learning_rate": 2.208259872372958e-05, + "loss": 0.0322, "step": 85575 }, { "epoch": 3.99, - "learning_rate": 1.2069757629740754e-05, - "loss": 0.104, + "learning_rate": 2.208213065389356e-05, + "loss": 0.1162, "step": 85580 }, { "epoch": 3.99, - "learning_rate": 1.2069288828465614e-05, - "loss": 0.1114, + "learning_rate": 2.208166258405754e-05, + "loss": 0.1457, "step": 85585 }, { "epoch": 3.99, - "learning_rate": 1.2068820027190476e-05, - "loss": 0.1281, + "learning_rate": 2.2081194514221523e-05, + "loss": 0.1838, "step": 85590 }, { "epoch": 3.99, - "learning_rate": 1.2068351225915336e-05, - "loss": 0.3246, + "learning_rate": 2.2080726444385503e-05, + "loss": 0.2478, "step": 85595 }, { "epoch": 3.99, - "learning_rate": 1.2067882424640196e-05, - "loss": 0.0502, + "learning_rate": 2.2080258374549483e-05, + "loss": 0.0548, "step": 85600 }, { "epoch": 3.99, - "learning_rate": 1.2067413623365056e-05, - "loss": 0.0084, + "learning_rate": 2.2079790304713466e-05, + "loss": 0.022, "step": 85605 }, { "epoch": 3.99, - "learning_rate": 1.2066944822089917e-05, - "loss": 0.0513, + "learning_rate": 2.2079322234877446e-05, + "loss": 0.0572, "step": 85610 }, { "epoch": 3.99, - "learning_rate": 1.2066476020814777e-05, - "loss": 0.0499, + "learning_rate": 2.2078854165041426e-05, + "loss": 0.0356, "step": 85615 }, { "epoch": 4.0, - "learning_rate": 1.2066007219539637e-05, - "loss": 0.1073, + "learning_rate": 2.2078386095205402e-05, + "loss": 0.0322, "step": 85620 }, { "epoch": 4.0, - "learning_rate": 1.2065538418264499e-05, - "loss": 0.1085, + "learning_rate": 2.2077918025369386e-05, + "loss": 0.1288, "step": 85625 }, { "epoch": 4.0, - "learning_rate": 1.206506961698936e-05, - "loss": 0.0658, + "learning_rate": 2.2077449955533365e-05, + "loss": 0.0752, "step": 85630 }, { "epoch": 4.0, - "learning_rate": 1.206460081571422e-05, - "loss": 0.1206, + "learning_rate": 2.2076981885697345e-05, + "loss": 0.1208, "step": 85635 }, { "epoch": 4.0, - "learning_rate": 1.206413201443908e-05, - "loss": 0.2309, + "learning_rate": 2.2076513815861325e-05, + "loss": 0.2597, "step": 85640 }, { "epoch": 4.0, - "learning_rate": 1.206366321316394e-05, - "loss": 0.2057, + "learning_rate": 2.2076045746025308e-05, + "loss": 0.2356, "step": 85645 }, { "epoch": 4.0, - "learning_rate": 1.2063194411888802e-05, - "loss": 0.0522, + "learning_rate": 2.2075577676189288e-05, + "loss": 0.0697, "step": 85650 }, { "epoch": 4.0, - "learning_rate": 1.2062725610613662e-05, - "loss": 0.0993, + "learning_rate": 2.2075109606353268e-05, + "loss": 0.0115, "step": 85655 }, { "epoch": 4.0, - "learning_rate": 1.2062256809338522e-05, - "loss": 0.0346, + "learning_rate": 2.207464153651725e-05, + "loss": 0.0664, "step": 85660 }, { "epoch": 4.0, - "learning_rate": 1.2061788008063382e-05, - "loss": 0.0709, + "learning_rate": 2.207417346668123e-05, + "loss": 0.052, "step": 85665 }, { "epoch": 4.0, - "learning_rate": 1.2061319206788245e-05, - "loss": 0.0602, + "learning_rate": 2.207370539684521e-05, + "loss": 0.0858, "step": 85670 }, { "epoch": 4.0, - "learning_rate": 1.2060850405513105e-05, - "loss": 0.0756, + "learning_rate": 2.207323732700919e-05, + "loss": 0.1018, "step": 85675 }, { "epoch": 4.0, - "learning_rate": 1.2060381604237965e-05, - "loss": 0.088, + "learning_rate": 2.207276925717317e-05, + "loss": 0.0639, "step": 85680 }, { "epoch": 4.0, - "learning_rate": 1.2059912802962825e-05, - "loss": 0.1051, + "learning_rate": 2.207230118733715e-05, + "loss": 0.1507, "step": 85685 }, { "epoch": 4.0, - "learning_rate": 1.2059444001687687e-05, - "loss": 0.2163, + "learning_rate": 2.207183311750113e-05, + "loss": 0.1872, "step": 85690 }, { "epoch": 4.0, - "learning_rate": 1.2058975200412546e-05, - "loss": 0.353, + "learning_rate": 2.207136504766511e-05, + "loss": 0.468, "step": 85695 }, { "epoch": 4.0, - "learning_rate": 1.2058506399137406e-05, - "loss": 0.0459, + "learning_rate": 2.2070896977829093e-05, + "loss": 0.0622, "step": 85700 }, { "epoch": 4.0, - "learning_rate": 1.2058037597862266e-05, - "loss": 0.0301, + "learning_rate": 2.2070428907993073e-05, + "loss": 0.0569, "step": 85705 }, { "epoch": 4.0, - "learning_rate": 1.2057568796587126e-05, - "loss": 0.0574, + "learning_rate": 2.2069960838157053e-05, + "loss": 0.0712, "step": 85710 }, { "epoch": 4.0, - "learning_rate": 1.2057099995311988e-05, - "loss": 0.0805, + "learning_rate": 2.2069492768321036e-05, + "loss": 0.0664, "step": 85715 }, { "epoch": 4.0, - "learning_rate": 1.205663119403685e-05, - "loss": 0.1654, + "learning_rate": 2.2069024698485016e-05, + "loss": 0.0775, "step": 85720 }, { "epoch": 4.0, - "eval_cer": 0.01052703752799744, - "eval_loss": 0.09995592385530472, - "eval_runtime": 386.2, - "eval_samples_per_second": 49.327, - "eval_steps_per_second": 12.333, - "eval_wer": 0.08902480462113489, + "eval_cer": 0.011984304725240149, + "eval_loss": 0.04588849097490311, + "eval_runtime": 393.5406, + "eval_samples_per_second": 48.407, + "eval_steps_per_second": 12.103, + "eval_wer": 0.09509413745599266, "step": 85724 }, { "epoch": 4.0, - "learning_rate": 1.205616239276171e-05, - "loss": 0.4197, + "learning_rate": 2.2068556628648996e-05, + "loss": 0.3099, "step": 85725 }, { "epoch": 4.0, - "learning_rate": 1.2055693591486571e-05, - "loss": 0.1284, + "learning_rate": 2.2068088558812975e-05, + "loss": 0.1177, "step": 85730 }, { "epoch": 4.0, - "learning_rate": 1.2055224790211431e-05, - "loss": 0.0131, + "learning_rate": 2.206762048897696e-05, + "loss": 0.0119, "step": 85735 }, { "epoch": 4.0, - "learning_rate": 1.2054755988936291e-05, - "loss": 0.0433, + "learning_rate": 2.206715241914094e-05, + "loss": 0.0366, "step": 85740 }, { "epoch": 4.0, - "learning_rate": 1.2054287187661151e-05, - "loss": 0.0229, + "learning_rate": 2.2066684349304915e-05, + "loss": 0.0562, "step": 85745 }, { "epoch": 4.0, - "learning_rate": 1.205381838638601e-05, - "loss": 0.0597, + "learning_rate": 2.2066216279468895e-05, + "loss": 0.0557, "step": 85750 }, { "epoch": 4.0, - "learning_rate": 1.2053349585110872e-05, - "loss": 0.0687, + "learning_rate": 2.2065748209632878e-05, + "loss": 0.0964, "step": 85755 }, { "epoch": 4.0, - "learning_rate": 1.2052880783835732e-05, - "loss": 0.0817, + "learning_rate": 2.2065280139796858e-05, + "loss": 0.1266, "step": 85760 }, { "epoch": 4.0, - "learning_rate": 1.2052411982560594e-05, - "loss": 0.1638, + "learning_rate": 2.2064812069960837e-05, + "loss": 0.1865, "step": 85765 }, { "epoch": 4.0, - "learning_rate": 1.2051943181285456e-05, - "loss": 0.1613, + "learning_rate": 2.2064344000124817e-05, + "loss": 0.1871, "step": 85770 }, { "epoch": 4.0, - "learning_rate": 1.2051474380010316e-05, - "loss": 0.2871, + "learning_rate": 2.20638759302888e-05, + "loss": 0.2434, "step": 85775 }, { "epoch": 4.0, - "learning_rate": 1.2051005578735175e-05, - "loss": 0.0609, + "learning_rate": 2.206340786045278e-05, + "loss": 0.0914, "step": 85780 }, { "epoch": 4.0, - "learning_rate": 1.2050536777460035e-05, - "loss": 0.0114, + "learning_rate": 2.206293979061676e-05, + "loss": 0.042, "step": 85785 }, { "epoch": 4.0, - "learning_rate": 1.2050067976184895e-05, - "loss": 0.0659, + "learning_rate": 2.2062471720780743e-05, + "loss": 0.0623, "step": 85790 }, { "epoch": 4.0, - "learning_rate": 1.2049599174909757e-05, - "loss": 0.0619, + "learning_rate": 2.2062003650944723e-05, + "loss": 0.1082, "step": 85795 }, { "epoch": 4.0, - "learning_rate": 1.2049130373634617e-05, - "loss": 0.0618, + "learning_rate": 2.2061535581108703e-05, + "loss": 0.0353, "step": 85800 }, { "epoch": 4.0, - "learning_rate": 1.2048661572359477e-05, - "loss": 0.0857, + "learning_rate": 2.2061067511272683e-05, + "loss": 0.0718, "step": 85805 }, { "epoch": 4.0, - "learning_rate": 1.204819277108434e-05, - "loss": 0.0764, + "learning_rate": 2.2060599441436663e-05, + "loss": 0.1308, "step": 85810 }, { "epoch": 4.0, - "learning_rate": 1.20477239698092e-05, - "loss": 0.0716, + "learning_rate": 2.2060131371600642e-05, + "loss": 0.0833, "step": 85815 }, { "epoch": 4.0, - "learning_rate": 1.204725516853406e-05, - "loss": 0.2912, + "learning_rate": 2.2059663301764622e-05, + "loss": 0.1326, "step": 85820 }, { "epoch": 4.0, - "learning_rate": 1.204678636725892e-05, - "loss": 0.1991, + "learning_rate": 2.2059195231928602e-05, + "loss": 0.3358, "step": 85825 }, { "epoch": 4.0, - "learning_rate": 1.204631756598378e-05, - "loss": 0.0563, + "learning_rate": 2.2058727162092585e-05, + "loss": 0.0518, "step": 85830 }, { "epoch": 4.01, - "learning_rate": 1.2045848764708642e-05, - "loss": 0.0287, + "learning_rate": 2.2058259092256565e-05, + "loss": 0.0175, "step": 85835 }, { "epoch": 4.01, - "learning_rate": 1.2045379963433501e-05, - "loss": 0.038, + "learning_rate": 2.2057791022420545e-05, + "loss": 0.0424, "step": 85840 }, { "epoch": 4.01, - "learning_rate": 1.2044911162158361e-05, - "loss": 0.0949, + "learning_rate": 2.2057322952584528e-05, + "loss": 0.0401, "step": 85845 }, { "epoch": 4.01, - "learning_rate": 1.2044442360883221e-05, - "loss": 0.0285, + "learning_rate": 2.2056854882748508e-05, + "loss": 0.0584, "step": 85850 }, { "epoch": 4.01, - "learning_rate": 1.2043973559608081e-05, - "loss": 0.0395, + "learning_rate": 2.2056386812912488e-05, + "loss": 0.0548, "step": 85855 }, { "epoch": 4.01, - "learning_rate": 1.2043504758332945e-05, - "loss": 0.116, + "learning_rate": 2.2055918743076468e-05, + "loss": 0.0919, "step": 85860 }, { "epoch": 4.01, - "learning_rate": 1.2043035957057805e-05, - "loss": 0.1158, + "learning_rate": 2.205545067324045e-05, + "loss": 0.1234, "step": 85865 }, { "epoch": 4.01, - "learning_rate": 1.2042567155782664e-05, - "loss": 0.1392, + "learning_rate": 2.2054982603404427e-05, + "loss": 0.2057, "step": 85870 }, { "epoch": 4.01, - "learning_rate": 1.2042098354507526e-05, - "loss": 0.1574, + "learning_rate": 2.2054514533568407e-05, + "loss": 0.3035, "step": 85875 }, { "epoch": 4.01, - "learning_rate": 1.2041629553232386e-05, - "loss": 0.0739, + "learning_rate": 2.2054046463732387e-05, + "loss": 0.0729, "step": 85880 }, { "epoch": 4.01, - "learning_rate": 1.2041160751957246e-05, - "loss": 0.0182, + "learning_rate": 2.205357839389637e-05, + "loss": 0.012, "step": 85885 }, { "epoch": 4.01, - "learning_rate": 1.2040691950682106e-05, - "loss": 0.026, + "learning_rate": 2.205311032406035e-05, + "loss": 0.0499, "step": 85890 }, { "epoch": 4.01, - "learning_rate": 1.2040223149406966e-05, - "loss": 0.0572, + "learning_rate": 2.205264225422433e-05, + "loss": 0.0177, "step": 85895 }, { "epoch": 4.01, - "learning_rate": 1.2039754348131827e-05, - "loss": 0.0655, + "learning_rate": 2.2052174184388313e-05, + "loss": 0.0323, "step": 85900 }, { "epoch": 4.01, - "learning_rate": 1.2039285546856689e-05, - "loss": 0.0803, + "learning_rate": 2.2051706114552293e-05, + "loss": 0.1812, "step": 85905 }, { "epoch": 4.01, - "learning_rate": 1.2038816745581549e-05, - "loss": 0.1215, + "learning_rate": 2.2051238044716272e-05, + "loss": 0.0678, "step": 85910 }, { "epoch": 4.01, - "learning_rate": 1.203834794430641e-05, - "loss": 0.2066, + "learning_rate": 2.2050769974880252e-05, + "loss": 0.2052, "step": 85915 }, { "epoch": 4.01, - "learning_rate": 1.203787914303127e-05, - "loss": 0.1278, + "learning_rate": 2.2050301905044236e-05, + "loss": 0.1158, "step": 85920 }, { "epoch": 4.01, - "learning_rate": 1.203741034175613e-05, - "loss": 0.4348, + "learning_rate": 2.2049833835208215e-05, + "loss": 0.2267, "step": 85925 }, { "epoch": 4.01, - "learning_rate": 1.203694154048099e-05, - "loss": 0.0525, + "learning_rate": 2.2049365765372195e-05, + "loss": 0.0623, "step": 85930 }, { "epoch": 4.01, - "learning_rate": 1.203647273920585e-05, - "loss": 0.0071, + "learning_rate": 2.204889769553617e-05, + "loss": 0.0142, "step": 85935 }, { "epoch": 4.01, - "learning_rate": 1.2036003937930712e-05, - "loss": 0.032, + "learning_rate": 2.2048429625700155e-05, + "loss": 0.047, "step": 85940 }, { "epoch": 4.01, - "learning_rate": 1.2035535136655572e-05, - "loss": 0.0231, + "learning_rate": 2.2047961555864135e-05, + "loss": 0.0285, "step": 85945 }, { "epoch": 4.01, - "learning_rate": 1.2035066335380434e-05, - "loss": 0.0631, + "learning_rate": 2.2047493486028114e-05, + "loss": 0.0712, "step": 85950 }, { "epoch": 4.01, - "learning_rate": 1.2034597534105295e-05, - "loss": 0.0885, + "learning_rate": 2.2047025416192094e-05, + "loss": 0.0502, "step": 85955 }, { "epoch": 4.01, - "learning_rate": 1.2034128732830155e-05, - "loss": 0.0834, + "learning_rate": 2.2046557346356077e-05, + "loss": 0.0447, "step": 85960 }, { "epoch": 4.01, - "learning_rate": 1.2033659931555015e-05, - "loss": 0.1309, + "learning_rate": 2.2046089276520057e-05, + "loss": 0.0771, "step": 85965 }, { "epoch": 4.01, - "learning_rate": 1.2033191130279875e-05, - "loss": 0.1026, + "learning_rate": 2.2045621206684037e-05, + "loss": 0.2218, "step": 85970 }, { "epoch": 4.01, - "learning_rate": 1.2032722329004735e-05, - "loss": 0.4151, + "learning_rate": 2.204515313684802e-05, + "loss": 0.1519, "step": 85975 }, { "epoch": 4.01, - "learning_rate": 1.2032253527729597e-05, - "loss": 0.1058, + "learning_rate": 2.2044685067012e-05, + "loss": 0.0928, "step": 85980 }, { "epoch": 4.01, - "learning_rate": 1.2031784726454456e-05, - "loss": 0.024, + "learning_rate": 2.204421699717598e-05, + "loss": 0.0178, "step": 85985 }, { "epoch": 4.01, - "learning_rate": 1.2031315925179316e-05, - "loss": 0.0085, + "learning_rate": 2.204374892733996e-05, + "loss": 0.064, "step": 85990 }, { "epoch": 4.01, - "learning_rate": 1.203084712390418e-05, - "loss": 0.0561, + "learning_rate": 2.2043280857503943e-05, + "loss": 0.0247, "step": 85995 }, { "epoch": 4.01, - "learning_rate": 1.203037832262904e-05, - "loss": 0.0677, + "learning_rate": 2.204281278766792e-05, + "loss": 0.0434, "step": 86000 }, { "epoch": 4.01, - "learning_rate": 1.20299095213539e-05, - "loss": 0.0989, + "learning_rate": 2.20423447178319e-05, + "loss": 0.0678, "step": 86005 }, { "epoch": 4.01, - "learning_rate": 1.202944072007876e-05, - "loss": 0.0582, + "learning_rate": 2.204187664799588e-05, + "loss": 0.1177, "step": 86010 }, { "epoch": 4.01, - "learning_rate": 1.202897191880362e-05, - "loss": 0.1444, + "learning_rate": 2.2041408578159862e-05, + "loss": 0.0669, "step": 86015 }, { "epoch": 4.01, - "learning_rate": 1.2028503117528481e-05, - "loss": 0.1608, + "learning_rate": 2.2040940508323842e-05, + "loss": 0.0712, "step": 86020 }, { "epoch": 4.01, - "learning_rate": 1.2028034316253341e-05, - "loss": 0.3524, + "learning_rate": 2.2040472438487822e-05, + "loss": 0.2023, "step": 86025 }, { "epoch": 4.01, - "learning_rate": 1.2027565514978201e-05, - "loss": 0.0627, + "learning_rate": 2.2040004368651805e-05, + "loss": 0.0938, "step": 86030 }, { "epoch": 4.01, - "learning_rate": 1.2027096713703061e-05, - "loss": 0.0291, + "learning_rate": 2.2039536298815785e-05, + "loss": 0.0401, "step": 86035 }, { "epoch": 4.01, - "learning_rate": 1.2026627912427923e-05, - "loss": 0.0183, + "learning_rate": 2.2039068228979765e-05, + "loss": 0.0537, "step": 86040 }, { "epoch": 4.01, - "learning_rate": 1.2026159111152784e-05, - "loss": 0.0537, + "learning_rate": 2.2038600159143745e-05, + "loss": 0.026, "step": 86045 }, { "epoch": 4.02, - "learning_rate": 1.2025690309877644e-05, - "loss": 0.0536, + "learning_rate": 2.2038132089307728e-05, + "loss": 0.0445, "step": 86050 }, { "epoch": 4.02, - "learning_rate": 1.2025221508602504e-05, - "loss": 0.0536, + "learning_rate": 2.2037664019471708e-05, + "loss": 0.0736, "step": 86055 }, { "epoch": 4.02, - "learning_rate": 1.2024752707327366e-05, - "loss": 0.1036, + "learning_rate": 2.2037195949635684e-05, + "loss": 0.1486, "step": 86060 }, { "epoch": 4.02, - "learning_rate": 1.2024283906052226e-05, - "loss": 0.0556, + "learning_rate": 2.2036727879799664e-05, + "loss": 0.1102, "step": 86065 }, { "epoch": 4.02, - "learning_rate": 1.2023815104777086e-05, - "loss": 0.2033, + "learning_rate": 2.2036259809963647e-05, + "loss": 0.1204, "step": 86070 }, { "epoch": 4.02, - "learning_rate": 1.2023346303501945e-05, - "loss": 0.2803, + "learning_rate": 2.2035791740127627e-05, + "loss": 0.304, "step": 86075 }, { "epoch": 4.02, - "learning_rate": 1.2022877502226807e-05, - "loss": 0.0971, + "learning_rate": 2.2035323670291607e-05, + "loss": 0.0598, "step": 86080 }, { "epoch": 4.02, - "learning_rate": 1.2022408700951667e-05, - "loss": 0.0376, + "learning_rate": 2.203485560045559e-05, + "loss": 0.0666, "step": 86085 }, { "epoch": 4.02, - "learning_rate": 1.2021939899676529e-05, - "loss": 0.0532, + "learning_rate": 2.203438753061957e-05, + "loss": 0.0134, "step": 86090 }, { "epoch": 4.02, - "learning_rate": 1.2021471098401389e-05, - "loss": 0.0816, + "learning_rate": 2.203391946078355e-05, + "loss": 0.0621, "step": 86095 }, { "epoch": 4.02, - "learning_rate": 1.202100229712625e-05, - "loss": 0.0376, + "learning_rate": 2.203345139094753e-05, + "loss": 0.0668, "step": 86100 }, { "epoch": 4.02, - "learning_rate": 1.202053349585111e-05, - "loss": 0.0546, + "learning_rate": 2.2032983321111512e-05, + "loss": 0.1054, "step": 86105 }, { "epoch": 4.02, - "learning_rate": 1.202006469457597e-05, - "loss": 0.0291, + "learning_rate": 2.2032515251275492e-05, + "loss": 0.093, "step": 86110 }, { "epoch": 4.02, - "learning_rate": 1.201959589330083e-05, - "loss": 0.0721, + "learning_rate": 2.2032047181439472e-05, + "loss": 0.1842, "step": 86115 }, { "epoch": 4.02, - "learning_rate": 1.2019127092025692e-05, - "loss": 0.1596, + "learning_rate": 2.2031579111603452e-05, + "loss": 0.2382, "step": 86120 }, { "epoch": 4.02, - "learning_rate": 1.2018658290750552e-05, - "loss": 0.1853, + "learning_rate": 2.2031111041767432e-05, + "loss": 0.3083, "step": 86125 }, { "epoch": 4.02, - "learning_rate": 1.2018189489475412e-05, - "loss": 0.0676, + "learning_rate": 2.203064297193141e-05, + "loss": 0.0964, "step": 86130 }, { "epoch": 4.02, - "learning_rate": 1.2017720688200273e-05, - "loss": 0.017, + "learning_rate": 2.203017490209539e-05, + "loss": 0.033, "step": 86135 }, { "epoch": 4.02, - "learning_rate": 1.2017251886925135e-05, - "loss": 0.0247, + "learning_rate": 2.2029706832259375e-05, + "loss": 0.0476, "step": 86140 }, { "epoch": 4.02, - "learning_rate": 1.2016783085649995e-05, - "loss": 0.023, + "learning_rate": 2.2029238762423354e-05, + "loss": 0.036, "step": 86145 }, { "epoch": 4.02, - "learning_rate": 1.2016314284374855e-05, - "loss": 0.0669, + "learning_rate": 2.2028770692587334e-05, + "loss": 0.0635, "step": 86150 }, { "epoch": 4.02, - "learning_rate": 1.2015845483099715e-05, - "loss": 0.0811, + "learning_rate": 2.2028302622751314e-05, + "loss": 0.0899, "step": 86155 }, { "epoch": 4.02, - "learning_rate": 1.2015376681824576e-05, - "loss": 0.147, + "learning_rate": 2.2027834552915297e-05, + "loss": 0.0925, "step": 86160 }, { "epoch": 4.02, - "learning_rate": 1.2014907880549436e-05, - "loss": 0.1308, + "learning_rate": 2.2027366483079277e-05, + "loss": 0.1154, "step": 86165 }, { "epoch": 4.02, - "learning_rate": 1.2014439079274296e-05, - "loss": 0.1823, + "learning_rate": 2.2026898413243257e-05, + "loss": 0.1085, "step": 86170 }, { "epoch": 4.02, - "learning_rate": 1.2013970277999156e-05, - "loss": 0.2708, + "learning_rate": 2.2026430343407237e-05, + "loss": 0.2105, "step": 86175 }, { "epoch": 4.02, - "learning_rate": 1.2013501476724016e-05, - "loss": 0.0748, + "learning_rate": 2.202596227357122e-05, + "loss": 0.0871, "step": 86180 }, { "epoch": 4.02, - "learning_rate": 1.201303267544888e-05, - "loss": 0.0506, + "learning_rate": 2.20254942037352e-05, + "loss": 0.0224, "step": 86185 }, { "epoch": 4.02, - "learning_rate": 1.201256387417374e-05, - "loss": 0.0432, + "learning_rate": 2.2025026133899176e-05, + "loss": 0.0324, "step": 86190 }, { "epoch": 4.02, - "learning_rate": 1.2012095072898599e-05, - "loss": 0.0347, + "learning_rate": 2.2024558064063156e-05, + "loss": 0.0684, "step": 86195 }, { "epoch": 4.02, - "learning_rate": 1.201162627162346e-05, - "loss": 0.0965, + "learning_rate": 2.202408999422714e-05, + "loss": 0.0833, "step": 86200 }, { "epoch": 4.02, - "learning_rate": 1.201115747034832e-05, - "loss": 0.0806, + "learning_rate": 2.202362192439112e-05, + "loss": 0.09, "step": 86205 }, { "epoch": 4.02, - "learning_rate": 1.201068866907318e-05, - "loss": 0.0727, + "learning_rate": 2.20231538545551e-05, + "loss": 0.0869, "step": 86210 }, { "epoch": 4.02, - "learning_rate": 1.201021986779804e-05, - "loss": 0.0547, + "learning_rate": 2.2022685784719082e-05, + "loss": 0.1744, "step": 86215 }, { "epoch": 4.02, - "learning_rate": 1.20097510665229e-05, - "loss": 0.1392, + "learning_rate": 2.2022217714883062e-05, + "loss": 0.1247, "step": 86220 }, { "epoch": 4.02, - "learning_rate": 1.2009282265247762e-05, - "loss": 0.3181, + "learning_rate": 2.202174964504704e-05, + "loss": 0.3012, "step": 86225 }, { "epoch": 4.02, - "learning_rate": 1.2008813463972624e-05, - "loss": 0.0705, + "learning_rate": 2.202128157521102e-05, + "loss": 0.0719, "step": 86230 }, { "epoch": 4.02, - "learning_rate": 1.2008344662697484e-05, - "loss": 0.0555, + "learning_rate": 2.2020813505375005e-05, + "loss": 0.0464, "step": 86235 }, { "epoch": 4.02, - "learning_rate": 1.2007875861422345e-05, - "loss": 0.0531, + "learning_rate": 2.2020345435538984e-05, + "loss": 0.0283, "step": 86240 }, { "epoch": 4.02, - "learning_rate": 1.2007407060147205e-05, - "loss": 0.0358, + "learning_rate": 2.2019877365702964e-05, + "loss": 0.0334, "step": 86245 }, { "epoch": 4.02, - "learning_rate": 1.2006938258872065e-05, - "loss": 0.0923, + "learning_rate": 2.201940929586694e-05, + "loss": 0.0261, "step": 86250 }, { "epoch": 4.02, - "learning_rate": 1.2006469457596925e-05, - "loss": 0.0677, + "learning_rate": 2.2018941226030924e-05, + "loss": 0.0371, "step": 86255 }, { "epoch": 4.03, - "learning_rate": 1.2006000656321785e-05, - "loss": 0.0699, + "learning_rate": 2.2018473156194904e-05, + "loss": 0.1252, "step": 86260 }, { "epoch": 4.03, - "learning_rate": 1.2005531855046647e-05, - "loss": 0.1537, + "learning_rate": 2.2018005086358884e-05, + "loss": 0.0915, "step": 86265 }, { "epoch": 4.03, - "learning_rate": 1.2005063053771507e-05, - "loss": 0.195, + "learning_rate": 2.2017537016522867e-05, + "loss": 0.2365, "step": 86270 }, { "epoch": 4.03, - "learning_rate": 1.2004594252496368e-05, - "loss": 0.2251, + "learning_rate": 2.2017068946686847e-05, + "loss": 0.3978, "step": 86275 }, { "epoch": 4.03, - "learning_rate": 1.200412545122123e-05, - "loss": 0.0748, + "learning_rate": 2.2016600876850826e-05, + "loss": 0.0498, "step": 86280 }, { "epoch": 4.03, - "learning_rate": 1.200365664994609e-05, - "loss": 0.0121, + "learning_rate": 2.2016132807014806e-05, + "loss": 0.0049, "step": 86285 }, { "epoch": 4.03, - "learning_rate": 1.200318784867095e-05, - "loss": 0.0332, + "learning_rate": 2.201566473717879e-05, + "loss": 0.0205, "step": 86290 }, { "epoch": 4.03, - "learning_rate": 1.200271904739581e-05, - "loss": 0.0416, + "learning_rate": 2.201519666734277e-05, + "loss": 0.0766, "step": 86295 }, { "epoch": 4.03, - "learning_rate": 1.200225024612067e-05, - "loss": 0.0202, + "learning_rate": 2.201472859750675e-05, + "loss": 0.0454, "step": 86300 }, { "epoch": 4.03, - "learning_rate": 1.2001781444845531e-05, - "loss": 0.0398, + "learning_rate": 2.201426052767073e-05, + "loss": 0.0498, "step": 86305 }, { "epoch": 4.03, - "learning_rate": 1.2001312643570391e-05, - "loss": 0.041, + "learning_rate": 2.2013792457834712e-05, + "loss": 0.0873, "step": 86310 }, { "epoch": 4.03, - "learning_rate": 1.2000843842295251e-05, - "loss": 0.1806, + "learning_rate": 2.201332438799869e-05, + "loss": 0.103, "step": 86315 }, { "epoch": 4.03, - "learning_rate": 1.2000375041020114e-05, - "loss": 0.1158, + "learning_rate": 2.201285631816267e-05, + "loss": 0.1496, "step": 86320 }, { "epoch": 4.03, - "learning_rate": 1.1999906239744974e-05, - "loss": 0.1668, + "learning_rate": 2.201238824832665e-05, + "loss": 0.2956, "step": 86325 }, { "epoch": 4.03, - "learning_rate": 1.1999437438469834e-05, - "loss": 0.0735, + "learning_rate": 2.201192017849063e-05, + "loss": 0.0642, "step": 86330 }, { "epoch": 4.03, - "learning_rate": 1.1998968637194694e-05, - "loss": 0.0184, + "learning_rate": 2.201145210865461e-05, + "loss": 0.0437, "step": 86335 }, { "epoch": 4.03, - "learning_rate": 1.1998499835919554e-05, - "loss": 0.0445, + "learning_rate": 2.201098403881859e-05, + "loss": 0.0438, "step": 86340 }, { "epoch": 4.03, - "learning_rate": 1.1998031034644416e-05, - "loss": 0.023, + "learning_rate": 2.2010515968982574e-05, + "loss": 0.0313, "step": 86345 }, { "epoch": 4.03, - "learning_rate": 1.1997562233369276e-05, - "loss": 0.0438, + "learning_rate": 2.2010047899146554e-05, + "loss": 0.0826, "step": 86350 }, { "epoch": 4.03, - "learning_rate": 1.1997093432094136e-05, - "loss": 0.0444, + "learning_rate": 2.2009579829310534e-05, + "loss": 0.0638, "step": 86355 }, { "epoch": 4.03, - "learning_rate": 1.1996624630818996e-05, - "loss": 0.0491, + "learning_rate": 2.2009111759474514e-05, + "loss": 0.1278, "step": 86360 }, { "epoch": 4.03, - "learning_rate": 1.1996155829543855e-05, - "loss": 0.1171, + "learning_rate": 2.2008643689638497e-05, + "loss": 0.182, "step": 86365 }, { "epoch": 4.03, - "learning_rate": 1.1995687028268719e-05, - "loss": 0.1871, + "learning_rate": 2.2008175619802477e-05, + "loss": 0.1623, "step": 86370 }, { "epoch": 4.03, - "learning_rate": 1.1995218226993579e-05, - "loss": 0.2566, + "learning_rate": 2.2007707549966457e-05, + "loss": 0.2398, "step": 86375 }, { "epoch": 4.03, - "learning_rate": 1.1994749425718439e-05, - "loss": 0.0699, + "learning_rate": 2.2007239480130433e-05, + "loss": 0.0551, "step": 86380 }, { "epoch": 4.03, - "learning_rate": 1.19942806244433e-05, - "loss": 0.0023, + "learning_rate": 2.2006771410294416e-05, + "loss": 0.0089, "step": 86385 }, { "epoch": 4.03, - "learning_rate": 1.199381182316816e-05, - "loss": 0.0388, + "learning_rate": 2.2006303340458396e-05, + "loss": 0.0345, "step": 86390 }, { "epoch": 4.03, - "learning_rate": 1.199334302189302e-05, - "loss": 0.065, + "learning_rate": 2.2005835270622376e-05, + "loss": 0.0253, "step": 86395 }, { "epoch": 4.03, - "learning_rate": 1.199287422061788e-05, - "loss": 0.0478, + "learning_rate": 2.200536720078636e-05, + "loss": 0.0238, "step": 86400 }, { "epoch": 4.03, - "learning_rate": 1.199240541934274e-05, - "loss": 0.1402, + "learning_rate": 2.200489913095034e-05, + "loss": 0.0327, "step": 86405 }, { "epoch": 4.03, - "learning_rate": 1.1991936618067602e-05, - "loss": 0.0579, + "learning_rate": 2.200443106111432e-05, + "loss": 0.1357, "step": 86410 }, { "epoch": 4.03, - "learning_rate": 1.1991467816792463e-05, - "loss": 0.1218, + "learning_rate": 2.20039629912783e-05, + "loss": 0.0486, "step": 86415 }, { "epoch": 4.03, - "learning_rate": 1.1990999015517323e-05, - "loss": 0.1371, + "learning_rate": 2.200349492144228e-05, + "loss": 0.1714, "step": 86420 }, { "epoch": 4.03, - "learning_rate": 1.1990530214242185e-05, - "loss": 0.259, + "learning_rate": 2.200302685160626e-05, + "loss": 0.2037, "step": 86425 }, { "epoch": 4.03, - "learning_rate": 1.1990061412967045e-05, - "loss": 0.1506, + "learning_rate": 2.200255878177024e-05, + "loss": 0.0548, "step": 86430 }, { "epoch": 4.03, - "learning_rate": 1.1989592611691905e-05, - "loss": 0.0274, + "learning_rate": 2.200209071193422e-05, + "loss": 0.0562, "step": 86435 }, { "epoch": 4.03, - "learning_rate": 1.1989123810416765e-05, - "loss": 0.0322, + "learning_rate": 2.20016226420982e-05, + "loss": 0.0306, "step": 86440 }, { "epoch": 4.03, - "learning_rate": 1.1988655009141625e-05, - "loss": 0.0835, + "learning_rate": 2.200115457226218e-05, + "loss": 0.0795, "step": 86445 }, { "epoch": 4.03, - "learning_rate": 1.1988186207866486e-05, - "loss": 0.0417, + "learning_rate": 2.200068650242616e-05, + "loss": 0.0365, "step": 86450 }, { "epoch": 4.03, - "learning_rate": 1.1987717406591346e-05, - "loss": 0.0588, + "learning_rate": 2.2000218432590144e-05, + "loss": 0.139, "step": 86455 }, { "epoch": 4.03, - "learning_rate": 1.1987248605316208e-05, - "loss": 0.089, + "learning_rate": 2.1999750362754124e-05, + "loss": 0.1529, "step": 86460 }, { "epoch": 4.03, - "learning_rate": 1.198677980404107e-05, - "loss": 0.1077, + "learning_rate": 2.1999282292918103e-05, + "loss": 0.0616, "step": 86465 }, { "epoch": 4.03, - "learning_rate": 1.198631100276593e-05, - "loss": 0.1568, + "learning_rate": 2.1998814223082083e-05, + "loss": 0.1053, "step": 86470 }, { "epoch": 4.04, - "learning_rate": 1.198584220149079e-05, - "loss": 0.4361, + "learning_rate": 2.1998346153246066e-05, + "loss": 0.2775, "step": 86475 }, { "epoch": 4.04, - "learning_rate": 1.198537340021565e-05, - "loss": 0.1228, + "learning_rate": 2.1997878083410046e-05, + "loss": 0.097, "step": 86480 }, { "epoch": 4.04, - "learning_rate": 1.1984904598940509e-05, - "loss": 0.0356, + "learning_rate": 2.1997410013574026e-05, + "loss": 0.0256, "step": 86485 }, { "epoch": 4.04, - "learning_rate": 1.198443579766537e-05, - "loss": 0.01, + "learning_rate": 2.1996941943738006e-05, + "loss": 0.0132, "step": 86490 }, { "epoch": 4.04, - "learning_rate": 1.198396699639023e-05, - "loss": 0.0745, + "learning_rate": 2.199647387390199e-05, + "loss": 0.0622, "step": 86495 }, { "epoch": 4.04, - "learning_rate": 1.198349819511509e-05, - "loss": 0.0558, + "learning_rate": 2.199600580406597e-05, + "loss": 0.0226, "step": 86500 }, { "epoch": 4.04, - "learning_rate": 1.198302939383995e-05, - "loss": 0.0393, + "learning_rate": 2.1995537734229945e-05, + "loss": 0.0785, "step": 86505 }, { "epoch": 4.04, - "learning_rate": 1.1982560592564814e-05, - "loss": 0.1151, + "learning_rate": 2.199506966439393e-05, + "loss": 0.0545, "step": 86510 }, { "epoch": 4.04, - "learning_rate": 1.1982091791289674e-05, - "loss": 0.1022, + "learning_rate": 2.199460159455791e-05, + "loss": 0.1494, "step": 86515 }, { "epoch": 4.04, - "learning_rate": 1.1981622990014534e-05, - "loss": 0.0857, + "learning_rate": 2.1994133524721888e-05, + "loss": 0.14, "step": 86520 }, { "epoch": 4.04, - "learning_rate": 1.1981154188739394e-05, - "loss": 0.3631, + "learning_rate": 2.1993665454885868e-05, + "loss": 0.3222, "step": 86525 }, { "epoch": 4.04, - "learning_rate": 1.1980685387464255e-05, - "loss": 0.1152, + "learning_rate": 2.199319738504985e-05, + "loss": 0.0663, "step": 86530 }, { "epoch": 4.04, - "learning_rate": 1.1980216586189115e-05, - "loss": 0.0289, + "learning_rate": 2.199272931521383e-05, + "loss": 0.0479, "step": 86535 }, { "epoch": 4.04, - "learning_rate": 1.1979747784913975e-05, - "loss": 0.0444, + "learning_rate": 2.199226124537781e-05, + "loss": 0.0064, "step": 86540 }, { "epoch": 4.04, - "learning_rate": 1.1979278983638835e-05, - "loss": 0.0932, + "learning_rate": 2.199179317554179e-05, + "loss": 0.0248, "step": 86545 }, { "epoch": 4.04, - "learning_rate": 1.1978810182363697e-05, - "loss": 0.0505, + "learning_rate": 2.1991325105705774e-05, + "loss": 0.0631, "step": 86550 }, { "epoch": 4.04, - "learning_rate": 1.1978341381088558e-05, - "loss": 0.0459, + "learning_rate": 2.1990857035869754e-05, + "loss": 0.1721, "step": 86555 }, { "epoch": 4.04, - "learning_rate": 1.1977872579813418e-05, - "loss": 0.174, + "learning_rate": 2.1990388966033733e-05, + "loss": 0.0625, "step": 86560 }, { "epoch": 4.04, - "learning_rate": 1.1977403778538278e-05, - "loss": 0.1497, + "learning_rate": 2.1989920896197713e-05, + "loss": 0.1172, "step": 86565 }, { "epoch": 4.04, - "learning_rate": 1.197693497726314e-05, - "loss": 0.1939, + "learning_rate": 2.1989452826361693e-05, + "loss": 0.181, "step": 86570 }, { "epoch": 4.04, - "learning_rate": 1.1976466175988e-05, - "loss": 0.2241, + "learning_rate": 2.1988984756525673e-05, + "loss": 0.3617, "step": 86575 }, { "epoch": 4.04, - "learning_rate": 1.197599737471286e-05, - "loss": 0.0899, + "learning_rate": 2.1988516686689653e-05, + "loss": 0.0803, "step": 86580 }, { "epoch": 4.04, - "learning_rate": 1.197552857343772e-05, - "loss": 0.0443, + "learning_rate": 2.1988048616853636e-05, + "loss": 0.0138, "step": 86585 }, { "epoch": 4.04, - "learning_rate": 1.1975059772162581e-05, - "loss": 0.0571, + "learning_rate": 2.1987580547017616e-05, + "loss": 0.0587, "step": 86590 }, { "epoch": 4.04, - "learning_rate": 1.1974590970887441e-05, - "loss": 0.0522, + "learning_rate": 2.1987112477181596e-05, + "loss": 0.0336, "step": 86595 }, { "epoch": 4.04, - "learning_rate": 1.1974122169612303e-05, - "loss": 0.0935, + "learning_rate": 2.1986644407345575e-05, + "loss": 0.0853, "step": 86600 }, { "epoch": 4.04, - "learning_rate": 1.1973653368337163e-05, - "loss": 0.0466, + "learning_rate": 2.198617633750956e-05, + "loss": 0.0329, "step": 86605 }, { "epoch": 4.04, - "learning_rate": 1.1973184567062024e-05, - "loss": 0.117, + "learning_rate": 2.198570826767354e-05, + "loss": 0.0856, "step": 86610 }, { "epoch": 4.04, - "learning_rate": 1.1972715765786884e-05, - "loss": 0.1251, + "learning_rate": 2.1985240197837518e-05, + "loss": 0.1107, "step": 86615 }, { "epoch": 4.04, - "learning_rate": 1.1972246964511744e-05, - "loss": 0.1509, + "learning_rate": 2.1984772128001498e-05, + "loss": 0.1964, "step": 86620 }, { "epoch": 4.04, - "learning_rate": 1.1971778163236604e-05, - "loss": 0.1627, + "learning_rate": 2.198430405816548e-05, + "loss": 0.2396, "step": 86625 }, { "epoch": 4.04, - "learning_rate": 1.1971309361961466e-05, - "loss": 0.0744, + "learning_rate": 2.1983835988329458e-05, + "loss": 0.0961, "step": 86630 }, { "epoch": 4.04, - "learning_rate": 1.1970840560686326e-05, - "loss": 0.043, + "learning_rate": 2.1983367918493438e-05, + "loss": 0.0129, "step": 86635 }, { "epoch": 4.04, - "learning_rate": 1.1970371759411186e-05, - "loss": 0.0451, + "learning_rate": 2.198289984865742e-05, + "loss": 0.0392, "step": 86640 }, { "epoch": 4.04, - "learning_rate": 1.1969902958136047e-05, - "loss": 0.0292, + "learning_rate": 2.19824317788214e-05, + "loss": 0.0301, "step": 86645 }, { "epoch": 4.04, - "learning_rate": 1.1969434156860909e-05, - "loss": 0.0933, + "learning_rate": 2.198196370898538e-05, + "loss": 0.0436, "step": 86650 }, { "epoch": 4.04, - "learning_rate": 1.1968965355585769e-05, - "loss": 0.0681, + "learning_rate": 2.198149563914936e-05, + "loss": 0.066, "step": 86655 }, { "epoch": 4.04, - "learning_rate": 1.1968496554310629e-05, - "loss": 0.0521, + "learning_rate": 2.1981027569313343e-05, + "loss": 0.0572, "step": 86660 }, { "epoch": 4.04, - "learning_rate": 1.1968027753035489e-05, - "loss": 0.0842, + "learning_rate": 2.1980559499477323e-05, + "loss": 0.1206, "step": 86665 }, { "epoch": 4.04, - "learning_rate": 1.196755895176035e-05, - "loss": 0.1892, + "learning_rate": 2.1980091429641303e-05, + "loss": 0.0729, "step": 86670 }, { "epoch": 4.04, - "learning_rate": 1.196709015048521e-05, - "loss": 0.3711, + "learning_rate": 2.1979623359805283e-05, + "loss": 0.32, "step": 86675 }, { "epoch": 4.04, - "learning_rate": 1.196662134921007e-05, - "loss": 0.0664, + "learning_rate": 2.1979155289969266e-05, + "loss": 0.0912, "step": 86680 }, { "epoch": 4.04, - "learning_rate": 1.196615254793493e-05, - "loss": 0.0181, + "learning_rate": 2.1978687220133246e-05, + "loss": 0.0107, "step": 86685 }, { "epoch": 4.05, - "learning_rate": 1.196568374665979e-05, - "loss": 0.042, + "learning_rate": 2.1978219150297226e-05, + "loss": 0.0748, "step": 86690 }, { "epoch": 4.05, - "learning_rate": 1.1965214945384653e-05, - "loss": 0.0395, + "learning_rate": 2.1977751080461206e-05, + "loss": 0.0546, "step": 86695 }, { "epoch": 4.05, - "learning_rate": 1.1964746144109513e-05, - "loss": 0.0535, + "learning_rate": 2.1977283010625185e-05, + "loss": 0.0559, "step": 86700 }, { "epoch": 4.05, - "learning_rate": 1.1964277342834373e-05, - "loss": 0.1296, + "learning_rate": 2.1976814940789165e-05, + "loss": 0.0553, "step": 86705 }, { "epoch": 4.05, - "learning_rate": 1.1963808541559235e-05, - "loss": 0.0728, + "learning_rate": 2.1976346870953145e-05, + "loss": 0.0383, "step": 86710 }, { "epoch": 4.05, - "learning_rate": 1.1963339740284095e-05, - "loss": 0.1151, + "learning_rate": 2.1975878801117128e-05, + "loss": 0.2138, "step": 86715 }, { "epoch": 4.05, - "learning_rate": 1.1962870939008955e-05, - "loss": 0.1472, + "learning_rate": 2.1975410731281108e-05, + "loss": 0.1925, "step": 86720 }, { "epoch": 4.05, - "learning_rate": 1.1962402137733815e-05, - "loss": 0.3731, + "learning_rate": 2.1974942661445088e-05, + "loss": 0.2555, "step": 86725 }, { "epoch": 4.05, - "learning_rate": 1.1961933336458675e-05, - "loss": 0.0713, + "learning_rate": 2.1974474591609068e-05, + "loss": 0.0554, "step": 86730 }, { "epoch": 4.05, - "learning_rate": 1.1961464535183536e-05, - "loss": 0.054, + "learning_rate": 2.197400652177305e-05, + "loss": 0.0222, "step": 86735 }, { "epoch": 4.05, - "learning_rate": 1.1960995733908398e-05, - "loss": 0.0754, + "learning_rate": 2.197353845193703e-05, + "loss": 0.0331, "step": 86740 }, { "epoch": 4.05, - "learning_rate": 1.1960526932633258e-05, - "loss": 0.0406, + "learning_rate": 2.197307038210101e-05, + "loss": 0.0637, "step": 86745 }, { "epoch": 4.05, - "learning_rate": 1.196005813135812e-05, - "loss": 0.0433, + "learning_rate": 2.197260231226499e-05, + "loss": 0.032, "step": 86750 }, { "epoch": 4.05, - "learning_rate": 1.195958933008298e-05, - "loss": 0.0618, + "learning_rate": 2.197213424242897e-05, + "loss": 0.0824, "step": 86755 }, { "epoch": 4.05, - "learning_rate": 1.195912052880784e-05, - "loss": 0.1214, + "learning_rate": 2.197166617259295e-05, + "loss": 0.1305, "step": 86760 }, { "epoch": 4.05, - "learning_rate": 1.19586517275327e-05, - "loss": 0.0823, + "learning_rate": 2.197119810275693e-05, + "loss": 0.1622, "step": 86765 }, { "epoch": 4.05, - "learning_rate": 1.195818292625756e-05, - "loss": 0.2509, + "learning_rate": 2.1970730032920913e-05, + "loss": 0.2243, "step": 86770 }, { "epoch": 4.05, - "learning_rate": 1.1957714124982421e-05, - "loss": 0.281, + "learning_rate": 2.1970261963084893e-05, + "loss": 0.2745, "step": 86775 }, { "epoch": 4.05, - "learning_rate": 1.195724532370728e-05, - "loss": 0.0714, + "learning_rate": 2.1969793893248873e-05, + "loss": 0.0607, "step": 86780 }, { "epoch": 4.05, - "learning_rate": 1.1956776522432142e-05, - "loss": 0.0125, + "learning_rate": 2.1969325823412852e-05, + "loss": 0.0255, "step": 86785 }, { "epoch": 4.05, - "learning_rate": 1.1956307721157004e-05, - "loss": 0.0437, + "learning_rate": 2.1968857753576836e-05, + "loss": 0.0681, "step": 86790 }, { "epoch": 4.05, - "learning_rate": 1.1955838919881864e-05, - "loss": 0.02, + "learning_rate": 2.1968389683740815e-05, + "loss": 0.0147, "step": 86795 }, { "epoch": 4.05, - "learning_rate": 1.1955370118606724e-05, - "loss": 0.0802, + "learning_rate": 2.1967921613904795e-05, + "loss": 0.0842, "step": 86800 }, { "epoch": 4.05, - "learning_rate": 1.1954901317331584e-05, - "loss": 0.0699, + "learning_rate": 2.1967453544068775e-05, + "loss": 0.088, "step": 86805 }, { "epoch": 4.05, - "learning_rate": 1.1954432516056444e-05, - "loss": 0.0943, + "learning_rate": 2.1966985474232758e-05, + "loss": 0.0626, "step": 86810 }, { "epoch": 4.05, - "learning_rate": 1.1953963714781305e-05, - "loss": 0.0623, + "learning_rate": 2.1966517404396738e-05, + "loss": 0.1063, "step": 86815 }, { "epoch": 4.05, - "learning_rate": 1.1953494913506165e-05, - "loss": 0.176, + "learning_rate": 2.1966049334560714e-05, + "loss": 0.2239, "step": 86820 }, { "epoch": 4.05, - "learning_rate": 1.1953026112231025e-05, - "loss": 0.2118, + "learning_rate": 2.1965581264724698e-05, + "loss": 0.1932, "step": 86825 }, { "epoch": 4.05, - "learning_rate": 1.1952557310955885e-05, - "loss": 0.0911, + "learning_rate": 2.1965113194888678e-05, + "loss": 0.0627, "step": 86830 }, { "epoch": 4.05, - "learning_rate": 1.1952088509680749e-05, - "loss": 0.0413, + "learning_rate": 2.1964645125052657e-05, + "loss": 0.0116, "step": 86835 }, { "epoch": 4.05, - "learning_rate": 1.1951619708405608e-05, - "loss": 0.0189, + "learning_rate": 2.1964177055216637e-05, + "loss": 0.0608, "step": 86840 }, { "epoch": 4.05, - "learning_rate": 1.1951150907130468e-05, - "loss": 0.0124, + "learning_rate": 2.196370898538062e-05, + "loss": 0.0391, "step": 86845 }, { "epoch": 4.05, - "learning_rate": 1.1950682105855328e-05, - "loss": 0.0306, + "learning_rate": 2.19632409155446e-05, + "loss": 0.0543, "step": 86850 }, { "epoch": 4.05, - "learning_rate": 1.195021330458019e-05, - "loss": 0.0734, + "learning_rate": 2.196277284570858e-05, + "loss": 0.0533, "step": 86855 }, { "epoch": 4.05, - "learning_rate": 1.194974450330505e-05, - "loss": 0.0863, + "learning_rate": 2.196230477587256e-05, + "loss": 0.1216, "step": 86860 }, { "epoch": 4.05, - "learning_rate": 1.194927570202991e-05, - "loss": 0.191, + "learning_rate": 2.1961836706036543e-05, + "loss": 0.0923, "step": 86865 }, { "epoch": 4.05, - "learning_rate": 1.194880690075477e-05, - "loss": 0.1266, + "learning_rate": 2.1961368636200523e-05, + "loss": 0.2205, "step": 86870 }, { "epoch": 4.05, - "learning_rate": 1.194833809947963e-05, - "loss": 0.2328, + "learning_rate": 2.1960900566364503e-05, + "loss": 0.3452, "step": 86875 }, { "epoch": 4.05, - "learning_rate": 1.1947869298204493e-05, - "loss": 0.09, + "learning_rate": 2.1960432496528486e-05, + "loss": 0.0783, "step": 86880 }, { "epoch": 4.05, - "learning_rate": 1.1947400496929353e-05, - "loss": 0.0347, + "learning_rate": 2.1959964426692462e-05, + "loss": 0.0415, "step": 86885 }, { "epoch": 4.05, - "learning_rate": 1.1946931695654213e-05, - "loss": 0.0266, + "learning_rate": 2.1959496356856442e-05, + "loss": 0.017, "step": 86890 }, { "epoch": 4.05, - "learning_rate": 1.1946462894379075e-05, - "loss": 0.053, + "learning_rate": 2.1959028287020422e-05, + "loss": 0.0489, "step": 86895 }, { "epoch": 4.05, - "learning_rate": 1.1945994093103934e-05, - "loss": 0.0394, + "learning_rate": 2.1958560217184405e-05, + "loss": 0.0452, "step": 86900 }, { "epoch": 4.06, - "learning_rate": 1.1945525291828794e-05, - "loss": 0.0295, + "learning_rate": 2.1958092147348385e-05, + "loss": 0.0558, "step": 86905 }, { "epoch": 4.06, - "learning_rate": 1.1945056490553654e-05, - "loss": 0.0788, + "learning_rate": 2.1957624077512365e-05, + "loss": 0.0609, "step": 86910 }, { "epoch": 4.06, - "learning_rate": 1.1944587689278514e-05, - "loss": 0.1111, + "learning_rate": 2.1957156007676345e-05, + "loss": 0.0916, "step": 86915 }, { "epoch": 4.06, - "learning_rate": 1.1944118888003376e-05, - "loss": 0.1836, + "learning_rate": 2.1956687937840328e-05, + "loss": 0.288, "step": 86920 }, { "epoch": 4.06, - "learning_rate": 1.1943650086728238e-05, - "loss": 0.2362, + "learning_rate": 2.1956219868004308e-05, + "loss": 0.3839, "step": 86925 }, { "epoch": 4.06, - "learning_rate": 1.1943181285453097e-05, - "loss": 0.1221, + "learning_rate": 2.1955751798168287e-05, + "loss": 0.094, "step": 86930 }, { "epoch": 4.06, - "learning_rate": 1.1942712484177959e-05, - "loss": 0.004, + "learning_rate": 2.195528372833227e-05, + "loss": 0.0594, "step": 86935 }, { "epoch": 4.06, - "learning_rate": 1.1942243682902819e-05, - "loss": 0.0897, + "learning_rate": 2.195481565849625e-05, + "loss": 0.0212, "step": 86940 }, { "epoch": 4.06, - "learning_rate": 1.1941774881627679e-05, - "loss": 0.0156, + "learning_rate": 2.1954347588660227e-05, + "loss": 0.0379, "step": 86945 }, { "epoch": 4.06, - "learning_rate": 1.1941306080352539e-05, - "loss": 0.0265, + "learning_rate": 2.1953879518824207e-05, + "loss": 0.029, "step": 86950 }, { "epoch": 4.06, - "learning_rate": 1.1940837279077399e-05, - "loss": 0.125, + "learning_rate": 2.195341144898819e-05, + "loss": 0.1147, "step": 86955 }, { "epoch": 4.06, - "learning_rate": 1.194036847780226e-05, - "loss": 0.1269, + "learning_rate": 2.195294337915217e-05, + "loss": 0.0553, "step": 86960 }, { "epoch": 4.06, - "learning_rate": 1.193989967652712e-05, - "loss": 0.1611, + "learning_rate": 2.195247530931615e-05, + "loss": 0.1225, "step": 86965 }, { "epoch": 4.06, - "learning_rate": 1.1939430875251982e-05, - "loss": 0.2753, + "learning_rate": 2.195200723948013e-05, + "loss": 0.0849, "step": 86970 }, { "epoch": 4.06, - "learning_rate": 1.1938962073976844e-05, - "loss": 0.1691, + "learning_rate": 2.1951539169644113e-05, + "loss": 0.2831, "step": 86975 }, { "epoch": 4.06, - "learning_rate": 1.1938493272701704e-05, - "loss": 0.0731, + "learning_rate": 2.1951071099808092e-05, + "loss": 0.1125, "step": 86980 }, { "epoch": 4.06, - "learning_rate": 1.1938024471426563e-05, - "loss": 0.0174, + "learning_rate": 2.1950603029972072e-05, + "loss": 0.0421, "step": 86985 }, { "epoch": 4.06, - "learning_rate": 1.1937555670151423e-05, - "loss": 0.0531, + "learning_rate": 2.1950134960136052e-05, + "loss": 0.0399, "step": 86990 }, { "epoch": 4.06, - "learning_rate": 1.1937086868876285e-05, - "loss": 0.1098, + "learning_rate": 2.1949666890300035e-05, + "loss": 0.0132, "step": 86995 }, { "epoch": 4.06, - "learning_rate": 1.1936618067601145e-05, - "loss": 0.0883, + "learning_rate": 2.1949198820464015e-05, + "loss": 0.0767, "step": 87000 }, { "epoch": 4.06, - "learning_rate": 1.1936149266326005e-05, - "loss": 0.1077, + "learning_rate": 2.1948730750627995e-05, + "loss": 0.0926, "step": 87005 }, { "epoch": 4.06, - "learning_rate": 1.1935680465050865e-05, - "loss": 0.0682, + "learning_rate": 2.1948262680791975e-05, + "loss": 0.1099, "step": 87010 }, { "epoch": 4.06, - "learning_rate": 1.1935211663775725e-05, - "loss": 0.0773, + "learning_rate": 2.1947794610955954e-05, + "loss": 0.066, "step": 87015 }, { "epoch": 4.06, - "learning_rate": 1.1934742862500588e-05, - "loss": 0.2073, + "learning_rate": 2.1947326541119934e-05, + "loss": 0.1541, "step": 87020 }, { "epoch": 4.06, - "learning_rate": 1.1934274061225448e-05, - "loss": 0.3685, + "learning_rate": 2.1946858471283914e-05, + "loss": 0.1978, "step": 87025 }, { "epoch": 4.06, - "learning_rate": 1.1933805259950308e-05, - "loss": 0.0654, + "learning_rate": 2.1946390401447897e-05, + "loss": 0.0652, "step": 87030 }, { "epoch": 4.06, - "learning_rate": 1.193333645867517e-05, - "loss": 0.0332, + "learning_rate": 2.1945922331611877e-05, + "loss": 0.0341, "step": 87035 }, { "epoch": 4.06, - "learning_rate": 1.193286765740003e-05, - "loss": 0.0198, + "learning_rate": 2.1945454261775857e-05, + "loss": 0.013, "step": 87040 }, { "epoch": 4.06, - "learning_rate": 1.193239885612489e-05, - "loss": 0.0684, + "learning_rate": 2.1944986191939837e-05, + "loss": 0.0885, "step": 87045 }, { "epoch": 4.06, - "learning_rate": 1.193193005484975e-05, - "loss": 0.0284, + "learning_rate": 2.194451812210382e-05, + "loss": 0.046, "step": 87050 }, { "epoch": 4.06, - "learning_rate": 1.193146125357461e-05, - "loss": 0.0423, + "learning_rate": 2.19440500522678e-05, + "loss": 0.1074, "step": 87055 }, { "epoch": 4.06, - "learning_rate": 1.1930992452299471e-05, - "loss": 0.0619, + "learning_rate": 2.194358198243178e-05, + "loss": 0.1178, "step": 87060 }, { "epoch": 4.06, - "learning_rate": 1.1930523651024333e-05, - "loss": 0.157, + "learning_rate": 2.1943113912595763e-05, + "loss": 0.0808, "step": 87065 }, { "epoch": 4.06, - "learning_rate": 1.1930054849749193e-05, - "loss": 0.119, + "learning_rate": 2.194264584275974e-05, + "loss": 0.6305, "step": 87070 }, { "epoch": 4.06, - "learning_rate": 1.1929586048474054e-05, - "loss": 0.2258, + "learning_rate": 2.194217777292372e-05, + "loss": 0.2743, "step": 87075 }, { "epoch": 4.06, - "learning_rate": 1.1929117247198914e-05, - "loss": 0.0805, + "learning_rate": 2.19417097030877e-05, + "loss": 0.042, "step": 87080 }, { "epoch": 4.06, - "learning_rate": 1.1928648445923774e-05, - "loss": 0.0118, + "learning_rate": 2.1941241633251682e-05, + "loss": 0.0106, "step": 87085 }, { "epoch": 4.06, - "learning_rate": 1.1928179644648634e-05, - "loss": 0.0255, + "learning_rate": 2.1940773563415662e-05, + "loss": 0.045, "step": 87090 }, { "epoch": 4.06, - "learning_rate": 1.1927710843373494e-05, - "loss": 0.0596, + "learning_rate": 2.1940305493579642e-05, + "loss": 0.0182, "step": 87095 }, { "epoch": 4.06, - "learning_rate": 1.1927242042098356e-05, - "loss": 0.1062, + "learning_rate": 2.193983742374362e-05, + "loss": 0.0446, "step": 87100 }, { "epoch": 4.06, - "learning_rate": 1.1926773240823215e-05, - "loss": 0.0723, + "learning_rate": 2.1939369353907605e-05, + "loss": 0.1292, "step": 87105 }, { "epoch": 4.06, - "learning_rate": 1.1926304439548077e-05, - "loss": 0.1052, + "learning_rate": 2.1938901284071585e-05, + "loss": 0.1061, "step": 87110 }, { "epoch": 4.06, - "learning_rate": 1.1925835638272939e-05, - "loss": 0.1319, + "learning_rate": 2.1938433214235564e-05, + "loss": 0.0849, "step": 87115 }, { "epoch": 4.07, - "learning_rate": 1.1925366836997799e-05, - "loss": 0.2386, + "learning_rate": 2.1937965144399548e-05, + "loss": 0.1324, "step": 87120 }, { "epoch": 4.07, - "learning_rate": 1.1924898035722659e-05, - "loss": 0.4077, + "learning_rate": 2.1937497074563527e-05, + "loss": 0.3246, "step": 87125 }, { "epoch": 4.07, - "learning_rate": 1.1924429234447519e-05, - "loss": 0.0996, + "learning_rate": 2.1937029004727507e-05, + "loss": 0.0622, "step": 87130 }, { "epoch": 4.07, - "learning_rate": 1.1923960433172378e-05, - "loss": 0.0111, + "learning_rate": 2.1936560934891484e-05, + "loss": 0.0168, "step": 87135 }, { "epoch": 4.07, - "learning_rate": 1.192349163189724e-05, - "loss": 0.0213, + "learning_rate": 2.1936092865055467e-05, + "loss": 0.0214, "step": 87140 }, { "epoch": 4.07, - "learning_rate": 1.19230228306221e-05, - "loss": 0.0942, + "learning_rate": 2.1935624795219447e-05, + "loss": 0.0228, "step": 87145 }, { "epoch": 4.07, - "learning_rate": 1.192255402934696e-05, - "loss": 0.0585, + "learning_rate": 2.1935156725383427e-05, + "loss": 0.0372, "step": 87150 }, { "epoch": 4.07, - "learning_rate": 1.192208522807182e-05, - "loss": 0.09, + "learning_rate": 2.1934688655547406e-05, + "loss": 0.061, "step": 87155 }, { "epoch": 4.07, - "learning_rate": 1.1921616426796683e-05, - "loss": 0.1602, + "learning_rate": 2.193422058571139e-05, + "loss": 0.0847, "step": 87160 }, { "epoch": 4.07, - "learning_rate": 1.1921147625521543e-05, - "loss": 0.0582, + "learning_rate": 2.193375251587537e-05, + "loss": 0.1317, "step": 87165 }, { "epoch": 4.07, - "learning_rate": 1.1920678824246403e-05, - "loss": 0.1511, + "learning_rate": 2.193328444603935e-05, + "loss": 0.0881, "step": 87170 }, { "epoch": 4.07, - "learning_rate": 1.1920210022971263e-05, - "loss": 0.1997, + "learning_rate": 2.193281637620333e-05, + "loss": 0.261, "step": 87175 }, { "epoch": 4.07, - "learning_rate": 1.1919741221696125e-05, - "loss": 0.1347, + "learning_rate": 2.1932348306367312e-05, + "loss": 0.0653, "step": 87180 }, { "epoch": 4.07, - "learning_rate": 1.1919272420420985e-05, - "loss": 0.0469, + "learning_rate": 2.1931880236531292e-05, + "loss": 0.0742, "step": 87185 }, { "epoch": 4.07, - "learning_rate": 1.1918803619145844e-05, - "loss": 0.0572, + "learning_rate": 2.1931412166695272e-05, + "loss": 0.0278, "step": 87190 }, { "epoch": 4.07, - "learning_rate": 1.1918334817870704e-05, - "loss": 0.0742, + "learning_rate": 2.1930944096859255e-05, + "loss": 0.075, "step": 87195 }, { "epoch": 4.07, - "learning_rate": 1.1917866016595564e-05, - "loss": 0.0608, + "learning_rate": 2.193047602702323e-05, + "loss": 0.0737, "step": 87200 }, { "epoch": 4.07, - "learning_rate": 1.1917397215320428e-05, - "loss": 0.08, + "learning_rate": 2.193000795718721e-05, + "loss": 0.0793, "step": 87205 }, { "epoch": 4.07, - "learning_rate": 1.1916928414045288e-05, - "loss": 0.0855, + "learning_rate": 2.192953988735119e-05, + "loss": 0.0816, "step": 87210 }, { "epoch": 4.07, - "learning_rate": 1.1916459612770148e-05, - "loss": 0.0958, + "learning_rate": 2.1929071817515174e-05, + "loss": 0.1442, "step": 87215 }, { "epoch": 4.07, - "learning_rate": 1.191599081149501e-05, - "loss": 0.2308, + "learning_rate": 2.1928603747679154e-05, + "loss": 0.2249, "step": 87220 }, { "epoch": 4.07, - "learning_rate": 1.1915522010219869e-05, - "loss": 0.2714, + "learning_rate": 2.1928135677843134e-05, + "loss": 0.2571, "step": 87225 }, { "epoch": 4.07, - "learning_rate": 1.1915053208944729e-05, - "loss": 0.0695, + "learning_rate": 2.1927667608007114e-05, + "loss": 0.0943, "step": 87230 }, { "epoch": 4.07, - "learning_rate": 1.1914584407669589e-05, - "loss": 0.009, + "learning_rate": 2.1927199538171097e-05, + "loss": 0.0076, "step": 87235 }, { "epoch": 4.07, - "learning_rate": 1.1914115606394449e-05, - "loss": 0.0141, + "learning_rate": 2.1926731468335077e-05, + "loss": 0.0705, "step": 87240 }, { "epoch": 4.07, - "learning_rate": 1.191364680511931e-05, - "loss": 0.0225, + "learning_rate": 2.1926263398499057e-05, + "loss": 0.0141, "step": 87245 }, { "epoch": 4.07, - "learning_rate": 1.1913178003844172e-05, - "loss": 0.0423, + "learning_rate": 2.192579532866304e-05, + "loss": 0.0194, "step": 87250 }, { "epoch": 4.07, - "learning_rate": 1.1912709202569032e-05, - "loss": 0.0474, + "learning_rate": 2.192532725882702e-05, + "loss": 0.0351, "step": 87255 }, { "epoch": 4.07, - "learning_rate": 1.1912240401293894e-05, - "loss": 0.0693, + "learning_rate": 2.1924859188990996e-05, + "loss": 0.0547, "step": 87260 }, { "epoch": 4.07, - "learning_rate": 1.1911771600018754e-05, - "loss": 0.0688, + "learning_rate": 2.1924391119154976e-05, + "loss": 0.1083, "step": 87265 }, { "epoch": 4.07, - "learning_rate": 1.1911302798743614e-05, - "loss": 0.1978, + "learning_rate": 2.192392304931896e-05, + "loss": 0.1001, "step": 87270 }, { "epoch": 4.07, - "learning_rate": 1.1910833997468474e-05, - "loss": 0.3268, + "learning_rate": 2.192345497948294e-05, + "loss": 0.3542, "step": 87275 }, { "epoch": 4.07, - "learning_rate": 1.1910365196193333e-05, - "loss": 0.1005, + "learning_rate": 2.192298690964692e-05, + "loss": 0.0621, "step": 87280 }, { "epoch": 4.07, - "learning_rate": 1.1909896394918195e-05, - "loss": 0.024, + "learning_rate": 2.19225188398109e-05, + "loss": 0.0152, "step": 87285 }, { "epoch": 4.07, - "learning_rate": 1.1909427593643055e-05, - "loss": 0.0411, + "learning_rate": 2.1922050769974882e-05, + "loss": 0.0173, "step": 87290 }, { "epoch": 4.07, - "learning_rate": 1.1908958792367917e-05, - "loss": 0.0444, + "learning_rate": 2.192158270013886e-05, + "loss": 0.0596, "step": 87295 }, { "epoch": 4.07, - "learning_rate": 1.1908489991092778e-05, - "loss": 0.0589, + "learning_rate": 2.192111463030284e-05, + "loss": 0.0848, "step": 87300 }, { "epoch": 4.07, - "learning_rate": 1.1908021189817638e-05, - "loss": 0.0539, + "learning_rate": 2.1920646560466825e-05, + "loss": 0.0542, "step": 87305 }, { "epoch": 4.07, - "learning_rate": 1.1907552388542498e-05, - "loss": 0.1432, + "learning_rate": 2.1920178490630804e-05, + "loss": 0.0972, "step": 87310 }, { "epoch": 4.07, - "learning_rate": 1.1907083587267358e-05, - "loss": 0.0775, + "learning_rate": 2.1919710420794784e-05, + "loss": 0.1462, "step": 87315 }, { "epoch": 4.07, - "learning_rate": 1.1906614785992218e-05, - "loss": 0.1898, + "learning_rate": 2.1919242350958764e-05, + "loss": 0.1395, "step": 87320 }, { "epoch": 4.07, - "learning_rate": 1.190614598471708e-05, - "loss": 0.3632, + "learning_rate": 2.1918774281122744e-05, + "loss": 0.1881, "step": 87325 }, { "epoch": 4.07, - "learning_rate": 1.190567718344194e-05, - "loss": 0.0627, + "learning_rate": 2.1918306211286724e-05, + "loss": 0.0871, "step": 87330 }, { "epoch": 4.08, - "learning_rate": 1.19052083821668e-05, - "loss": 0.0112, + "learning_rate": 2.1917838141450703e-05, + "loss": 0.0091, "step": 87335 }, { "epoch": 4.08, - "learning_rate": 1.190473958089166e-05, - "loss": 0.0243, + "learning_rate": 2.1917370071614683e-05, + "loss": 0.0282, "step": 87340 }, { "epoch": 4.08, - "learning_rate": 1.1904270779616523e-05, - "loss": 0.0383, + "learning_rate": 2.1916902001778667e-05, + "loss": 0.0229, "step": 87345 }, { "epoch": 4.08, - "learning_rate": 1.1903801978341383e-05, - "loss": 0.0393, + "learning_rate": 2.1916433931942646e-05, + "loss": 0.0495, "step": 87350 }, { "epoch": 4.08, - "learning_rate": 1.1903333177066243e-05, - "loss": 0.0794, + "learning_rate": 2.1915965862106626e-05, + "loss": 0.1327, "step": 87355 }, { "epoch": 4.08, - "learning_rate": 1.1902864375791103e-05, - "loss": 0.0875, + "learning_rate": 2.1915497792270606e-05, + "loss": 0.0523, "step": 87360 }, { "epoch": 4.08, - "learning_rate": 1.1902395574515964e-05, - "loss": 0.0768, + "learning_rate": 2.191502972243459e-05, + "loss": 0.1189, "step": 87365 }, { "epoch": 4.08, - "learning_rate": 1.1901926773240824e-05, - "loss": 0.137, + "learning_rate": 2.191456165259857e-05, + "loss": 0.1822, "step": 87370 }, { "epoch": 4.08, - "learning_rate": 1.1901457971965684e-05, - "loss": 0.2204, + "learning_rate": 2.191409358276255e-05, + "loss": 0.3843, "step": 87375 }, { "epoch": 4.08, - "learning_rate": 1.1900989170690544e-05, - "loss": 0.0299, + "learning_rate": 2.1913625512926532e-05, + "loss": 0.1062, "step": 87380 }, { "epoch": 4.08, - "learning_rate": 1.1900520369415406e-05, - "loss": 0.0461, + "learning_rate": 2.1913157443090512e-05, + "loss": 0.0128, "step": 87385 }, { "epoch": 4.08, - "learning_rate": 1.1900051568140267e-05, - "loss": 0.0408, + "learning_rate": 2.1912689373254488e-05, + "loss": 0.016, "step": 87390 }, { "epoch": 4.08, - "learning_rate": 1.1899582766865127e-05, - "loss": 0.0258, + "learning_rate": 2.1912221303418468e-05, + "loss": 0.0543, "step": 87395 }, { "epoch": 4.08, - "learning_rate": 1.1899113965589987e-05, - "loss": 0.0267, + "learning_rate": 2.191175323358245e-05, + "loss": 0.0188, "step": 87400 }, { "epoch": 4.08, - "learning_rate": 1.1898645164314849e-05, - "loss": 0.0925, + "learning_rate": 2.191128516374643e-05, + "loss": 0.0385, "step": 87405 }, { "epoch": 4.08, - "learning_rate": 1.1898176363039709e-05, - "loss": 0.0924, + "learning_rate": 2.191081709391041e-05, + "loss": 0.0836, "step": 87410 }, { "epoch": 4.08, - "learning_rate": 1.1897707561764569e-05, - "loss": 0.109, + "learning_rate": 2.191034902407439e-05, + "loss": 0.0773, "step": 87415 }, { "epoch": 4.08, - "learning_rate": 1.1897238760489429e-05, - "loss": 0.2402, + "learning_rate": 2.1909880954238374e-05, + "loss": 0.155, "step": 87420 }, { "epoch": 4.08, - "learning_rate": 1.189676995921429e-05, - "loss": 0.2597, + "learning_rate": 2.1909412884402354e-05, + "loss": 0.2735, "step": 87425 }, { "epoch": 4.08, - "learning_rate": 1.189630115793915e-05, - "loss": 0.0844, + "learning_rate": 2.1908944814566334e-05, + "loss": 0.0835, "step": 87430 }, { "epoch": 4.08, - "learning_rate": 1.1895832356664012e-05, - "loss": 0.0202, + "learning_rate": 2.1908476744730317e-05, + "loss": 0.013, "step": 87435 }, { "epoch": 4.08, - "learning_rate": 1.1895363555388872e-05, - "loss": 0.0664, + "learning_rate": 2.1908008674894297e-05, + "loss": 0.0348, "step": 87440 }, { "epoch": 4.08, - "learning_rate": 1.1894894754113733e-05, - "loss": 0.0505, + "learning_rate": 2.1907540605058276e-05, + "loss": 0.0422, "step": 87445 }, { "epoch": 4.08, - "learning_rate": 1.1894425952838593e-05, - "loss": 0.0556, + "learning_rate": 2.1907072535222253e-05, + "loss": 0.0548, "step": 87450 }, { "epoch": 4.08, - "learning_rate": 1.1893957151563453e-05, - "loss": 0.1165, + "learning_rate": 2.1906604465386236e-05, + "loss": 0.0552, "step": 87455 }, { "epoch": 4.08, - "learning_rate": 1.1893488350288313e-05, - "loss": 0.0669, + "learning_rate": 2.1906136395550216e-05, + "loss": 0.0534, "step": 87460 }, { "epoch": 4.08, - "learning_rate": 1.1893019549013175e-05, - "loss": 0.1502, + "learning_rate": 2.1905668325714196e-05, + "loss": 0.0805, "step": 87465 }, { "epoch": 4.08, - "learning_rate": 1.1892550747738035e-05, - "loss": 0.3517, + "learning_rate": 2.1905200255878175e-05, + "loss": 0.2372, "step": 87470 }, { "epoch": 4.08, - "learning_rate": 1.1892081946462895e-05, - "loss": 0.2206, + "learning_rate": 2.190473218604216e-05, + "loss": 0.3136, "step": 87475 }, { "epoch": 4.08, - "learning_rate": 1.1891613145187755e-05, - "loss": 0.0826, + "learning_rate": 2.190426411620614e-05, + "loss": 0.1033, "step": 87480 }, { "epoch": 4.08, - "learning_rate": 1.1891144343912618e-05, - "loss": 0.0292, + "learning_rate": 2.190379604637012e-05, + "loss": 0.0448, "step": 87485 }, { "epoch": 4.08, - "learning_rate": 1.1890675542637478e-05, - "loss": 0.0348, + "learning_rate": 2.19033279765341e-05, + "loss": 0.0539, "step": 87490 }, { "epoch": 4.08, - "learning_rate": 1.1890206741362338e-05, - "loss": 0.0419, + "learning_rate": 2.190285990669808e-05, + "loss": 0.0333, "step": 87495 }, { "epoch": 4.08, - "learning_rate": 1.1889737940087198e-05, - "loss": 0.0687, + "learning_rate": 2.190239183686206e-05, + "loss": 0.1014, "step": 87500 }, { "epoch": 4.08, - "learning_rate": 1.188926913881206e-05, - "loss": 0.0166, + "learning_rate": 2.190192376702604e-05, + "loss": 0.0772, "step": 87505 }, { "epoch": 4.08, - "learning_rate": 1.188880033753692e-05, - "loss": 0.0892, + "learning_rate": 2.1901455697190024e-05, + "loss": 0.0524, "step": 87510 }, { "epoch": 4.08, - "learning_rate": 1.1888331536261779e-05, - "loss": 0.1468, + "learning_rate": 2.1900987627354e-05, + "loss": 0.1289, "step": 87515 }, { "epoch": 4.08, - "learning_rate": 1.1887862734986639e-05, - "loss": 0.1371, + "learning_rate": 2.190051955751798e-05, + "loss": 0.0939, "step": 87520 }, { "epoch": 4.08, - "learning_rate": 1.1887393933711499e-05, - "loss": 0.229, + "learning_rate": 2.190005148768196e-05, + "loss": 0.2248, "step": 87525 }, { "epoch": 4.08, - "learning_rate": 1.1886925132436362e-05, - "loss": 0.1088, + "learning_rate": 2.1899583417845943e-05, + "loss": 0.0486, "step": 87530 }, { "epoch": 4.08, - "learning_rate": 1.1886456331161222e-05, - "loss": 0.0123, + "learning_rate": 2.1899115348009923e-05, + "loss": 0.0214, "step": 87535 }, { "epoch": 4.08, - "learning_rate": 1.1885987529886082e-05, - "loss": 0.0759, + "learning_rate": 2.1898647278173903e-05, + "loss": 0.0322, "step": 87540 }, { "epoch": 4.08, - "learning_rate": 1.1885518728610944e-05, - "loss": 0.03, + "learning_rate": 2.1898179208337886e-05, + "loss": 0.036, "step": 87545 }, { "epoch": 4.09, - "learning_rate": 1.1885049927335804e-05, - "loss": 0.0509, + "learning_rate": 2.1897711138501866e-05, + "loss": 0.0297, "step": 87550 }, { "epoch": 4.09, - "learning_rate": 1.1884581126060664e-05, - "loss": 0.073, + "learning_rate": 2.1897243068665846e-05, + "loss": 0.1082, "step": 87555 }, { "epoch": 4.09, - "learning_rate": 1.1884112324785524e-05, - "loss": 0.1191, + "learning_rate": 2.1896774998829826e-05, + "loss": 0.0814, "step": 87560 }, { "epoch": 4.09, - "learning_rate": 1.1883643523510384e-05, - "loss": 0.0545, + "learning_rate": 2.189630692899381e-05, + "loss": 0.1035, "step": 87565 }, { "epoch": 4.09, - "learning_rate": 1.1883174722235245e-05, - "loss": 0.0946, + "learning_rate": 2.189583885915779e-05, + "loss": 0.1442, "step": 87570 }, { "epoch": 4.09, - "learning_rate": 1.1882705920960107e-05, - "loss": 0.2369, + "learning_rate": 2.189537078932177e-05, + "loss": 0.3322, "step": 87575 }, { "epoch": 4.09, - "learning_rate": 1.1882237119684967e-05, - "loss": 0.052, + "learning_rate": 2.1894902719485745e-05, + "loss": 0.0531, "step": 87580 }, { "epoch": 4.09, - "learning_rate": 1.1881768318409828e-05, - "loss": 0.0259, + "learning_rate": 2.1894434649649728e-05, + "loss": 0.0064, "step": 87585 }, { "epoch": 4.09, - "learning_rate": 1.1881299517134688e-05, - "loss": 0.0106, + "learning_rate": 2.1893966579813708e-05, + "loss": 0.0426, "step": 87590 }, { "epoch": 4.09, - "learning_rate": 1.1880830715859548e-05, - "loss": 0.0452, + "learning_rate": 2.1893498509977688e-05, + "loss": 0.0442, "step": 87595 }, { "epoch": 4.09, - "learning_rate": 1.1880361914584408e-05, - "loss": 0.0208, + "learning_rate": 2.1893030440141668e-05, + "loss": 0.0742, "step": 87600 }, { "epoch": 4.09, - "learning_rate": 1.1879893113309268e-05, - "loss": 0.0497, + "learning_rate": 2.189256237030565e-05, + "loss": 0.0648, "step": 87605 }, { "epoch": 4.09, - "learning_rate": 1.187942431203413e-05, - "loss": 0.0882, + "learning_rate": 2.189209430046963e-05, + "loss": 0.0797, "step": 87610 }, { "epoch": 4.09, - "learning_rate": 1.187895551075899e-05, - "loss": 0.17, + "learning_rate": 2.189162623063361e-05, + "loss": 0.0806, "step": 87615 }, { "epoch": 4.09, - "learning_rate": 1.1878486709483851e-05, - "loss": 0.1611, + "learning_rate": 2.1891158160797594e-05, + "loss": 0.0947, "step": 87620 }, { "epoch": 4.09, - "learning_rate": 1.1878017908208713e-05, - "loss": 0.2782, + "learning_rate": 2.1890690090961574e-05, + "loss": 0.3165, "step": 87625 }, { "epoch": 4.09, - "learning_rate": 1.1877549106933573e-05, - "loss": 0.0584, + "learning_rate": 2.1890222021125553e-05, + "loss": 0.1064, "step": 87630 }, { "epoch": 4.09, - "learning_rate": 1.1877080305658433e-05, - "loss": 0.0585, + "learning_rate": 2.1889753951289533e-05, + "loss": 0.0323, "step": 87635 }, { "epoch": 4.09, - "learning_rate": 1.1876611504383293e-05, - "loss": 0.0534, + "learning_rate": 2.1889285881453513e-05, + "loss": 0.057, "step": 87640 }, { "epoch": 4.09, - "learning_rate": 1.1876142703108153e-05, - "loss": 0.0818, + "learning_rate": 2.1888817811617493e-05, + "loss": 0.0756, "step": 87645 }, { "epoch": 4.09, - "learning_rate": 1.1875673901833014e-05, - "loss": 0.0938, + "learning_rate": 2.1888349741781473e-05, + "loss": 0.0512, "step": 87650 }, { "epoch": 4.09, - "learning_rate": 1.1875205100557874e-05, - "loss": 0.0645, + "learning_rate": 2.1887881671945452e-05, + "loss": 0.0538, "step": 87655 }, { "epoch": 4.09, - "learning_rate": 1.1874736299282734e-05, - "loss": 0.058, + "learning_rate": 2.1887413602109436e-05, + "loss": 0.0438, "step": 87660 }, { "epoch": 4.09, - "learning_rate": 1.1874267498007594e-05, - "loss": 0.1157, + "learning_rate": 2.1886945532273415e-05, + "loss": 0.1384, "step": 87665 }, { "epoch": 4.09, - "learning_rate": 1.1873798696732457e-05, - "loss": 0.1999, + "learning_rate": 2.1886477462437395e-05, + "loss": 0.0906, "step": 87670 }, { "epoch": 4.09, - "learning_rate": 1.1873329895457317e-05, - "loss": 0.3274, + "learning_rate": 2.188600939260138e-05, + "loss": 0.2356, "step": 87675 }, { "epoch": 4.09, - "learning_rate": 1.1872861094182177e-05, - "loss": 0.121, + "learning_rate": 2.188554132276536e-05, + "loss": 0.1064, "step": 87680 }, { "epoch": 4.09, - "learning_rate": 1.1872392292907037e-05, - "loss": 0.0206, + "learning_rate": 2.1885073252929338e-05, + "loss": 0.0119, "step": 87685 }, { "epoch": 4.09, - "learning_rate": 1.1871923491631899e-05, - "loss": 0.0536, + "learning_rate": 2.1884605183093318e-05, + "loss": 0.0214, "step": 87690 }, { "epoch": 4.09, - "learning_rate": 1.1871454690356759e-05, - "loss": 0.054, + "learning_rate": 2.18841371132573e-05, + "loss": 0.0345, "step": 87695 }, { "epoch": 4.09, - "learning_rate": 1.1870985889081619e-05, - "loss": 0.0702, + "learning_rate": 2.188366904342128e-05, + "loss": 0.0691, "step": 87700 }, { "epoch": 4.09, - "learning_rate": 1.1870517087806479e-05, - "loss": 0.0894, + "learning_rate": 2.1883200973585257e-05, + "loss": 0.0322, "step": 87705 }, { "epoch": 4.09, - "learning_rate": 1.1870048286531339e-05, - "loss": 0.0561, + "learning_rate": 2.1882732903749237e-05, + "loss": 0.0243, "step": 87710 }, { "epoch": 4.09, - "learning_rate": 1.1869579485256202e-05, - "loss": 0.1382, + "learning_rate": 2.188226483391322e-05, + "loss": 0.091, "step": 87715 }, { "epoch": 4.09, - "learning_rate": 1.1869110683981062e-05, - "loss": 0.0828, + "learning_rate": 2.18817967640772e-05, + "loss": 0.2202, "step": 87720 }, { "epoch": 4.09, - "learning_rate": 1.1868641882705922e-05, - "loss": 0.2575, + "learning_rate": 2.188132869424118e-05, + "loss": 0.3173, "step": 87725 }, { "epoch": 4.09, - "learning_rate": 1.1868173081430783e-05, - "loss": 0.105, + "learning_rate": 2.1880860624405163e-05, + "loss": 0.0484, "step": 87730 }, { "epoch": 4.09, - "learning_rate": 1.1867704280155643e-05, - "loss": 0.0239, + "learning_rate": 2.1880392554569143e-05, + "loss": 0.026, "step": 87735 }, { "epoch": 4.09, - "learning_rate": 1.1867235478880503e-05, - "loss": 0.031, + "learning_rate": 2.1879924484733123e-05, + "loss": 0.0215, "step": 87740 }, { "epoch": 4.09, - "learning_rate": 1.1866766677605363e-05, - "loss": 0.0219, + "learning_rate": 2.1879456414897103e-05, + "loss": 0.0208, "step": 87745 }, { "epoch": 4.09, - "learning_rate": 1.1866297876330223e-05, - "loss": 0.0578, + "learning_rate": 2.1878988345061086e-05, + "loss": 0.0373, "step": 87750 }, { "epoch": 4.09, - "learning_rate": 1.1865829075055085e-05, - "loss": 0.0634, + "learning_rate": 2.1878520275225066e-05, + "loss": 0.0263, "step": 87755 }, { "epoch": 4.1, - "learning_rate": 1.1865360273779946e-05, - "loss": 0.0727, + "learning_rate": 2.1878052205389046e-05, + "loss": 0.146, "step": 87760 }, { "epoch": 4.1, - "learning_rate": 1.1864891472504806e-05, - "loss": 0.1068, + "learning_rate": 2.1877584135553025e-05, + "loss": 0.0693, "step": 87765 }, { "epoch": 4.1, - "learning_rate": 1.1864422671229668e-05, - "loss": 0.2169, + "learning_rate": 2.1877116065717005e-05, + "loss": 0.1801, "step": 87770 }, { "epoch": 4.1, - "learning_rate": 1.1863953869954528e-05, - "loss": 0.3113, + "learning_rate": 2.1876647995880985e-05, + "loss": 0.2143, "step": 87775 }, { "epoch": 4.1, - "learning_rate": 1.1863485068679388e-05, - "loss": 0.0885, + "learning_rate": 2.1876179926044965e-05, + "loss": 0.1182, "step": 87780 }, { "epoch": 4.1, - "learning_rate": 1.1863016267404248e-05, - "loss": 0.0041, + "learning_rate": 2.1875711856208945e-05, + "loss": 0.0247, "step": 87785 }, { "epoch": 4.1, - "learning_rate": 1.1862547466129108e-05, - "loss": 0.0168, + "learning_rate": 2.1875243786372928e-05, + "loss": 0.0559, "step": 87790 }, { "epoch": 4.1, - "learning_rate": 1.186207866485397e-05, - "loss": 0.0328, + "learning_rate": 2.1874775716536908e-05, + "loss": 0.0396, "step": 87795 }, { "epoch": 4.1, - "learning_rate": 1.186160986357883e-05, - "loss": 0.0349, + "learning_rate": 2.1874307646700888e-05, + "loss": 0.0341, "step": 87800 }, { "epoch": 4.1, - "learning_rate": 1.186114106230369e-05, - "loss": 0.0933, + "learning_rate": 2.187383957686487e-05, + "loss": 0.1028, "step": 87805 }, { "epoch": 4.1, - "learning_rate": 1.1860672261028552e-05, - "loss": 0.0872, + "learning_rate": 2.187337150702885e-05, + "loss": 0.0494, "step": 87810 }, { "epoch": 4.1, - "learning_rate": 1.1860203459753412e-05, - "loss": 0.0645, + "learning_rate": 2.187290343719283e-05, + "loss": 0.1219, "step": 87815 }, { "epoch": 4.1, - "learning_rate": 1.1859734658478272e-05, - "loss": 0.0875, + "learning_rate": 2.187243536735681e-05, + "loss": 0.1796, "step": 87820 }, { "epoch": 4.1, - "learning_rate": 1.1859265857203132e-05, - "loss": 0.3078, + "learning_rate": 2.1871967297520793e-05, + "loss": 0.3003, "step": 87825 }, { "epoch": 4.1, - "learning_rate": 1.1858797055927992e-05, - "loss": 0.0885, + "learning_rate": 2.187149922768477e-05, + "loss": 0.0731, "step": 87830 }, { "epoch": 4.1, - "learning_rate": 1.1858328254652854e-05, - "loss": 0.0082, + "learning_rate": 2.187103115784875e-05, + "loss": 0.0606, "step": 87835 }, { "epoch": 4.1, - "learning_rate": 1.1857859453377714e-05, - "loss": 0.0125, + "learning_rate": 2.187056308801273e-05, + "loss": 0.0478, "step": 87840 }, { "epoch": 4.1, - "learning_rate": 1.1857390652102574e-05, - "loss": 0.0745, + "learning_rate": 2.1870095018176713e-05, + "loss": 0.019, "step": 87845 }, { "epoch": 4.1, - "learning_rate": 1.1856921850827434e-05, - "loss": 0.0627, + "learning_rate": 2.1869626948340692e-05, + "loss": 0.0303, "step": 87850 }, { "epoch": 4.1, - "learning_rate": 1.1856453049552297e-05, - "loss": 0.1043, + "learning_rate": 2.1869158878504672e-05, + "loss": 0.0509, "step": 87855 }, { "epoch": 4.1, - "learning_rate": 1.1855984248277157e-05, - "loss": 0.1207, + "learning_rate": 2.1868690808668655e-05, + "loss": 0.0908, "step": 87860 }, { "epoch": 4.1, - "learning_rate": 1.1855515447002017e-05, - "loss": 0.0784, + "learning_rate": 2.1868222738832635e-05, + "loss": 0.214, "step": 87865 }, { "epoch": 4.1, - "learning_rate": 1.1855046645726877e-05, - "loss": 0.0709, + "learning_rate": 2.1867754668996615e-05, + "loss": 0.26, "step": 87870 }, { "epoch": 4.1, - "learning_rate": 1.1854577844451738e-05, - "loss": 0.3188, + "learning_rate": 2.1867286599160595e-05, + "loss": 0.208, "step": 87875 }, { "epoch": 4.1, - "learning_rate": 1.1854109043176598e-05, - "loss": 0.097, + "learning_rate": 2.1866818529324578e-05, + "loss": 0.0766, "step": 87880 }, { "epoch": 4.1, - "learning_rate": 1.1853640241901458e-05, - "loss": 0.0212, + "learning_rate": 2.1866350459488558e-05, + "loss": 0.0329, "step": 87885 }, { "epoch": 4.1, - "learning_rate": 1.1853171440626318e-05, - "loss": 0.0323, + "learning_rate": 2.1865882389652538e-05, + "loss": 0.0285, "step": 87890 }, { "epoch": 4.1, - "learning_rate": 1.185270263935118e-05, - "loss": 0.0622, + "learning_rate": 2.1865414319816514e-05, + "loss": 0.0366, "step": 87895 }, { "epoch": 4.1, - "learning_rate": 1.1852233838076041e-05, - "loss": 0.0712, + "learning_rate": 2.1864946249980497e-05, + "loss": 0.0579, "step": 87900 }, { "epoch": 4.1, - "learning_rate": 1.1851765036800901e-05, - "loss": 0.0905, + "learning_rate": 2.1864478180144477e-05, + "loss": 0.0613, "step": 87905 }, { "epoch": 4.1, - "learning_rate": 1.1851296235525761e-05, - "loss": 0.1088, + "learning_rate": 2.1864010110308457e-05, + "loss": 0.0535, "step": 87910 }, { "epoch": 4.1, - "learning_rate": 1.1850827434250623e-05, - "loss": 0.1603, + "learning_rate": 2.186354204047244e-05, + "loss": 0.0822, "step": 87915 }, { "epoch": 4.1, - "learning_rate": 1.1850358632975483e-05, - "loss": 0.1569, + "learning_rate": 2.186307397063642e-05, + "loss": 0.1147, "step": 87920 }, { "epoch": 4.1, - "learning_rate": 1.1849889831700343e-05, - "loss": 0.4106, + "learning_rate": 2.18626059008004e-05, + "loss": 0.1806, "step": 87925 }, { "epoch": 4.1, - "learning_rate": 1.1849421030425203e-05, - "loss": 0.0888, + "learning_rate": 2.186213783096438e-05, + "loss": 0.0769, "step": 87930 }, { "epoch": 4.1, - "learning_rate": 1.1848952229150064e-05, - "loss": 0.0147, + "learning_rate": 2.1861669761128363e-05, + "loss": 0.0466, "step": 87935 }, { "epoch": 4.1, - "learning_rate": 1.1848483427874924e-05, - "loss": 0.0204, + "learning_rate": 2.1861201691292343e-05, + "loss": 0.0462, "step": 87940 }, { "epoch": 4.1, - "learning_rate": 1.1848014626599786e-05, - "loss": 0.0484, + "learning_rate": 2.1860733621456323e-05, + "loss": 0.0318, "step": 87945 }, { "epoch": 4.1, - "learning_rate": 1.1847545825324646e-05, - "loss": 0.0552, + "learning_rate": 2.1860265551620302e-05, + "loss": 0.0303, "step": 87950 }, { "epoch": 4.1, - "learning_rate": 1.1847077024049508e-05, - "loss": 0.1071, + "learning_rate": 2.1859797481784282e-05, + "loss": 0.0748, "step": 87955 }, { "epoch": 4.1, - "learning_rate": 1.1846608222774367e-05, - "loss": 0.0732, + "learning_rate": 2.1859329411948262e-05, + "loss": 0.1268, "step": 87960 }, { "epoch": 4.1, - "learning_rate": 1.1846139421499227e-05, - "loss": 0.1531, + "learning_rate": 2.1858861342112242e-05, + "loss": 0.0978, "step": 87965 }, { "epoch": 4.1, - "learning_rate": 1.1845670620224087e-05, - "loss": 0.1324, + "learning_rate": 2.185839327227622e-05, + "loss": 0.1581, "step": 87970 }, { "epoch": 4.11, - "learning_rate": 1.1845201818948949e-05, - "loss": 0.3174, + "learning_rate": 2.1857925202440205e-05, + "loss": 0.2448, "step": 87975 }, { "epoch": 4.11, - "learning_rate": 1.1844733017673809e-05, - "loss": 0.0821, + "learning_rate": 2.1857457132604185e-05, + "loss": 0.0661, "step": 87980 }, { "epoch": 4.11, - "learning_rate": 1.1844264216398669e-05, - "loss": 0.0447, + "learning_rate": 2.1856989062768164e-05, + "loss": 0.005, "step": 87985 }, { "epoch": 4.11, - "learning_rate": 1.1843795415123529e-05, - "loss": 0.0748, + "learning_rate": 2.1856520992932148e-05, + "loss": 0.0197, "step": 87990 }, { "epoch": 4.11, - "learning_rate": 1.1843326613848392e-05, - "loss": 0.0336, + "learning_rate": 2.1856052923096128e-05, + "loss": 0.0265, "step": 87995 }, { "epoch": 4.11, - "learning_rate": 1.1842857812573252e-05, - "loss": 0.0854, + "learning_rate": 2.1855584853260107e-05, + "loss": 0.0648, "step": 88000 }, { "epoch": 4.11, - "learning_rate": 1.1842389011298112e-05, - "loss": 0.084, + "learning_rate": 2.1855116783424087e-05, + "loss": 0.0491, "step": 88005 }, { "epoch": 4.11, - "learning_rate": 1.1841920210022972e-05, - "loss": 0.08, + "learning_rate": 2.185464871358807e-05, + "loss": 0.0998, "step": 88010 }, { "epoch": 4.11, - "learning_rate": 1.1841451408747833e-05, - "loss": 0.1339, + "learning_rate": 2.185418064375205e-05, + "loss": 0.1069, "step": 88015 }, { "epoch": 4.11, - "learning_rate": 1.1840982607472693e-05, - "loss": 0.1688, + "learning_rate": 2.1853712573916027e-05, + "loss": 0.1824, "step": 88020 }, { "epoch": 4.11, - "learning_rate": 1.1840513806197553e-05, - "loss": 0.2253, + "learning_rate": 2.1853244504080006e-05, + "loss": 0.2942, "step": 88025 }, { "epoch": 4.11, - "learning_rate": 1.1840045004922413e-05, - "loss": 0.0564, + "learning_rate": 2.185277643424399e-05, + "loss": 0.0556, "step": 88030 }, { "epoch": 4.11, - "learning_rate": 1.1839576203647273e-05, - "loss": 0.0427, + "learning_rate": 2.185230836440797e-05, + "loss": 0.0353, "step": 88035 }, { "epoch": 4.11, - "learning_rate": 1.1839107402372137e-05, - "loss": 0.0204, + "learning_rate": 2.185184029457195e-05, + "loss": 0.0175, "step": 88040 }, { "epoch": 4.11, - "learning_rate": 1.1838638601096996e-05, - "loss": 0.0069, + "learning_rate": 2.1851372224735932e-05, + "loss": 0.0413, "step": 88045 }, { "epoch": 4.11, - "learning_rate": 1.1838169799821856e-05, - "loss": 0.0884, + "learning_rate": 2.1850904154899912e-05, + "loss": 0.021, "step": 88050 }, { "epoch": 4.11, - "learning_rate": 1.1837700998546718e-05, - "loss": 0.1743, + "learning_rate": 2.1850436085063892e-05, + "loss": 0.0665, "step": 88055 }, { "epoch": 4.11, - "learning_rate": 1.1837232197271578e-05, - "loss": 0.0626, + "learning_rate": 2.1849968015227872e-05, + "loss": 0.0666, "step": 88060 }, { "epoch": 4.11, - "learning_rate": 1.1836763395996438e-05, - "loss": 0.1187, + "learning_rate": 2.1849499945391855e-05, + "loss": 0.1015, "step": 88065 }, { "epoch": 4.11, - "learning_rate": 1.1836294594721298e-05, - "loss": 0.114, + "learning_rate": 2.1849031875555835e-05, + "loss": 0.1513, "step": 88070 }, { "epoch": 4.11, - "learning_rate": 1.1835825793446158e-05, - "loss": 0.2401, + "learning_rate": 2.1848563805719815e-05, + "loss": 0.3774, "step": 88075 }, { "epoch": 4.11, - "learning_rate": 1.183535699217102e-05, - "loss": 0.0934, + "learning_rate": 2.1848095735883795e-05, + "loss": 0.0613, "step": 88080 }, { "epoch": 4.11, - "learning_rate": 1.1834888190895881e-05, - "loss": 0.0237, + "learning_rate": 2.1847627666047774e-05, + "loss": 0.0135, "step": 88085 }, { "epoch": 4.11, - "learning_rate": 1.1834419389620741e-05, - "loss": 0.0265, + "learning_rate": 2.1847159596211754e-05, + "loss": 0.0367, "step": 88090 }, { "epoch": 4.11, - "learning_rate": 1.1833950588345603e-05, - "loss": 0.0286, + "learning_rate": 2.1846691526375734e-05, + "loss": 0.0242, "step": 88095 }, { "epoch": 4.11, - "learning_rate": 1.1833481787070463e-05, - "loss": 0.0595, + "learning_rate": 2.1846223456539717e-05, + "loss": 0.0937, "step": 88100 }, { "epoch": 4.11, - "learning_rate": 1.1833012985795322e-05, - "loss": 0.1194, + "learning_rate": 2.1845755386703697e-05, + "loss": 0.0497, "step": 88105 }, { "epoch": 4.11, - "learning_rate": 1.1832544184520182e-05, - "loss": 0.0379, + "learning_rate": 2.1845287316867677e-05, + "loss": 0.0457, "step": 88110 }, { "epoch": 4.11, - "learning_rate": 1.1832075383245042e-05, - "loss": 0.0703, + "learning_rate": 2.1844819247031657e-05, + "loss": 0.1698, "step": 88115 }, { "epoch": 4.11, - "learning_rate": 1.1831606581969904e-05, - "loss": 0.1492, + "learning_rate": 2.184435117719564e-05, + "loss": 0.0903, "step": 88120 }, { "epoch": 4.11, - "learning_rate": 1.1831137780694764e-05, - "loss": 0.2666, + "learning_rate": 2.184388310735962e-05, + "loss": 0.2749, "step": 88125 }, { "epoch": 4.11, - "learning_rate": 1.1830668979419624e-05, - "loss": 0.1059, + "learning_rate": 2.18434150375236e-05, + "loss": 0.0893, "step": 88130 }, { "epoch": 4.11, - "learning_rate": 1.1830200178144487e-05, - "loss": 0.0229, + "learning_rate": 2.184294696768758e-05, + "loss": 0.0374, "step": 88135 }, { "epoch": 4.11, - "learning_rate": 1.1829731376869347e-05, - "loss": 0.0534, + "learning_rate": 2.1842478897851563e-05, + "loss": 0.012, "step": 88140 }, { "epoch": 4.11, - "learning_rate": 1.1829262575594207e-05, - "loss": 0.0788, + "learning_rate": 2.184201082801554e-05, + "loss": 0.0376, "step": 88145 }, { "epoch": 4.11, - "learning_rate": 1.1828793774319067e-05, - "loss": 0.0502, + "learning_rate": 2.184154275817952e-05, + "loss": 0.0954, "step": 88150 }, { "epoch": 4.11, - "learning_rate": 1.1828324973043927e-05, - "loss": 0.0303, + "learning_rate": 2.18410746883435e-05, + "loss": 0.0626, "step": 88155 }, { "epoch": 4.11, - "learning_rate": 1.1827856171768789e-05, - "loss": 0.1054, + "learning_rate": 2.1840606618507482e-05, + "loss": 0.0993, "step": 88160 }, { "epoch": 4.11, - "learning_rate": 1.1827387370493648e-05, - "loss": 0.0756, + "learning_rate": 2.184013854867146e-05, + "loss": 0.0609, "step": 88165 }, { "epoch": 4.11, - "learning_rate": 1.1826918569218508e-05, - "loss": 0.1272, + "learning_rate": 2.183967047883544e-05, + "loss": 0.1246, "step": 88170 }, { "epoch": 4.11, - "learning_rate": 1.1826449767943368e-05, - "loss": 0.2779, + "learning_rate": 2.1839202408999425e-05, + "loss": 0.2616, "step": 88175 }, { "epoch": 4.11, - "learning_rate": 1.1825980966668232e-05, - "loss": 0.0624, + "learning_rate": 2.1838734339163404e-05, + "loss": 0.0367, "step": 88180 }, { "epoch": 4.11, - "learning_rate": 1.1825512165393092e-05, - "loss": 0.038, + "learning_rate": 2.1838266269327384e-05, + "loss": 0.033, "step": 88185 }, { "epoch": 4.12, - "learning_rate": 1.1825043364117951e-05, - "loss": 0.015, + "learning_rate": 2.1837798199491364e-05, + "loss": 0.0365, "step": 88190 }, { "epoch": 4.12, - "learning_rate": 1.1824574562842811e-05, - "loss": 0.0463, + "learning_rate": 2.1837330129655347e-05, + "loss": 0.0582, "step": 88195 }, { "epoch": 4.12, - "learning_rate": 1.1824105761567673e-05, - "loss": 0.0571, + "learning_rate": 2.1836862059819327e-05, + "loss": 0.0645, "step": 88200 }, { "epoch": 4.12, - "learning_rate": 1.1823636960292533e-05, - "loss": 0.1115, + "learning_rate": 2.1836393989983307e-05, + "loss": 0.0592, "step": 88205 }, { "epoch": 4.12, - "learning_rate": 1.1823168159017393e-05, - "loss": 0.1003, + "learning_rate": 2.1835925920147283e-05, + "loss": 0.0666, "step": 88210 }, { "epoch": 4.12, - "learning_rate": 1.1822699357742253e-05, - "loss": 0.1697, + "learning_rate": 2.1835457850311267e-05, + "loss": 0.1248, "step": 88215 }, { "epoch": 4.12, - "learning_rate": 1.1822230556467113e-05, - "loss": 0.1415, + "learning_rate": 2.1834989780475246e-05, + "loss": 0.1172, "step": 88220 }, { "epoch": 4.12, - "learning_rate": 1.1821761755191976e-05, - "loss": 0.4007, + "learning_rate": 2.1834521710639226e-05, + "loss": 0.205, "step": 88225 }, { "epoch": 4.12, - "learning_rate": 1.1821292953916836e-05, - "loss": 0.0847, + "learning_rate": 2.183405364080321e-05, + "loss": 0.0789, "step": 88230 }, { "epoch": 4.12, - "learning_rate": 1.1820824152641696e-05, - "loss": 0.0565, + "learning_rate": 2.183358557096719e-05, + "loss": 0.0394, "step": 88235 }, { "epoch": 4.12, - "learning_rate": 1.1820355351366558e-05, - "loss": 0.045, + "learning_rate": 2.183311750113117e-05, + "loss": 0.0545, "step": 88240 }, { "epoch": 4.12, - "learning_rate": 1.1819886550091418e-05, - "loss": 0.0273, + "learning_rate": 2.183264943129515e-05, + "loss": 0.0358, "step": 88245 }, { "epoch": 4.12, - "learning_rate": 1.1819417748816277e-05, - "loss": 0.0581, + "learning_rate": 2.1832181361459132e-05, + "loss": 0.0385, "step": 88250 }, { "epoch": 4.12, - "learning_rate": 1.1818948947541137e-05, - "loss": 0.0596, + "learning_rate": 2.1831713291623112e-05, + "loss": 0.1018, "step": 88255 }, { "epoch": 4.12, - "learning_rate": 1.1818480146265997e-05, - "loss": 0.104, + "learning_rate": 2.1831245221787092e-05, + "loss": 0.0701, "step": 88260 }, { "epoch": 4.12, - "learning_rate": 1.1818011344990859e-05, - "loss": 0.066, + "learning_rate": 2.183077715195107e-05, + "loss": 0.1283, "step": 88265 }, { "epoch": 4.12, - "learning_rate": 1.181754254371572e-05, - "loss": 0.1752, + "learning_rate": 2.1830309082115055e-05, + "loss": 0.1503, "step": 88270 }, { "epoch": 4.12, - "learning_rate": 1.181707374244058e-05, - "loss": 0.1842, + "learning_rate": 2.182984101227903e-05, + "loss": 0.2098, "step": 88275 }, { "epoch": 4.12, - "learning_rate": 1.1816604941165442e-05, - "loss": 0.0878, + "learning_rate": 2.182937294244301e-05, + "loss": 0.0773, "step": 88280 }, { "epoch": 4.12, - "learning_rate": 1.1816136139890302e-05, - "loss": 0.0034, + "learning_rate": 2.1828904872606994e-05, + "loss": 0.0548, "step": 88285 }, { "epoch": 4.12, - "learning_rate": 1.1815667338615162e-05, - "loss": 0.0331, + "learning_rate": 2.1828436802770974e-05, + "loss": 0.017, "step": 88290 }, { "epoch": 4.12, - "learning_rate": 1.1815198537340022e-05, - "loss": 0.062, + "learning_rate": 2.1827968732934954e-05, + "loss": 0.0223, "step": 88295 }, { "epoch": 4.12, - "learning_rate": 1.1814729736064882e-05, - "loss": 0.0553, + "learning_rate": 2.1827500663098934e-05, + "loss": 0.0547, "step": 88300 }, { "epoch": 4.12, - "learning_rate": 1.1814260934789744e-05, - "loss": 0.0708, + "learning_rate": 2.1827032593262917e-05, + "loss": 0.0946, "step": 88305 }, { "epoch": 4.12, - "learning_rate": 1.1813792133514603e-05, - "loss": 0.118, + "learning_rate": 2.1826564523426897e-05, + "loss": 0.0936, "step": 88310 }, { "epoch": 4.12, - "learning_rate": 1.1813323332239463e-05, - "loss": 0.031, + "learning_rate": 2.1826096453590876e-05, + "loss": 0.122, "step": 88315 }, { "epoch": 4.12, - "learning_rate": 1.1812854530964327e-05, - "loss": 0.2011, + "learning_rate": 2.1825628383754856e-05, + "loss": 0.1302, "step": 88320 }, { "epoch": 4.12, - "learning_rate": 1.1812385729689187e-05, - "loss": 0.2144, + "learning_rate": 2.182516031391884e-05, + "loss": 0.2529, "step": 88325 }, { "epoch": 4.12, - "learning_rate": 1.1811916928414047e-05, - "loss": 0.0985, + "learning_rate": 2.182469224408282e-05, + "loss": 0.1053, "step": 88330 }, { "epoch": 4.12, - "learning_rate": 1.1811448127138907e-05, - "loss": 0.0294, + "learning_rate": 2.1824224174246796e-05, + "loss": 0.0328, "step": 88335 }, { "epoch": 4.12, - "learning_rate": 1.1810979325863766e-05, - "loss": 0.0454, + "learning_rate": 2.182375610441078e-05, + "loss": 0.0339, "step": 88340 }, { "epoch": 4.12, - "learning_rate": 1.1810510524588628e-05, - "loss": 0.0536, + "learning_rate": 2.182328803457476e-05, + "loss": 0.0635, "step": 88345 }, { "epoch": 4.12, - "learning_rate": 1.1810041723313488e-05, - "loss": 0.0362, + "learning_rate": 2.182281996473874e-05, + "loss": 0.0453, "step": 88350 }, { "epoch": 4.12, - "learning_rate": 1.1809572922038348e-05, - "loss": 0.0595, + "learning_rate": 2.182235189490272e-05, + "loss": 0.1195, "step": 88355 }, { "epoch": 4.12, - "learning_rate": 1.1809104120763208e-05, - "loss": 0.1038, + "learning_rate": 2.18218838250667e-05, + "loss": 0.0468, "step": 88360 }, { "epoch": 4.12, - "learning_rate": 1.1808635319488071e-05, - "loss": 0.1226, + "learning_rate": 2.182141575523068e-05, + "loss": 0.0791, "step": 88365 }, { "epoch": 4.12, - "learning_rate": 1.1808166518212931e-05, - "loss": 0.1687, + "learning_rate": 2.182094768539466e-05, + "loss": 0.1459, "step": 88370 }, { "epoch": 4.12, - "learning_rate": 1.1807697716937791e-05, - "loss": 0.2789, + "learning_rate": 2.182047961555864e-05, + "loss": 0.3577, "step": 88375 }, { "epoch": 4.12, - "learning_rate": 1.1807228915662651e-05, - "loss": 0.1013, + "learning_rate": 2.1820011545722624e-05, + "loss": 0.1202, "step": 88380 }, { "epoch": 4.12, - "learning_rate": 1.1806760114387513e-05, - "loss": 0.0386, + "learning_rate": 2.1819543475886604e-05, + "loss": 0.0049, "step": 88385 }, { "epoch": 4.12, - "learning_rate": 1.1806291313112373e-05, - "loss": 0.059, + "learning_rate": 2.1819075406050584e-05, + "loss": 0.0209, "step": 88390 }, { "epoch": 4.12, - "learning_rate": 1.1805822511837232e-05, - "loss": 0.0712, + "learning_rate": 2.1818607336214564e-05, + "loss": 0.0265, "step": 88395 }, { "epoch": 4.12, - "learning_rate": 1.1805353710562092e-05, - "loss": 0.0891, + "learning_rate": 2.1818139266378544e-05, + "loss": 0.0638, "step": 88400 }, { "epoch": 4.13, - "learning_rate": 1.1804884909286954e-05, - "loss": 0.0921, + "learning_rate": 2.1817671196542523e-05, + "loss": 0.1006, "step": 88405 }, { "epoch": 4.13, - "learning_rate": 1.1804416108011816e-05, - "loss": 0.083, + "learning_rate": 2.1817203126706503e-05, + "loss": 0.0618, "step": 88410 }, { "epoch": 4.13, - "learning_rate": 1.1803947306736676e-05, - "loss": 0.053, + "learning_rate": 2.1816735056870486e-05, + "loss": 0.1018, "step": 88415 }, { "epoch": 4.13, - "learning_rate": 1.1803478505461537e-05, - "loss": 0.1004, + "learning_rate": 2.1816266987034466e-05, + "loss": 0.1326, "step": 88420 }, { "epoch": 4.13, - "learning_rate": 1.1803009704186397e-05, - "loss": 0.3463, + "learning_rate": 2.1815798917198446e-05, + "loss": 0.219, "step": 88425 }, { "epoch": 4.13, - "learning_rate": 1.1802540902911257e-05, - "loss": 0.0652, + "learning_rate": 2.1815330847362426e-05, + "loss": 0.0574, "step": 88430 }, { "epoch": 4.13, - "learning_rate": 1.1802072101636117e-05, - "loss": 0.0415, + "learning_rate": 2.181486277752641e-05, + "loss": 0.0596, "step": 88435 }, { "epoch": 4.13, - "learning_rate": 1.1801603300360977e-05, - "loss": 0.0458, + "learning_rate": 2.181439470769039e-05, + "loss": 0.035, "step": 88440 }, { "epoch": 4.13, - "learning_rate": 1.1801134499085839e-05, - "loss": 0.0418, + "learning_rate": 2.181392663785437e-05, + "loss": 0.0561, "step": 88445 }, { "epoch": 4.13, - "learning_rate": 1.1800665697810699e-05, - "loss": 0.0401, + "learning_rate": 2.181345856801835e-05, + "loss": 0.0482, "step": 88450 }, { "epoch": 4.13, - "learning_rate": 1.1800196896535558e-05, - "loss": 0.0574, + "learning_rate": 2.1812990498182332e-05, + "loss": 0.0277, "step": 88455 }, { "epoch": 4.13, - "learning_rate": 1.1799728095260422e-05, - "loss": 0.0761, + "learning_rate": 2.1812522428346308e-05, + "loss": 0.1141, "step": 88460 }, { "epoch": 4.13, - "learning_rate": 1.1799259293985282e-05, - "loss": 0.0803, + "learning_rate": 2.1812054358510288e-05, + "loss": 0.1254, "step": 88465 }, { "epoch": 4.13, - "learning_rate": 1.1798790492710142e-05, - "loss": 0.1139, + "learning_rate": 2.181158628867427e-05, + "loss": 0.1962, "step": 88470 }, { "epoch": 4.13, - "learning_rate": 1.1798321691435002e-05, - "loss": 0.2541, + "learning_rate": 2.181111821883825e-05, + "loss": 0.3182, "step": 88475 }, { "epoch": 4.13, - "learning_rate": 1.1797852890159862e-05, - "loss": 0.0633, + "learning_rate": 2.181065014900223e-05, + "loss": 0.1179, "step": 88480 }, { "epoch": 4.13, - "learning_rate": 1.1797384088884723e-05, - "loss": 0.0298, + "learning_rate": 2.181018207916621e-05, + "loss": 0.0225, "step": 88485 }, { "epoch": 4.13, - "learning_rate": 1.1796915287609583e-05, - "loss": 0.0426, + "learning_rate": 2.1809714009330194e-05, + "loss": 0.0354, "step": 88490 }, { "epoch": 4.13, - "learning_rate": 1.1796446486334443e-05, - "loss": 0.0683, + "learning_rate": 2.1809245939494174e-05, + "loss": 0.0275, "step": 88495 }, { "epoch": 4.13, - "learning_rate": 1.1795977685059303e-05, - "loss": 0.0408, + "learning_rate": 2.1808777869658153e-05, + "loss": 0.1133, "step": 88500 }, { "epoch": 4.13, - "learning_rate": 1.1795508883784166e-05, - "loss": 0.0765, + "learning_rate": 2.1808309799822133e-05, + "loss": 0.0807, "step": 88505 }, { "epoch": 4.13, - "learning_rate": 1.1795040082509026e-05, - "loss": 0.0715, + "learning_rate": 2.1807841729986116e-05, + "loss": 0.1142, "step": 88510 }, { "epoch": 4.13, - "learning_rate": 1.1794571281233886e-05, - "loss": 0.12, + "learning_rate": 2.1807373660150096e-05, + "loss": 0.1437, "step": 88515 }, { "epoch": 4.13, - "learning_rate": 1.1794102479958746e-05, - "loss": 0.1333, + "learning_rate": 2.1806905590314076e-05, + "loss": 0.2475, "step": 88520 }, { "epoch": 4.13, - "learning_rate": 1.1793633678683608e-05, - "loss": 0.2048, + "learning_rate": 2.1806437520478056e-05, + "loss": 0.249, "step": 88525 }, { "epoch": 4.13, - "learning_rate": 1.1793164877408468e-05, - "loss": 0.1168, + "learning_rate": 2.1805969450642036e-05, + "loss": 0.0684, "step": 88530 }, { "epoch": 4.13, - "learning_rate": 1.1792696076133328e-05, - "loss": 0.0439, + "learning_rate": 2.1805501380806016e-05, + "loss": 0.0623, "step": 88535 }, { "epoch": 4.13, - "learning_rate": 1.1792227274858188e-05, - "loss": 0.0645, + "learning_rate": 2.1805033310969995e-05, + "loss": 0.0238, "step": 88540 }, { "epoch": 4.13, - "learning_rate": 1.1791758473583047e-05, - "loss": 0.0506, + "learning_rate": 2.180456524113398e-05, + "loss": 0.0648, "step": 88545 }, { "epoch": 4.13, - "learning_rate": 1.179128967230791e-05, - "loss": 0.025, + "learning_rate": 2.180409717129796e-05, + "loss": 0.0934, "step": 88550 }, { "epoch": 4.13, - "learning_rate": 1.179082087103277e-05, - "loss": 0.0817, + "learning_rate": 2.1803629101461938e-05, + "loss": 0.0596, "step": 88555 }, { "epoch": 4.13, - "learning_rate": 1.179035206975763e-05, - "loss": 0.1231, + "learning_rate": 2.1803161031625918e-05, + "loss": 0.1362, "step": 88560 }, { "epoch": 4.13, - "learning_rate": 1.1789883268482492e-05, - "loss": 0.1282, + "learning_rate": 2.18026929617899e-05, + "loss": 0.1312, "step": 88565 }, { "epoch": 4.13, - "learning_rate": 1.1789414467207352e-05, - "loss": 0.2506, + "learning_rate": 2.180222489195388e-05, + "loss": 0.1537, "step": 88570 }, { "epoch": 4.13, - "learning_rate": 1.1788945665932212e-05, - "loss": 0.3161, + "learning_rate": 2.180175682211786e-05, + "loss": 0.2068, "step": 88575 }, { "epoch": 4.13, - "learning_rate": 1.1788476864657072e-05, - "loss": 0.0732, + "learning_rate": 2.180128875228184e-05, + "loss": 0.0961, "step": 88580 }, { "epoch": 4.13, - "learning_rate": 1.1788008063381932e-05, - "loss": 0.0347, + "learning_rate": 2.1800820682445824e-05, + "loss": 0.0204, "step": 88585 }, { "epoch": 4.13, - "learning_rate": 1.1787539262106794e-05, - "loss": 0.0442, + "learning_rate": 2.18003526126098e-05, + "loss": 0.0245, "step": 88590 }, { "epoch": 4.13, - "learning_rate": 1.1787070460831655e-05, - "loss": 0.0373, + "learning_rate": 2.179988454277378e-05, + "loss": 0.0264, "step": 88595 }, { "epoch": 4.13, - "learning_rate": 1.1786601659556515e-05, - "loss": 0.1332, + "learning_rate": 2.1799416472937763e-05, + "loss": 0.0634, "step": 88600 }, { "epoch": 4.13, - "learning_rate": 1.1786132858281377e-05, - "loss": 0.0915, + "learning_rate": 2.1798948403101743e-05, + "loss": 0.0785, "step": 88605 }, { "epoch": 4.13, - "learning_rate": 1.1785664057006237e-05, - "loss": 0.0812, + "learning_rate": 2.1798480333265723e-05, + "loss": 0.0399, "step": 88610 }, { "epoch": 4.13, - "learning_rate": 1.1785195255731097e-05, - "loss": 0.2585, + "learning_rate": 2.1798012263429703e-05, + "loss": 0.1062, "step": 88615 }, { "epoch": 4.14, - "learning_rate": 1.1784726454455957e-05, - "loss": 0.1337, + "learning_rate": 2.1797544193593686e-05, + "loss": 0.1316, "step": 88620 }, { "epoch": 4.14, - "learning_rate": 1.1784257653180817e-05, - "loss": 0.243, + "learning_rate": 2.1797076123757666e-05, + "loss": 0.2738, "step": 88625 }, { "epoch": 4.14, - "learning_rate": 1.1783788851905678e-05, - "loss": 0.0926, + "learning_rate": 2.1796608053921646e-05, + "loss": 0.07, "step": 88630 }, { "epoch": 4.14, - "learning_rate": 1.1783320050630538e-05, - "loss": 0.0232, + "learning_rate": 2.1796139984085625e-05, + "loss": 0.0344, "step": 88635 }, { "epoch": 4.14, - "learning_rate": 1.1782851249355398e-05, - "loss": 0.0341, + "learning_rate": 2.179567191424961e-05, + "loss": 0.0885, "step": 88640 }, { "epoch": 4.14, - "learning_rate": 1.1782382448080261e-05, - "loss": 0.0606, + "learning_rate": 2.179520384441359e-05, + "loss": 0.0217, "step": 88645 }, { "epoch": 4.14, - "learning_rate": 1.1781913646805121e-05, - "loss": 0.0525, + "learning_rate": 2.1794735774577565e-05, + "loss": 0.0689, "step": 88650 }, { "epoch": 4.14, - "learning_rate": 1.1781444845529981e-05, - "loss": 0.0898, + "learning_rate": 2.1794267704741548e-05, + "loss": 0.0456, "step": 88655 }, { "epoch": 4.14, - "learning_rate": 1.1780976044254841e-05, - "loss": 0.1552, + "learning_rate": 2.1793799634905528e-05, + "loss": 0.0803, "step": 88660 }, { "epoch": 4.14, - "learning_rate": 1.1780507242979701e-05, - "loss": 0.0678, + "learning_rate": 2.1793331565069508e-05, + "loss": 0.1219, "step": 88665 }, { "epoch": 4.14, - "learning_rate": 1.1780038441704563e-05, - "loss": 0.1634, + "learning_rate": 2.1792863495233488e-05, + "loss": 0.0822, "step": 88670 }, { "epoch": 4.14, - "learning_rate": 1.1779569640429423e-05, - "loss": 0.2653, + "learning_rate": 2.179239542539747e-05, + "loss": 0.1919, "step": 88675 }, { "epoch": 4.14, - "learning_rate": 1.1779100839154283e-05, - "loss": 0.0456, + "learning_rate": 2.179192735556145e-05, + "loss": 0.082, "step": 88680 }, { "epoch": 4.14, - "learning_rate": 1.1778632037879143e-05, - "loss": 0.0381, + "learning_rate": 2.179145928572543e-05, + "loss": 0.0335, "step": 88685 }, { "epoch": 4.14, - "learning_rate": 1.1778163236604006e-05, - "loss": 0.0179, + "learning_rate": 2.179099121588941e-05, + "loss": 0.0163, "step": 88690 }, { "epoch": 4.14, - "learning_rate": 1.1777694435328866e-05, - "loss": 0.0408, + "learning_rate": 2.1790523146053393e-05, + "loss": 0.0161, "step": 88695 }, { "epoch": 4.14, - "learning_rate": 1.1777225634053726e-05, - "loss": 0.062, + "learning_rate": 2.1790055076217373e-05, + "loss": 0.0843, "step": 88700 }, { "epoch": 4.14, - "learning_rate": 1.1776756832778586e-05, - "loss": 0.0861, + "learning_rate": 2.1789587006381353e-05, + "loss": 0.104, "step": 88705 }, { "epoch": 4.14, - "learning_rate": 1.1776288031503447e-05, - "loss": 0.1181, + "learning_rate": 2.1789118936545336e-05, + "loss": 0.1546, "step": 88710 }, { "epoch": 4.14, - "learning_rate": 1.1775819230228307e-05, - "loss": 0.1903, + "learning_rate": 2.1788650866709313e-05, + "loss": 0.0725, "step": 88715 }, { "epoch": 4.14, - "learning_rate": 1.1775350428953167e-05, - "loss": 0.109, + "learning_rate": 2.1788182796873293e-05, + "loss": 0.0892, "step": 88720 }, { "epoch": 4.14, - "learning_rate": 1.1774881627678027e-05, - "loss": 0.2255, + "learning_rate": 2.1787714727037272e-05, + "loss": 0.1866, "step": 88725 }, { "epoch": 4.14, - "learning_rate": 1.1774412826402887e-05, - "loss": 0.0749, + "learning_rate": 2.1787246657201256e-05, + "loss": 0.097, "step": 88730 }, { "epoch": 4.14, - "learning_rate": 1.177394402512775e-05, - "loss": 0.0773, + "learning_rate": 2.1786778587365235e-05, + "loss": 0.0241, "step": 88735 }, { "epoch": 4.14, - "learning_rate": 1.177347522385261e-05, - "loss": 0.0129, + "learning_rate": 2.1786310517529215e-05, + "loss": 0.0293, "step": 88740 }, { "epoch": 4.14, - "learning_rate": 1.177300642257747e-05, - "loss": 0.063, + "learning_rate": 2.1785842447693195e-05, + "loss": 0.0791, "step": 88745 }, { "epoch": 4.14, - "learning_rate": 1.1772537621302332e-05, - "loss": 0.0323, + "learning_rate": 2.1785374377857178e-05, + "loss": 0.07, "step": 88750 }, { "epoch": 4.14, - "learning_rate": 1.1772068820027192e-05, - "loss": 0.087, + "learning_rate": 2.1784906308021158e-05, + "loss": 0.0167, "step": 88755 }, { "epoch": 4.14, - "learning_rate": 1.1771600018752052e-05, - "loss": 0.152, + "learning_rate": 2.1784438238185138e-05, + "loss": 0.0704, "step": 88760 }, { "epoch": 4.14, - "learning_rate": 1.1771131217476912e-05, - "loss": 0.169, + "learning_rate": 2.1783970168349118e-05, + "loss": 0.1413, "step": 88765 }, { "epoch": 4.14, - "learning_rate": 1.1770662416201773e-05, - "loss": 0.1258, + "learning_rate": 2.17835020985131e-05, + "loss": 0.3223, "step": 88770 }, { "epoch": 4.14, - "learning_rate": 1.1770193614926633e-05, - "loss": 0.2229, + "learning_rate": 2.178303402867708e-05, + "loss": 0.2282, "step": 88775 }, { "epoch": 4.14, - "learning_rate": 1.1769724813651493e-05, - "loss": 0.059, + "learning_rate": 2.1782565958841057e-05, + "loss": 0.088, "step": 88780 }, { "epoch": 4.14, - "learning_rate": 1.1769256012376355e-05, - "loss": 0.0488, + "learning_rate": 2.178209788900504e-05, + "loss": 0.03, "step": 88785 }, { "epoch": 4.14, - "learning_rate": 1.1768787211101216e-05, - "loss": 0.0462, + "learning_rate": 2.178162981916902e-05, + "loss": 0.0158, "step": 88790 }, { "epoch": 4.14, - "learning_rate": 1.1768318409826076e-05, - "loss": 0.0595, + "learning_rate": 2.1781161749333e-05, + "loss": 0.0577, "step": 88795 }, { "epoch": 4.14, - "learning_rate": 1.1767849608550936e-05, - "loss": 0.0647, + "learning_rate": 2.178069367949698e-05, + "loss": 0.1158, "step": 88800 }, { "epoch": 4.14, - "learning_rate": 1.1767380807275796e-05, - "loss": 0.0766, + "learning_rate": 2.1780225609660963e-05, + "loss": 0.0852, "step": 88805 }, { "epoch": 4.14, - "learning_rate": 1.1766912006000658e-05, - "loss": 0.0546, + "learning_rate": 2.1779757539824943e-05, + "loss": 0.1088, "step": 88810 }, { "epoch": 4.14, - "learning_rate": 1.1766443204725518e-05, - "loss": 0.0309, + "learning_rate": 2.1779289469988923e-05, + "loss": 0.1481, "step": 88815 }, { "epoch": 4.14, - "learning_rate": 1.1765974403450378e-05, - "loss": 0.0951, + "learning_rate": 2.1778821400152902e-05, + "loss": 0.173, "step": 88820 }, { "epoch": 4.14, - "learning_rate": 1.1765505602175238e-05, - "loss": 0.1486, + "learning_rate": 2.1778353330316886e-05, + "loss": 0.3599, "step": 88825 }, { "epoch": 4.14, - "learning_rate": 1.1765036800900101e-05, - "loss": 0.0767, + "learning_rate": 2.1777885260480865e-05, + "loss": 0.0814, "step": 88830 }, { "epoch": 4.15, - "learning_rate": 1.1764567999624961e-05, - "loss": 0.0409, + "learning_rate": 2.1777417190644845e-05, + "loss": 0.0078, "step": 88835 }, { "epoch": 4.15, - "learning_rate": 1.176409919834982e-05, - "loss": 0.0294, + "learning_rate": 2.1776949120808825e-05, + "loss": 0.0553, "step": 88840 }, { "epoch": 4.15, - "learning_rate": 1.176363039707468e-05, - "loss": 0.0727, + "learning_rate": 2.1776481050972805e-05, + "loss": 0.0302, "step": 88845 }, { "epoch": 4.15, - "learning_rate": 1.1763161595799542e-05, - "loss": 0.0128, + "learning_rate": 2.1776012981136785e-05, + "loss": 0.107, "step": 88850 }, { "epoch": 4.15, - "learning_rate": 1.1762692794524402e-05, - "loss": 0.0553, + "learning_rate": 2.1775544911300765e-05, + "loss": 0.0551, "step": 88855 }, { "epoch": 4.15, - "learning_rate": 1.1762223993249262e-05, - "loss": 0.0832, + "learning_rate": 2.1775076841464748e-05, + "loss": 0.1123, "step": 88860 }, { "epoch": 4.15, - "learning_rate": 1.1761755191974122e-05, - "loss": 0.0824, + "learning_rate": 2.1774608771628728e-05, + "loss": 0.1512, "step": 88865 }, { "epoch": 4.15, - "learning_rate": 1.1761286390698982e-05, - "loss": 0.1037, + "learning_rate": 2.1774140701792707e-05, + "loss": 0.1058, "step": 88870 }, { "epoch": 4.15, - "learning_rate": 1.1760817589423845e-05, - "loss": 0.4061, + "learning_rate": 2.1773672631956687e-05, + "loss": 0.2026, "step": 88875 }, { "epoch": 4.15, - "learning_rate": 1.1760348788148705e-05, - "loss": 0.1145, + "learning_rate": 2.177320456212067e-05, + "loss": 0.1047, "step": 88880 }, { "epoch": 4.15, - "learning_rate": 1.1759879986873565e-05, - "loss": 0.2368, + "learning_rate": 2.177273649228465e-05, + "loss": 0.0143, "step": 88885 }, { "epoch": 4.15, - "learning_rate": 1.1759411185598427e-05, - "loss": 0.0173, + "learning_rate": 2.177226842244863e-05, + "loss": 0.0326, "step": 88890 }, { "epoch": 4.15, - "learning_rate": 1.1758942384323287e-05, - "loss": 0.0621, + "learning_rate": 2.1771800352612613e-05, + "loss": 0.0739, "step": 88895 }, { "epoch": 4.15, - "learning_rate": 1.1758473583048147e-05, - "loss": 0.0346, + "learning_rate": 2.1771332282776593e-05, + "loss": 0.0464, "step": 88900 }, { "epoch": 4.15, - "learning_rate": 1.1758004781773007e-05, - "loss": 0.0817, + "learning_rate": 2.177086421294057e-05, + "loss": 0.0793, "step": 88905 }, { "epoch": 4.15, - "learning_rate": 1.1757535980497867e-05, - "loss": 0.1684, + "learning_rate": 2.177039614310455e-05, + "loss": 0.2582, "step": 88910 }, { "epoch": 4.15, - "learning_rate": 1.1757067179222728e-05, - "loss": 0.1637, + "learning_rate": 2.1769928073268533e-05, + "loss": 0.036, "step": 88915 }, { "epoch": 4.15, - "learning_rate": 1.175659837794759e-05, - "loss": 0.1615, + "learning_rate": 2.1769460003432512e-05, + "loss": 0.1929, "step": 88920 }, { "epoch": 4.15, - "learning_rate": 1.175612957667245e-05, - "loss": 0.1003, + "learning_rate": 2.1768991933596492e-05, + "loss": 0.2144, "step": 88925 }, { "epoch": 4.15, - "learning_rate": 1.1755660775397311e-05, - "loss": 0.1181, + "learning_rate": 2.1768523863760472e-05, + "loss": 0.0616, "step": 88930 }, { "epoch": 4.15, - "learning_rate": 1.1755191974122171e-05, - "loss": 0.0264, + "learning_rate": 2.1768055793924455e-05, + "loss": 0.0204, "step": 88935 }, { "epoch": 4.15, - "learning_rate": 1.1754723172847031e-05, - "loss": 0.0501, + "learning_rate": 2.1767587724088435e-05, + "loss": 0.0454, "step": 88940 }, { "epoch": 4.15, - "learning_rate": 1.1754254371571891e-05, - "loss": 0.0896, + "learning_rate": 2.1767119654252415e-05, + "loss": 0.0573, "step": 88945 }, { "epoch": 4.15, - "learning_rate": 1.1753785570296751e-05, - "loss": 0.0177, + "learning_rate": 2.1766651584416398e-05, + "loss": 0.0399, "step": 88950 }, { "epoch": 4.15, - "learning_rate": 1.1753316769021613e-05, - "loss": 0.0574, + "learning_rate": 2.1766183514580378e-05, + "loss": 0.0646, "step": 88955 }, { "epoch": 4.15, - "learning_rate": 1.1752847967746473e-05, - "loss": 0.0604, + "learning_rate": 2.1765715444744358e-05, + "loss": 0.0499, "step": 88960 }, { "epoch": 4.15, - "learning_rate": 1.1752379166471333e-05, - "loss": 0.1172, + "learning_rate": 2.1765247374908337e-05, + "loss": 0.1148, "step": 88965 }, { "epoch": 4.15, - "learning_rate": 1.1751910365196196e-05, - "loss": 0.075, + "learning_rate": 2.1764779305072317e-05, + "loss": 0.1139, "step": 88970 }, { "epoch": 4.15, - "learning_rate": 1.1751441563921056e-05, - "loss": 0.1926, + "learning_rate": 2.1764311235236297e-05, + "loss": 0.2459, "step": 88975 }, { "epoch": 4.15, - "learning_rate": 1.1750972762645916e-05, - "loss": 0.1254, + "learning_rate": 2.1763843165400277e-05, + "loss": 0.0464, "step": 88980 }, { "epoch": 4.15, - "learning_rate": 1.1750503961370776e-05, - "loss": 0.0377, + "learning_rate": 2.1763375095564257e-05, + "loss": 0.028, "step": 88985 }, { "epoch": 4.15, - "learning_rate": 1.1750035160095636e-05, - "loss": 0.0124, + "learning_rate": 2.176290702572824e-05, + "loss": 0.03, "step": 88990 }, { "epoch": 4.15, - "learning_rate": 1.1749566358820497e-05, - "loss": 0.0527, + "learning_rate": 2.176243895589222e-05, + "loss": 0.054, "step": 88995 }, { "epoch": 4.15, - "learning_rate": 1.1749097557545357e-05, - "loss": 0.0684, + "learning_rate": 2.17619708860562e-05, + "loss": 0.0397, "step": 89000 }, { "epoch": 4.15, - "learning_rate": 1.1748628756270217e-05, - "loss": 0.036, + "learning_rate": 2.176150281622018e-05, + "loss": 0.0232, "step": 89005 }, { "epoch": 4.15, - "learning_rate": 1.1748159954995077e-05, - "loss": 0.0722, + "learning_rate": 2.1761034746384163e-05, + "loss": 0.0773, "step": 89010 }, { "epoch": 4.15, - "learning_rate": 1.174769115371994e-05, - "loss": 0.0797, + "learning_rate": 2.1760566676548142e-05, + "loss": 0.1476, "step": 89015 }, { "epoch": 4.15, - "learning_rate": 1.17472223524448e-05, - "loss": 0.1527, + "learning_rate": 2.1760098606712122e-05, + "loss": 0.1636, "step": 89020 }, { "epoch": 4.15, - "learning_rate": 1.174675355116966e-05, - "loss": 0.2188, + "learning_rate": 2.1759630536876105e-05, + "loss": 0.3525, "step": 89025 }, { "epoch": 4.15, - "learning_rate": 1.174628474989452e-05, - "loss": 0.089, + "learning_rate": 2.1759162467040082e-05, + "loss": 0.0539, "step": 89030 }, { "epoch": 4.15, - "learning_rate": 1.1745815948619382e-05, - "loss": 0.0208, + "learning_rate": 2.1758694397204062e-05, + "loss": 0.0283, "step": 89035 }, { "epoch": 4.15, - "learning_rate": 1.1745347147344242e-05, - "loss": 0.0321, + "learning_rate": 2.175822632736804e-05, + "loss": 0.0333, "step": 89040 }, { "epoch": 4.15, - "learning_rate": 1.1744878346069102e-05, - "loss": 0.0461, + "learning_rate": 2.1757758257532025e-05, + "loss": 0.032, "step": 89045 }, { "epoch": 4.16, - "learning_rate": 1.1744409544793962e-05, - "loss": 0.0726, + "learning_rate": 2.1757290187696005e-05, + "loss": 0.0216, "step": 89050 }, { "epoch": 4.16, - "learning_rate": 1.1743940743518822e-05, - "loss": 0.1387, + "learning_rate": 2.1756822117859984e-05, + "loss": 0.0727, "step": 89055 }, { "epoch": 4.16, - "learning_rate": 1.1743471942243685e-05, - "loss": 0.1153, + "learning_rate": 2.1756354048023964e-05, + "loss": 0.0753, "step": 89060 }, { "epoch": 4.16, - "learning_rate": 1.1743003140968545e-05, - "loss": 0.1698, + "learning_rate": 2.1755885978187947e-05, + "loss": 0.0786, "step": 89065 }, { "epoch": 4.16, - "learning_rate": 1.1742534339693405e-05, - "loss": 0.1889, + "learning_rate": 2.1755417908351927e-05, + "loss": 0.1136, "step": 89070 }, { "epoch": 4.16, - "learning_rate": 1.1742065538418266e-05, - "loss": 0.3885, + "learning_rate": 2.1754949838515907e-05, + "loss": 0.2164, "step": 89075 }, { "epoch": 4.16, - "learning_rate": 1.1741596737143126e-05, - "loss": 0.075, + "learning_rate": 2.175448176867989e-05, + "loss": 0.0667, "step": 89080 }, { "epoch": 4.16, - "learning_rate": 1.1741127935867986e-05, - "loss": 0.0027, + "learning_rate": 2.175401369884387e-05, + "loss": 0.037, "step": 89085 }, { "epoch": 4.16, - "learning_rate": 1.1740659134592846e-05, - "loss": 0.0209, + "learning_rate": 2.175354562900785e-05, + "loss": 0.021, "step": 89090 }, { "epoch": 4.16, - "learning_rate": 1.1740190333317706e-05, - "loss": 0.1213, + "learning_rate": 2.1753077559171826e-05, + "loss": 0.0342, "step": 89095 }, { "epoch": 4.16, - "learning_rate": 1.1739721532042568e-05, - "loss": 0.0651, + "learning_rate": 2.175260948933581e-05, + "loss": 0.0958, "step": 89100 }, { "epoch": 4.16, - "learning_rate": 1.1739252730767428e-05, - "loss": 0.0386, + "learning_rate": 2.175214141949979e-05, + "loss": 0.0325, "step": 89105 }, { "epoch": 4.16, - "learning_rate": 1.173878392949229e-05, - "loss": 0.0728, + "learning_rate": 2.175167334966377e-05, + "loss": 0.0972, "step": 89110 }, { "epoch": 4.16, - "learning_rate": 1.1738315128217151e-05, - "loss": 0.1389, + "learning_rate": 2.175120527982775e-05, + "loss": 0.1195, "step": 89115 }, { "epoch": 4.16, - "learning_rate": 1.1737846326942011e-05, - "loss": 0.1604, + "learning_rate": 2.1750737209991732e-05, + "loss": 0.1996, "step": 89120 }, { "epoch": 4.16, - "learning_rate": 1.1737377525666871e-05, - "loss": 0.292, + "learning_rate": 2.1750269140155712e-05, + "loss": 0.2093, "step": 89125 }, { "epoch": 4.16, - "learning_rate": 1.173690872439173e-05, - "loss": 0.0724, + "learning_rate": 2.1749801070319692e-05, + "loss": 0.0615, "step": 89130 }, { "epoch": 4.16, - "learning_rate": 1.173643992311659e-05, - "loss": 0.0395, + "learning_rate": 2.1749333000483675e-05, + "loss": 0.0124, "step": 89135 }, { "epoch": 4.16, - "learning_rate": 1.1735971121841452e-05, - "loss": 0.026, + "learning_rate": 2.1748864930647655e-05, + "loss": 0.0213, "step": 89140 }, { "epoch": 4.16, - "learning_rate": 1.1735502320566312e-05, - "loss": 0.0254, + "learning_rate": 2.1748396860811635e-05, + "loss": 0.0602, "step": 89145 }, { "epoch": 4.16, - "learning_rate": 1.1735033519291172e-05, - "loss": 0.0568, + "learning_rate": 2.1747928790975614e-05, + "loss": 0.0523, "step": 89150 }, { "epoch": 4.16, - "learning_rate": 1.1734564718016036e-05, - "loss": 0.051, + "learning_rate": 2.1747460721139594e-05, + "loss": 0.1304, "step": 89155 }, { "epoch": 4.16, - "learning_rate": 1.1734095916740896e-05, - "loss": 0.069, + "learning_rate": 2.1746992651303574e-05, + "loss": 0.0843, "step": 89160 }, { "epoch": 4.16, - "learning_rate": 1.1733627115465755e-05, - "loss": 0.0804, + "learning_rate": 2.1746524581467554e-05, + "loss": 0.1015, "step": 89165 }, { "epoch": 4.16, - "learning_rate": 1.1733158314190615e-05, - "loss": 0.1007, + "learning_rate": 2.1746056511631534e-05, + "loss": 0.2622, "step": 89170 }, { "epoch": 4.16, - "learning_rate": 1.1732689512915475e-05, - "loss": 0.3237, + "learning_rate": 2.1745588441795517e-05, + "loss": 0.2156, "step": 89175 }, { "epoch": 4.16, - "learning_rate": 1.1732220711640337e-05, - "loss": 0.0378, + "learning_rate": 2.1745120371959497e-05, + "loss": 0.0821, "step": 89180 }, { "epoch": 4.16, - "learning_rate": 1.1731751910365197e-05, - "loss": 0.0284, + "learning_rate": 2.1744652302123477e-05, + "loss": 0.0375, "step": 89185 }, { "epoch": 4.16, - "learning_rate": 1.1731283109090057e-05, - "loss": 0.0374, + "learning_rate": 2.1744184232287456e-05, + "loss": 0.0299, "step": 89190 }, { "epoch": 4.16, - "learning_rate": 1.1730814307814917e-05, - "loss": 0.0462, + "learning_rate": 2.174371616245144e-05, + "loss": 0.1019, "step": 89195 }, { "epoch": 4.16, - "learning_rate": 1.173034550653978e-05, - "loss": 0.0606, + "learning_rate": 2.174324809261542e-05, + "loss": 0.0202, "step": 89200 }, { "epoch": 4.16, - "learning_rate": 1.172987670526464e-05, - "loss": 0.0739, + "learning_rate": 2.17427800227794e-05, + "loss": 0.0822, "step": 89205 }, { "epoch": 4.16, - "learning_rate": 1.17294079039895e-05, - "loss": 0.0919, + "learning_rate": 2.1742311952943382e-05, + "loss": 0.1113, "step": 89210 }, { "epoch": 4.16, - "learning_rate": 1.172893910271436e-05, - "loss": 0.1291, + "learning_rate": 2.1741843883107362e-05, + "loss": 0.1077, "step": 89215 }, { "epoch": 4.16, - "learning_rate": 1.1728470301439221e-05, - "loss": 0.1922, + "learning_rate": 2.174137581327134e-05, + "loss": 0.13, "step": 89220 }, { "epoch": 4.16, - "learning_rate": 1.1728001500164081e-05, - "loss": 0.3333, + "learning_rate": 2.174090774343532e-05, + "loss": 0.3006, "step": 89225 }, { "epoch": 4.16, - "learning_rate": 1.1727532698888941e-05, - "loss": 0.095, + "learning_rate": 2.1740439673599302e-05, + "loss": 0.0962, "step": 89230 }, { "epoch": 4.16, - "learning_rate": 1.1727063897613801e-05, - "loss": 0.0126, + "learning_rate": 2.173997160376328e-05, + "loss": 0.0132, "step": 89235 }, { "epoch": 4.16, - "learning_rate": 1.1726595096338663e-05, - "loss": 0.0252, + "learning_rate": 2.173950353392726e-05, + "loss": 0.0133, "step": 89240 }, { "epoch": 4.16, - "learning_rate": 1.1726126295063525e-05, - "loss": 0.0179, + "learning_rate": 2.173903546409124e-05, + "loss": 0.0841, "step": 89245 }, { "epoch": 4.16, - "learning_rate": 1.1725657493788384e-05, - "loss": 0.0795, + "learning_rate": 2.1738567394255224e-05, + "loss": 0.0685, "step": 89250 }, { "epoch": 4.16, - "learning_rate": 1.1725188692513244e-05, - "loss": 0.1216, + "learning_rate": 2.1738099324419204e-05, + "loss": 0.1035, "step": 89255 }, { "epoch": 4.16, - "learning_rate": 1.1724719891238106e-05, - "loss": 0.0173, + "learning_rate": 2.1737631254583184e-05, + "loss": 0.0988, "step": 89260 }, { "epoch": 4.17, - "learning_rate": 1.1724251089962966e-05, - "loss": 0.1934, + "learning_rate": 2.1737163184747167e-05, + "loss": 0.066, "step": 89265 }, { "epoch": 4.17, - "learning_rate": 1.1723782288687826e-05, - "loss": 0.2105, + "learning_rate": 2.1736695114911147e-05, + "loss": 0.195, "step": 89270 }, { "epoch": 4.17, - "learning_rate": 1.1723313487412686e-05, - "loss": 0.2147, + "learning_rate": 2.1736227045075127e-05, + "loss": 0.3362, "step": 89275 }, { "epoch": 4.17, - "learning_rate": 1.1722844686137547e-05, - "loss": 0.1033, + "learning_rate": 2.1735758975239107e-05, + "loss": 0.0619, "step": 89280 }, { "epoch": 4.17, - "learning_rate": 1.1722375884862407e-05, - "loss": 0.0282, + "learning_rate": 2.1735290905403086e-05, + "loss": 0.0071, "step": 89285 }, { "epoch": 4.17, - "learning_rate": 1.1721907083587267e-05, - "loss": 0.0108, + "learning_rate": 2.1734822835567066e-05, + "loss": 0.0151, "step": 89290 }, { "epoch": 4.17, - "learning_rate": 1.1721438282312129e-05, - "loss": 0.0072, + "learning_rate": 2.1734354765731046e-05, + "loss": 0.0539, "step": 89295 }, { "epoch": 4.17, - "learning_rate": 1.172096948103699e-05, - "loss": 0.1576, + "learning_rate": 2.1733886695895026e-05, + "loss": 0.0295, "step": 89300 }, { "epoch": 4.17, - "learning_rate": 1.172050067976185e-05, - "loss": 0.0357, + "learning_rate": 2.173341862605901e-05, + "loss": 0.0296, "step": 89305 }, { "epoch": 4.17, - "learning_rate": 1.172003187848671e-05, - "loss": 0.0539, + "learning_rate": 2.173295055622299e-05, + "loss": 0.0783, "step": 89310 }, { "epoch": 4.17, - "learning_rate": 1.171956307721157e-05, - "loss": 0.1258, + "learning_rate": 2.173248248638697e-05, + "loss": 0.0658, "step": 89315 }, { "epoch": 4.17, - "learning_rate": 1.1719094275936432e-05, - "loss": 0.0998, + "learning_rate": 2.1732014416550952e-05, + "loss": 0.0973, "step": 89320 }, { "epoch": 4.17, - "learning_rate": 1.1718625474661292e-05, - "loss": 0.2966, + "learning_rate": 2.1731546346714932e-05, + "loss": 0.3352, "step": 89325 }, { "epoch": 4.17, - "learning_rate": 1.1718156673386152e-05, - "loss": 0.103, + "learning_rate": 2.173107827687891e-05, + "loss": 0.0864, "step": 89330 }, { "epoch": 4.17, - "learning_rate": 1.1717687872111012e-05, - "loss": 0.0213, + "learning_rate": 2.173061020704289e-05, + "loss": 0.031, "step": 89335 }, { "epoch": 4.17, - "learning_rate": 1.1717219070835875e-05, - "loss": 0.0429, + "learning_rate": 2.1730142137206875e-05, + "loss": 0.0503, "step": 89340 }, { "epoch": 4.17, - "learning_rate": 1.1716750269560735e-05, - "loss": 0.0503, + "learning_rate": 2.172967406737085e-05, + "loss": 0.1243, "step": 89345 }, { "epoch": 4.17, - "learning_rate": 1.1716281468285595e-05, - "loss": 0.0322, + "learning_rate": 2.172920599753483e-05, + "loss": 0.0666, "step": 89350 }, { "epoch": 4.17, - "learning_rate": 1.1715812667010455e-05, - "loss": 0.0499, + "learning_rate": 2.172873792769881e-05, + "loss": 0.1137, "step": 89355 }, { "epoch": 4.17, - "learning_rate": 1.1715343865735317e-05, - "loss": 0.1094, + "learning_rate": 2.1728269857862794e-05, + "loss": 0.1218, "step": 89360 }, { "epoch": 4.17, - "learning_rate": 1.1714875064460177e-05, - "loss": 0.1059, + "learning_rate": 2.1727801788026774e-05, + "loss": 0.1143, "step": 89365 }, { "epoch": 4.17, - "learning_rate": 1.1714406263185036e-05, - "loss": 0.3074, + "learning_rate": 2.1727333718190754e-05, + "loss": 0.1646, "step": 89370 }, { "epoch": 4.17, - "learning_rate": 1.1713937461909896e-05, - "loss": 0.2705, + "learning_rate": 2.1726865648354733e-05, + "loss": 0.1988, "step": 89375 }, { "epoch": 4.17, - "learning_rate": 1.1713468660634756e-05, - "loss": 0.0501, + "learning_rate": 2.1726397578518717e-05, + "loss": 0.0544, "step": 89380 }, { "epoch": 4.17, - "learning_rate": 1.171299985935962e-05, - "loss": 0.0824, + "learning_rate": 2.1725929508682696e-05, + "loss": 0.0151, "step": 89385 }, { "epoch": 4.17, - "learning_rate": 1.171253105808448e-05, - "loss": 0.0481, + "learning_rate": 2.1725461438846676e-05, + "loss": 0.0287, "step": 89390 }, { "epoch": 4.17, - "learning_rate": 1.171206225680934e-05, - "loss": 0.0364, + "learning_rate": 2.172499336901066e-05, + "loss": 0.024, "step": 89395 }, { "epoch": 4.17, - "learning_rate": 1.1711593455534201e-05, - "loss": 0.058, + "learning_rate": 2.172452529917464e-05, + "loss": 0.0424, "step": 89400 }, { "epoch": 4.17, - "learning_rate": 1.1711124654259061e-05, - "loss": 0.1301, + "learning_rate": 2.172405722933862e-05, + "loss": 0.1138, "step": 89405 }, { "epoch": 4.17, - "learning_rate": 1.1710655852983921e-05, - "loss": 0.0709, + "learning_rate": 2.1723589159502595e-05, + "loss": 0.0561, "step": 89410 }, { "epoch": 4.17, - "learning_rate": 1.1710187051708781e-05, - "loss": 0.1452, + "learning_rate": 2.172312108966658e-05, + "loss": 0.0907, "step": 89415 }, { "epoch": 4.17, - "learning_rate": 1.1709718250433641e-05, - "loss": 0.1915, + "learning_rate": 2.172265301983056e-05, + "loss": 0.199, "step": 89420 }, { "epoch": 4.17, - "learning_rate": 1.1709249449158502e-05, - "loss": 0.4685, + "learning_rate": 2.172218494999454e-05, + "loss": 0.2765, "step": 89425 }, { "epoch": 4.17, - "learning_rate": 1.1708780647883362e-05, - "loss": 0.092, + "learning_rate": 2.1721716880158518e-05, + "loss": 0.102, "step": 89430 }, { "epoch": 4.17, - "learning_rate": 1.1708311846608224e-05, - "loss": 0.0195, + "learning_rate": 2.17212488103225e-05, + "loss": 0.0068, "step": 89435 }, { "epoch": 4.17, - "learning_rate": 1.1707843045333086e-05, - "loss": 0.0541, + "learning_rate": 2.172078074048648e-05, + "loss": 0.04, "step": 89440 }, { "epoch": 4.17, - "learning_rate": 1.1707374244057946e-05, - "loss": 0.0734, + "learning_rate": 2.172031267065046e-05, + "loss": 0.0454, "step": 89445 }, { "epoch": 4.17, - "learning_rate": 1.1706905442782806e-05, - "loss": 0.0399, + "learning_rate": 2.1719844600814444e-05, + "loss": 0.0545, "step": 89450 }, { "epoch": 4.17, - "learning_rate": 1.1706436641507665e-05, - "loss": 0.0425, + "learning_rate": 2.1719376530978424e-05, + "loss": 0.056, "step": 89455 }, { "epoch": 4.17, - "learning_rate": 1.1705967840232525e-05, - "loss": 0.087, + "learning_rate": 2.1718908461142404e-05, + "loss": 0.1433, "step": 89460 }, { "epoch": 4.17, - "learning_rate": 1.1705499038957387e-05, - "loss": 0.0955, + "learning_rate": 2.1718440391306384e-05, + "loss": 0.1506, "step": 89465 }, { "epoch": 4.17, - "learning_rate": 1.1705030237682247e-05, - "loss": 0.1549, + "learning_rate": 2.1717972321470367e-05, + "loss": 0.1674, "step": 89470 }, { "epoch": 4.18, - "learning_rate": 1.1704561436407107e-05, - "loss": 0.2599, + "learning_rate": 2.1717504251634343e-05, + "loss": 0.2905, "step": 89475 }, { "epoch": 4.18, - "learning_rate": 1.170409263513197e-05, - "loss": 0.0578, + "learning_rate": 2.1717036181798323e-05, + "loss": 0.0621, "step": 89480 }, { "epoch": 4.18, - "learning_rate": 1.170362383385683e-05, - "loss": 0.0126, + "learning_rate": 2.1716568111962303e-05, + "loss": 0.0705, "step": 89485 }, { "epoch": 4.18, - "learning_rate": 1.170315503258169e-05, - "loss": 0.0218, + "learning_rate": 2.1716100042126286e-05, + "loss": 0.0151, "step": 89490 }, { "epoch": 4.18, - "learning_rate": 1.170268623130655e-05, - "loss": 0.0482, + "learning_rate": 2.1715631972290266e-05, + "loss": 0.0541, "step": 89495 }, { "epoch": 4.18, - "learning_rate": 1.170221743003141e-05, - "loss": 0.0779, + "learning_rate": 2.1715163902454246e-05, + "loss": 0.0387, "step": 89500 }, { "epoch": 4.18, - "learning_rate": 1.1701748628756272e-05, - "loss": 0.0651, + "learning_rate": 2.171469583261823e-05, + "loss": 0.065, "step": 89505 }, { "epoch": 4.18, - "learning_rate": 1.1701279827481132e-05, - "loss": 0.0489, + "learning_rate": 2.171422776278221e-05, + "loss": 0.1134, "step": 89510 }, { "epoch": 4.18, - "learning_rate": 1.1700811026205991e-05, - "loss": 0.0632, + "learning_rate": 2.171375969294619e-05, + "loss": 0.1499, "step": 89515 }, { "epoch": 4.18, - "learning_rate": 1.1700342224930851e-05, - "loss": 0.1013, + "learning_rate": 2.171329162311017e-05, + "loss": 0.169, "step": 89520 }, { "epoch": 4.18, - "learning_rate": 1.1699873423655715e-05, - "loss": 0.3354, + "learning_rate": 2.171282355327415e-05, + "loss": 0.2755, "step": 89525 }, { "epoch": 4.18, - "learning_rate": 1.1699404622380575e-05, - "loss": 0.0844, + "learning_rate": 2.171235548343813e-05, + "loss": 0.0621, "step": 89530 }, { "epoch": 4.18, - "learning_rate": 1.1698935821105435e-05, - "loss": 0.0415, + "learning_rate": 2.1711887413602108e-05, + "loss": 0.0102, "step": 89535 }, { "epoch": 4.18, - "learning_rate": 1.1698467019830295e-05, - "loss": 0.0494, + "learning_rate": 2.1711419343766088e-05, + "loss": 0.0266, "step": 89540 }, { "epoch": 4.18, - "learning_rate": 1.1697998218555156e-05, - "loss": 0.0566, + "learning_rate": 2.171095127393007e-05, + "loss": 0.0795, "step": 89545 }, { "epoch": 4.18, - "learning_rate": 1.1697529417280016e-05, - "loss": 0.0648, + "learning_rate": 2.171048320409405e-05, + "loss": 0.0964, "step": 89550 }, { "epoch": 4.18, - "learning_rate": 1.1697060616004876e-05, - "loss": 0.0941, + "learning_rate": 2.171001513425803e-05, + "loss": 0.0698, "step": 89555 }, { "epoch": 4.18, - "learning_rate": 1.1696591814729736e-05, - "loss": 0.1188, + "learning_rate": 2.1709547064422014e-05, + "loss": 0.1632, "step": 89560 }, { "epoch": 4.18, - "learning_rate": 1.1696123013454596e-05, - "loss": 0.1267, + "learning_rate": 2.1709078994585994e-05, + "loss": 0.0803, "step": 89565 }, { "epoch": 4.18, - "learning_rate": 1.169565421217946e-05, - "loss": 0.184, + "learning_rate": 2.1708610924749973e-05, + "loss": 0.1164, "step": 89570 }, { "epoch": 4.18, - "learning_rate": 1.1695185410904319e-05, - "loss": 0.5252, + "learning_rate": 2.1708142854913953e-05, + "loss": 0.1899, "step": 89575 }, { "epoch": 4.18, - "learning_rate": 1.1694716609629179e-05, - "loss": 0.0873, + "learning_rate": 2.1707674785077936e-05, + "loss": 0.0859, "step": 89580 }, { "epoch": 4.18, - "learning_rate": 1.169424780835404e-05, - "loss": 0.0438, + "learning_rate": 2.1707206715241916e-05, + "loss": 0.0182, "step": 89585 }, { "epoch": 4.18, - "learning_rate": 1.16937790070789e-05, - "loss": 0.063, + "learning_rate": 2.1706738645405896e-05, + "loss": 0.0512, "step": 89590 }, { "epoch": 4.18, - "learning_rate": 1.169331020580376e-05, - "loss": 0.0517, + "learning_rate": 2.1706270575569876e-05, + "loss": 0.0563, "step": 89595 }, { "epoch": 4.18, - "learning_rate": 1.169284140452862e-05, - "loss": 0.0161, + "learning_rate": 2.1705802505733856e-05, + "loss": 0.1255, "step": 89600 }, { "epoch": 4.18, - "learning_rate": 1.169237260325348e-05, - "loss": 0.0823, + "learning_rate": 2.1705334435897835e-05, + "loss": 0.0428, "step": 89605 }, { "epoch": 4.18, - "learning_rate": 1.1691903801978342e-05, - "loss": 0.0543, + "learning_rate": 2.1704866366061815e-05, + "loss": 0.0301, "step": 89610 }, { "epoch": 4.18, - "learning_rate": 1.1691435000703202e-05, - "loss": 0.0828, + "learning_rate": 2.1704398296225795e-05, + "loss": 0.126, "step": 89615 }, { "epoch": 4.18, - "learning_rate": 1.1690966199428064e-05, - "loss": 0.2017, + "learning_rate": 2.170393022638978e-05, + "loss": 0.194, "step": 89620 }, { "epoch": 4.18, - "learning_rate": 1.1690497398152925e-05, - "loss": 0.3275, + "learning_rate": 2.1703462156553758e-05, + "loss": 0.1766, "step": 89625 }, { "epoch": 4.18, - "learning_rate": 1.1690028596877785e-05, - "loss": 0.0912, + "learning_rate": 2.1702994086717738e-05, + "loss": 0.0466, "step": 89630 }, { "epoch": 4.18, - "learning_rate": 1.1689559795602645e-05, - "loss": 0.0215, + "learning_rate": 2.170252601688172e-05, + "loss": 0.0341, "step": 89635 }, { "epoch": 4.18, - "learning_rate": 1.1689090994327505e-05, - "loss": 0.0207, + "learning_rate": 2.17020579470457e-05, + "loss": 0.0101, "step": 89640 }, { "epoch": 4.18, - "learning_rate": 1.1688622193052365e-05, - "loss": 0.0594, + "learning_rate": 2.170158987720968e-05, + "loss": 0.0302, "step": 89645 }, { "epoch": 4.18, - "learning_rate": 1.1688153391777227e-05, - "loss": 0.0652, + "learning_rate": 2.170112180737366e-05, + "loss": 0.02, "step": 89650 }, { "epoch": 4.18, - "learning_rate": 1.1687684590502087e-05, - "loss": 0.0914, + "learning_rate": 2.1700653737537644e-05, + "loss": 0.1171, "step": 89655 }, { "epoch": 4.18, - "learning_rate": 1.1687215789226946e-05, - "loss": 0.088, + "learning_rate": 2.170018566770162e-05, + "loss": 0.0718, "step": 89660 }, { "epoch": 4.18, - "learning_rate": 1.168674698795181e-05, - "loss": 0.1658, + "learning_rate": 2.16997175978656e-05, + "loss": 0.1474, "step": 89665 }, { "epoch": 4.18, - "learning_rate": 1.168627818667667e-05, - "loss": 0.1631, + "learning_rate": 2.169924952802958e-05, + "loss": 0.1317, "step": 89670 }, { "epoch": 4.18, - "learning_rate": 1.168580938540153e-05, - "loss": 0.1616, + "learning_rate": 2.1698781458193563e-05, + "loss": 0.3305, "step": 89675 }, { "epoch": 4.18, - "learning_rate": 1.168534058412639e-05, - "loss": 0.0536, + "learning_rate": 2.1698313388357543e-05, + "loss": 0.0655, "step": 89680 }, { "epoch": 4.18, - "learning_rate": 1.168487178285125e-05, - "loss": 0.0344, + "learning_rate": 2.1697845318521523e-05, + "loss": 0.0185, "step": 89685 }, { "epoch": 4.19, - "learning_rate": 1.1684402981576111e-05, - "loss": 0.0607, + "learning_rate": 2.1697377248685506e-05, + "loss": 0.0194, "step": 89690 }, { "epoch": 4.19, - "learning_rate": 1.1683934180300971e-05, - "loss": 0.0244, + "learning_rate": 2.1696909178849486e-05, + "loss": 0.0517, "step": 89695 }, { "epoch": 4.19, - "learning_rate": 1.1683465379025831e-05, - "loss": 0.096, + "learning_rate": 2.1696441109013466e-05, + "loss": 0.0777, "step": 89700 }, { "epoch": 4.19, - "learning_rate": 1.1682996577750691e-05, - "loss": 0.0742, + "learning_rate": 2.1695973039177445e-05, + "loss": 0.0558, "step": 89705 }, { "epoch": 4.19, - "learning_rate": 1.1682527776475554e-05, - "loss": 0.1275, + "learning_rate": 2.169550496934143e-05, + "loss": 0.1115, "step": 89710 }, { "epoch": 4.19, - "learning_rate": 1.1682058975200414e-05, - "loss": 0.0448, + "learning_rate": 2.169503689950541e-05, + "loss": 0.168, "step": 89715 }, { "epoch": 4.19, - "learning_rate": 1.1681590173925274e-05, - "loss": 0.1787, + "learning_rate": 2.1694568829669388e-05, + "loss": 0.077, "step": 89720 }, { "epoch": 4.19, - "learning_rate": 1.1681121372650134e-05, - "loss": 0.3124, + "learning_rate": 2.1694100759833365e-05, + "loss": 0.2126, "step": 89725 }, { "epoch": 4.19, - "learning_rate": 1.1680652571374996e-05, - "loss": 0.1043, + "learning_rate": 2.1693632689997348e-05, + "loss": 0.0568, "step": 89730 }, { "epoch": 4.19, - "learning_rate": 1.1680183770099856e-05, - "loss": 0.0047, + "learning_rate": 2.1693164620161328e-05, + "loss": 0.0362, "step": 89735 }, { "epoch": 4.19, - "learning_rate": 1.1679714968824716e-05, - "loss": 0.0341, + "learning_rate": 2.1692696550325307e-05, + "loss": 0.0394, "step": 89740 }, { "epoch": 4.19, - "learning_rate": 1.1679246167549576e-05, - "loss": 0.046, + "learning_rate": 2.169222848048929e-05, + "loss": 0.0303, "step": 89745 }, { "epoch": 4.19, - "learning_rate": 1.1678777366274437e-05, - "loss": 0.0342, + "learning_rate": 2.169176041065327e-05, + "loss": 0.0534, "step": 89750 }, { "epoch": 4.19, - "learning_rate": 1.1678308564999297e-05, - "loss": 0.0527, + "learning_rate": 2.169129234081725e-05, + "loss": 0.0548, "step": 89755 }, { "epoch": 4.19, - "learning_rate": 1.1677839763724159e-05, - "loss": 0.164, + "learning_rate": 2.169082427098123e-05, + "loss": 0.0371, "step": 89760 }, { "epoch": 4.19, - "learning_rate": 1.1677370962449019e-05, - "loss": 0.061, + "learning_rate": 2.1690356201145213e-05, + "loss": 0.1142, "step": 89765 }, { "epoch": 4.19, - "learning_rate": 1.167690216117388e-05, - "loss": 0.19, + "learning_rate": 2.1689888131309193e-05, + "loss": 0.0938, "step": 89770 }, { "epoch": 4.19, - "learning_rate": 1.167643335989874e-05, - "loss": 0.219, + "learning_rate": 2.1689420061473173e-05, + "loss": 0.2837, "step": 89775 }, { "epoch": 4.19, - "learning_rate": 1.16759645586236e-05, - "loss": 0.0776, + "learning_rate": 2.1688951991637153e-05, + "loss": 0.1181, "step": 89780 }, { "epoch": 4.19, - "learning_rate": 1.167549575734846e-05, - "loss": 0.0106, + "learning_rate": 2.1688483921801136e-05, + "loss": 0.035, "step": 89785 }, { "epoch": 4.19, - "learning_rate": 1.1675026956073322e-05, - "loss": 0.021, + "learning_rate": 2.1688015851965112e-05, + "loss": 0.0246, "step": 89790 }, { "epoch": 4.19, - "learning_rate": 1.1674558154798182e-05, - "loss": 0.033, + "learning_rate": 2.1687547782129092e-05, + "loss": 0.0223, "step": 89795 }, { "epoch": 4.19, - "learning_rate": 1.1674089353523042e-05, - "loss": 0.0703, + "learning_rate": 2.1687079712293072e-05, + "loss": 0.0614, "step": 89800 }, { "epoch": 4.19, - "learning_rate": 1.1673620552247903e-05, - "loss": 0.1027, + "learning_rate": 2.1686611642457055e-05, + "loss": 0.0179, "step": 89805 }, { "epoch": 4.19, - "learning_rate": 1.1673151750972765e-05, - "loss": 0.0881, + "learning_rate": 2.1686143572621035e-05, + "loss": 0.149, "step": 89810 }, { "epoch": 4.19, - "learning_rate": 1.1672682949697625e-05, - "loss": 0.0987, + "learning_rate": 2.1685675502785015e-05, + "loss": 0.1675, "step": 89815 }, { "epoch": 4.19, - "learning_rate": 1.1672214148422485e-05, - "loss": 0.2062, + "learning_rate": 2.1685207432948998e-05, + "loss": 0.1311, "step": 89820 }, { "epoch": 4.19, - "learning_rate": 1.1671745347147345e-05, - "loss": 0.3137, + "learning_rate": 2.1684739363112978e-05, + "loss": 0.2425, "step": 89825 }, { "epoch": 4.19, - "learning_rate": 1.1671276545872206e-05, - "loss": 0.113, + "learning_rate": 2.1684271293276958e-05, + "loss": 0.0726, "step": 89830 }, { "epoch": 4.19, - "learning_rate": 1.1670807744597066e-05, + "learning_rate": 2.1683803223440938e-05, "loss": 0.0464, "step": 89835 }, { "epoch": 4.19, - "learning_rate": 1.1670338943321926e-05, - "loss": 0.0121, + "learning_rate": 2.168333515360492e-05, + "loss": 0.0499, "step": 89840 }, { "epoch": 4.19, - "learning_rate": 1.1669870142046786e-05, - "loss": 0.0268, + "learning_rate": 2.16828670837689e-05, + "loss": 0.0303, "step": 89845 }, { "epoch": 4.19, - "learning_rate": 1.166940134077165e-05, - "loss": 0.0599, + "learning_rate": 2.1682399013932877e-05, + "loss": 0.0835, "step": 89850 }, { "epoch": 4.19, - "learning_rate": 1.166893253949651e-05, - "loss": 0.0611, + "learning_rate": 2.1681930944096857e-05, + "loss": 0.0961, "step": 89855 }, { "epoch": 4.19, - "learning_rate": 1.166846373822137e-05, - "loss": 0.0742, + "learning_rate": 2.168146287426084e-05, + "loss": 0.0702, "step": 89860 }, { "epoch": 4.19, - "learning_rate": 1.166799493694623e-05, - "loss": 0.1433, + "learning_rate": 2.168099480442482e-05, + "loss": 0.2056, "step": 89865 }, { "epoch": 4.19, - "learning_rate": 1.166752613567109e-05, - "loss": 0.1798, + "learning_rate": 2.16805267345888e-05, + "loss": 0.1328, "step": 89870 }, { "epoch": 4.19, - "learning_rate": 1.166705733439595e-05, - "loss": 0.2045, + "learning_rate": 2.1680058664752783e-05, + "loss": 0.3375, "step": 89875 }, { "epoch": 4.19, - "learning_rate": 1.166658853312081e-05, - "loss": 0.0834, + "learning_rate": 2.1679590594916763e-05, + "loss": 0.1319, "step": 89880 }, { "epoch": 4.19, - "learning_rate": 1.166611973184567e-05, - "loss": 0.0339, + "learning_rate": 2.1679122525080743e-05, + "loss": 0.0293, "step": 89885 }, { "epoch": 4.19, - "learning_rate": 1.166565093057053e-05, - "loss": 0.0613, + "learning_rate": 2.1678654455244722e-05, + "loss": 0.0154, "step": 89890 }, { "epoch": 4.19, - "learning_rate": 1.1665182129295394e-05, - "loss": 0.0092, + "learning_rate": 2.1678186385408706e-05, + "loss": 0.027, "step": 89895 }, { "epoch": 4.19, - "learning_rate": 1.1664713328020254e-05, - "loss": 0.0491, + "learning_rate": 2.1677718315572685e-05, + "loss": 0.0636, "step": 89900 }, { "epoch": 4.2, - "learning_rate": 1.1664244526745114e-05, - "loss": 0.0116, + "learning_rate": 2.1677250245736665e-05, + "loss": 0.1259, "step": 89905 }, { "epoch": 4.2, - "learning_rate": 1.1663775725469975e-05, - "loss": 0.0667, + "learning_rate": 2.1676782175900645e-05, + "loss": 0.0637, "step": 89910 }, { "epoch": 4.2, - "learning_rate": 1.1663306924194835e-05, - "loss": 0.1023, + "learning_rate": 2.1676314106064625e-05, + "loss": 0.0533, "step": 89915 }, { "epoch": 4.2, - "learning_rate": 1.1662838122919695e-05, - "loss": 0.1983, + "learning_rate": 2.1675846036228605e-05, + "loss": 0.1625, "step": 89920 }, { "epoch": 4.2, - "learning_rate": 1.1662369321644555e-05, - "loss": 0.3117, + "learning_rate": 2.1675377966392584e-05, + "loss": 0.1751, "step": 89925 }, { "epoch": 4.2, - "learning_rate": 1.1661900520369415e-05, - "loss": 0.085, + "learning_rate": 2.1674909896556568e-05, + "loss": 0.0844, "step": 89930 }, { "epoch": 4.2, - "learning_rate": 1.1661431719094277e-05, - "loss": 0.0078, + "learning_rate": 2.1674441826720547e-05, + "loss": 0.0062, "step": 89935 }, { "epoch": 4.2, - "learning_rate": 1.1660962917819137e-05, - "loss": 0.0456, + "learning_rate": 2.1673973756884527e-05, + "loss": 0.0279, "step": 89940 }, { "epoch": 4.2, - "learning_rate": 1.1660494116543998e-05, - "loss": 0.0304, + "learning_rate": 2.1673505687048507e-05, + "loss": 0.0397, "step": 89945 }, { "epoch": 4.2, - "learning_rate": 1.166002531526886e-05, - "loss": 0.0178, + "learning_rate": 2.167303761721249e-05, + "loss": 0.0225, "step": 89950 }, { "epoch": 4.2, - "learning_rate": 1.165955651399372e-05, - "loss": 0.0747, + "learning_rate": 2.167256954737647e-05, + "loss": 0.023, "step": 89955 }, { "epoch": 4.2, - "learning_rate": 1.165908771271858e-05, - "loss": 0.0913, + "learning_rate": 2.167210147754045e-05, + "loss": 0.1211, "step": 89960 }, { "epoch": 4.2, - "learning_rate": 1.165861891144344e-05, - "loss": 0.0636, + "learning_rate": 2.167163340770443e-05, + "loss": 0.1782, "step": 89965 }, { "epoch": 4.2, - "learning_rate": 1.16581501101683e-05, - "loss": 0.1351, + "learning_rate": 2.1671165337868413e-05, + "loss": 0.0947, "step": 89970 }, { "epoch": 4.2, - "learning_rate": 1.1657681308893161e-05, - "loss": 0.3487, + "learning_rate": 2.1670697268032393e-05, + "loss": 0.1964, "step": 89975 }, { "epoch": 4.2, - "learning_rate": 1.1657212507618021e-05, - "loss": 0.0798, + "learning_rate": 2.167022919819637e-05, + "loss": 0.0589, "step": 89980 }, { "epoch": 4.2, - "learning_rate": 1.1656743706342881e-05, - "loss": 0.0109, + "learning_rate": 2.166976112836035e-05, + "loss": 0.0365, "step": 89985 }, { "epoch": 4.2, - "learning_rate": 1.1656274905067744e-05, - "loss": 0.0226, + "learning_rate": 2.1669293058524332e-05, + "loss": 0.0496, "step": 89990 }, { "epoch": 4.2, - "learning_rate": 1.1655806103792604e-05, - "loss": 0.0188, + "learning_rate": 2.1668824988688312e-05, + "loss": 0.08, "step": 89995 }, { "epoch": 4.2, - "learning_rate": 1.1655337302517464e-05, - "loss": 0.0779, + "learning_rate": 2.1668356918852292e-05, + "loss": 0.0371, "step": 90000 }, { "epoch": 4.2, - "learning_rate": 1.1654868501242324e-05, - "loss": 0.0679, + "learning_rate": 2.1667888849016275e-05, + "loss": 0.0543, "step": 90005 }, { "epoch": 4.2, - "learning_rate": 1.1654399699967184e-05, - "loss": 0.081, + "learning_rate": 2.1667420779180255e-05, + "loss": 0.1352, "step": 90010 }, { "epoch": 4.2, - "learning_rate": 1.1653930898692046e-05, - "loss": 0.0588, + "learning_rate": 2.1666952709344235e-05, + "loss": 0.1711, "step": 90015 }, { "epoch": 4.2, - "learning_rate": 1.1653462097416906e-05, - "loss": 0.2129, + "learning_rate": 2.1666484639508215e-05, + "loss": 0.0971, "step": 90020 }, { "epoch": 4.2, - "learning_rate": 1.1652993296141766e-05, - "loss": 0.1604, + "learning_rate": 2.1666016569672198e-05, + "loss": 0.5047, "step": 90025 }, { "epoch": 4.2, - "learning_rate": 1.1652524494866626e-05, - "loss": 0.0807, + "learning_rate": 2.1665548499836178e-05, + "loss": 0.0559, "step": 90030 }, { "epoch": 4.2, - "learning_rate": 1.1652055693591489e-05, - "loss": 0.0473, + "learning_rate": 2.1665080430000157e-05, + "loss": 0.0385, "step": 90035 }, { "epoch": 4.2, - "learning_rate": 1.1651586892316349e-05, - "loss": 0.0271, + "learning_rate": 2.1664612360164134e-05, + "loss": 0.0144, "step": 90040 }, { "epoch": 4.2, - "learning_rate": 1.1651118091041209e-05, - "loss": 0.0115, + "learning_rate": 2.1664144290328117e-05, + "loss": 0.0441, "step": 90045 }, { "epoch": 4.2, - "learning_rate": 1.1650649289766069e-05, - "loss": 0.0769, + "learning_rate": 2.1663676220492097e-05, + "loss": 0.0718, "step": 90050 }, { "epoch": 4.2, - "learning_rate": 1.165018048849093e-05, - "loss": 0.0499, + "learning_rate": 2.1663208150656077e-05, + "loss": 0.051, "step": 90055 }, { "epoch": 4.2, - "learning_rate": 1.164971168721579e-05, - "loss": 0.113, + "learning_rate": 2.166274008082006e-05, + "loss": 0.0623, "step": 90060 }, { "epoch": 4.2, - "learning_rate": 1.164924288594065e-05, - "loss": 0.1396, + "learning_rate": 2.166227201098404e-05, + "loss": 0.1201, "step": 90065 }, { "epoch": 4.2, - "learning_rate": 1.164877408466551e-05, - "loss": 0.1436, + "learning_rate": 2.166180394114802e-05, + "loss": 0.118, "step": 90070 }, { "epoch": 4.2, - "learning_rate": 1.164830528339037e-05, - "loss": 0.3258, + "learning_rate": 2.1661335871312e-05, + "loss": 0.2831, "step": 90075 }, { "epoch": 4.2, - "learning_rate": 1.1647836482115232e-05, - "loss": 0.0535, + "learning_rate": 2.1660867801475983e-05, + "loss": 0.0926, "step": 90080 }, { "epoch": 4.2, - "learning_rate": 1.1647367680840093e-05, - "loss": 0.0282, + "learning_rate": 2.1660399731639962e-05, + "loss": 0.0571, "step": 90085 }, { "epoch": 4.2, - "learning_rate": 1.1646898879564953e-05, - "loss": 0.046, + "learning_rate": 2.1659931661803942e-05, + "loss": 0.0633, "step": 90090 }, { "epoch": 4.2, - "learning_rate": 1.1646430078289815e-05, - "loss": 0.0267, + "learning_rate": 2.1659463591967922e-05, + "loss": 0.0169, "step": 90095 }, { "epoch": 4.2, - "learning_rate": 1.1645961277014675e-05, - "loss": 0.0636, + "learning_rate": 2.1658995522131905e-05, + "loss": 0.0306, "step": 90100 }, { "epoch": 4.2, - "learning_rate": 1.1645492475739535e-05, - "loss": 0.1151, + "learning_rate": 2.165852745229588e-05, + "loss": 0.0889, "step": 90105 }, { "epoch": 4.2, - "learning_rate": 1.1645023674464395e-05, - "loss": 0.0741, + "learning_rate": 2.165805938245986e-05, + "loss": 0.1051, "step": 90110 }, { "epoch": 4.2, - "learning_rate": 1.1644554873189255e-05, - "loss": 0.0809, + "learning_rate": 2.1657591312623845e-05, + "loss": 0.1247, "step": 90115 }, { "epoch": 4.21, - "learning_rate": 1.1644086071914116e-05, - "loss": 0.1993, + "learning_rate": 2.1657123242787824e-05, + "loss": 0.1085, "step": 90120 }, { "epoch": 4.21, - "learning_rate": 1.1643617270638976e-05, - "loss": 0.2685, + "learning_rate": 2.1656655172951804e-05, + "loss": 0.2601, "step": 90125 }, { "epoch": 4.21, - "learning_rate": 1.1643148469363838e-05, - "loss": 0.0708, + "learning_rate": 2.1656187103115784e-05, + "loss": 0.0541, "step": 90130 }, { "epoch": 4.21, - "learning_rate": 1.16426796680887e-05, - "loss": 0.0637, + "learning_rate": 2.1655719033279767e-05, + "loss": 0.0112, "step": 90135 }, { "epoch": 4.21, - "learning_rate": 1.164221086681356e-05, - "loss": 0.0553, + "learning_rate": 2.1655250963443747e-05, + "loss": 0.0386, "step": 90140 }, { "epoch": 4.21, - "learning_rate": 1.164174206553842e-05, - "loss": 0.061, + "learning_rate": 2.1654782893607727e-05, + "loss": 0.0439, "step": 90145 }, { "epoch": 4.21, - "learning_rate": 1.164127326426328e-05, - "loss": 0.0785, + "learning_rate": 2.1654314823771707e-05, + "loss": 0.0436, "step": 90150 }, { "epoch": 4.21, - "learning_rate": 1.164080446298814e-05, - "loss": 0.0905, + "learning_rate": 2.165384675393569e-05, + "loss": 0.1417, "step": 90155 }, { "epoch": 4.21, - "learning_rate": 1.1640335661713e-05, - "loss": 0.0879, + "learning_rate": 2.165337868409967e-05, + "loss": 0.094, "step": 90160 }, { "epoch": 4.21, - "learning_rate": 1.163986686043786e-05, - "loss": 0.1455, + "learning_rate": 2.165291061426365e-05, + "loss": 0.1749, "step": 90165 }, { "epoch": 4.21, - "learning_rate": 1.163939805916272e-05, - "loss": 0.0863, + "learning_rate": 2.1652442544427626e-05, + "loss": 0.0974, "step": 90170 }, { "epoch": 4.21, - "learning_rate": 1.1638929257887584e-05, - "loss": 0.3679, + "learning_rate": 2.165197447459161e-05, + "loss": 0.2187, "step": 90175 }, { "epoch": 4.21, - "learning_rate": 1.1638460456612444e-05, - "loss": 0.0826, + "learning_rate": 2.165150640475559e-05, + "loss": 0.0665, "step": 90180 }, { "epoch": 4.21, - "learning_rate": 1.1637991655337304e-05, - "loss": 0.0402, + "learning_rate": 2.165103833491957e-05, + "loss": 0.0564, "step": 90185 }, { "epoch": 4.21, - "learning_rate": 1.1637522854062164e-05, - "loss": 0.024, + "learning_rate": 2.1650570265083552e-05, + "loss": 0.037, "step": 90190 }, { "epoch": 4.21, - "learning_rate": 1.1637054052787025e-05, - "loss": 0.0203, + "learning_rate": 2.1650102195247532e-05, + "loss": 0.0334, "step": 90195 }, { "epoch": 4.21, - "learning_rate": 1.1636585251511885e-05, - "loss": 0.0285, + "learning_rate": 2.1649634125411512e-05, + "loss": 0.0823, "step": 90200 }, { "epoch": 4.21, - "learning_rate": 1.1636116450236745e-05, - "loss": 0.123, + "learning_rate": 2.164916605557549e-05, + "loss": 0.0581, "step": 90205 }, { "epoch": 4.21, - "learning_rate": 1.1635647648961605e-05, - "loss": 0.0205, + "learning_rate": 2.1648697985739475e-05, + "loss": 0.1109, "step": 90210 }, { "epoch": 4.21, - "learning_rate": 1.1635178847686465e-05, - "loss": 0.1254, + "learning_rate": 2.1648229915903455e-05, + "loss": 0.1184, "step": 90215 }, { "epoch": 4.21, - "learning_rate": 1.1634710046411329e-05, - "loss": 0.2179, + "learning_rate": 2.1647761846067434e-05, + "loss": 0.2159, "step": 90220 }, { "epoch": 4.21, - "learning_rate": 1.1634241245136188e-05, - "loss": 0.3697, + "learning_rate": 2.1647293776231414e-05, + "loss": 0.2555, "step": 90225 }, { "epoch": 4.21, - "learning_rate": 1.1633772443861048e-05, - "loss": 0.1076, + "learning_rate": 2.1646825706395394e-05, + "loss": 0.0911, "step": 90230 }, { "epoch": 4.21, - "learning_rate": 1.163330364258591e-05, - "loss": 0.0421, + "learning_rate": 2.1646357636559374e-05, + "loss": 0.0262, "step": 90235 }, { "epoch": 4.21, - "learning_rate": 1.163283484131077e-05, - "loss": 0.0345, + "learning_rate": 2.1645889566723354e-05, + "loss": 0.0201, "step": 90240 }, { "epoch": 4.21, - "learning_rate": 1.163236604003563e-05, - "loss": 0.0681, + "learning_rate": 2.1645421496887337e-05, + "loss": 0.0035, "step": 90245 }, { "epoch": 4.21, - "learning_rate": 1.163189723876049e-05, - "loss": 0.0593, + "learning_rate": 2.1644953427051317e-05, + "loss": 0.0272, "step": 90250 }, { "epoch": 4.21, - "learning_rate": 1.163142843748535e-05, - "loss": 0.0574, + "learning_rate": 2.1644485357215296e-05, + "loss": 0.0812, "step": 90255 }, { "epoch": 4.21, - "learning_rate": 1.1630959636210211e-05, - "loss": 0.0452, + "learning_rate": 2.1644017287379276e-05, + "loss": 0.1042, "step": 90260 }, { "epoch": 4.21, - "learning_rate": 1.1630490834935071e-05, - "loss": 0.1625, + "learning_rate": 2.164354921754326e-05, + "loss": 0.1133, "step": 90265 }, { "epoch": 4.21, - "learning_rate": 1.1630022033659933e-05, - "loss": 0.2212, + "learning_rate": 2.164308114770724e-05, + "loss": 0.0978, "step": 90270 }, { "epoch": 4.21, - "learning_rate": 1.1629553232384795e-05, - "loss": 0.2688, + "learning_rate": 2.164261307787122e-05, + "loss": 0.2986, "step": 90275 }, { "epoch": 4.21, - "learning_rate": 1.1629084431109654e-05, - "loss": 0.0968, + "learning_rate": 2.16421450080352e-05, + "loss": 0.058, "step": 90280 }, { "epoch": 4.21, - "learning_rate": 1.1628615629834514e-05, - "loss": 0.0236, + "learning_rate": 2.1641676938199182e-05, + "loss": 0.0186, "step": 90285 }, { "epoch": 4.21, - "learning_rate": 1.1628146828559374e-05, - "loss": 0.0285, + "learning_rate": 2.1641208868363162e-05, + "loss": 0.0468, "step": 90290 }, { "epoch": 4.21, - "learning_rate": 1.1627678027284234e-05, - "loss": 0.1006, + "learning_rate": 2.164074079852714e-05, + "loss": 0.0387, "step": 90295 }, { "epoch": 4.21, - "learning_rate": 1.1627209226009096e-05, - "loss": 0.0232, + "learning_rate": 2.164027272869112e-05, + "loss": 0.1515, "step": 90300 }, { "epoch": 4.21, - "learning_rate": 1.1626740424733956e-05, - "loss": 0.1043, + "learning_rate": 2.16398046588551e-05, + "loss": 0.0543, "step": 90305 }, { "epoch": 4.21, - "learning_rate": 1.1626271623458816e-05, - "loss": 0.1092, + "learning_rate": 2.163933658901908e-05, + "loss": 0.0378, "step": 90310 }, { "epoch": 4.21, - "learning_rate": 1.1625802822183679e-05, - "loss": 0.0979, + "learning_rate": 2.163886851918306e-05, + "loss": 0.0759, "step": 90315 }, { "epoch": 4.21, - "learning_rate": 1.1625334020908539e-05, - "loss": 0.2579, + "learning_rate": 2.1638400449347044e-05, + "loss": 0.1269, "step": 90320 }, { "epoch": 4.21, - "learning_rate": 1.1624865219633399e-05, - "loss": 0.4228, + "learning_rate": 2.1637932379511024e-05, + "loss": 0.2222, "step": 90325 }, { "epoch": 4.21, - "learning_rate": 1.1624396418358259e-05, - "loss": 0.1107, + "learning_rate": 2.1637464309675004e-05, + "loss": 0.0784, "step": 90330 }, { "epoch": 4.22, - "learning_rate": 1.1623927617083119e-05, - "loss": 0.038, + "learning_rate": 2.1636996239838984e-05, + "loss": 0.013, "step": 90335 }, { "epoch": 4.22, - "learning_rate": 1.162345881580798e-05, - "loss": 0.0136, + "learning_rate": 2.1636528170002967e-05, + "loss": 0.0105, "step": 90340 }, { "epoch": 4.22, - "learning_rate": 1.162299001453284e-05, - "loss": 0.0173, + "learning_rate": 2.1636060100166947e-05, + "loss": 0.0141, "step": 90345 }, { "epoch": 4.22, - "learning_rate": 1.16225212132577e-05, - "loss": 0.0287, + "learning_rate": 2.1635592030330927e-05, + "loss": 0.0538, "step": 90350 }, { "epoch": 4.22, - "learning_rate": 1.162205241198256e-05, - "loss": 0.0393, + "learning_rate": 2.1635123960494906e-05, + "loss": 0.1042, "step": 90355 }, { "epoch": 4.22, - "learning_rate": 1.1621583610707424e-05, - "loss": 0.0761, + "learning_rate": 2.1634655890658886e-05, + "loss": 0.0611, "step": 90360 }, { "epoch": 4.22, - "learning_rate": 1.1621114809432284e-05, - "loss": 0.1793, + "learning_rate": 2.1634187820822866e-05, + "loss": 0.1356, "step": 90365 }, { "epoch": 4.22, - "learning_rate": 1.1620646008157143e-05, - "loss": 0.2109, + "learning_rate": 2.1633719750986846e-05, + "loss": 0.1381, "step": 90370 }, { "epoch": 4.22, - "learning_rate": 1.1620177206882003e-05, - "loss": 0.2366, + "learning_rate": 2.163325168115083e-05, + "loss": 0.2049, "step": 90375 }, { "epoch": 4.22, - "learning_rate": 1.1619708405606865e-05, - "loss": 0.0787, + "learning_rate": 2.163278361131481e-05, + "loss": 0.0837, "step": 90380 }, { "epoch": 4.22, - "learning_rate": 1.1619239604331725e-05, - "loss": 0.0256, + "learning_rate": 2.163231554147879e-05, + "loss": 0.0327, "step": 90385 }, { "epoch": 4.22, - "learning_rate": 1.1618770803056585e-05, - "loss": 0.0247, + "learning_rate": 2.163184747164277e-05, + "loss": 0.1187, "step": 90390 }, { "epoch": 4.22, - "learning_rate": 1.1618302001781445e-05, - "loss": 0.0377, + "learning_rate": 2.163137940180675e-05, + "loss": 0.1283, "step": 90395 }, { "epoch": 4.22, - "learning_rate": 1.1617833200506305e-05, - "loss": 0.0484, + "learning_rate": 2.163091133197073e-05, + "loss": 0.0368, "step": 90400 }, { "epoch": 4.22, - "learning_rate": 1.1617364399231166e-05, - "loss": 0.082, + "learning_rate": 2.163044326213471e-05, + "loss": 0.07, "step": 90405 }, { "epoch": 4.22, - "learning_rate": 1.1616895597956028e-05, - "loss": 0.056, + "learning_rate": 2.162997519229869e-05, + "loss": 0.0897, "step": 90410 }, { "epoch": 4.22, - "learning_rate": 1.1616426796680888e-05, - "loss": 0.1347, + "learning_rate": 2.1629507122462674e-05, + "loss": 0.0924, "step": 90415 }, { "epoch": 4.22, - "learning_rate": 1.161595799540575e-05, - "loss": 0.1939, + "learning_rate": 2.162903905262665e-05, + "loss": 0.1605, "step": 90420 }, { "epoch": 4.22, - "learning_rate": 1.161548919413061e-05, - "loss": 0.2519, + "learning_rate": 2.162857098279063e-05, + "loss": 0.2614, "step": 90425 }, { "epoch": 4.22, - "learning_rate": 1.161502039285547e-05, - "loss": 0.0694, + "learning_rate": 2.1628102912954614e-05, + "loss": 0.0548, "step": 90430 }, { "epoch": 4.22, - "learning_rate": 1.161455159158033e-05, - "loss": 0.0292, + "learning_rate": 2.1627634843118594e-05, + "loss": 0.0153, "step": 90435 }, { "epoch": 4.22, - "learning_rate": 1.161408279030519e-05, - "loss": 0.0794, + "learning_rate": 2.1627166773282573e-05, + "loss": 0.0038, "step": 90440 }, { "epoch": 4.22, - "learning_rate": 1.1613613989030051e-05, - "loss": 0.0355, + "learning_rate": 2.1626698703446553e-05, + "loss": 0.0414, "step": 90445 }, { "epoch": 4.22, - "learning_rate": 1.1613145187754911e-05, - "loss": 0.0542, + "learning_rate": 2.1626230633610536e-05, + "loss": 0.064, "step": 90450 }, { "epoch": 4.22, - "learning_rate": 1.1612676386479772e-05, - "loss": 0.0475, + "learning_rate": 2.1625762563774516e-05, + "loss": 0.0364, "step": 90455 }, { "epoch": 4.22, - "learning_rate": 1.1612207585204634e-05, - "loss": 0.1139, + "learning_rate": 2.1625294493938496e-05, + "loss": 0.0768, "step": 90460 }, { "epoch": 4.22, - "learning_rate": 1.1611738783929494e-05, - "loss": 0.0833, + "learning_rate": 2.1624826424102476e-05, + "loss": 0.1397, "step": 90465 }, { "epoch": 4.22, - "learning_rate": 1.1611269982654354e-05, - "loss": 0.1518, + "learning_rate": 2.162435835426646e-05, + "loss": 0.1599, "step": 90470 }, { "epoch": 4.22, - "learning_rate": 1.1610801181379214e-05, - "loss": 0.2468, + "learning_rate": 2.162389028443044e-05, + "loss": 0.3434, "step": 90475 }, { "epoch": 4.22, - "learning_rate": 1.1610332380104074e-05, - "loss": 0.082, + "learning_rate": 2.162342221459442e-05, + "loss": 0.0858, "step": 90480 }, { "epoch": 4.22, - "learning_rate": 1.1609863578828935e-05, - "loss": 0.0228, + "learning_rate": 2.16229541447584e-05, + "loss": 0.0357, "step": 90485 }, { "epoch": 4.22, - "learning_rate": 1.1609394777553795e-05, - "loss": 0.0282, + "learning_rate": 2.162248607492238e-05, + "loss": 0.0128, "step": 90490 }, { "epoch": 4.22, - "learning_rate": 1.1608925976278655e-05, - "loss": 0.0354, + "learning_rate": 2.1622018005086358e-05, + "loss": 0.0423, "step": 90495 }, { "epoch": 4.22, - "learning_rate": 1.1608457175003519e-05, - "loss": 0.0689, + "learning_rate": 2.1621549935250338e-05, + "loss": 0.053, "step": 90500 }, { "epoch": 4.22, - "learning_rate": 1.1607988373728379e-05, - "loss": 0.0947, + "learning_rate": 2.162108186541432e-05, + "loss": 0.0837, "step": 90505 }, { "epoch": 4.22, - "learning_rate": 1.1607519572453239e-05, - "loss": 0.1315, + "learning_rate": 2.16206137955783e-05, + "loss": 0.0776, "step": 90510 }, { "epoch": 4.22, - "learning_rate": 1.1607050771178098e-05, - "loss": 0.088, + "learning_rate": 2.162014572574228e-05, + "loss": 0.1152, "step": 90515 }, { "epoch": 4.22, - "learning_rate": 1.1606581969902958e-05, - "loss": 0.2375, + "learning_rate": 2.161967765590626e-05, + "loss": 0.1675, "step": 90520 }, { "epoch": 4.22, - "learning_rate": 1.160611316862782e-05, - "loss": 0.2542, + "learning_rate": 2.1619209586070244e-05, + "loss": 0.2945, "step": 90525 }, { "epoch": 4.22, - "learning_rate": 1.160564436735268e-05, - "loss": 0.022, + "learning_rate": 2.1618741516234224e-05, + "loss": 0.0858, "step": 90530 }, { "epoch": 4.22, - "learning_rate": 1.160517556607754e-05, - "loss": 0.0536, + "learning_rate": 2.1618273446398204e-05, + "loss": 0.0319, "step": 90535 }, { "epoch": 4.22, - "learning_rate": 1.16047067648024e-05, - "loss": 0.0599, + "learning_rate": 2.1617805376562187e-05, + "loss": 0.0242, "step": 90540 }, { "epoch": 4.22, - "learning_rate": 1.1604237963527263e-05, - "loss": 0.0576, + "learning_rate": 2.1617337306726163e-05, + "loss": 0.0452, "step": 90545 }, { "epoch": 4.23, - "learning_rate": 1.1603769162252123e-05, - "loss": 0.135, + "learning_rate": 2.1616869236890143e-05, + "loss": 0.0638, "step": 90550 }, { "epoch": 4.23, - "learning_rate": 1.1603300360976983e-05, - "loss": 0.028, + "learning_rate": 2.1616401167054123e-05, + "loss": 0.0292, "step": 90555 }, { "epoch": 4.23, - "learning_rate": 1.1602831559701843e-05, - "loss": 0.1132, + "learning_rate": 2.1615933097218106e-05, + "loss": 0.0718, "step": 90560 }, { "epoch": 4.23, - "learning_rate": 1.1602362758426705e-05, - "loss": 0.1297, + "learning_rate": 2.1615465027382086e-05, + "loss": 0.0592, "step": 90565 }, { "epoch": 4.23, - "learning_rate": 1.1601893957151565e-05, - "loss": 0.1117, + "learning_rate": 2.1614996957546066e-05, + "loss": 0.2091, "step": 90570 }, { "epoch": 4.23, - "learning_rate": 1.1601425155876424e-05, - "loss": 0.3883, + "learning_rate": 2.1614528887710045e-05, + "loss": 0.3008, "step": 90575 }, { "epoch": 4.23, - "learning_rate": 1.1600956354601284e-05, - "loss": 0.0677, + "learning_rate": 2.161406081787403e-05, + "loss": 0.1003, "step": 90580 }, { "epoch": 4.23, - "learning_rate": 1.1600487553326146e-05, - "loss": 0.0066, + "learning_rate": 2.161359274803801e-05, + "loss": 0.016, "step": 90585 }, { "epoch": 4.23, - "learning_rate": 1.1600018752051006e-05, - "loss": 0.0376, + "learning_rate": 2.1613124678201988e-05, + "loss": 0.0213, "step": 90590 }, { "epoch": 4.23, - "learning_rate": 1.1599549950775868e-05, - "loss": 0.0679, + "learning_rate": 2.1612656608365968e-05, + "loss": 0.0117, "step": 90595 }, { "epoch": 4.23, - "learning_rate": 1.1599081149500728e-05, - "loss": 0.0643, + "learning_rate": 2.161218853852995e-05, + "loss": 0.0514, "step": 90600 }, { "epoch": 4.23, - "learning_rate": 1.1598612348225589e-05, - "loss": 0.0511, + "learning_rate": 2.161172046869393e-05, + "loss": 0.0643, "step": 90605 }, { "epoch": 4.23, - "learning_rate": 1.1598143546950449e-05, - "loss": 0.0937, + "learning_rate": 2.1611252398857908e-05, + "loss": 0.0958, "step": 90610 }, { "epoch": 4.23, - "learning_rate": 1.1597674745675309e-05, - "loss": 0.1283, + "learning_rate": 2.161078432902189e-05, + "loss": 0.146, "step": 90615 }, { "epoch": 4.23, - "learning_rate": 1.1597205944400169e-05, - "loss": 0.0704, + "learning_rate": 2.161031625918587e-05, + "loss": 0.0929, "step": 90620 }, { "epoch": 4.23, - "learning_rate": 1.159673714312503e-05, - "loss": 0.2658, + "learning_rate": 2.160984818934985e-05, + "loss": 0.2433, "step": 90625 }, { "epoch": 4.23, - "learning_rate": 1.159626834184989e-05, - "loss": 0.0897, + "learning_rate": 2.160938011951383e-05, + "loss": 0.0827, "step": 90630 }, { "epoch": 4.23, - "learning_rate": 1.159579954057475e-05, - "loss": 0.0046, + "learning_rate": 2.1608912049677813e-05, + "loss": 0.0214, "step": 90635 }, { "epoch": 4.23, - "learning_rate": 1.1595330739299612e-05, - "loss": 0.0517, + "learning_rate": 2.1608443979841793e-05, + "loss": 0.0498, "step": 90640 }, { "epoch": 4.23, - "learning_rate": 1.1594861938024474e-05, - "loss": 0.0436, + "learning_rate": 2.1607975910005773e-05, + "loss": 0.0724, "step": 90645 }, { "epoch": 4.23, - "learning_rate": 1.1594393136749334e-05, - "loss": 0.0218, + "learning_rate": 2.1607507840169753e-05, + "loss": 0.1462, "step": 90650 }, { "epoch": 4.23, - "learning_rate": 1.1593924335474194e-05, - "loss": 0.0381, + "learning_rate": 2.1607039770333736e-05, + "loss": 0.0779, "step": 90655 }, { "epoch": 4.23, - "learning_rate": 1.1593455534199053e-05, - "loss": 0.0627, + "learning_rate": 2.1606571700497716e-05, + "loss": 0.0757, "step": 90660 }, { "epoch": 4.23, - "learning_rate": 1.1592986732923915e-05, - "loss": 0.1014, + "learning_rate": 2.1606103630661696e-05, + "loss": 0.1154, "step": 90665 }, { "epoch": 4.23, - "learning_rate": 1.1592517931648775e-05, - "loss": 0.0805, + "learning_rate": 2.160563556082568e-05, + "loss": 0.1773, "step": 90670 }, { "epoch": 4.23, - "learning_rate": 1.1592049130373635e-05, - "loss": 0.2807, + "learning_rate": 2.1605167490989655e-05, + "loss": 0.3573, "step": 90675 }, { "epoch": 4.23, - "learning_rate": 1.1591580329098495e-05, - "loss": 0.0884, + "learning_rate": 2.1604699421153635e-05, + "loss": 0.1099, "step": 90680 }, { "epoch": 4.23, - "learning_rate": 1.1591111527823358e-05, - "loss": 0.0178, + "learning_rate": 2.1604231351317615e-05, + "loss": 0.0201, "step": 90685 }, { "epoch": 4.23, - "learning_rate": 1.1590642726548218e-05, - "loss": 0.0513, + "learning_rate": 2.1603763281481598e-05, + "loss": 0.0537, "step": 90690 }, { "epoch": 4.23, - "learning_rate": 1.1590173925273078e-05, - "loss": 0.0441, + "learning_rate": 2.1603295211645578e-05, + "loss": 0.0576, "step": 90695 }, { "epoch": 4.23, - "learning_rate": 1.1589705123997938e-05, - "loss": 0.0771, + "learning_rate": 2.1602827141809558e-05, + "loss": 0.0641, "step": 90700 }, { "epoch": 4.23, - "learning_rate": 1.15892363227228e-05, - "loss": 0.0506, + "learning_rate": 2.1602359071973538e-05, + "loss": 0.1006, "step": 90705 }, { "epoch": 4.23, - "learning_rate": 1.158876752144766e-05, - "loss": 0.1414, + "learning_rate": 2.160189100213752e-05, + "loss": 0.0426, "step": 90710 }, { "epoch": 4.23, - "learning_rate": 1.158829872017252e-05, - "loss": 0.0922, + "learning_rate": 2.16014229323015e-05, + "loss": 0.1145, "step": 90715 }, { "epoch": 4.23, - "learning_rate": 1.158782991889738e-05, - "loss": 0.1986, + "learning_rate": 2.160095486246548e-05, + "loss": 0.144, "step": 90720 }, { "epoch": 4.23, - "learning_rate": 1.158736111762224e-05, - "loss": 0.2862, + "learning_rate": 2.1600486792629464e-05, + "loss": 0.2014, "step": 90725 }, { "epoch": 4.23, - "learning_rate": 1.1586892316347101e-05, - "loss": 0.1357, + "learning_rate": 2.1600018722793444e-05, + "loss": 0.0722, "step": 90730 }, { "epoch": 4.23, - "learning_rate": 1.1586423515071963e-05, - "loss": 0.0374, + "learning_rate": 2.159955065295742e-05, + "loss": 0.0269, "step": 90735 }, { "epoch": 4.23, - "learning_rate": 1.1585954713796823e-05, - "loss": 0.0397, + "learning_rate": 2.15990825831214e-05, + "loss": 0.0364, "step": 90740 }, { "epoch": 4.23, - "learning_rate": 1.1585485912521684e-05, - "loss": 0.0721, + "learning_rate": 2.1598614513285383e-05, + "loss": 0.03, "step": 90745 }, { "epoch": 4.23, - "learning_rate": 1.1585017111246544e-05, - "loss": 0.0833, + "learning_rate": 2.1598146443449363e-05, + "loss": 0.0837, "step": 90750 }, { "epoch": 4.23, - "learning_rate": 1.1584548309971404e-05, - "loss": 0.0783, + "learning_rate": 2.1597678373613343e-05, + "loss": 0.1186, "step": 90755 }, { "epoch": 4.23, - "learning_rate": 1.1584079508696264e-05, - "loss": 0.1488, + "learning_rate": 2.1597210303777322e-05, + "loss": 0.0246, "step": 90760 }, { "epoch": 4.24, - "learning_rate": 1.1583610707421124e-05, - "loss": 0.1002, + "learning_rate": 2.1596742233941306e-05, + "loss": 0.1213, "step": 90765 }, { "epoch": 4.24, - "learning_rate": 1.1583141906145986e-05, - "loss": 0.2139, + "learning_rate": 2.1596274164105285e-05, + "loss": 0.1188, "step": 90770 }, { "epoch": 4.24, - "learning_rate": 1.1582673104870846e-05, - "loss": 0.3166, + "learning_rate": 2.1595806094269265e-05, + "loss": 0.2603, "step": 90775 }, { "epoch": 4.24, - "learning_rate": 1.1582204303595707e-05, - "loss": 0.1548, + "learning_rate": 2.1595338024433245e-05, + "loss": 0.059, "step": 90780 }, { "epoch": 4.24, - "learning_rate": 1.1581735502320569e-05, - "loss": 0.017, + "learning_rate": 2.1594869954597228e-05, + "loss": 0.0068, "step": 90785 }, { "epoch": 4.24, - "learning_rate": 1.1581266701045429e-05, - "loss": 0.0412, + "learning_rate": 2.1594401884761208e-05, + "loss": 0.0196, "step": 90790 }, { "epoch": 4.24, - "learning_rate": 1.1580797899770289e-05, - "loss": 0.0272, + "learning_rate": 2.1593933814925188e-05, + "loss": 0.0443, "step": 90795 }, { "epoch": 4.24, - "learning_rate": 1.1580329098495149e-05, - "loss": 0.0953, + "learning_rate": 2.1593465745089168e-05, + "loss": 0.0751, "step": 90800 }, { "epoch": 4.24, - "learning_rate": 1.1579860297220008e-05, - "loss": 0.2096, + "learning_rate": 2.1592997675253148e-05, + "loss": 0.0716, "step": 90805 }, { "epoch": 4.24, - "learning_rate": 1.157939149594487e-05, - "loss": 0.0589, + "learning_rate": 2.1592529605417127e-05, + "loss": 0.0929, "step": 90810 }, { "epoch": 4.24, - "learning_rate": 1.157892269466973e-05, - "loss": 0.1322, + "learning_rate": 2.1592061535581107e-05, + "loss": 0.2608, "step": 90815 }, { "epoch": 4.24, - "learning_rate": 1.157845389339459e-05, - "loss": 0.2027, + "learning_rate": 2.159159346574509e-05, + "loss": 0.067, "step": 90820 }, { "epoch": 4.24, - "learning_rate": 1.1577985092119453e-05, - "loss": 0.2512, + "learning_rate": 2.159112539590907e-05, + "loss": 0.2137, "step": 90825 }, { "epoch": 4.24, - "learning_rate": 1.1577516290844313e-05, - "loss": 0.0679, + "learning_rate": 2.159065732607305e-05, + "loss": 0.0805, "step": 90830 }, { "epoch": 4.24, - "learning_rate": 1.1577047489569173e-05, - "loss": 0.0326, + "learning_rate": 2.159018925623703e-05, + "loss": 0.0146, "step": 90835 }, { "epoch": 4.24, - "learning_rate": 1.1576578688294033e-05, - "loss": 0.041, + "learning_rate": 2.1589721186401013e-05, + "loss": 0.0297, "step": 90840 }, { "epoch": 4.24, - "learning_rate": 1.1576109887018893e-05, - "loss": 0.0604, + "learning_rate": 2.1589253116564993e-05, + "loss": 0.0605, "step": 90845 }, { "epoch": 4.24, - "learning_rate": 1.1575641085743755e-05, - "loss": 0.0379, + "learning_rate": 2.1588785046728973e-05, + "loss": 0.0378, "step": 90850 }, { "epoch": 4.24, - "learning_rate": 1.1575172284468615e-05, - "loss": 0.0647, + "learning_rate": 2.1588316976892956e-05, + "loss": 0.055, "step": 90855 }, { "epoch": 4.24, - "learning_rate": 1.1574703483193475e-05, - "loss": 0.0726, + "learning_rate": 2.1587848907056936e-05, + "loss": 0.0452, "step": 90860 }, { "epoch": 4.24, - "learning_rate": 1.1574234681918334e-05, - "loss": 0.0912, + "learning_rate": 2.1587380837220912e-05, + "loss": 0.1267, "step": 90865 }, { "epoch": 4.24, - "learning_rate": 1.1573765880643198e-05, - "loss": 0.0761, + "learning_rate": 2.1586912767384892e-05, + "loss": 0.072, "step": 90870 }, { "epoch": 4.24, - "learning_rate": 1.1573297079368058e-05, - "loss": 0.1042, + "learning_rate": 2.1586444697548875e-05, + "loss": 0.1715, "step": 90875 }, { "epoch": 4.24, - "learning_rate": 1.1572828278092918e-05, - "loss": 0.0479, + "learning_rate": 2.1585976627712855e-05, + "loss": 0.0771, "step": 90880 }, { "epoch": 4.24, - "learning_rate": 1.1572359476817778e-05, - "loss": 0.0345, + "learning_rate": 2.1585508557876835e-05, + "loss": 0.0158, "step": 90885 }, { "epoch": 4.24, - "learning_rate": 1.157189067554264e-05, - "loss": 0.0204, + "learning_rate": 2.1585040488040815e-05, + "loss": 0.0206, "step": 90890 }, { "epoch": 4.24, - "learning_rate": 1.15714218742675e-05, - "loss": 0.0444, + "learning_rate": 2.1584572418204798e-05, + "loss": 0.0403, "step": 90895 }, { "epoch": 4.24, - "learning_rate": 1.1570953072992359e-05, - "loss": 0.0423, + "learning_rate": 2.1584104348368778e-05, + "loss": 0.0343, "step": 90900 }, { "epoch": 4.24, - "learning_rate": 1.1570484271717219e-05, - "loss": 0.0727, + "learning_rate": 2.1583636278532757e-05, + "loss": 0.0523, "step": 90905 }, { "epoch": 4.24, - "learning_rate": 1.1570015470442079e-05, - "loss": 0.1087, + "learning_rate": 2.158316820869674e-05, + "loss": 0.0843, "step": 90910 }, { "epoch": 4.24, - "learning_rate": 1.156954666916694e-05, - "loss": 0.0887, + "learning_rate": 2.158270013886072e-05, + "loss": 0.0528, "step": 90915 }, { "epoch": 4.24, - "learning_rate": 1.1569077867891802e-05, - "loss": 0.1516, + "learning_rate": 2.15822320690247e-05, + "loss": 0.2703, "step": 90920 }, { "epoch": 4.24, - "learning_rate": 1.1568609066616662e-05, - "loss": 0.3522, + "learning_rate": 2.1581763999188677e-05, + "loss": 0.3033, "step": 90925 }, { "epoch": 4.24, - "learning_rate": 1.1568140265341524e-05, - "loss": 0.0869, + "learning_rate": 2.158129592935266e-05, + "loss": 0.0812, "step": 90930 }, { "epoch": 4.24, - "learning_rate": 1.1567671464066384e-05, - "loss": 0.0037, + "learning_rate": 2.158082785951664e-05, + "loss": 0.132, "step": 90935 }, { "epoch": 4.24, - "learning_rate": 1.1567202662791244e-05, - "loss": 0.0325, + "learning_rate": 2.158035978968062e-05, + "loss": 0.0373, "step": 90940 }, { "epoch": 4.24, - "learning_rate": 1.1566733861516104e-05, - "loss": 0.0397, + "learning_rate": 2.15798917198446e-05, + "loss": 0.0415, "step": 90945 }, { "epoch": 4.24, - "learning_rate": 1.1566265060240964e-05, - "loss": 0.0766, + "learning_rate": 2.1579423650008583e-05, + "loss": 0.0537, "step": 90950 }, { "epoch": 4.24, - "learning_rate": 1.1565796258965825e-05, - "loss": 0.0681, + "learning_rate": 2.1578955580172562e-05, + "loss": 0.0671, "step": 90955 }, { "epoch": 4.24, - "learning_rate": 1.1565327457690685e-05, - "loss": 0.0819, + "learning_rate": 2.1578487510336542e-05, + "loss": 0.0644, "step": 90960 }, { "epoch": 4.24, - "learning_rate": 1.1564858656415547e-05, - "loss": 0.1397, + "learning_rate": 2.1578019440500525e-05, + "loss": 0.0951, "step": 90965 }, { "epoch": 4.24, - "learning_rate": 1.1564389855140408e-05, - "loss": 0.1983, + "learning_rate": 2.1577551370664505e-05, + "loss": 0.1204, "step": 90970 }, { "epoch": 4.25, - "learning_rate": 1.1563921053865268e-05, - "loss": 0.2272, + "learning_rate": 2.1577083300828485e-05, + "loss": 0.125, "step": 90975 }, { "epoch": 4.25, - "learning_rate": 1.1563452252590128e-05, - "loss": 0.075, + "learning_rate": 2.1576615230992465e-05, + "loss": 0.0639, "step": 90980 }, { "epoch": 4.25, - "learning_rate": 1.1562983451314988e-05, - "loss": 0.084, + "learning_rate": 2.1576147161156448e-05, + "loss": 0.0908, "step": 90985 }, { "epoch": 4.25, - "learning_rate": 1.1562514650039848e-05, - "loss": 0.0441, + "learning_rate": 2.1575679091320425e-05, + "loss": 0.0109, "step": 90990 }, { "epoch": 4.25, - "learning_rate": 1.156204584876471e-05, - "loss": 0.1087, + "learning_rate": 2.1575211021484404e-05, + "loss": 0.0454, "step": 90995 }, { "epoch": 4.25, - "learning_rate": 1.156157704748957e-05, - "loss": 0.0661, + "learning_rate": 2.1574742951648384e-05, + "loss": 0.0288, "step": 91000 }, { "epoch": 4.25, - "learning_rate": 1.156110824621443e-05, - "loss": 0.1206, + "learning_rate": 2.1574274881812367e-05, + "loss": 0.0459, "step": 91005 }, { "epoch": 4.25, - "learning_rate": 1.1560639444939293e-05, - "loss": 0.0591, + "learning_rate": 2.1573806811976347e-05, + "loss": 0.062, "step": 91010 }, { "epoch": 4.25, - "learning_rate": 1.1560170643664153e-05, - "loss": 0.1529, + "learning_rate": 2.1573338742140327e-05, + "loss": 0.1107, "step": 91015 }, { "epoch": 4.25, - "learning_rate": 1.1559701842389013e-05, - "loss": 0.1996, + "learning_rate": 2.1572870672304307e-05, + "loss": 0.118, "step": 91020 }, { "epoch": 4.25, - "learning_rate": 1.1559233041113873e-05, - "loss": 0.3524, + "learning_rate": 2.157240260246829e-05, + "loss": 0.2292, "step": 91025 }, { "epoch": 4.25, - "learning_rate": 1.1558764239838733e-05, - "loss": 0.0991, + "learning_rate": 2.157193453263227e-05, + "loss": 0.0604, "step": 91030 }, { "epoch": 4.25, - "learning_rate": 1.1558295438563594e-05, - "loss": 0.0129, + "learning_rate": 2.157146646279625e-05, + "loss": 0.0176, "step": 91035 }, { "epoch": 4.25, - "learning_rate": 1.1557826637288454e-05, - "loss": 0.0498, + "learning_rate": 2.1570998392960233e-05, + "loss": 0.0614, "step": 91040 }, { "epoch": 4.25, - "learning_rate": 1.1557357836013314e-05, - "loss": 0.0546, + "learning_rate": 2.1570530323124213e-05, + "loss": 0.0274, "step": 91045 }, { "epoch": 4.25, - "learning_rate": 1.1556889034738174e-05, - "loss": 0.0598, + "learning_rate": 2.157006225328819e-05, + "loss": 0.0568, "step": 91050 }, { "epoch": 4.25, - "learning_rate": 1.1556420233463036e-05, - "loss": 0.0731, + "learning_rate": 2.156959418345217e-05, + "loss": 0.0992, "step": 91055 }, { "epoch": 4.25, - "learning_rate": 1.1555951432187897e-05, - "loss": 0.1104, + "learning_rate": 2.1569126113616152e-05, + "loss": 0.0587, "step": 91060 }, { "epoch": 4.25, - "learning_rate": 1.1555482630912757e-05, - "loss": 0.1065, + "learning_rate": 2.1568658043780132e-05, + "loss": 0.1212, "step": 91065 }, { "epoch": 4.25, - "learning_rate": 1.1555013829637617e-05, - "loss": 0.243, + "learning_rate": 2.1568189973944112e-05, + "loss": 0.1668, "step": 91070 }, { "epoch": 4.25, - "learning_rate": 1.1554545028362479e-05, - "loss": 0.2123, + "learning_rate": 2.156772190410809e-05, + "loss": 0.2372, "step": 91075 }, { "epoch": 4.25, - "learning_rate": 1.1554076227087339e-05, - "loss": 0.0422, + "learning_rate": 2.1567253834272075e-05, + "loss": 0.0517, "step": 91080 }, { "epoch": 4.25, - "learning_rate": 1.1553607425812199e-05, - "loss": 0.0264, + "learning_rate": 2.1566785764436055e-05, + "loss": 0.0125, "step": 91085 }, { "epoch": 4.25, - "learning_rate": 1.1553138624537059e-05, - "loss": 0.0343, + "learning_rate": 2.1566317694600034e-05, + "loss": 0.0598, "step": 91090 }, { "epoch": 4.25, - "learning_rate": 1.155266982326192e-05, - "loss": 0.0504, + "learning_rate": 2.1565849624764018e-05, + "loss": 0.0434, "step": 91095 }, { "epoch": 4.25, - "learning_rate": 1.155220102198678e-05, - "loss": 0.0373, + "learning_rate": 2.1565381554927997e-05, + "loss": 0.0561, "step": 91100 }, { "epoch": 4.25, - "learning_rate": 1.1551732220711642e-05, - "loss": 0.0501, + "learning_rate": 2.1564913485091977e-05, + "loss": 0.1006, "step": 91105 }, { "epoch": 4.25, - "learning_rate": 1.1551263419436502e-05, - "loss": 0.0663, + "learning_rate": 2.1564445415255957e-05, + "loss": 0.0679, "step": 91110 }, { "epoch": 4.25, - "learning_rate": 1.1550794618161363e-05, - "loss": 0.0887, + "learning_rate": 2.1563977345419937e-05, + "loss": 0.073, "step": 91115 }, { "epoch": 4.25, - "learning_rate": 1.1550325816886223e-05, - "loss": 0.1535, + "learning_rate": 2.1563509275583917e-05, + "loss": 0.112, "step": 91120 }, { "epoch": 4.25, - "learning_rate": 1.1549857015611083e-05, - "loss": 0.2512, + "learning_rate": 2.1563041205747897e-05, + "loss": 0.2283, "step": 91125 }, { "epoch": 4.25, - "learning_rate": 1.1549388214335943e-05, - "loss": 0.0946, + "learning_rate": 2.1562573135911876e-05, + "loss": 0.1051, "step": 91130 }, { "epoch": 4.25, - "learning_rate": 1.1548919413060805e-05, - "loss": 0.0098, + "learning_rate": 2.156210506607586e-05, + "loss": 0.0097, "step": 91135 }, { "epoch": 4.25, - "learning_rate": 1.1548450611785665e-05, - "loss": 0.0319, + "learning_rate": 2.156163699623984e-05, + "loss": 0.0191, "step": 91140 }, { "epoch": 4.25, - "learning_rate": 1.1547981810510525e-05, - "loss": 0.0227, + "learning_rate": 2.156116892640382e-05, + "loss": 0.0453, "step": 91145 }, { "epoch": 4.25, - "learning_rate": 1.1547513009235386e-05, - "loss": 0.0815, + "learning_rate": 2.1560700856567802e-05, + "loss": 0.0791, "step": 91150 }, { "epoch": 4.25, - "learning_rate": 1.1547044207960248e-05, - "loss": 0.0746, + "learning_rate": 2.1560232786731782e-05, + "loss": 0.099, "step": 91155 }, { "epoch": 4.25, - "learning_rate": 1.1546575406685108e-05, - "loss": 0.1372, + "learning_rate": 2.1559764716895762e-05, + "loss": 0.0385, "step": 91160 }, { "epoch": 4.25, - "learning_rate": 1.1546106605409968e-05, - "loss": 0.0507, + "learning_rate": 2.1559296647059742e-05, + "loss": 0.0706, "step": 91165 }, { "epoch": 4.25, - "learning_rate": 1.1545637804134828e-05, - "loss": 0.2027, + "learning_rate": 2.1558828577223725e-05, + "loss": 0.1438, "step": 91170 }, { "epoch": 4.25, - "learning_rate": 1.154516900285969e-05, - "loss": 0.1872, + "learning_rate": 2.1558360507387705e-05, + "loss": 0.2672, "step": 91175 }, { "epoch": 4.25, - "learning_rate": 1.154470020158455e-05, - "loss": 0.0551, + "learning_rate": 2.155789243755168e-05, + "loss": 0.094, "step": 91180 }, { "epoch": 4.25, - "learning_rate": 1.154423140030941e-05, - "loss": 0.046, + "learning_rate": 2.155742436771566e-05, + "loss": 0.0476, "step": 91185 }, { "epoch": 4.26, - "learning_rate": 1.1543762599034269e-05, - "loss": 0.028, + "learning_rate": 2.1556956297879644e-05, + "loss": 0.0433, "step": 91190 }, { "epoch": 4.26, - "learning_rate": 1.1543293797759132e-05, - "loss": 0.0266, + "learning_rate": 2.1556488228043624e-05, + "loss": 0.0521, "step": 91195 }, { "epoch": 4.26, - "learning_rate": 1.1542824996483992e-05, - "loss": 0.067, + "learning_rate": 2.1556020158207604e-05, + "loss": 0.1223, "step": 91200 }, { "epoch": 4.26, - "learning_rate": 1.1542356195208852e-05, - "loss": 0.0767, + "learning_rate": 2.1555552088371584e-05, + "loss": 0.0273, "step": 91205 }, { "epoch": 4.26, - "learning_rate": 1.1541887393933712e-05, - "loss": 0.0836, + "learning_rate": 2.1555084018535567e-05, + "loss": 0.0776, "step": 91210 }, { "epoch": 4.26, - "learning_rate": 1.1541418592658574e-05, - "loss": 0.1233, + "learning_rate": 2.1554615948699547e-05, + "loss": 0.2161, "step": 91215 }, { "epoch": 4.26, - "learning_rate": 1.1540949791383434e-05, - "loss": 0.1101, + "learning_rate": 2.1554147878863527e-05, + "loss": 0.2016, "step": 91220 }, { "epoch": 4.26, - "learning_rate": 1.1540480990108294e-05, - "loss": 0.1987, + "learning_rate": 2.155367980902751e-05, + "loss": 0.3704, "step": 91225 }, { "epoch": 4.26, - "learning_rate": 1.1540012188833154e-05, - "loss": 0.057, + "learning_rate": 2.155321173919149e-05, + "loss": 0.0955, "step": 91230 }, { "epoch": 4.26, - "learning_rate": 1.1539543387558014e-05, - "loss": 0.0362, + "learning_rate": 2.155274366935547e-05, + "loss": 0.0455, "step": 91235 }, { "epoch": 4.26, - "learning_rate": 1.1539074586282875e-05, - "loss": 0.0245, + "learning_rate": 2.1552275599519446e-05, + "loss": 0.0508, "step": 91240 }, { "epoch": 4.26, - "learning_rate": 1.1538605785007737e-05, - "loss": 0.0242, + "learning_rate": 2.155180752968343e-05, + "loss": 0.0564, "step": 91245 }, { "epoch": 4.26, - "learning_rate": 1.1538136983732597e-05, - "loss": 0.1013, + "learning_rate": 2.155133945984741e-05, + "loss": 0.0604, "step": 91250 }, { "epoch": 4.26, - "learning_rate": 1.1537668182457458e-05, - "loss": 0.0648, + "learning_rate": 2.155087139001139e-05, + "loss": 0.0565, "step": 91255 }, { "epoch": 4.26, - "learning_rate": 1.1537199381182318e-05, - "loss": 0.0399, + "learning_rate": 2.155040332017537e-05, + "loss": 0.0568, "step": 91260 }, { "epoch": 4.26, - "learning_rate": 1.1536730579907178e-05, - "loss": 0.0487, + "learning_rate": 2.1549935250339352e-05, + "loss": 0.1572, "step": 91265 }, { "epoch": 4.26, - "learning_rate": 1.1536261778632038e-05, - "loss": 0.2504, + "learning_rate": 2.154946718050333e-05, + "loss": 0.1262, "step": 91270 }, { "epoch": 4.26, - "learning_rate": 1.1535792977356898e-05, - "loss": 0.151, + "learning_rate": 2.154899911066731e-05, + "loss": 0.26, "step": 91275 }, { "epoch": 4.26, - "learning_rate": 1.153532417608176e-05, - "loss": 0.0723, + "learning_rate": 2.1548531040831295e-05, + "loss": 0.1309, "step": 91280 }, { "epoch": 4.26, - "learning_rate": 1.153485537480662e-05, - "loss": 0.0751, + "learning_rate": 2.1548062970995274e-05, + "loss": 0.0169, "step": 91285 }, { "epoch": 4.26, - "learning_rate": 1.1534386573531481e-05, - "loss": 0.0415, + "learning_rate": 2.1547594901159254e-05, + "loss": 0.0186, "step": 91290 }, { "epoch": 4.26, - "learning_rate": 1.1533917772256343e-05, - "loss": 0.0405, + "learning_rate": 2.1547126831323234e-05, + "loss": 0.0568, "step": 91295 }, { "epoch": 4.26, - "learning_rate": 1.1533448970981203e-05, - "loss": 0.0208, + "learning_rate": 2.1546658761487217e-05, + "loss": 0.0366, "step": 91300 }, { "epoch": 4.26, - "learning_rate": 1.1532980169706063e-05, - "loss": 0.025, + "learning_rate": 2.1546190691651194e-05, + "loss": 0.0275, "step": 91305 }, { "epoch": 4.26, - "learning_rate": 1.1532511368430923e-05, - "loss": 0.0711, + "learning_rate": 2.1545722621815174e-05, + "loss": 0.0608, "step": 91310 }, { "epoch": 4.26, - "learning_rate": 1.1532042567155783e-05, - "loss": 0.0693, + "learning_rate": 2.1545254551979153e-05, + "loss": 0.1344, "step": 91315 }, { "epoch": 4.26, - "learning_rate": 1.1531573765880644e-05, - "loss": 0.2113, + "learning_rate": 2.1544786482143137e-05, + "loss": 0.1155, "step": 91320 }, { "epoch": 4.26, - "learning_rate": 1.1531104964605504e-05, - "loss": 0.1946, + "learning_rate": 2.1544318412307116e-05, + "loss": 0.1856, "step": 91325 }, { "epoch": 4.26, - "learning_rate": 1.1530636163330364e-05, - "loss": 0.0494, + "learning_rate": 2.1543850342471096e-05, + "loss": 0.0874, "step": 91330 }, { "epoch": 4.26, - "learning_rate": 1.1530167362055228e-05, - "loss": 0.0235, + "learning_rate": 2.154338227263508e-05, + "loss": 0.0507, "step": 91335 }, { "epoch": 4.26, - "learning_rate": 1.1529698560780087e-05, - "loss": 0.0209, + "learning_rate": 2.154291420279906e-05, + "loss": 0.0148, "step": 91340 }, { "epoch": 4.26, - "learning_rate": 1.1529229759504947e-05, - "loss": 0.0524, + "learning_rate": 2.154244613296304e-05, + "loss": 0.0537, "step": 91345 }, { "epoch": 4.26, - "learning_rate": 1.1528760958229807e-05, - "loss": 0.0715, + "learning_rate": 2.154197806312702e-05, + "loss": 0.0341, "step": 91350 }, { "epoch": 4.26, - "learning_rate": 1.1528292156954667e-05, - "loss": 0.0529, + "learning_rate": 2.1541509993291002e-05, + "loss": 0.0461, "step": 91355 }, { "epoch": 4.26, - "learning_rate": 1.1527823355679529e-05, - "loss": 0.0976, + "learning_rate": 2.1541041923454982e-05, + "loss": 0.1653, "step": 91360 }, { "epoch": 4.26, - "learning_rate": 1.1527354554404389e-05, - "loss": 0.2976, + "learning_rate": 2.154057385361896e-05, + "loss": 0.1148, "step": 91365 }, { "epoch": 4.26, - "learning_rate": 1.1526885753129249e-05, - "loss": 0.1699, + "learning_rate": 2.1540105783782938e-05, + "loss": 0.2113, "step": 91370 }, { "epoch": 4.26, - "learning_rate": 1.1526416951854109e-05, - "loss": 0.1805, + "learning_rate": 2.153963771394692e-05, + "loss": 0.2968, "step": 91375 }, { "epoch": 4.26, - "learning_rate": 1.1525948150578969e-05, - "loss": 0.0969, + "learning_rate": 2.15391696441109e-05, + "loss": 0.0837, "step": 91380 }, { "epoch": 4.26, - "learning_rate": 1.1525479349303832e-05, - "loss": 0.0386, + "learning_rate": 2.153870157427488e-05, + "loss": 0.0857, "step": 91385 }, { "epoch": 4.26, - "learning_rate": 1.1525010548028692e-05, - "loss": 0.0124, + "learning_rate": 2.153823350443886e-05, + "loss": 0.0227, "step": 91390 }, { "epoch": 4.26, - "learning_rate": 1.1524541746753552e-05, - "loss": 0.081, + "learning_rate": 2.1537765434602844e-05, + "loss": 0.0285, "step": 91395 }, { "epoch": 4.26, - "learning_rate": 1.1524072945478413e-05, - "loss": 0.0536, + "learning_rate": 2.1537297364766824e-05, + "loss": 0.0208, "step": 91400 }, { "epoch": 4.27, - "learning_rate": 1.1523604144203273e-05, - "loss": 0.0579, + "learning_rate": 2.1536829294930804e-05, + "loss": 0.0864, "step": 91405 }, { "epoch": 4.27, - "learning_rate": 1.1523135342928133e-05, - "loss": 0.0607, + "learning_rate": 2.1536361225094787e-05, + "loss": 0.0828, "step": 91410 }, { "epoch": 4.27, - "learning_rate": 1.1522666541652993e-05, - "loss": 0.1101, + "learning_rate": 2.1535893155258767e-05, + "loss": 0.1288, "step": 91415 }, { "epoch": 4.27, - "learning_rate": 1.1522197740377853e-05, - "loss": 0.2055, + "learning_rate": 2.1535425085422746e-05, + "loss": 0.1451, "step": 91420 }, { "epoch": 4.27, - "learning_rate": 1.1521728939102715e-05, - "loss": 0.284, + "learning_rate": 2.1534957015586726e-05, + "loss": 0.4224, "step": 91425 }, { "epoch": 4.27, - "learning_rate": 1.1521260137827576e-05, - "loss": 0.1254, + "learning_rate": 2.1534488945750706e-05, + "loss": 0.0565, "step": 91430 }, { "epoch": 4.27, - "learning_rate": 1.1520791336552436e-05, - "loss": 0.0306, + "learning_rate": 2.1534020875914686e-05, + "loss": 0.0283, "step": 91435 }, { "epoch": 4.27, - "learning_rate": 1.1520322535277298e-05, - "loss": 0.0168, + "learning_rate": 2.1533552806078666e-05, + "loss": 0.0208, "step": 91440 }, { "epoch": 4.27, - "learning_rate": 1.1519853734002158e-05, - "loss": 0.059, + "learning_rate": 2.1533084736242646e-05, + "loss": 0.0329, "step": 91445 }, { "epoch": 4.27, - "learning_rate": 1.1519384932727018e-05, - "loss": 0.0459, + "learning_rate": 2.153261666640663e-05, + "loss": 0.0286, "step": 91450 }, { "epoch": 4.27, - "learning_rate": 1.1518916131451878e-05, - "loss": 0.069, + "learning_rate": 2.153214859657061e-05, + "loss": 0.0664, "step": 91455 }, { "epoch": 4.27, - "learning_rate": 1.1518447330176738e-05, - "loss": 0.0843, + "learning_rate": 2.153168052673459e-05, + "loss": 0.1638, "step": 91460 }, { "epoch": 4.27, - "learning_rate": 1.15179785289016e-05, - "loss": 0.5684, + "learning_rate": 2.153121245689857e-05, + "loss": 0.1694, "step": 91465 }, { "epoch": 4.27, - "learning_rate": 1.151750972762646e-05, - "loss": 0.1952, + "learning_rate": 2.153074438706255e-05, + "loss": 0.0873, "step": 91470 }, { "epoch": 4.27, - "learning_rate": 1.1517040926351321e-05, - "loss": 0.3945, + "learning_rate": 2.153027631722653e-05, + "loss": 0.2008, "step": 91475 }, { "epoch": 4.27, - "learning_rate": 1.1516572125076183e-05, - "loss": 0.04, + "learning_rate": 2.152980824739051e-05, + "loss": 0.0683, "step": 91480 }, { "epoch": 4.27, - "learning_rate": 1.1516103323801042e-05, - "loss": 0.0278, + "learning_rate": 2.1529340177554494e-05, + "loss": 0.0727, "step": 91485 }, { "epoch": 4.27, - "learning_rate": 1.1515634522525902e-05, - "loss": 0.0286, + "learning_rate": 2.1528872107718474e-05, + "loss": 0.0307, "step": 91490 }, { "epoch": 4.27, - "learning_rate": 1.1515165721250762e-05, - "loss": 0.0333, + "learning_rate": 2.152840403788245e-05, + "loss": 0.0311, "step": 91495 }, { "epoch": 4.27, - "learning_rate": 1.1514696919975622e-05, - "loss": 0.084, + "learning_rate": 2.152793596804643e-05, + "loss": 0.09, "step": 91500 }, { "epoch": 4.27, - "learning_rate": 1.1514228118700484e-05, - "loss": 0.0698, + "learning_rate": 2.1527467898210414e-05, + "loss": 0.0235, "step": 91505 }, { "epoch": 4.27, - "learning_rate": 1.1513759317425344e-05, - "loss": 0.0549, + "learning_rate": 2.1526999828374393e-05, + "loss": 0.0441, "step": 91510 }, { "epoch": 4.27, - "learning_rate": 1.1513290516150204e-05, - "loss": 0.0892, + "learning_rate": 2.1526531758538373e-05, + "loss": 0.1189, "step": 91515 }, { "epoch": 4.27, - "learning_rate": 1.1512821714875067e-05, - "loss": 0.1317, + "learning_rate": 2.1526063688702356e-05, + "loss": 0.1695, "step": 91520 }, { "epoch": 4.27, - "learning_rate": 1.1512352913599927e-05, - "loss": 0.2714, + "learning_rate": 2.1525595618866336e-05, + "loss": 0.1996, "step": 91525 }, { "epoch": 4.27, - "learning_rate": 1.1511884112324787e-05, - "loss": 0.0958, + "learning_rate": 2.1525127549030316e-05, + "loss": 0.0766, "step": 91530 }, { "epoch": 4.27, - "learning_rate": 1.1511415311049647e-05, - "loss": 0.0136, + "learning_rate": 2.1524659479194296e-05, + "loss": 0.027, "step": 91535 }, { "epoch": 4.27, - "learning_rate": 1.1510946509774507e-05, - "loss": 0.0703, + "learning_rate": 2.152419140935828e-05, + "loss": 0.0297, "step": 91540 }, { "epoch": 4.27, - "learning_rate": 1.1510477708499368e-05, - "loss": 0.0202, + "learning_rate": 2.152372333952226e-05, + "loss": 0.0451, "step": 91545 }, { "epoch": 4.27, - "learning_rate": 1.1510008907224228e-05, - "loss": 0.0117, + "learning_rate": 2.152325526968624e-05, + "loss": 0.0401, "step": 91550 }, { "epoch": 4.27, - "learning_rate": 1.1509540105949088e-05, - "loss": 0.0474, + "learning_rate": 2.152278719985022e-05, + "loss": 0.0465, "step": 91555 }, { "epoch": 4.27, - "learning_rate": 1.1509071304673948e-05, - "loss": 0.0763, + "learning_rate": 2.1522319130014198e-05, + "loss": 0.072, "step": 91560 }, { "epoch": 4.27, - "learning_rate": 1.150860250339881e-05, - "loss": 0.1485, + "learning_rate": 2.1521851060178178e-05, + "loss": 0.0744, "step": 91565 }, { "epoch": 4.27, - "learning_rate": 1.1508133702123672e-05, - "loss": 0.09, + "learning_rate": 2.1521382990342158e-05, + "loss": 0.103, "step": 91570 }, { "epoch": 4.27, - "learning_rate": 1.1507664900848531e-05, - "loss": 0.2797, + "learning_rate": 2.1520914920506138e-05, + "loss": 0.2843, "step": 91575 }, { "epoch": 4.27, - "learning_rate": 1.1507196099573391e-05, - "loss": 0.1036, + "learning_rate": 2.152044685067012e-05, + "loss": 0.0631, "step": 91580 }, { "epoch": 4.27, - "learning_rate": 1.1506727298298253e-05, - "loss": 0.0242, + "learning_rate": 2.15199787808341e-05, + "loss": 0.0046, "step": 91585 }, { "epoch": 4.27, - "learning_rate": 1.1506258497023113e-05, - "loss": 0.0397, + "learning_rate": 2.151951071099808e-05, + "loss": 0.0079, "step": 91590 }, { "epoch": 4.27, - "learning_rate": 1.1505789695747973e-05, - "loss": 0.0304, + "learning_rate": 2.1519042641162064e-05, + "loss": 0.0429, "step": 91595 }, { "epoch": 4.27, - "learning_rate": 1.1505320894472833e-05, - "loss": 0.0617, + "learning_rate": 2.1518574571326044e-05, + "loss": 0.0961, "step": 91600 }, { "epoch": 4.27, - "learning_rate": 1.1504852093197694e-05, - "loss": 0.0714, + "learning_rate": 2.1518106501490023e-05, + "loss": 0.0691, "step": 91605 }, { "epoch": 4.27, - "learning_rate": 1.1504383291922554e-05, - "loss": 0.0834, + "learning_rate": 2.1517638431654003e-05, + "loss": 0.0518, "step": 91610 }, { "epoch": 4.27, - "learning_rate": 1.1503914490647416e-05, - "loss": 0.1014, + "learning_rate": 2.1517170361817986e-05, + "loss": 0.0721, "step": 91615 }, { "epoch": 4.28, - "learning_rate": 1.1503445689372278e-05, - "loss": 0.1623, + "learning_rate": 2.1516702291981963e-05, + "loss": 0.1428, "step": 91620 }, { "epoch": 4.28, - "learning_rate": 1.1502976888097138e-05, - "loss": 0.2311, + "learning_rate": 2.1516234222145943e-05, + "loss": 0.3187, "step": 91625 }, { "epoch": 4.28, - "learning_rate": 1.1502508086821998e-05, - "loss": 0.0357, + "learning_rate": 2.1515766152309923e-05, + "loss": 0.0801, "step": 91630 }, { "epoch": 4.28, - "learning_rate": 1.1502039285546857e-05, - "loss": 0.0253, + "learning_rate": 2.1515298082473906e-05, + "loss": 0.0106, "step": 91635 }, { "epoch": 4.28, - "learning_rate": 1.1501570484271717e-05, - "loss": 0.0131, + "learning_rate": 2.1514830012637886e-05, + "loss": 0.0407, "step": 91640 }, { "epoch": 4.28, - "learning_rate": 1.1501101682996579e-05, - "loss": 0.0304, + "learning_rate": 2.1514361942801865e-05, + "loss": 0.0708, "step": 91645 }, { "epoch": 4.28, - "learning_rate": 1.1500632881721439e-05, - "loss": 0.0276, + "learning_rate": 2.151389387296585e-05, + "loss": 0.021, "step": 91650 }, { "epoch": 4.28, - "learning_rate": 1.1500164080446299e-05, - "loss": 0.0578, + "learning_rate": 2.151342580312983e-05, + "loss": 0.0699, "step": 91655 }, { "epoch": 4.28, - "learning_rate": 1.1499695279171162e-05, - "loss": 0.0585, + "learning_rate": 2.1512957733293808e-05, + "loss": 0.1055, "step": 91660 }, { "epoch": 4.28, - "learning_rate": 1.1499226477896022e-05, - "loss": 0.116, + "learning_rate": 2.1512489663457788e-05, + "loss": 0.1129, "step": 91665 }, { "epoch": 4.28, - "learning_rate": 1.1498757676620882e-05, - "loss": 0.1421, + "learning_rate": 2.151202159362177e-05, + "loss": 0.1656, "step": 91670 }, { "epoch": 4.28, - "learning_rate": 1.1498288875345742e-05, - "loss": 0.2871, + "learning_rate": 2.151155352378575e-05, + "loss": 0.2301, "step": 91675 }, { "epoch": 4.28, - "learning_rate": 1.1497820074070602e-05, - "loss": 0.0859, + "learning_rate": 2.151108545394973e-05, + "loss": 0.1125, "step": 91680 }, { "epoch": 4.28, - "learning_rate": 1.1497351272795464e-05, - "loss": 0.0371, + "learning_rate": 2.1510617384113707e-05, + "loss": 0.0368, "step": 91685 }, { "epoch": 4.28, - "learning_rate": 1.1496882471520323e-05, - "loss": 0.0224, + "learning_rate": 2.151014931427769e-05, + "loss": 0.0176, "step": 91690 }, { "epoch": 4.28, - "learning_rate": 1.1496413670245183e-05, - "loss": 0.0406, + "learning_rate": 2.150968124444167e-05, + "loss": 0.0471, "step": 91695 }, { "epoch": 4.28, - "learning_rate": 1.1495944868970043e-05, - "loss": 0.0528, + "learning_rate": 2.150921317460565e-05, + "loss": 0.0224, "step": 91700 }, { "epoch": 4.28, - "learning_rate": 1.1495476067694903e-05, - "loss": 0.081, + "learning_rate": 2.1508745104769633e-05, + "loss": 0.0664, "step": 91705 }, { "epoch": 4.28, - "learning_rate": 1.1495007266419767e-05, - "loss": 0.0952, + "learning_rate": 2.1508277034933613e-05, + "loss": 0.0319, "step": 91710 }, { "epoch": 4.28, - "learning_rate": 1.1494538465144627e-05, - "loss": 0.195, + "learning_rate": 2.1507808965097593e-05, + "loss": 0.0782, "step": 91715 }, { "epoch": 4.28, - "learning_rate": 1.1494069663869486e-05, - "loss": 0.1552, + "learning_rate": 2.1507340895261573e-05, + "loss": 0.2314, "step": 91720 }, { "epoch": 4.28, - "learning_rate": 1.1493600862594348e-05, - "loss": 0.3942, + "learning_rate": 2.1506872825425556e-05, + "loss": 0.3484, "step": 91725 }, { "epoch": 4.28, - "learning_rate": 1.1493132061319208e-05, - "loss": 0.1197, + "learning_rate": 2.1506404755589536e-05, + "loss": 0.0504, "step": 91730 }, { "epoch": 4.28, - "learning_rate": 1.1492663260044068e-05, - "loss": 0.0155, + "learning_rate": 2.1505936685753516e-05, + "loss": 0.0115, "step": 91735 }, { "epoch": 4.28, - "learning_rate": 1.1492194458768928e-05, - "loss": 0.0613, + "learning_rate": 2.1505468615917495e-05, + "loss": 0.038, "step": 91740 }, { "epoch": 4.28, - "learning_rate": 1.1491725657493788e-05, - "loss": 0.0329, + "learning_rate": 2.1505000546081475e-05, + "loss": 0.0631, "step": 91745 }, { "epoch": 4.28, - "learning_rate": 1.149125685621865e-05, - "loss": 0.0667, + "learning_rate": 2.1504532476245455e-05, + "loss": 0.0871, "step": 91750 }, { "epoch": 4.28, - "learning_rate": 1.1490788054943511e-05, - "loss": 0.0855, + "learning_rate": 2.1504064406409435e-05, + "loss": 0.0748, "step": 91755 }, { "epoch": 4.28, - "learning_rate": 1.1490319253668371e-05, - "loss": 0.0843, + "learning_rate": 2.1503596336573418e-05, + "loss": 0.1113, "step": 91760 }, { "epoch": 4.28, - "learning_rate": 1.1489850452393233e-05, - "loss": 0.0893, + "learning_rate": 2.1503128266737398e-05, + "loss": 0.04, "step": 91765 }, { "epoch": 4.28, - "learning_rate": 1.1489381651118093e-05, - "loss": 0.1834, + "learning_rate": 2.1502660196901378e-05, + "loss": 0.1797, "step": 91770 }, { "epoch": 4.28, - "learning_rate": 1.1488912849842953e-05, - "loss": 0.212, + "learning_rate": 2.1502192127065358e-05, + "loss": 0.2878, "step": 91775 }, { "epoch": 4.28, - "learning_rate": 1.1488444048567812e-05, - "loss": 0.0486, + "learning_rate": 2.150172405722934e-05, + "loss": 0.0517, "step": 91780 }, { "epoch": 4.28, - "learning_rate": 1.1487975247292672e-05, - "loss": 0.0235, + "learning_rate": 2.150125598739332e-05, + "loss": 0.0354, "step": 91785 }, { "epoch": 4.28, - "learning_rate": 1.1487506446017534e-05, - "loss": 0.0296, + "learning_rate": 2.15007879175573e-05, + "loss": 0.0198, "step": 91790 }, { "epoch": 4.28, - "learning_rate": 1.1487037644742394e-05, - "loss": 0.0367, + "learning_rate": 2.150031984772128e-05, + "loss": 0.054, "step": 91795 }, { "epoch": 4.28, - "learning_rate": 1.1486568843467256e-05, - "loss": 0.0374, + "learning_rate": 2.1499851777885263e-05, + "loss": 0.0156, "step": 91800 }, { "epoch": 4.28, - "learning_rate": 1.1486100042192117e-05, - "loss": 0.1294, + "learning_rate": 2.1499383708049243e-05, + "loss": 0.0488, "step": 91805 }, { "epoch": 4.28, - "learning_rate": 1.1485631240916977e-05, - "loss": 0.0709, + "learning_rate": 2.149891563821322e-05, + "loss": 0.0504, "step": 91810 }, { "epoch": 4.28, - "learning_rate": 1.1485162439641837e-05, - "loss": 0.1642, + "learning_rate": 2.14984475683772e-05, + "loss": 0.1195, "step": 91815 }, { "epoch": 4.28, - "learning_rate": 1.1484693638366697e-05, - "loss": 0.1697, + "learning_rate": 2.1497979498541183e-05, + "loss": 0.1193, "step": 91820 }, { "epoch": 4.28, - "learning_rate": 1.1484224837091557e-05, - "loss": 0.2103, + "learning_rate": 2.1497511428705163e-05, + "loss": 0.3017, "step": 91825 }, { "epoch": 4.28, - "learning_rate": 1.1483756035816419e-05, - "loss": 0.0815, + "learning_rate": 2.1497043358869142e-05, + "loss": 0.0333, "step": 91830 }, { "epoch": 4.29, - "learning_rate": 1.1483287234541278e-05, - "loss": 0.0419, + "learning_rate": 2.1496575289033126e-05, + "loss": 0.005, "step": 91835 }, { "epoch": 4.29, - "learning_rate": 1.1482818433266138e-05, - "loss": 0.0401, + "learning_rate": 2.1496107219197105e-05, + "loss": 0.0214, "step": 91840 }, { "epoch": 4.29, - "learning_rate": 1.1482349631991002e-05, - "loss": 0.1041, + "learning_rate": 2.1495639149361085e-05, + "loss": 0.0471, "step": 91845 }, { "epoch": 4.29, - "learning_rate": 1.1481880830715862e-05, - "loss": 0.0594, + "learning_rate": 2.1495171079525065e-05, + "loss": 0.041, "step": 91850 }, { "epoch": 4.29, - "learning_rate": 1.1481412029440722e-05, - "loss": 0.049, + "learning_rate": 2.1494703009689048e-05, + "loss": 0.0538, "step": 91855 }, { "epoch": 4.29, - "learning_rate": 1.1480943228165582e-05, - "loss": 0.055, + "learning_rate": 2.1494234939853028e-05, + "loss": 0.071, "step": 91860 }, { "epoch": 4.29, - "learning_rate": 1.1480474426890441e-05, - "loss": 0.1907, + "learning_rate": 2.1493766870017008e-05, + "loss": 0.1065, "step": 91865 }, { "epoch": 4.29, - "learning_rate": 1.1480005625615303e-05, - "loss": 0.1978, + "learning_rate": 2.1493298800180988e-05, + "loss": 0.1288, "step": 91870 }, { "epoch": 4.29, - "learning_rate": 1.1479536824340163e-05, - "loss": 0.2849, + "learning_rate": 2.1492830730344967e-05, + "loss": 0.2563, "step": 91875 }, { "epoch": 4.29, - "learning_rate": 1.1479068023065023e-05, - "loss": 0.0433, + "learning_rate": 2.1492362660508947e-05, + "loss": 0.0316, "step": 91880 }, { "epoch": 4.29, - "learning_rate": 1.1478599221789883e-05, - "loss": 0.0516, + "learning_rate": 2.1491894590672927e-05, + "loss": 0.0187, "step": 91885 }, { "epoch": 4.29, - "learning_rate": 1.1478130420514743e-05, - "loss": 0.0232, + "learning_rate": 2.149142652083691e-05, + "loss": 0.0641, "step": 91890 }, { "epoch": 4.29, - "learning_rate": 1.1477661619239606e-05, - "loss": 0.0767, + "learning_rate": 2.149095845100089e-05, + "loss": 0.0661, "step": 91895 }, { "epoch": 4.29, - "learning_rate": 1.1477192817964466e-05, - "loss": 0.0341, + "learning_rate": 2.149049038116487e-05, + "loss": 0.1115, "step": 91900 }, { "epoch": 4.29, - "learning_rate": 1.1476724016689326e-05, - "loss": 0.0907, + "learning_rate": 2.149002231132885e-05, + "loss": 0.0561, "step": 91905 }, { "epoch": 4.29, - "learning_rate": 1.1476255215414188e-05, - "loss": 0.1072, + "learning_rate": 2.1489554241492833e-05, + "loss": 0.1458, "step": 91910 }, { "epoch": 4.29, - "learning_rate": 1.1475786414139048e-05, - "loss": 0.1205, + "learning_rate": 2.1489086171656813e-05, + "loss": 0.0998, "step": 91915 }, { "epoch": 4.29, - "learning_rate": 1.1475317612863908e-05, - "loss": 0.1197, + "learning_rate": 2.1488618101820793e-05, + "loss": 0.1722, "step": 91920 }, { "epoch": 4.29, - "learning_rate": 1.1474848811588767e-05, - "loss": 0.1426, + "learning_rate": 2.1488150031984772e-05, + "loss": 0.2578, "step": 91925 }, { "epoch": 4.29, - "learning_rate": 1.1474380010313629e-05, - "loss": 0.1108, + "learning_rate": 2.1487681962148756e-05, + "loss": 0.0692, "step": 91930 }, { "epoch": 4.29, - "learning_rate": 1.1473911209038489e-05, - "loss": 0.0342, + "learning_rate": 2.1487213892312732e-05, + "loss": 0.0302, "step": 91935 }, { "epoch": 4.29, - "learning_rate": 1.147344240776335e-05, - "loss": 0.029, + "learning_rate": 2.1486745822476712e-05, + "loss": 0.0643, "step": 91940 }, { "epoch": 4.29, - "learning_rate": 1.147297360648821e-05, - "loss": 0.0302, + "learning_rate": 2.1486277752640695e-05, + "loss": 0.0616, "step": 91945 }, { "epoch": 4.29, - "learning_rate": 1.1472504805213072e-05, - "loss": 0.1783, + "learning_rate": 2.1485809682804675e-05, + "loss": 0.0381, "step": 91950 }, { "epoch": 4.29, - "learning_rate": 1.1472036003937932e-05, - "loss": 0.0747, + "learning_rate": 2.1485341612968655e-05, + "loss": 0.0915, "step": 91955 }, { "epoch": 4.29, - "learning_rate": 1.1471567202662792e-05, - "loss": 0.0703, + "learning_rate": 2.1484873543132635e-05, + "loss": 0.1649, "step": 91960 }, { "epoch": 4.29, - "learning_rate": 1.1471098401387652e-05, - "loss": 0.0686, + "learning_rate": 2.1484405473296618e-05, + "loss": 0.089, "step": 91965 }, { "epoch": 4.29, - "learning_rate": 1.1470629600112514e-05, - "loss": 0.2284, + "learning_rate": 2.1483937403460598e-05, + "loss": 0.2497, "step": 91970 }, { "epoch": 4.29, - "learning_rate": 1.1470160798837374e-05, - "loss": 0.2897, + "learning_rate": 2.1483469333624577e-05, + "loss": 0.2692, "step": 91975 }, { "epoch": 4.29, - "learning_rate": 1.1469691997562234e-05, - "loss": 0.0665, + "learning_rate": 2.1483001263788557e-05, + "loss": 0.049, "step": 91980 }, { "epoch": 4.29, - "learning_rate": 1.1469223196287095e-05, - "loss": 0.0452, + "learning_rate": 2.148253319395254e-05, + "loss": 0.0372, "step": 91985 }, { "epoch": 4.29, - "learning_rate": 1.1468754395011957e-05, - "loss": 0.0176, + "learning_rate": 2.148206512411652e-05, + "loss": 0.0175, "step": 91990 }, { "epoch": 4.29, - "learning_rate": 1.1468285593736817e-05, - "loss": 0.0459, + "learning_rate": 2.14815970542805e-05, + "loss": 0.1069, "step": 91995 }, { "epoch": 4.29, - "learning_rate": 1.1467816792461677e-05, - "loss": 0.0803, + "learning_rate": 2.1481128984444476e-05, + "loss": 0.0515, "step": 92000 }, { "epoch": 4.29, - "learning_rate": 1.1467347991186537e-05, - "loss": 0.1029, + "learning_rate": 2.148066091460846e-05, + "loss": 0.0726, "step": 92005 }, { "epoch": 4.29, - "learning_rate": 1.1466879189911398e-05, - "loss": 0.118, + "learning_rate": 2.148019284477244e-05, + "loss": 0.109, "step": 92010 }, { "epoch": 4.29, - "learning_rate": 1.1466410388636258e-05, - "loss": 0.2179, + "learning_rate": 2.147972477493642e-05, + "loss": 0.0669, "step": 92015 }, { "epoch": 4.29, - "learning_rate": 1.1465941587361118e-05, - "loss": 0.1018, + "learning_rate": 2.1479256705100403e-05, + "loss": 0.2292, "step": 92020 }, { "epoch": 4.29, - "learning_rate": 1.1465472786085978e-05, - "loss": 0.1854, + "learning_rate": 2.1478788635264382e-05, + "loss": 0.2595, "step": 92025 }, { "epoch": 4.29, - "learning_rate": 1.1465003984810838e-05, - "loss": 0.0795, + "learning_rate": 2.1478320565428362e-05, + "loss": 0.0763, "step": 92030 }, { "epoch": 4.29, - "learning_rate": 1.1464535183535701e-05, - "loss": 0.0179, + "learning_rate": 2.1477852495592342e-05, + "loss": 0.0126, "step": 92035 }, { "epoch": 4.29, - "learning_rate": 1.1464066382260561e-05, - "loss": 0.0337, + "learning_rate": 2.1477384425756325e-05, + "loss": 0.0411, "step": 92040 }, { "epoch": 4.29, - "learning_rate": 1.1463597580985421e-05, - "loss": 0.0106, + "learning_rate": 2.1476916355920305e-05, + "loss": 0.0511, "step": 92045 }, { "epoch": 4.3, - "learning_rate": 1.1463128779710283e-05, - "loss": 0.0376, + "learning_rate": 2.1476448286084285e-05, + "loss": 0.0514, "step": 92050 }, { "epoch": 4.3, - "learning_rate": 1.1462659978435143e-05, - "loss": 0.0449, + "learning_rate": 2.1475980216248265e-05, + "loss": 0.0783, "step": 92055 }, { "epoch": 4.3, - "learning_rate": 1.1462191177160003e-05, - "loss": 0.0672, + "learning_rate": 2.1475512146412248e-05, + "loss": 0.0667, "step": 92060 }, { "epoch": 4.3, - "learning_rate": 1.1461722375884863e-05, - "loss": 0.088, + "learning_rate": 2.1475044076576224e-05, + "loss": 0.0855, "step": 92065 }, { "epoch": 4.3, - "learning_rate": 1.1461253574609722e-05, - "loss": 0.1922, + "learning_rate": 2.1474576006740204e-05, + "loss": 0.1089, "step": 92070 }, { "epoch": 4.3, - "learning_rate": 1.1460784773334584e-05, - "loss": 0.2762, + "learning_rate": 2.1474107936904187e-05, + "loss": 0.2828, "step": 92075 }, { "epoch": 4.3, - "learning_rate": 1.1460315972059446e-05, - "loss": 0.0693, + "learning_rate": 2.1473639867068167e-05, + "loss": 0.0702, "step": 92080 }, { "epoch": 4.3, - "learning_rate": 1.1459847170784306e-05, - "loss": 0.027, + "learning_rate": 2.1473171797232147e-05, + "loss": 0.0202, "step": 92085 }, { "epoch": 4.3, - "learning_rate": 1.1459378369509167e-05, - "loss": 0.0088, + "learning_rate": 2.1472703727396127e-05, + "loss": 0.0623, "step": 92090 }, { "epoch": 4.3, - "learning_rate": 1.1458909568234027e-05, - "loss": 0.0344, + "learning_rate": 2.147223565756011e-05, + "loss": 0.0409, "step": 92095 }, { "epoch": 4.3, - "learning_rate": 1.1458440766958887e-05, - "loss": 0.0979, + "learning_rate": 2.147176758772409e-05, + "loss": 0.0509, "step": 92100 }, { "epoch": 4.3, - "learning_rate": 1.1457971965683747e-05, - "loss": 0.0436, + "learning_rate": 2.147129951788807e-05, + "loss": 0.1186, "step": 92105 }, { "epoch": 4.3, - "learning_rate": 1.1457503164408607e-05, - "loss": 0.0458, + "learning_rate": 2.147083144805205e-05, + "loss": 0.0887, "step": 92110 }, { "epoch": 4.3, - "learning_rate": 1.1457034363133469e-05, - "loss": 0.0857, + "learning_rate": 2.1470363378216033e-05, + "loss": 0.148, "step": 92115 }, { "epoch": 4.3, - "learning_rate": 1.1456565561858329e-05, - "loss": 0.1536, + "learning_rate": 2.1469895308380012e-05, + "loss": 0.0908, "step": 92120 }, { "epoch": 4.3, - "learning_rate": 1.145609676058319e-05, - "loss": 0.3376, + "learning_rate": 2.146942723854399e-05, + "loss": 0.2314, "step": 92125 }, { "epoch": 4.3, - "learning_rate": 1.1455627959308052e-05, - "loss": 0.074, + "learning_rate": 2.1468959168707972e-05, + "loss": 0.0768, "step": 92130 }, { "epoch": 4.3, - "learning_rate": 1.1455159158032912e-05, - "loss": 0.0353, + "learning_rate": 2.1468491098871952e-05, + "loss": 0.0244, "step": 92135 }, { "epoch": 4.3, - "learning_rate": 1.1454690356757772e-05, - "loss": 0.0516, + "learning_rate": 2.146802302903593e-05, + "loss": 0.0333, "step": 92140 }, { "epoch": 4.3, - "learning_rate": 1.1454221555482632e-05, - "loss": 0.0492, + "learning_rate": 2.146755495919991e-05, + "loss": 0.0358, "step": 92145 }, { "epoch": 4.3, - "learning_rate": 1.1453752754207492e-05, - "loss": 0.0188, + "learning_rate": 2.1467086889363895e-05, + "loss": 0.0314, "step": 92150 }, { "epoch": 4.3, - "learning_rate": 1.1453283952932353e-05, - "loss": 0.0925, + "learning_rate": 2.1466618819527875e-05, + "loss": 0.0634, "step": 92155 }, { "epoch": 4.3, - "learning_rate": 1.1452815151657213e-05, - "loss": 0.05, + "learning_rate": 2.1466150749691854e-05, + "loss": 0.0926, "step": 92160 }, { "epoch": 4.3, - "learning_rate": 1.1452346350382073e-05, - "loss": 0.1509, + "learning_rate": 2.1465682679855834e-05, + "loss": 0.0443, "step": 92165 }, { "epoch": 4.3, - "learning_rate": 1.1451877549106936e-05, - "loss": 0.1062, + "learning_rate": 2.1465214610019817e-05, + "loss": 0.2431, "step": 92170 }, { "epoch": 4.3, - "learning_rate": 1.1451408747831796e-05, - "loss": 0.2546, + "learning_rate": 2.1464746540183797e-05, + "loss": 0.2937, "step": 92175 }, { "epoch": 4.3, - "learning_rate": 1.1450939946556656e-05, - "loss": 0.0603, + "learning_rate": 2.1464278470347777e-05, + "loss": 0.0893, "step": 92180 }, { "epoch": 4.3, - "learning_rate": 1.1450471145281516e-05, - "loss": 0.0328, + "learning_rate": 2.1463810400511757e-05, + "loss": 0.0169, "step": 92185 }, { "epoch": 4.3, - "learning_rate": 1.1450002344006376e-05, - "loss": 0.0494, + "learning_rate": 2.1463342330675737e-05, + "loss": 0.0439, "step": 92190 }, { "epoch": 4.3, - "learning_rate": 1.1449533542731238e-05, - "loss": 0.0678, + "learning_rate": 2.1462874260839716e-05, + "loss": 0.0307, "step": 92195 }, { "epoch": 4.3, - "learning_rate": 1.1449064741456098e-05, - "loss": 0.0348, + "learning_rate": 2.1462406191003696e-05, + "loss": 0.066, "step": 92200 }, { "epoch": 4.3, - "learning_rate": 1.1448595940180958e-05, - "loss": 0.0759, + "learning_rate": 2.146193812116768e-05, + "loss": 0.0472, "step": 92205 }, { "epoch": 4.3, - "learning_rate": 1.1448127138905818e-05, - "loss": 0.0946, + "learning_rate": 2.146147005133166e-05, + "loss": 0.1657, "step": 92210 }, { "epoch": 4.3, - "learning_rate": 1.1447658337630677e-05, - "loss": 0.0649, + "learning_rate": 2.146100198149564e-05, + "loss": 0.1463, "step": 92215 }, { "epoch": 4.3, - "learning_rate": 1.144718953635554e-05, - "loss": 0.1014, + "learning_rate": 2.146053391165962e-05, + "loss": 0.0506, "step": 92220 }, { "epoch": 4.3, - "learning_rate": 1.14467207350804e-05, - "loss": 0.2581, + "learning_rate": 2.1460065841823602e-05, + "loss": 0.1736, "step": 92225 }, { "epoch": 4.3, - "learning_rate": 1.144625193380526e-05, - "loss": 0.1092, + "learning_rate": 2.1459597771987582e-05, + "loss": 0.0749, "step": 92230 }, { "epoch": 4.3, - "learning_rate": 1.1445783132530122e-05, - "loss": 0.0297, + "learning_rate": 2.1459129702151562e-05, + "loss": 0.0093, "step": 92235 }, { "epoch": 4.3, - "learning_rate": 1.1445314331254982e-05, - "loss": 0.0514, + "learning_rate": 2.145866163231554e-05, + "loss": 0.017, "step": 92240 }, { "epoch": 4.3, - "learning_rate": 1.1444845529979842e-05, - "loss": 0.0675, + "learning_rate": 2.1458193562479525e-05, + "loss": 0.0431, "step": 92245 }, { "epoch": 4.3, - "learning_rate": 1.1444376728704702e-05, - "loss": 0.1109, + "learning_rate": 2.1457725492643505e-05, + "loss": 0.0395, "step": 92250 }, { "epoch": 4.3, - "learning_rate": 1.1443907927429562e-05, - "loss": 0.0423, + "learning_rate": 2.145725742280748e-05, + "loss": 0.0593, "step": 92255 }, { "epoch": 4.3, - "learning_rate": 1.1443439126154424e-05, - "loss": 0.0663, + "learning_rate": 2.1456789352971464e-05, + "loss": 0.0776, "step": 92260 }, { "epoch": 4.31, - "learning_rate": 1.1442970324879285e-05, - "loss": 0.1463, + "learning_rate": 2.1456321283135444e-05, + "loss": 0.0764, "step": 92265 }, { "epoch": 4.31, - "learning_rate": 1.1442501523604145e-05, - "loss": 0.1144, + "learning_rate": 2.1455853213299424e-05, + "loss": 0.1312, "step": 92270 }, { "epoch": 4.31, - "learning_rate": 1.1442032722329007e-05, - "loss": 0.2856, + "learning_rate": 2.1455385143463404e-05, + "loss": 0.1732, "step": 92275 }, { "epoch": 4.31, - "learning_rate": 1.1441563921053867e-05, - "loss": 0.0755, + "learning_rate": 2.1454917073627387e-05, + "loss": 0.0624, "step": 92280 }, { "epoch": 4.31, - "learning_rate": 1.1441095119778727e-05, - "loss": 0.0132, + "learning_rate": 2.1454449003791367e-05, + "loss": 0.0564, "step": 92285 }, { "epoch": 4.31, - "learning_rate": 1.1440626318503587e-05, - "loss": 0.0207, + "learning_rate": 2.1453980933955347e-05, + "loss": 0.0139, "step": 92290 }, { "epoch": 4.31, - "learning_rate": 1.1440157517228447e-05, - "loss": 0.0565, + "learning_rate": 2.1453512864119326e-05, + "loss": 0.0444, "step": 92295 }, { "epoch": 4.31, - "learning_rate": 1.1439688715953308e-05, - "loss": 0.0273, + "learning_rate": 2.145304479428331e-05, + "loss": 0.0636, "step": 92300 }, { "epoch": 4.31, - "learning_rate": 1.1439219914678168e-05, - "loss": 0.0751, + "learning_rate": 2.145257672444729e-05, + "loss": 0.0561, "step": 92305 }, { "epoch": 4.31, - "learning_rate": 1.143875111340303e-05, - "loss": 0.0303, + "learning_rate": 2.145210865461127e-05, + "loss": 0.0824, "step": 92310 }, { "epoch": 4.31, - "learning_rate": 1.1438282312127891e-05, - "loss": 0.1336, + "learning_rate": 2.145164058477525e-05, + "loss": 0.1261, "step": 92315 }, { "epoch": 4.31, - "learning_rate": 1.1437813510852751e-05, - "loss": 0.0892, + "learning_rate": 2.145117251493923e-05, + "loss": 0.2007, "step": 92320 }, { "epoch": 4.31, - "learning_rate": 1.1437344709577611e-05, - "loss": 0.262, + "learning_rate": 2.145070444510321e-05, + "loss": 0.1951, "step": 92325 }, { "epoch": 4.31, - "learning_rate": 1.1436875908302471e-05, - "loss": 0.0757, + "learning_rate": 2.145023637526719e-05, + "loss": 0.0794, "step": 92330 }, { "epoch": 4.31, - "learning_rate": 1.1436407107027331e-05, - "loss": 0.0104, + "learning_rate": 2.144976830543117e-05, + "loss": 0.0723, "step": 92335 }, { "epoch": 4.31, - "learning_rate": 1.1435938305752193e-05, - "loss": 0.0194, + "learning_rate": 2.144930023559515e-05, + "loss": 0.0266, "step": 92340 }, { "epoch": 4.31, - "learning_rate": 1.1435469504477053e-05, - "loss": 0.0316, + "learning_rate": 2.144883216575913e-05, + "loss": 0.0535, "step": 92345 }, { "epoch": 4.31, - "learning_rate": 1.1435000703201913e-05, - "loss": 0.0225, + "learning_rate": 2.144836409592311e-05, + "loss": 0.0238, "step": 92350 }, { "epoch": 4.31, - "learning_rate": 1.1434531901926773e-05, - "loss": 0.1253, + "learning_rate": 2.1447896026087094e-05, + "loss": 0.039, "step": 92355 }, { "epoch": 4.31, - "learning_rate": 1.1434063100651636e-05, - "loss": 0.1119, + "learning_rate": 2.1447427956251074e-05, + "loss": 0.1133, "step": 92360 }, { "epoch": 4.31, - "learning_rate": 1.1433594299376496e-05, - "loss": 0.1393, + "learning_rate": 2.1446959886415054e-05, + "loss": 0.1282, "step": 92365 }, { "epoch": 4.31, - "learning_rate": 1.1433125498101356e-05, - "loss": 0.1641, + "learning_rate": 2.1446491816579037e-05, + "loss": 0.1855, "step": 92370 }, { "epoch": 4.31, - "learning_rate": 1.1432656696826216e-05, - "loss": 0.4148, + "learning_rate": 2.1446023746743017e-05, + "loss": 0.2506, "step": 92375 }, { "epoch": 4.31, - "learning_rate": 1.1432187895551077e-05, - "loss": 0.0839, + "learning_rate": 2.1445555676906993e-05, + "loss": 0.0554, "step": 92380 }, { "epoch": 4.31, - "learning_rate": 1.1431719094275937e-05, - "loss": 0.0213, + "learning_rate": 2.1445087607070973e-05, + "loss": 0.0348, "step": 92385 }, { "epoch": 4.31, - "learning_rate": 1.1431250293000797e-05, - "loss": 0.0586, + "learning_rate": 2.1444619537234956e-05, + "loss": 0.0356, "step": 92390 }, { "epoch": 4.31, - "learning_rate": 1.1430781491725657e-05, - "loss": 0.0278, + "learning_rate": 2.1444151467398936e-05, + "loss": 0.0699, "step": 92395 }, { "epoch": 4.31, - "learning_rate": 1.1430312690450519e-05, - "loss": 0.0343, + "learning_rate": 2.1443683397562916e-05, + "loss": 0.0837, "step": 92400 }, { "epoch": 4.31, - "learning_rate": 1.142984388917538e-05, - "loss": 0.0383, + "learning_rate": 2.1443215327726896e-05, + "loss": 0.1174, "step": 92405 }, { "epoch": 4.31, - "learning_rate": 1.142937508790024e-05, - "loss": 0.0766, + "learning_rate": 2.144274725789088e-05, + "loss": 0.0386, "step": 92410 }, { "epoch": 4.31, - "learning_rate": 1.14289062866251e-05, - "loss": 0.0804, + "learning_rate": 2.144227918805486e-05, + "loss": 0.1811, "step": 92415 }, { "epoch": 4.31, - "learning_rate": 1.1428437485349962e-05, - "loss": 0.0692, + "learning_rate": 2.144181111821884e-05, + "loss": 0.1609, "step": 92420 }, { "epoch": 4.31, - "learning_rate": 1.1427968684074822e-05, - "loss": 0.1649, + "learning_rate": 2.144134304838282e-05, + "loss": 0.2364, "step": 92425 }, { "epoch": 4.31, - "learning_rate": 1.1427499882799682e-05, - "loss": 0.0648, + "learning_rate": 2.1440874978546802e-05, + "loss": 0.0595, "step": 92430 }, { "epoch": 4.31, - "learning_rate": 1.1427031081524542e-05, - "loss": 0.0405, + "learning_rate": 2.144040690871078e-05, + "loss": 0.021, "step": 92435 }, { "epoch": 4.31, - "learning_rate": 1.1426562280249403e-05, - "loss": 0.0459, + "learning_rate": 2.1439938838874758e-05, + "loss": 0.0103, "step": 92440 }, { "epoch": 4.31, - "learning_rate": 1.1426093478974263e-05, - "loss": 0.0534, + "learning_rate": 2.143947076903874e-05, + "loss": 0.058, "step": 92445 }, { "epoch": 4.31, - "learning_rate": 1.1425624677699125e-05, - "loss": 0.0571, + "learning_rate": 2.143900269920272e-05, + "loss": 0.0835, "step": 92450 }, { "epoch": 4.31, - "learning_rate": 1.1425155876423985e-05, - "loss": 0.1281, + "learning_rate": 2.14385346293667e-05, + "loss": 0.0503, "step": 92455 }, { "epoch": 4.31, - "learning_rate": 1.1424687075148846e-05, - "loss": 0.1047, + "learning_rate": 2.143806655953068e-05, + "loss": 0.0941, "step": 92460 }, { "epoch": 4.31, - "learning_rate": 1.1424218273873706e-05, - "loss": 0.1761, + "learning_rate": 2.1437598489694664e-05, + "loss": 0.0833, "step": 92465 }, { "epoch": 4.31, - "learning_rate": 1.1423749472598566e-05, - "loss": 0.2301, + "learning_rate": 2.1437130419858644e-05, + "loss": 0.1539, "step": 92470 }, { "epoch": 4.32, - "learning_rate": 1.1423280671323426e-05, - "loss": 0.2127, + "learning_rate": 2.1436662350022624e-05, + "loss": 0.2661, "step": 92475 }, { "epoch": 4.32, - "learning_rate": 1.1422811870048288e-05, - "loss": 0.0692, + "learning_rate": 2.1436194280186603e-05, + "loss": 0.066, "step": 92480 }, { "epoch": 4.32, - "learning_rate": 1.1422343068773148e-05, - "loss": 0.0307, + "learning_rate": 2.1435726210350587e-05, + "loss": 0.0129, "step": 92485 }, { "epoch": 4.32, - "learning_rate": 1.1421874267498008e-05, - "loss": 0.0709, + "learning_rate": 2.1435258140514566e-05, + "loss": 0.0127, "step": 92490 }, { "epoch": 4.32, - "learning_rate": 1.142140546622287e-05, - "loss": 0.0676, + "learning_rate": 2.1434790070678546e-05, + "loss": 0.0629, "step": 92495 }, { "epoch": 4.32, - "learning_rate": 1.1420936664947731e-05, - "loss": 0.0358, + "learning_rate": 2.143432200084253e-05, + "loss": 0.0383, "step": 92500 }, { "epoch": 4.32, - "learning_rate": 1.1420467863672591e-05, - "loss": 0.109, + "learning_rate": 2.1433853931006506e-05, + "loss": 0.0395, "step": 92505 }, { "epoch": 4.32, - "learning_rate": 1.1419999062397451e-05, - "loss": 0.1057, + "learning_rate": 2.1433385861170486e-05, + "loss": 0.0606, "step": 92510 }, { "epoch": 4.32, - "learning_rate": 1.141953026112231e-05, - "loss": 0.1385, + "learning_rate": 2.1432917791334465e-05, + "loss": 0.1207, "step": 92515 }, { "epoch": 4.32, - "learning_rate": 1.1419061459847172e-05, - "loss": 0.1163, + "learning_rate": 2.143244972149845e-05, + "loss": 0.0971, "step": 92520 }, { "epoch": 4.32, - "learning_rate": 1.1418592658572032e-05, - "loss": 0.2456, + "learning_rate": 2.143198165166243e-05, + "loss": 0.1987, "step": 92525 }, { "epoch": 4.32, - "learning_rate": 1.1418123857296892e-05, - "loss": 0.0923, + "learning_rate": 2.1431513581826408e-05, + "loss": 0.071, "step": 92530 }, { "epoch": 4.32, - "learning_rate": 1.1417655056021752e-05, - "loss": 0.0199, + "learning_rate": 2.1431045511990388e-05, + "loss": 0.0165, "step": 92535 }, { "epoch": 4.32, - "learning_rate": 1.1417186254746612e-05, - "loss": 0.0505, + "learning_rate": 2.143057744215437e-05, + "loss": 0.031, "step": 92540 }, { "epoch": 4.32, - "learning_rate": 1.1416717453471475e-05, - "loss": 0.0625, + "learning_rate": 2.143010937231835e-05, + "loss": 0.0323, "step": 92545 }, { "epoch": 4.32, - "learning_rate": 1.1416248652196335e-05, - "loss": 0.0228, + "learning_rate": 2.142964130248233e-05, + "loss": 0.0566, "step": 92550 }, { "epoch": 4.32, - "learning_rate": 1.1415779850921195e-05, - "loss": 0.0506, + "learning_rate": 2.1429173232646314e-05, + "loss": 0.0546, "step": 92555 }, { "epoch": 4.32, - "learning_rate": 1.1415311049646057e-05, - "loss": 0.0614, + "learning_rate": 2.1428705162810294e-05, + "loss": 0.105, "step": 92560 }, { "epoch": 4.32, - "learning_rate": 1.1414842248370917e-05, - "loss": 0.1146, + "learning_rate": 2.1428237092974274e-05, + "loss": 0.0885, "step": 92565 }, { "epoch": 4.32, - "learning_rate": 1.1414373447095777e-05, - "loss": 0.1423, + "learning_rate": 2.142776902313825e-05, + "loss": 0.2199, "step": 92570 }, { "epoch": 4.32, - "learning_rate": 1.1413904645820637e-05, - "loss": 0.1711, + "learning_rate": 2.1427300953302233e-05, + "loss": 0.2795, "step": 92575 }, { "epoch": 4.32, - "learning_rate": 1.1413435844545497e-05, - "loss": 0.0486, + "learning_rate": 2.1426832883466213e-05, + "loss": 0.0732, "step": 92580 }, { "epoch": 4.32, - "learning_rate": 1.1412967043270358e-05, - "loss": 0.0206, + "learning_rate": 2.1426364813630193e-05, + "loss": 0.0473, "step": 92585 }, { "epoch": 4.32, - "learning_rate": 1.141249824199522e-05, - "loss": 0.0247, + "learning_rate": 2.1425896743794173e-05, + "loss": 0.0385, "step": 92590 }, { "epoch": 4.32, - "learning_rate": 1.141202944072008e-05, - "loss": 0.0561, + "learning_rate": 2.1425428673958156e-05, + "loss": 0.0106, "step": 92595 }, { "epoch": 4.32, - "learning_rate": 1.1411560639444942e-05, - "loss": 0.0752, + "learning_rate": 2.1424960604122136e-05, + "loss": 0.0665, "step": 92600 }, { "epoch": 4.32, - "learning_rate": 1.1411091838169801e-05, - "loss": 0.0944, + "learning_rate": 2.1424492534286116e-05, + "loss": 0.0502, "step": 92605 }, { "epoch": 4.32, - "learning_rate": 1.1410623036894661e-05, - "loss": 0.0962, + "learning_rate": 2.1424024464450096e-05, + "loss": 0.1122, "step": 92610 }, { "epoch": 4.32, - "learning_rate": 1.1410154235619521e-05, - "loss": 0.1155, + "learning_rate": 2.142355639461408e-05, + "loss": 0.1283, "step": 92615 }, { "epoch": 4.32, - "learning_rate": 1.1409685434344381e-05, - "loss": 0.1644, + "learning_rate": 2.142308832477806e-05, + "loss": 0.157, "step": 92620 }, { "epoch": 4.32, - "learning_rate": 1.1409216633069243e-05, - "loss": 0.3262, + "learning_rate": 2.142262025494204e-05, + "loss": 0.2141, "step": 92625 }, { "epoch": 4.32, - "learning_rate": 1.1408747831794103e-05, - "loss": 0.0713, + "learning_rate": 2.1422152185106018e-05, + "loss": 0.0668, "step": 92630 }, { "epoch": 4.32, - "learning_rate": 1.1408279030518964e-05, - "loss": 0.029, + "learning_rate": 2.1421684115269998e-05, + "loss": 0.0297, "step": 92635 }, { "epoch": 4.32, - "learning_rate": 1.1407810229243826e-05, - "loss": 0.0507, + "learning_rate": 2.1421216045433978e-05, + "loss": 0.0414, "step": 92640 }, { "epoch": 4.32, - "learning_rate": 1.1407341427968686e-05, - "loss": 0.0661, + "learning_rate": 2.1420747975597958e-05, + "loss": 0.0297, "step": 92645 }, { "epoch": 4.32, - "learning_rate": 1.1406872626693546e-05, - "loss": 0.0631, + "learning_rate": 2.142027990576194e-05, + "loss": 0.0548, "step": 92650 }, { "epoch": 4.32, - "learning_rate": 1.1406403825418406e-05, - "loss": 0.0711, + "learning_rate": 2.141981183592592e-05, + "loss": 0.036, "step": 92655 }, { "epoch": 4.32, - "learning_rate": 1.1405935024143266e-05, - "loss": 0.1305, + "learning_rate": 2.14193437660899e-05, + "loss": 0.0771, "step": 92660 }, { "epoch": 4.32, - "learning_rate": 1.1405466222868127e-05, - "loss": 0.1408, + "learning_rate": 2.141887569625388e-05, + "loss": 0.0628, "step": 92665 }, { "epoch": 4.32, - "learning_rate": 1.1404997421592987e-05, - "loss": 0.2081, + "learning_rate": 2.1418407626417864e-05, + "loss": 0.1362, "step": 92670 }, { "epoch": 4.32, - "learning_rate": 1.1404528620317847e-05, - "loss": 0.2834, + "learning_rate": 2.1417939556581843e-05, + "loss": 0.3234, "step": 92675 }, { "epoch": 4.32, - "learning_rate": 1.1404059819042707e-05, - "loss": 0.0993, + "learning_rate": 2.1417471486745823e-05, + "loss": 0.1041, "step": 92680 }, { "epoch": 4.32, - "learning_rate": 1.140359101776757e-05, - "loss": 0.0104, + "learning_rate": 2.1417003416909806e-05, + "loss": 0.0509, "step": 92685 }, { "epoch": 4.33, - "learning_rate": 1.140312221649243e-05, - "loss": 0.0497, + "learning_rate": 2.1416535347073786e-05, + "loss": 0.0407, "step": 92690 }, { "epoch": 4.33, - "learning_rate": 1.140265341521729e-05, - "loss": 0.0498, + "learning_rate": 2.1416067277237763e-05, + "loss": 0.032, "step": 92695 }, { "epoch": 4.33, - "learning_rate": 1.140218461394215e-05, - "loss": 0.0869, + "learning_rate": 2.1415599207401742e-05, + "loss": 0.063, "step": 92700 }, { "epoch": 4.33, - "learning_rate": 1.1401715812667012e-05, - "loss": 0.0429, + "learning_rate": 2.1415131137565726e-05, + "loss": 0.0557, "step": 92705 }, { "epoch": 4.33, - "learning_rate": 1.1401247011391872e-05, - "loss": 0.0689, + "learning_rate": 2.1414663067729705e-05, + "loss": 0.0625, "step": 92710 }, { "epoch": 4.33, - "learning_rate": 1.1400778210116732e-05, - "loss": 0.156, + "learning_rate": 2.1414194997893685e-05, + "loss": 0.1, "step": 92715 }, { "epoch": 4.33, - "learning_rate": 1.1400309408841592e-05, - "loss": 0.1198, + "learning_rate": 2.1413726928057665e-05, + "loss": 0.1588, "step": 92720 }, { "epoch": 4.33, - "learning_rate": 1.1399840607566452e-05, - "loss": 0.3197, + "learning_rate": 2.1413258858221648e-05, + "loss": 0.169, "step": 92725 }, { "epoch": 4.33, - "learning_rate": 1.1399371806291315e-05, - "loss": 0.0663, + "learning_rate": 2.1412790788385628e-05, + "loss": 0.0872, "step": 92730 }, { "epoch": 4.33, - "learning_rate": 1.1398903005016175e-05, - "loss": 0.0414, + "learning_rate": 2.1412322718549608e-05, + "loss": 0.0163, "step": 92735 }, { "epoch": 4.33, - "learning_rate": 1.1398434203741035e-05, - "loss": 0.039, + "learning_rate": 2.141185464871359e-05, + "loss": 0.0162, "step": 92740 }, { "epoch": 4.33, - "learning_rate": 1.1397965402465897e-05, - "loss": 0.0337, + "learning_rate": 2.141138657887757e-05, + "loss": 0.0387, "step": 92745 }, { "epoch": 4.33, - "learning_rate": 1.1397496601190756e-05, - "loss": 0.0979, + "learning_rate": 2.141091850904155e-05, + "loss": 0.0324, "step": 92750 }, { "epoch": 4.33, - "learning_rate": 1.1397027799915616e-05, - "loss": 0.0924, + "learning_rate": 2.141045043920553e-05, + "loss": 0.0578, "step": 92755 }, { "epoch": 4.33, - "learning_rate": 1.1396558998640476e-05, - "loss": 0.1765, + "learning_rate": 2.140998236936951e-05, + "loss": 0.0921, "step": 92760 }, { "epoch": 4.33, - "learning_rate": 1.1396090197365336e-05, - "loss": 0.0846, + "learning_rate": 2.140951429953349e-05, + "loss": 0.0672, "step": 92765 }, { "epoch": 4.33, - "learning_rate": 1.1395621396090198e-05, - "loss": 0.1251, + "learning_rate": 2.140904622969747e-05, + "loss": 0.1351, "step": 92770 }, { "epoch": 4.33, - "learning_rate": 1.139515259481506e-05, - "loss": 0.2777, + "learning_rate": 2.140857815986145e-05, + "loss": 0.174, "step": 92775 }, { "epoch": 4.33, - "learning_rate": 1.139468379353992e-05, - "loss": 0.0719, + "learning_rate": 2.1408110090025433e-05, + "loss": 0.0985, "step": 92780 }, { "epoch": 4.33, - "learning_rate": 1.1394214992264781e-05, - "loss": 0.0334, + "learning_rate": 2.1407642020189413e-05, + "loss": 0.0473, "step": 92785 }, { "epoch": 4.33, - "learning_rate": 1.1393746190989641e-05, - "loss": 0.0253, + "learning_rate": 2.1407173950353393e-05, + "loss": 0.0437, "step": 92790 }, { "epoch": 4.33, - "learning_rate": 1.1393277389714501e-05, - "loss": 0.0627, + "learning_rate": 2.1406705880517372e-05, + "loss": 0.0103, "step": 92795 }, { "epoch": 4.33, - "learning_rate": 1.1392808588439361e-05, - "loss": 0.0869, + "learning_rate": 2.1406237810681356e-05, + "loss": 0.0353, "step": 92800 }, { "epoch": 4.33, - "learning_rate": 1.139233978716422e-05, - "loss": 0.0711, + "learning_rate": 2.1405769740845336e-05, + "loss": 0.0738, "step": 92805 }, { "epoch": 4.33, - "learning_rate": 1.1391870985889082e-05, - "loss": 0.1426, + "learning_rate": 2.1405301671009315e-05, + "loss": 0.015, "step": 92810 }, { "epoch": 4.33, - "learning_rate": 1.1391402184613942e-05, - "loss": 0.0801, + "learning_rate": 2.14048336011733e-05, + "loss": 0.1657, "step": 92815 }, { "epoch": 4.33, - "learning_rate": 1.1390933383338804e-05, - "loss": 0.1832, + "learning_rate": 2.1404365531337275e-05, + "loss": 0.1682, "step": 92820 }, { "epoch": 4.33, - "learning_rate": 1.1390464582063666e-05, - "loss": 0.1939, + "learning_rate": 2.1403897461501255e-05, + "loss": 0.2142, "step": 92825 }, { "epoch": 4.33, - "learning_rate": 1.1389995780788526e-05, - "loss": 0.081, + "learning_rate": 2.1403429391665235e-05, + "loss": 0.0851, "step": 92830 }, { "epoch": 4.33, - "learning_rate": 1.1389526979513386e-05, - "loss": 0.0177, + "learning_rate": 2.1402961321829218e-05, + "loss": 0.0034, "step": 92835 }, { "epoch": 4.33, - "learning_rate": 1.1389058178238245e-05, - "loss": 0.0086, + "learning_rate": 2.1402493251993198e-05, + "loss": 0.033, "step": 92840 }, { "epoch": 4.33, - "learning_rate": 1.1388589376963105e-05, - "loss": 0.0447, + "learning_rate": 2.1402025182157177e-05, + "loss": 0.052, "step": 92845 }, { "epoch": 4.33, - "learning_rate": 1.1388120575687967e-05, - "loss": 0.1108, + "learning_rate": 2.1401557112321157e-05, + "loss": 0.0673, "step": 92850 }, { "epoch": 4.33, - "learning_rate": 1.1387651774412827e-05, - "loss": 0.0754, + "learning_rate": 2.140108904248514e-05, + "loss": 0.0683, "step": 92855 }, { "epoch": 4.33, - "learning_rate": 1.1387182973137687e-05, - "loss": 0.1089, + "learning_rate": 2.140062097264912e-05, + "loss": 0.1242, "step": 92860 }, { "epoch": 4.33, - "learning_rate": 1.1386714171862547e-05, - "loss": 0.0973, + "learning_rate": 2.14001529028131e-05, + "loss": 0.1497, "step": 92865 }, { "epoch": 4.33, - "learning_rate": 1.138624537058741e-05, - "loss": 0.2828, + "learning_rate": 2.1399684832977083e-05, + "loss": 0.119, "step": 92870 }, { "epoch": 4.33, - "learning_rate": 1.138577656931227e-05, - "loss": 0.3095, + "learning_rate": 2.1399216763141063e-05, + "loss": 0.3771, "step": 92875 }, { "epoch": 4.33, - "learning_rate": 1.138530776803713e-05, - "loss": 0.092, + "learning_rate": 2.1398748693305043e-05, + "loss": 0.1316, "step": 92880 }, { "epoch": 4.33, - "learning_rate": 1.138483896676199e-05, - "loss": 0.0204, + "learning_rate": 2.139828062346902e-05, + "loss": 0.0331, "step": 92885 }, { "epoch": 4.33, - "learning_rate": 1.1384370165486852e-05, - "loss": 0.0619, + "learning_rate": 2.1397812553633003e-05, + "loss": 0.0276, "step": 92890 }, { "epoch": 4.33, - "learning_rate": 1.1383901364211711e-05, - "loss": 0.057, + "learning_rate": 2.1397344483796982e-05, + "loss": 0.0399, "step": 92895 }, { "epoch": 4.33, - "learning_rate": 1.1383432562936571e-05, - "loss": 0.121, + "learning_rate": 2.1396876413960962e-05, + "loss": 0.0438, "step": 92900 }, { "epoch": 4.34, - "learning_rate": 1.1382963761661431e-05, - "loss": 0.0572, + "learning_rate": 2.1396408344124942e-05, + "loss": 0.0301, "step": 92905 }, { "epoch": 4.34, - "learning_rate": 1.1382494960386293e-05, - "loss": 0.0798, + "learning_rate": 2.1395940274288925e-05, + "loss": 0.1457, "step": 92910 }, { "epoch": 4.34, - "learning_rate": 1.1382026159111155e-05, - "loss": 0.0695, + "learning_rate": 2.1395472204452905e-05, + "loss": 0.04, "step": 92915 }, { "epoch": 4.34, - "learning_rate": 1.1381557357836015e-05, - "loss": 0.1602, + "learning_rate": 2.1395004134616885e-05, + "loss": 0.197, "step": 92920 }, { "epoch": 4.34, - "learning_rate": 1.1381088556560874e-05, - "loss": 0.2295, + "learning_rate": 2.1394536064780868e-05, + "loss": 0.2136, "step": 92925 }, { "epoch": 4.34, - "learning_rate": 1.1380619755285736e-05, - "loss": 0.0473, + "learning_rate": 2.1394067994944848e-05, + "loss": 0.121, "step": 92930 }, { "epoch": 4.34, - "learning_rate": 1.1380150954010596e-05, - "loss": 0.1084, + "learning_rate": 2.1393599925108828e-05, + "loss": 0.042, "step": 92935 }, { "epoch": 4.34, - "learning_rate": 1.1379682152735456e-05, - "loss": 0.0252, + "learning_rate": 2.1393131855272808e-05, + "loss": 0.045, "step": 92940 }, { "epoch": 4.34, - "learning_rate": 1.1379213351460316e-05, - "loss": 0.0559, + "learning_rate": 2.1392663785436787e-05, + "loss": 0.029, "step": 92945 }, { "epoch": 4.34, - "learning_rate": 1.1378744550185178e-05, - "loss": 0.0836, + "learning_rate": 2.1392195715600767e-05, + "loss": 0.0473, "step": 92950 }, { "epoch": 4.34, - "learning_rate": 1.1378275748910037e-05, - "loss": 0.0414, + "learning_rate": 2.1391727645764747e-05, + "loss": 0.0264, "step": 92955 }, { "epoch": 4.34, - "learning_rate": 1.1377806947634899e-05, - "loss": 0.081, + "learning_rate": 2.1391259575928727e-05, + "loss": 0.0565, "step": 92960 }, { "epoch": 4.34, - "learning_rate": 1.1377338146359759e-05, - "loss": 0.0432, + "learning_rate": 2.139079150609271e-05, + "loss": 0.1025, "step": 92965 }, { "epoch": 4.34, - "learning_rate": 1.137686934508462e-05, - "loss": 0.1134, + "learning_rate": 2.139032343625669e-05, + "loss": 0.1965, "step": 92970 }, { "epoch": 4.34, - "learning_rate": 1.137640054380948e-05, - "loss": 0.1596, + "learning_rate": 2.138985536642067e-05, + "loss": 0.2978, "step": 92975 }, { "epoch": 4.34, - "learning_rate": 1.137593174253434e-05, - "loss": 0.0731, + "learning_rate": 2.138938729658465e-05, + "loss": 0.1531, "step": 92980 }, { "epoch": 4.34, - "learning_rate": 1.13754629412592e-05, - "loss": 0.0099, + "learning_rate": 2.1388919226748633e-05, + "loss": 0.0455, "step": 92985 }, { "epoch": 4.34, - "learning_rate": 1.1374994139984062e-05, - "loss": 0.0077, + "learning_rate": 2.1388451156912612e-05, + "loss": 0.0122, "step": 92990 }, { "epoch": 4.34, - "learning_rate": 1.1374525338708922e-05, - "loss": 0.031, + "learning_rate": 2.1387983087076592e-05, + "loss": 0.0447, "step": 92995 }, { "epoch": 4.34, - "learning_rate": 1.1374056537433782e-05, - "loss": 0.0629, + "learning_rate": 2.1387515017240576e-05, + "loss": 0.053, "step": 93000 }, { "epoch": 4.34, - "learning_rate": 1.1373587736158642e-05, - "loss": 0.0635, + "learning_rate": 2.1387046947404555e-05, + "loss": 0.0432, "step": 93005 }, { "epoch": 4.34, - "learning_rate": 1.1373118934883505e-05, - "loss": 0.0805, + "learning_rate": 2.1386578877568532e-05, + "loss": 0.1126, "step": 93010 }, { "epoch": 4.34, - "learning_rate": 1.1372650133608365e-05, - "loss": 0.2164, + "learning_rate": 2.138611080773251e-05, + "loss": 0.1052, "step": 93015 }, { "epoch": 4.34, - "learning_rate": 1.1372181332333225e-05, - "loss": 0.2023, + "learning_rate": 2.1385642737896495e-05, + "loss": 0.2292, "step": 93020 }, { "epoch": 4.34, - "learning_rate": 1.1371712531058085e-05, - "loss": 0.2579, + "learning_rate": 2.1385174668060475e-05, + "loss": 0.2942, "step": 93025 }, { "epoch": 4.34, - "learning_rate": 1.1371243729782947e-05, - "loss": 0.0653, + "learning_rate": 2.1384706598224454e-05, + "loss": 0.103, "step": 93030 }, { "epoch": 4.34, - "learning_rate": 1.1370774928507807e-05, - "loss": 0.0023, + "learning_rate": 2.1384238528388434e-05, + "loss": 0.0345, "step": 93035 }, { "epoch": 4.34, - "learning_rate": 1.1370306127232667e-05, - "loss": 0.0256, + "learning_rate": 2.1383770458552417e-05, + "loss": 0.0482, "step": 93040 }, { "epoch": 4.34, - "learning_rate": 1.1369837325957526e-05, - "loss": 0.0836, + "learning_rate": 2.1383302388716397e-05, + "loss": 0.1101, "step": 93045 }, { "epoch": 4.34, - "learning_rate": 1.1369368524682386e-05, - "loss": 0.0599, + "learning_rate": 2.1382834318880377e-05, + "loss": 0.0809, "step": 93050 }, { "epoch": 4.34, - "learning_rate": 1.136889972340725e-05, - "loss": 0.0774, + "learning_rate": 2.138236624904436e-05, + "loss": 0.0547, "step": 93055 }, { "epoch": 4.34, - "learning_rate": 1.136843092213211e-05, - "loss": 0.1233, + "learning_rate": 2.138189817920834e-05, + "loss": 0.1605, "step": 93060 }, { "epoch": 4.34, - "learning_rate": 1.136796212085697e-05, - "loss": 0.0889, + "learning_rate": 2.138143010937232e-05, + "loss": 0.1163, "step": 93065 }, { "epoch": 4.34, - "learning_rate": 1.1367493319581831e-05, - "loss": 0.0753, + "learning_rate": 2.13809620395363e-05, + "loss": 0.1737, "step": 93070 }, { "epoch": 4.34, - "learning_rate": 1.1367024518306691e-05, - "loss": 0.2387, + "learning_rate": 2.138049396970028e-05, + "loss": 0.258, "step": 93075 }, { "epoch": 4.34, - "learning_rate": 1.1366555717031551e-05, - "loss": 0.0767, + "learning_rate": 2.138002589986426e-05, + "loss": 0.1017, "step": 93080 }, { "epoch": 4.34, - "learning_rate": 1.1366086915756411e-05, - "loss": 0.0199, + "learning_rate": 2.137955783002824e-05, + "loss": 0.0202, "step": 93085 }, { "epoch": 4.34, - "learning_rate": 1.1365618114481271e-05, - "loss": 0.0746, + "learning_rate": 2.137908976019222e-05, + "loss": 0.0393, "step": 93090 }, { "epoch": 4.34, - "learning_rate": 1.1365149313206133e-05, - "loss": 0.0233, + "learning_rate": 2.1378621690356202e-05, + "loss": 0.0335, "step": 93095 }, { "epoch": 4.34, - "learning_rate": 1.1364680511930994e-05, - "loss": 0.0937, + "learning_rate": 2.1378153620520182e-05, + "loss": 0.0356, "step": 93100 }, { "epoch": 4.34, - "learning_rate": 1.1364211710655854e-05, - "loss": 0.2074, + "learning_rate": 2.1377685550684162e-05, + "loss": 0.0845, "step": 93105 }, { "epoch": 4.34, - "learning_rate": 1.1363742909380716e-05, - "loss": 0.0425, + "learning_rate": 2.1377217480848145e-05, + "loss": 0.1507, "step": 93110 }, { "epoch": 4.34, - "learning_rate": 1.1363274108105576e-05, - "loss": 0.0882, + "learning_rate": 2.1376749411012125e-05, + "loss": 0.1019, "step": 93115 }, { "epoch": 4.35, - "learning_rate": 1.1362805306830436e-05, - "loss": 0.0885, + "learning_rate": 2.1376281341176105e-05, + "loss": 0.2084, "step": 93120 }, { "epoch": 4.35, - "learning_rate": 1.1362336505555296e-05, - "loss": 0.2915, + "learning_rate": 2.1375813271340085e-05, + "loss": 0.151, "step": 93125 }, { "epoch": 4.35, - "learning_rate": 1.1361867704280155e-05, - "loss": 0.1091, + "learning_rate": 2.1375345201504068e-05, + "loss": 0.0917, "step": 93130 }, { "epoch": 4.35, - "learning_rate": 1.1361398903005017e-05, - "loss": 0.0368, + "learning_rate": 2.1374877131668044e-05, + "loss": 0.0146, "step": 93135 }, { "epoch": 4.35, - "learning_rate": 1.1360930101729877e-05, - "loss": 0.0263, + "learning_rate": 2.1374409061832024e-05, + "loss": 0.0441, "step": 93140 }, { "epoch": 4.35, - "learning_rate": 1.1360461300454739e-05, - "loss": 0.0319, + "learning_rate": 2.1373940991996004e-05, + "loss": 0.0181, "step": 93145 }, { "epoch": 4.35, - "learning_rate": 1.13599924991796e-05, - "loss": 0.0369, + "learning_rate": 2.1373472922159987e-05, + "loss": 0.0553, "step": 93150 }, { "epoch": 4.35, - "learning_rate": 1.135952369790446e-05, - "loss": 0.0442, + "learning_rate": 2.1373004852323967e-05, + "loss": 0.0235, "step": 93155 }, { "epoch": 4.35, - "learning_rate": 1.135905489662932e-05, - "loss": 0.0669, + "learning_rate": 2.1372536782487947e-05, + "loss": 0.0748, "step": 93160 }, { "epoch": 4.35, - "learning_rate": 1.135858609535418e-05, - "loss": 0.0986, + "learning_rate": 2.137206871265193e-05, + "loss": 0.2056, "step": 93165 }, { "epoch": 4.35, - "learning_rate": 1.135811729407904e-05, - "loss": 0.0718, + "learning_rate": 2.137160064281591e-05, + "loss": 0.1418, "step": 93170 }, { "epoch": 4.35, - "learning_rate": 1.1357648492803902e-05, - "loss": 0.2575, + "learning_rate": 2.137113257297989e-05, + "loss": 0.1906, "step": 93175 }, { "epoch": 4.35, - "learning_rate": 1.1357179691528762e-05, - "loss": 0.0897, + "learning_rate": 2.137066450314387e-05, + "loss": 0.0758, "step": 93180 }, { "epoch": 4.35, - "learning_rate": 1.1356710890253622e-05, - "loss": 0.0338, + "learning_rate": 2.1370196433307852e-05, + "loss": 0.0292, "step": 93185 }, { "epoch": 4.35, - "learning_rate": 1.1356242088978481e-05, - "loss": 0.0242, + "learning_rate": 2.1369728363471832e-05, + "loss": 0.02, "step": 93190 }, { "epoch": 4.35, - "learning_rate": 1.1355773287703345e-05, - "loss": 0.0657, + "learning_rate": 2.1369260293635812e-05, + "loss": 0.0543, "step": 93195 }, { "epoch": 4.35, - "learning_rate": 1.1355304486428205e-05, - "loss": 0.0481, + "learning_rate": 2.136879222379979e-05, + "loss": 0.055, "step": 93200 }, { "epoch": 4.35, - "learning_rate": 1.1354835685153065e-05, - "loss": 0.0743, + "learning_rate": 2.1368324153963772e-05, + "loss": 0.1028, "step": 93205 }, { "epoch": 4.35, - "learning_rate": 1.1354366883877925e-05, - "loss": 0.0767, + "learning_rate": 2.136785608412775e-05, + "loss": 0.111, "step": 93210 }, { "epoch": 4.35, - "learning_rate": 1.1353898082602786e-05, - "loss": 0.1051, + "learning_rate": 2.136738801429173e-05, + "loss": 0.0826, "step": 93215 }, { "epoch": 4.35, - "learning_rate": 1.1353429281327646e-05, - "loss": 0.1902, + "learning_rate": 2.136691994445571e-05, + "loss": 0.0883, "step": 93220 }, { "epoch": 4.35, - "learning_rate": 1.1352960480052506e-05, - "loss": 0.2896, + "learning_rate": 2.1366451874619694e-05, + "loss": 0.2776, "step": 93225 }, { "epoch": 4.35, - "learning_rate": 1.1352491678777366e-05, - "loss": 0.0998, + "learning_rate": 2.1365983804783674e-05, + "loss": 0.1039, "step": 93230 }, { "epoch": 4.35, - "learning_rate": 1.1352022877502226e-05, - "loss": 0.064, + "learning_rate": 2.1365515734947654e-05, + "loss": 0.0334, "step": 93235 }, { "epoch": 4.35, - "learning_rate": 1.135155407622709e-05, - "loss": 0.0245, + "learning_rate": 2.1365047665111637e-05, + "loss": 0.0278, "step": 93240 }, { "epoch": 4.35, - "learning_rate": 1.135108527495195e-05, - "loss": 0.0712, + "learning_rate": 2.1364579595275617e-05, + "loss": 0.0467, "step": 93245 }, { "epoch": 4.35, - "learning_rate": 1.1350616473676809e-05, - "loss": 0.0593, + "learning_rate": 2.1364111525439597e-05, + "loss": 0.0094, "step": 93250 }, { "epoch": 4.35, - "learning_rate": 1.135014767240167e-05, - "loss": 0.0652, + "learning_rate": 2.1363643455603577e-05, + "loss": 0.0705, "step": 93255 }, { "epoch": 4.35, - "learning_rate": 1.134967887112653e-05, - "loss": 0.1075, + "learning_rate": 2.136317538576756e-05, + "loss": 0.0254, "step": 93260 }, { "epoch": 4.35, - "learning_rate": 1.134921006985139e-05, - "loss": 0.1495, + "learning_rate": 2.1362707315931536e-05, + "loss": 0.1317, "step": 93265 }, { "epoch": 4.35, - "learning_rate": 1.134874126857625e-05, - "loss": 0.1475, + "learning_rate": 2.1362239246095516e-05, + "loss": 0.1069, "step": 93270 }, { "epoch": 4.35, - "learning_rate": 1.134827246730111e-05, - "loss": 0.3304, + "learning_rate": 2.1361771176259496e-05, + "loss": 0.2367, "step": 93275 }, { "epoch": 4.35, - "learning_rate": 1.1347803666025972e-05, - "loss": 0.0534, + "learning_rate": 2.136130310642348e-05, + "loss": 0.0789, "step": 93280 }, { "epoch": 4.35, - "learning_rate": 1.1347334864750834e-05, - "loss": 0.0177, + "learning_rate": 2.136083503658746e-05, + "loss": 0.0231, "step": 93285 }, { "epoch": 4.35, - "learning_rate": 1.1346866063475694e-05, - "loss": 0.0147, + "learning_rate": 2.136036696675144e-05, + "loss": 0.0046, "step": 93290 }, { "epoch": 4.35, - "learning_rate": 1.1346397262200555e-05, - "loss": 0.0469, + "learning_rate": 2.1359898896915422e-05, + "loss": 0.0582, "step": 93295 }, { "epoch": 4.35, - "learning_rate": 1.1345928460925415e-05, - "loss": 0.0652, + "learning_rate": 2.1359430827079402e-05, + "loss": 0.0284, "step": 93300 }, { "epoch": 4.35, - "learning_rate": 1.1345459659650275e-05, - "loss": 0.0227, + "learning_rate": 2.135896275724338e-05, + "loss": 0.0605, "step": 93305 }, { "epoch": 4.35, - "learning_rate": 1.1344990858375135e-05, - "loss": 0.0823, + "learning_rate": 2.135849468740736e-05, + "loss": 0.12, "step": 93310 }, { "epoch": 4.35, - "learning_rate": 1.1344522057099995e-05, - "loss": 0.0464, + "learning_rate": 2.1358026617571345e-05, + "loss": 0.0557, "step": 93315 }, { "epoch": 4.35, - "learning_rate": 1.1344053255824857e-05, - "loss": 0.1616, + "learning_rate": 2.1357558547735325e-05, + "loss": 0.1432, "step": 93320 }, { "epoch": 4.35, - "learning_rate": 1.1343584454549717e-05, - "loss": 0.2665, + "learning_rate": 2.13570904778993e-05, + "loss": 0.243, "step": 93325 }, { "epoch": 4.35, - "learning_rate": 1.1343115653274577e-05, - "loss": 0.174, + "learning_rate": 2.135662240806328e-05, + "loss": 0.0628, "step": 93330 }, { "epoch": 4.36, - "learning_rate": 1.134264685199944e-05, - "loss": 0.0116, + "learning_rate": 2.1356154338227264e-05, + "loss": 0.0325, "step": 93335 }, { "epoch": 4.36, - "learning_rate": 1.13421780507243e-05, - "loss": 0.0323, + "learning_rate": 2.1355686268391244e-05, + "loss": 0.0621, "step": 93340 }, { "epoch": 4.36, - "learning_rate": 1.134170924944916e-05, - "loss": 0.0394, + "learning_rate": 2.1355218198555224e-05, + "loss": 0.0644, "step": 93345 }, { "epoch": 4.36, - "learning_rate": 1.134124044817402e-05, - "loss": 0.029, + "learning_rate": 2.1354750128719207e-05, + "loss": 0.0682, "step": 93350 }, { "epoch": 4.36, - "learning_rate": 1.1340771646898881e-05, - "loss": 0.0564, + "learning_rate": 2.1354282058883187e-05, + "loss": 0.1353, "step": 93355 }, { "epoch": 4.36, - "learning_rate": 1.1340302845623741e-05, - "loss": 0.1097, + "learning_rate": 2.1353813989047166e-05, + "loss": 0.1662, "step": 93360 }, { "epoch": 4.36, - "learning_rate": 1.1339834044348601e-05, - "loss": 0.1954, + "learning_rate": 2.1353345919211146e-05, + "loss": 0.0795, "step": 93365 }, { "epoch": 4.36, - "learning_rate": 1.1339365243073461e-05, - "loss": 0.1325, + "learning_rate": 2.135287784937513e-05, + "loss": 0.2169, "step": 93370 }, { "epoch": 4.36, - "learning_rate": 1.1338896441798321e-05, - "loss": 0.2073, + "learning_rate": 2.135240977953911e-05, + "loss": 0.2537, "step": 93375 }, { "epoch": 4.36, - "learning_rate": 1.1338427640523184e-05, - "loss": 0.0924, + "learning_rate": 2.135194170970309e-05, + "loss": 0.0804, "step": 93380 }, { "epoch": 4.36, - "learning_rate": 1.1337958839248044e-05, - "loss": 0.0208, + "learning_rate": 2.135147363986707e-05, + "loss": 0.0479, "step": 93385 }, { "epoch": 4.36, - "learning_rate": 1.1337490037972904e-05, - "loss": 0.0388, + "learning_rate": 2.135100557003105e-05, + "loss": 0.023, "step": 93390 }, { "epoch": 4.36, - "learning_rate": 1.1337021236697766e-05, - "loss": 0.0524, + "learning_rate": 2.135053750019503e-05, + "loss": 0.0171, "step": 93395 }, { "epoch": 4.36, - "learning_rate": 1.1336552435422626e-05, - "loss": 0.0209, + "learning_rate": 2.135006943035901e-05, + "loss": 0.0308, "step": 93400 }, { "epoch": 4.36, - "learning_rate": 1.1336083634147486e-05, - "loss": 0.0644, + "learning_rate": 2.1349601360522988e-05, + "loss": 0.0554, "step": 93405 }, { "epoch": 4.36, - "learning_rate": 1.1335614832872346e-05, - "loss": 0.1235, + "learning_rate": 2.134913329068697e-05, + "loss": 0.1531, "step": 93410 }, { "epoch": 4.36, - "learning_rate": 1.1335146031597206e-05, - "loss": 0.101, + "learning_rate": 2.134866522085095e-05, + "loss": 0.0881, "step": 93415 }, { "epoch": 4.36, - "learning_rate": 1.1334677230322067e-05, - "loss": 0.1767, + "learning_rate": 2.134819715101493e-05, + "loss": 0.1753, "step": 93420 }, { "epoch": 4.36, - "learning_rate": 1.1334208429046929e-05, - "loss": 0.314, + "learning_rate": 2.1347729081178914e-05, + "loss": 0.1874, "step": 93425 }, { "epoch": 4.36, - "learning_rate": 1.1333739627771789e-05, - "loss": 0.1279, + "learning_rate": 2.1347261011342894e-05, + "loss": 0.0824, "step": 93430 }, { "epoch": 4.36, - "learning_rate": 1.133327082649665e-05, - "loss": 0.0459, + "learning_rate": 2.1346792941506874e-05, + "loss": 0.0136, "step": 93435 }, { "epoch": 4.36, - "learning_rate": 1.133280202522151e-05, - "loss": 0.0312, + "learning_rate": 2.1346324871670854e-05, + "loss": 0.0558, "step": 93440 }, { "epoch": 4.36, - "learning_rate": 1.133233322394637e-05, - "loss": 0.071, + "learning_rate": 2.1345856801834837e-05, + "loss": 0.0513, "step": 93445 }, { "epoch": 4.36, - "learning_rate": 1.133186442267123e-05, - "loss": 0.0254, + "learning_rate": 2.1345388731998817e-05, + "loss": 0.0497, "step": 93450 }, { "epoch": 4.36, - "learning_rate": 1.133139562139609e-05, - "loss": 0.051, + "learning_rate": 2.1344920662162793e-05, + "loss": 0.115, "step": 93455 }, { "epoch": 4.36, - "learning_rate": 1.1330926820120952e-05, - "loss": 0.0682, + "learning_rate": 2.1344452592326773e-05, + "loss": 0.1568, "step": 93460 }, { "epoch": 4.36, - "learning_rate": 1.1330458018845812e-05, - "loss": 0.0462, + "learning_rate": 2.1343984522490756e-05, + "loss": 0.0793, "step": 93465 }, { "epoch": 4.36, - "learning_rate": 1.1329989217570673e-05, - "loss": 0.1674, + "learning_rate": 2.1343516452654736e-05, + "loss": 0.1928, "step": 93470 }, { "epoch": 4.36, - "learning_rate": 1.1329520416295535e-05, - "loss": 0.2021, + "learning_rate": 2.1343048382818716e-05, + "loss": 0.2192, "step": 93475 }, { "epoch": 4.36, - "learning_rate": 1.1329051615020395e-05, - "loss": 0.0761, + "learning_rate": 2.13425803129827e-05, + "loss": 0.0681, "step": 93480 }, { "epoch": 4.36, - "learning_rate": 1.1328582813745255e-05, - "loss": 0.026, + "learning_rate": 2.134211224314668e-05, + "loss": 0.0148, "step": 93485 }, { "epoch": 4.36, - "learning_rate": 1.1328114012470115e-05, - "loss": 0.0264, + "learning_rate": 2.134164417331066e-05, + "loss": 0.026, "step": 93490 }, { "epoch": 4.36, - "learning_rate": 1.1327645211194975e-05, - "loss": 0.0651, + "learning_rate": 2.134117610347464e-05, + "loss": 0.0443, "step": 93495 }, { "epoch": 4.36, - "learning_rate": 1.1327176409919836e-05, - "loss": 0.0882, + "learning_rate": 2.134070803363862e-05, + "loss": 0.0323, "step": 93500 }, { "epoch": 4.36, - "learning_rate": 1.1326707608644696e-05, - "loss": 0.0469, + "learning_rate": 2.13402399638026e-05, + "loss": 0.1371, "step": 93505 }, { "epoch": 4.36, - "learning_rate": 1.1326238807369556e-05, - "loss": 0.0392, + "learning_rate": 2.133977189396658e-05, + "loss": 0.0752, "step": 93510 }, { "epoch": 4.36, - "learning_rate": 1.1325770006094416e-05, - "loss": 0.0775, + "learning_rate": 2.1339303824130558e-05, + "loss": 0.1134, "step": 93515 }, { "epoch": 4.36, - "learning_rate": 1.132530120481928e-05, - "loss": 0.1573, + "learning_rate": 2.133883575429454e-05, + "loss": 0.1922, "step": 93520 }, { "epoch": 4.36, - "learning_rate": 1.132483240354414e-05, - "loss": 0.2538, + "learning_rate": 2.133836768445852e-05, + "loss": 0.2471, "step": 93525 }, { "epoch": 4.36, - "learning_rate": 1.1324363602269e-05, - "loss": 0.1064, + "learning_rate": 2.13378996146225e-05, + "loss": 0.0772, "step": 93530 }, { "epoch": 4.36, - "learning_rate": 1.132389480099386e-05, - "loss": 0.0241, + "learning_rate": 2.1337431544786484e-05, + "loss": 0.0527, "step": 93535 }, { "epoch": 4.36, - "learning_rate": 1.1323425999718721e-05, - "loss": 0.0369, + "learning_rate": 2.1336963474950464e-05, + "loss": 0.0206, "step": 93540 }, { "epoch": 4.36, - "learning_rate": 1.132295719844358e-05, - "loss": 0.0454, + "learning_rate": 2.1336495405114443e-05, + "loss": 0.0118, "step": 93545 }, { "epoch": 4.37, - "learning_rate": 1.132248839716844e-05, - "loss": 0.0286, + "learning_rate": 2.1336027335278423e-05, + "loss": 0.0599, "step": 93550 }, { "epoch": 4.37, - "learning_rate": 1.13220195958933e-05, - "loss": 0.0409, + "learning_rate": 2.1335559265442406e-05, + "loss": 0.0406, "step": 93555 }, { "epoch": 4.37, - "learning_rate": 1.132155079461816e-05, - "loss": 0.147, + "learning_rate": 2.1335091195606386e-05, + "loss": 0.2206, "step": 93560 }, { "epoch": 4.37, - "learning_rate": 1.1321081993343024e-05, - "loss": 0.1259, + "learning_rate": 2.1334623125770366e-05, + "loss": 0.0447, "step": 93565 }, { "epoch": 4.37, - "learning_rate": 1.1320613192067884e-05, - "loss": 0.1534, + "learning_rate": 2.1334155055934346e-05, + "loss": 0.09, "step": 93570 }, { "epoch": 4.37, - "learning_rate": 1.1320144390792744e-05, - "loss": 0.2241, + "learning_rate": 2.133368698609833e-05, + "loss": 0.2716, "step": 93575 }, { "epoch": 4.37, - "learning_rate": 1.1319675589517605e-05, - "loss": 0.0898, + "learning_rate": 2.1333218916262306e-05, + "loss": 0.0594, "step": 93580 }, { "epoch": 4.37, - "learning_rate": 1.1319206788242465e-05, - "loss": 0.038, + "learning_rate": 2.1332750846426285e-05, + "loss": 0.0088, "step": 93585 }, { "epoch": 4.37, - "learning_rate": 1.1318737986967325e-05, - "loss": 0.0325, + "learning_rate": 2.1332282776590265e-05, + "loss": 0.039, "step": 93590 }, { "epoch": 4.37, - "learning_rate": 1.1318269185692185e-05, - "loss": 0.0524, + "learning_rate": 2.133181470675425e-05, + "loss": 0.0587, "step": 93595 }, { "epoch": 4.37, - "learning_rate": 1.1317800384417045e-05, - "loss": 0.0863, + "learning_rate": 2.1331346636918228e-05, + "loss": 0.0448, "step": 93600 }, { "epoch": 4.37, - "learning_rate": 1.1317331583141907e-05, - "loss": 0.0932, + "learning_rate": 2.1330878567082208e-05, + "loss": 0.1639, "step": 93605 }, { "epoch": 4.37, - "learning_rate": 1.1316862781866768e-05, - "loss": 0.065, + "learning_rate": 2.133041049724619e-05, + "loss": 0.1584, "step": 93610 }, { "epoch": 4.37, - "learning_rate": 1.1316393980591628e-05, - "loss": 0.1651, + "learning_rate": 2.132994242741017e-05, + "loss": 0.1308, "step": 93615 }, { "epoch": 4.37, - "learning_rate": 1.131592517931649e-05, - "loss": 0.1685, + "learning_rate": 2.132947435757415e-05, + "loss": 0.2, "step": 93620 }, { "epoch": 4.37, - "learning_rate": 1.131545637804135e-05, - "loss": 0.3206, + "learning_rate": 2.132900628773813e-05, + "loss": 0.172, "step": 93625 }, { "epoch": 4.37, - "learning_rate": 1.131498757676621e-05, - "loss": 0.1088, + "learning_rate": 2.1328538217902114e-05, + "loss": 0.0635, "step": 93630 }, { "epoch": 4.37, - "learning_rate": 1.131451877549107e-05, - "loss": 0.0235, + "learning_rate": 2.1328070148066094e-05, + "loss": 0.0383, "step": 93635 }, { "epoch": 4.37, - "learning_rate": 1.131404997421593e-05, - "loss": 0.055, + "learning_rate": 2.1327602078230073e-05, + "loss": 0.0336, "step": 93640 }, { "epoch": 4.37, - "learning_rate": 1.1313581172940791e-05, - "loss": 0.023, + "learning_rate": 2.132713400839405e-05, + "loss": 0.0295, "step": 93645 }, { "epoch": 4.37, - "learning_rate": 1.1313112371665651e-05, - "loss": 0.0641, + "learning_rate": 2.1326665938558033e-05, + "loss": 0.0761, "step": 93650 }, { "epoch": 4.37, - "learning_rate": 1.1312643570390511e-05, - "loss": 0.0778, + "learning_rate": 2.1326197868722013e-05, + "loss": 0.0609, "step": 93655 }, { "epoch": 4.37, - "learning_rate": 1.1312174769115375e-05, - "loss": 0.0932, + "learning_rate": 2.1325729798885993e-05, + "loss": 0.1297, "step": 93660 }, { "epoch": 4.37, - "learning_rate": 1.1311705967840234e-05, - "loss": 0.0475, + "learning_rate": 2.1325261729049976e-05, + "loss": 0.1761, "step": 93665 }, { "epoch": 4.37, - "learning_rate": 1.1311237166565094e-05, - "loss": 0.2037, + "learning_rate": 2.1324793659213956e-05, + "loss": 0.1425, "step": 93670 }, { "epoch": 4.37, - "learning_rate": 1.1310768365289954e-05, - "loss": 0.2547, + "learning_rate": 2.1324325589377936e-05, + "loss": 0.2784, "step": 93675 }, { "epoch": 4.37, - "learning_rate": 1.1310299564014814e-05, - "loss": 0.1241, + "learning_rate": 2.1323857519541915e-05, + "loss": 0.0634, "step": 93680 }, { "epoch": 4.37, - "learning_rate": 1.1309830762739676e-05, - "loss": 0.014, + "learning_rate": 2.13233894497059e-05, + "loss": 0.0232, "step": 93685 }, { "epoch": 4.37, - "learning_rate": 1.1309361961464536e-05, - "loss": 0.0262, + "learning_rate": 2.132292137986988e-05, + "loss": 0.0581, "step": 93690 }, { "epoch": 4.37, - "learning_rate": 1.1308893160189396e-05, - "loss": 0.0372, + "learning_rate": 2.1322453310033858e-05, + "loss": 0.0401, "step": 93695 }, { "epoch": 4.37, - "learning_rate": 1.1308424358914256e-05, - "loss": 0.0808, + "learning_rate": 2.1321985240197838e-05, + "loss": 0.0769, "step": 93700 }, { "epoch": 4.37, - "learning_rate": 1.1307955557639119e-05, - "loss": 0.0937, + "learning_rate": 2.1321517170361818e-05, + "loss": 0.0676, "step": 93705 }, { "epoch": 4.37, - "learning_rate": 1.1307486756363979e-05, - "loss": 0.1413, + "learning_rate": 2.1321049100525798e-05, + "loss": 0.0787, "step": 93710 }, { "epoch": 4.37, - "learning_rate": 1.1307017955088839e-05, - "loss": 0.1658, + "learning_rate": 2.1320581030689778e-05, + "loss": 0.0774, "step": 93715 }, { "epoch": 4.37, - "learning_rate": 1.1306549153813699e-05, - "loss": 0.1432, + "learning_rate": 2.132011296085376e-05, + "loss": 0.1567, "step": 93720 }, { "epoch": 4.37, - "learning_rate": 1.130608035253856e-05, - "loss": 0.2572, + "learning_rate": 2.131964489101774e-05, + "loss": 0.3283, "step": 93725 }, { "epoch": 4.37, - "learning_rate": 1.130561155126342e-05, - "loss": 0.0823, + "learning_rate": 2.131917682118172e-05, + "loss": 0.0623, "step": 93730 }, { "epoch": 4.37, - "learning_rate": 1.130514274998828e-05, - "loss": 0.0146, + "learning_rate": 2.13187087513457e-05, + "loss": 0.0509, "step": 93735 }, { "epoch": 4.37, - "learning_rate": 1.130467394871314e-05, - "loss": 0.0098, + "learning_rate": 2.1318240681509683e-05, + "loss": 0.0076, "step": 93740 }, { "epoch": 4.37, - "learning_rate": 1.1304205147438002e-05, - "loss": 0.0271, + "learning_rate": 2.1317772611673663e-05, + "loss": 0.0478, "step": 93745 }, { "epoch": 4.37, - "learning_rate": 1.1303736346162863e-05, - "loss": 0.0164, + "learning_rate": 2.1317304541837643e-05, + "loss": 0.0565, "step": 93750 }, { "epoch": 4.37, - "learning_rate": 1.1303267544887723e-05, - "loss": 0.0325, + "learning_rate": 2.1316836472001623e-05, + "loss": 0.0474, "step": 93755 }, { "epoch": 4.37, - "learning_rate": 1.1302798743612583e-05, - "loss": 0.1518, + "learning_rate": 2.1316368402165606e-05, + "loss": 0.0605, "step": 93760 }, { "epoch": 4.38, - "learning_rate": 1.1302329942337445e-05, - "loss": 0.0947, + "learning_rate": 2.1315900332329586e-05, + "loss": 0.0642, "step": 93765 }, { "epoch": 4.38, - "learning_rate": 1.1301861141062305e-05, - "loss": 0.1963, + "learning_rate": 2.1315432262493562e-05, + "loss": 0.1465, "step": 93770 }, { "epoch": 4.38, - "learning_rate": 1.1301392339787165e-05, - "loss": 0.3213, + "learning_rate": 2.1314964192657546e-05, + "loss": 0.2117, "step": 93775 }, { "epoch": 4.38, - "learning_rate": 1.1300923538512025e-05, - "loss": 0.1066, + "learning_rate": 2.1314496122821525e-05, + "loss": 0.0776, "step": 93780 }, { "epoch": 4.38, - "learning_rate": 1.1300454737236886e-05, - "loss": 0.0301, + "learning_rate": 2.1314028052985505e-05, + "loss": 0.0802, "step": 93785 }, { "epoch": 4.38, - "learning_rate": 1.1299985935961746e-05, - "loss": 0.0105, + "learning_rate": 2.1313559983149485e-05, + "loss": 0.0585, "step": 93790 }, { "epoch": 4.38, - "learning_rate": 1.1299517134686608e-05, - "loss": 0.0451, + "learning_rate": 2.1313091913313468e-05, + "loss": 0.0401, "step": 93795 }, { "epoch": 4.38, - "learning_rate": 1.1299048333411468e-05, - "loss": 0.0651, + "learning_rate": 2.1312623843477448e-05, + "loss": 0.0574, "step": 93800 }, { "epoch": 4.38, - "learning_rate": 1.129857953213633e-05, - "loss": 0.0617, + "learning_rate": 2.1312155773641428e-05, + "loss": 0.0269, "step": 93805 }, { "epoch": 4.38, - "learning_rate": 1.129811073086119e-05, - "loss": 0.0659, + "learning_rate": 2.1311687703805408e-05, + "loss": 0.0507, "step": 93810 }, { "epoch": 4.38, - "learning_rate": 1.129764192958605e-05, - "loss": 0.121, + "learning_rate": 2.131121963396939e-05, + "loss": 0.1283, "step": 93815 }, { "epoch": 4.38, - "learning_rate": 1.129717312831091e-05, - "loss": 0.1004, + "learning_rate": 2.131075156413337e-05, + "loss": 0.2316, "step": 93820 }, { "epoch": 4.38, - "learning_rate": 1.1296704327035771e-05, - "loss": 0.3267, + "learning_rate": 2.131028349429735e-05, + "loss": 0.224, "step": 93825 }, { "epoch": 4.38, - "learning_rate": 1.1296235525760631e-05, - "loss": 0.1201, + "learning_rate": 2.1309815424461327e-05, + "loss": 0.072, "step": 93830 }, { "epoch": 4.38, - "learning_rate": 1.129576672448549e-05, - "loss": 0.0243, + "learning_rate": 2.130934735462531e-05, + "loss": 0.0039, "step": 93835 }, { "epoch": 4.38, - "learning_rate": 1.129529792321035e-05, - "loss": 0.0153, + "learning_rate": 2.130887928478929e-05, + "loss": 0.0413, "step": 93840 }, { "epoch": 4.38, - "learning_rate": 1.1294829121935214e-05, - "loss": 0.0669, + "learning_rate": 2.130841121495327e-05, + "loss": 0.0194, "step": 93845 }, { "epoch": 4.38, - "learning_rate": 1.1294360320660074e-05, - "loss": 0.0889, + "learning_rate": 2.1307943145117253e-05, + "loss": 0.0738, "step": 93850 }, { "epoch": 4.38, - "learning_rate": 1.1293891519384934e-05, - "loss": 0.047, + "learning_rate": 2.1307475075281233e-05, + "loss": 0.0921, "step": 93855 }, { "epoch": 4.38, - "learning_rate": 1.1293422718109794e-05, - "loss": 0.0768, + "learning_rate": 2.1307007005445213e-05, + "loss": 0.0632, "step": 93860 }, { "epoch": 4.38, - "learning_rate": 1.1292953916834656e-05, - "loss": 0.1047, + "learning_rate": 2.1306538935609192e-05, + "loss": 0.131, "step": 93865 }, { "epoch": 4.38, - "learning_rate": 1.1292485115559515e-05, - "loss": 0.1511, + "learning_rate": 2.1306070865773176e-05, + "loss": 0.1789, "step": 93870 }, { "epoch": 4.38, - "learning_rate": 1.1292016314284375e-05, - "loss": 0.1909, + "learning_rate": 2.1305602795937155e-05, + "loss": 0.1455, "step": 93875 }, { "epoch": 4.38, - "learning_rate": 1.1291547513009235e-05, - "loss": 0.0859, + "learning_rate": 2.1305134726101135e-05, + "loss": 0.095, "step": 93880 }, { "epoch": 4.38, - "learning_rate": 1.1291078711734095e-05, - "loss": 0.0099, + "learning_rate": 2.1304666656265115e-05, + "loss": 0.0528, "step": 93885 }, { "epoch": 4.38, - "learning_rate": 1.1290609910458959e-05, - "loss": 0.0474, + "learning_rate": 2.1304198586429098e-05, + "loss": 0.0175, "step": 93890 }, { "epoch": 4.38, - "learning_rate": 1.1290141109183818e-05, - "loss": 0.0195, + "learning_rate": 2.1303730516593075e-05, + "loss": 0.0262, "step": 93895 }, { "epoch": 4.38, - "learning_rate": 1.1289672307908678e-05, - "loss": 0.0113, + "learning_rate": 2.1303262446757055e-05, + "loss": 0.0145, "step": 93900 }, { "epoch": 4.38, - "learning_rate": 1.128920350663354e-05, - "loss": 0.0707, + "learning_rate": 2.1302794376921038e-05, + "loss": 0.066, "step": 93905 }, { "epoch": 4.38, - "learning_rate": 1.12887347053584e-05, - "loss": 0.0706, + "learning_rate": 2.1302326307085018e-05, + "loss": 0.0155, "step": 93910 }, { "epoch": 4.38, - "learning_rate": 1.128826590408326e-05, - "loss": 0.0743, + "learning_rate": 2.1301858237248997e-05, + "loss": 0.1581, "step": 93915 }, { "epoch": 4.38, - "learning_rate": 1.128779710280812e-05, - "loss": 0.0829, + "learning_rate": 2.1301390167412977e-05, + "loss": 0.1781, "step": 93920 }, { "epoch": 4.38, - "learning_rate": 1.128732830153298e-05, - "loss": 0.1584, + "learning_rate": 2.130092209757696e-05, + "loss": 0.1762, "step": 93925 }, { "epoch": 4.38, - "learning_rate": 1.1286859500257841e-05, - "loss": 0.1059, + "learning_rate": 2.130045402774094e-05, + "loss": 0.1417, "step": 93930 }, { "epoch": 4.38, - "learning_rate": 1.1286390698982703e-05, - "loss": 0.029, + "learning_rate": 2.129998595790492e-05, + "loss": 0.0121, "step": 93935 }, { "epoch": 4.38, - "learning_rate": 1.1285921897707563e-05, - "loss": 0.0266, + "learning_rate": 2.12995178880689e-05, + "loss": 0.0594, "step": 93940 }, { "epoch": 4.38, - "learning_rate": 1.1285453096432425e-05, - "loss": 0.0672, + "learning_rate": 2.1299049818232883e-05, + "loss": 0.0541, "step": 93945 }, { "epoch": 4.38, - "learning_rate": 1.1284984295157285e-05, - "loss": 0.0235, + "learning_rate": 2.1298581748396863e-05, + "loss": 0.0433, "step": 93950 }, { "epoch": 4.38, - "learning_rate": 1.1284515493882144e-05, - "loss": 0.0488, + "learning_rate": 2.1298113678560843e-05, + "loss": 0.064, "step": 93955 }, { "epoch": 4.38, - "learning_rate": 1.1284046692607004e-05, - "loss": 0.1255, + "learning_rate": 2.1297645608724822e-05, + "loss": 0.0684, "step": 93960 }, { "epoch": 4.38, - "learning_rate": 1.1283577891331864e-05, - "loss": 0.0715, + "learning_rate": 2.1297177538888802e-05, + "loss": 0.0935, "step": 93965 }, { "epoch": 4.38, - "learning_rate": 1.1283109090056726e-05, - "loss": 0.1523, + "learning_rate": 2.1296709469052782e-05, + "loss": 0.0551, "step": 93970 }, { "epoch": 4.39, - "learning_rate": 1.1282640288781586e-05, - "loss": 0.3252, + "learning_rate": 2.1296241399216762e-05, + "loss": 0.2005, "step": 93975 }, { "epoch": 4.39, - "learning_rate": 1.1282171487506446e-05, - "loss": 0.1029, + "learning_rate": 2.1295773329380745e-05, + "loss": 0.041, "step": 93980 }, { "epoch": 4.39, - "learning_rate": 1.128170268623131e-05, - "loss": 0.1262, + "learning_rate": 2.1295305259544725e-05, + "loss": 0.0213, "step": 93985 }, { "epoch": 4.39, - "learning_rate": 1.1281233884956169e-05, - "loss": 0.0488, + "learning_rate": 2.1294837189708705e-05, + "loss": 0.0258, "step": 93990 }, { "epoch": 4.39, - "learning_rate": 1.1280765083681029e-05, - "loss": 0.0067, + "learning_rate": 2.1294369119872685e-05, + "loss": 0.0958, "step": 93995 }, { "epoch": 4.39, - "learning_rate": 1.1280296282405889e-05, - "loss": 0.023, + "learning_rate": 2.1293901050036668e-05, + "loss": 0.0894, "step": 94000 }, { "epoch": 4.39, - "learning_rate": 1.1279827481130749e-05, - "loss": 0.0867, + "learning_rate": 2.1293432980200648e-05, + "loss": 0.0773, "step": 94005 }, { "epoch": 4.39, - "learning_rate": 1.127935867985561e-05, - "loss": 0.0932, + "learning_rate": 2.1292964910364627e-05, + "loss": 0.0814, "step": 94010 }, { "epoch": 4.39, - "learning_rate": 1.127888987858047e-05, - "loss": 0.0649, + "learning_rate": 2.1292496840528607e-05, + "loss": 0.1146, "step": 94015 }, { "epoch": 4.39, - "learning_rate": 1.127842107730533e-05, - "loss": 0.1785, + "learning_rate": 2.1292028770692587e-05, + "loss": 0.1824, "step": 94020 }, { "epoch": 4.39, - "learning_rate": 1.127795227603019e-05, - "loss": 0.2187, + "learning_rate": 2.1291560700856567e-05, + "loss": 0.2757, "step": 94025 }, { "epoch": 4.39, - "learning_rate": 1.1277483474755054e-05, - "loss": 0.0831, + "learning_rate": 2.1291092631020547e-05, + "loss": 0.0782, "step": 94030 }, { "epoch": 4.39, - "learning_rate": 1.1277014673479914e-05, - "loss": 0.0091, + "learning_rate": 2.129062456118453e-05, + "loss": 0.0263, "step": 94035 }, { "epoch": 4.39, - "learning_rate": 1.1276545872204774e-05, - "loss": 0.0288, + "learning_rate": 2.129015649134851e-05, + "loss": 0.0392, "step": 94040 }, { "epoch": 4.39, - "learning_rate": 1.1276077070929633e-05, - "loss": 0.0356, + "learning_rate": 2.128968842151249e-05, + "loss": 0.0691, "step": 94045 }, { "epoch": 4.39, - "learning_rate": 1.1275608269654495e-05, - "loss": 0.0559, + "learning_rate": 2.128922035167647e-05, + "loss": 0.0788, "step": 94050 }, { "epoch": 4.39, - "learning_rate": 1.1275139468379355e-05, - "loss": 0.0913, + "learning_rate": 2.1288752281840453e-05, + "loss": 0.063, "step": 94055 }, { "epoch": 4.39, - "learning_rate": 1.1274670667104215e-05, - "loss": 0.0628, + "learning_rate": 2.1288284212004432e-05, + "loss": 0.0777, "step": 94060 }, { "epoch": 4.39, - "learning_rate": 1.1274201865829075e-05, - "loss": 0.1708, + "learning_rate": 2.1287816142168412e-05, + "loss": 0.054, "step": 94065 }, { "epoch": 4.39, - "learning_rate": 1.1273733064553935e-05, - "loss": 0.1671, + "learning_rate": 2.1287348072332392e-05, + "loss": 0.1587, "step": 94070 }, { "epoch": 4.39, - "learning_rate": 1.1273264263278798e-05, - "loss": 0.2832, + "learning_rate": 2.1286880002496375e-05, + "loss": 0.2849, "step": 94075 }, { "epoch": 4.39, - "learning_rate": 1.1272795462003658e-05, - "loss": 0.087, + "learning_rate": 2.1286411932660355e-05, + "loss": 0.0443, "step": 94080 }, { "epoch": 4.39, - "learning_rate": 1.1272326660728518e-05, - "loss": 0.0399, + "learning_rate": 2.128594386282433e-05, + "loss": 0.0254, "step": 94085 }, { "epoch": 4.39, - "learning_rate": 1.127185785945338e-05, - "loss": 0.053, + "learning_rate": 2.1285475792988315e-05, + "loss": 0.0246, "step": 94090 }, { "epoch": 4.39, - "learning_rate": 1.127138905817824e-05, - "loss": 0.0415, + "learning_rate": 2.1285007723152294e-05, + "loss": 0.0435, "step": 94095 }, { "epoch": 4.39, - "learning_rate": 1.12709202569031e-05, - "loss": 0.1127, + "learning_rate": 2.1284539653316274e-05, + "loss": 0.0487, "step": 94100 }, { "epoch": 4.39, - "learning_rate": 1.127045145562796e-05, - "loss": 0.0561, + "learning_rate": 2.1284071583480254e-05, + "loss": 0.0453, "step": 94105 }, { "epoch": 4.39, - "learning_rate": 1.126998265435282e-05, - "loss": 0.0568, + "learning_rate": 2.1283603513644237e-05, + "loss": 0.0702, "step": 94110 }, { "epoch": 4.39, - "learning_rate": 1.1269513853077681e-05, - "loss": 0.1383, + "learning_rate": 2.1283135443808217e-05, + "loss": 0.0848, "step": 94115 }, { "epoch": 4.39, - "learning_rate": 1.1269045051802543e-05, - "loss": 0.1745, + "learning_rate": 2.1282667373972197e-05, + "loss": 0.1402, "step": 94120 }, { "epoch": 4.39, - "learning_rate": 1.1268576250527403e-05, - "loss": 0.2155, + "learning_rate": 2.1282199304136177e-05, + "loss": 0.3395, "step": 94125 }, { "epoch": 4.39, - "learning_rate": 1.1268107449252264e-05, - "loss": 0.0674, + "learning_rate": 2.128173123430016e-05, + "loss": 0.0917, "step": 94130 }, { "epoch": 4.39, - "learning_rate": 1.1267638647977124e-05, - "loss": 0.0514, + "learning_rate": 2.128126316446414e-05, + "loss": 0.057, "step": 94135 }, { "epoch": 4.39, - "learning_rate": 1.1267169846701984e-05, - "loss": 0.0352, + "learning_rate": 2.128079509462812e-05, + "loss": 0.0486, "step": 94140 }, { "epoch": 4.39, - "learning_rate": 1.1266701045426844e-05, - "loss": 0.0417, + "learning_rate": 2.12803270247921e-05, + "loss": 0.0435, "step": 94145 }, { "epoch": 4.39, - "learning_rate": 1.1266232244151704e-05, - "loss": 0.0777, + "learning_rate": 2.127985895495608e-05, + "loss": 0.035, "step": 94150 }, { "epoch": 4.39, - "learning_rate": 1.1265763442876566e-05, - "loss": 0.0277, + "learning_rate": 2.127939088512006e-05, + "loss": 0.0711, "step": 94155 }, { "epoch": 4.39, - "learning_rate": 1.1265294641601425e-05, - "loss": 0.0597, + "learning_rate": 2.127892281528404e-05, + "loss": 0.0735, "step": 94160 }, { "epoch": 4.39, - "learning_rate": 1.1264825840326285e-05, - "loss": 0.0858, + "learning_rate": 2.1278454745448022e-05, + "loss": 0.1244, "step": 94165 }, { "epoch": 4.39, - "learning_rate": 1.1264357039051149e-05, - "loss": 0.1687, + "learning_rate": 2.1277986675612002e-05, + "loss": 0.1703, "step": 94170 }, { "epoch": 4.39, - "learning_rate": 1.1263888237776009e-05, - "loss": 0.3233, + "learning_rate": 2.1277518605775982e-05, + "loss": 0.351, "step": 94175 }, { "epoch": 4.39, - "learning_rate": 1.1263419436500869e-05, - "loss": 0.1246, + "learning_rate": 2.127705053593996e-05, + "loss": 0.0806, "step": 94180 }, { "epoch": 4.39, - "learning_rate": 1.1262950635225729e-05, - "loss": 0.0243, + "learning_rate": 2.1276582466103945e-05, + "loss": 0.0425, "step": 94185 }, { "epoch": 4.4, - "learning_rate": 1.1262481833950588e-05, - "loss": 0.054, + "learning_rate": 2.1276114396267925e-05, + "loss": 0.0104, "step": 94190 }, { "epoch": 4.4, - "learning_rate": 1.126201303267545e-05, - "loss": 0.0775, + "learning_rate": 2.1275646326431904e-05, + "loss": 0.0765, "step": 94195 }, { "epoch": 4.4, - "learning_rate": 1.126154423140031e-05, - "loss": 0.0309, + "learning_rate": 2.1275178256595884e-05, + "loss": 0.0362, "step": 94200 }, { "epoch": 4.4, - "learning_rate": 1.126107543012517e-05, - "loss": 0.1172, + "learning_rate": 2.1274710186759867e-05, + "loss": 0.0668, "step": 94205 }, { "epoch": 4.4, - "learning_rate": 1.126060662885003e-05, - "loss": 0.079, + "learning_rate": 2.1274242116923844e-05, + "loss": 0.1609, "step": 94210 }, { "epoch": 4.4, - "learning_rate": 1.1260137827574893e-05, - "loss": 0.0634, + "learning_rate": 2.1273774047087824e-05, + "loss": 0.1283, "step": 94215 }, { "epoch": 4.4, - "learning_rate": 1.1259669026299753e-05, - "loss": 0.1968, + "learning_rate": 2.1273305977251807e-05, + "loss": 0.1217, "step": 94220 }, { "epoch": 4.4, - "learning_rate": 1.1259200225024613e-05, - "loss": 0.3465, + "learning_rate": 2.1272837907415787e-05, + "loss": 0.2536, "step": 94225 }, { "epoch": 4.4, - "learning_rate": 1.1258731423749473e-05, - "loss": 0.0498, + "learning_rate": 2.1272369837579767e-05, + "loss": 0.0831, "step": 94230 }, { "epoch": 4.4, - "learning_rate": 1.1258262622474335e-05, - "loss": 0.0276, + "learning_rate": 2.1271901767743746e-05, + "loss": 0.004, "step": 94235 }, { "epoch": 4.4, - "learning_rate": 1.1257793821199195e-05, - "loss": 0.0654, + "learning_rate": 2.127143369790773e-05, + "loss": 0.0546, "step": 94240 }, { "epoch": 4.4, - "learning_rate": 1.1257325019924055e-05, - "loss": 0.0597, + "learning_rate": 2.127096562807171e-05, + "loss": 0.0367, "step": 94245 }, { "epoch": 4.4, - "learning_rate": 1.1256856218648914e-05, - "loss": 0.0425, + "learning_rate": 2.127049755823569e-05, + "loss": 0.0323, "step": 94250 }, { "epoch": 4.4, - "learning_rate": 1.1256387417373776e-05, - "loss": 0.1243, + "learning_rate": 2.127002948839967e-05, + "loss": 0.0759, "step": 94255 }, { "epoch": 4.4, - "learning_rate": 1.1255918616098638e-05, - "loss": 0.0957, + "learning_rate": 2.1269561418563652e-05, + "loss": 0.0962, "step": 94260 }, { "epoch": 4.4, - "learning_rate": 1.1255449814823498e-05, - "loss": 0.1223, + "learning_rate": 2.1269093348727632e-05, + "loss": 0.0669, "step": 94265 }, { "epoch": 4.4, - "learning_rate": 1.1254981013548358e-05, - "loss": 0.1075, + "learning_rate": 2.1268625278891612e-05, + "loss": 0.1467, "step": 94270 }, { "epoch": 4.4, - "learning_rate": 1.125451221227322e-05, - "loss": 0.2353, + "learning_rate": 2.126815720905559e-05, + "loss": 0.21, "step": 94275 }, { "epoch": 4.4, - "learning_rate": 1.1254043410998079e-05, - "loss": 0.0526, + "learning_rate": 2.126768913921957e-05, + "loss": 0.0645, "step": 94280 }, { "epoch": 4.4, - "learning_rate": 1.1253574609722939e-05, - "loss": 0.0169, + "learning_rate": 2.126722106938355e-05, + "loss": 0.0206, "step": 94285 }, { "epoch": 4.4, - "learning_rate": 1.1253105808447799e-05, - "loss": 0.0166, + "learning_rate": 2.126675299954753e-05, + "loss": 0.0027, "step": 94290 }, { "epoch": 4.4, - "learning_rate": 1.125263700717266e-05, - "loss": 0.0217, + "learning_rate": 2.1266284929711514e-05, + "loss": 0.0144, "step": 94295 }, { "epoch": 4.4, - "learning_rate": 1.125216820589752e-05, - "loss": 0.044, + "learning_rate": 2.1265816859875494e-05, + "loss": 0.0262, "step": 94300 }, { "epoch": 4.4, - "learning_rate": 1.125169940462238e-05, - "loss": 0.0456, + "learning_rate": 2.1265348790039474e-05, + "loss": 0.0247, "step": 94305 }, { "epoch": 4.4, - "learning_rate": 1.1251230603347242e-05, - "loss": 0.0509, + "learning_rate": 2.1264880720203454e-05, + "loss": 0.0641, "step": 94310 }, { "epoch": 4.4, - "learning_rate": 1.1250761802072104e-05, - "loss": 0.1222, + "learning_rate": 2.1264412650367437e-05, + "loss": 0.1935, "step": 94315 }, { "epoch": 4.4, - "learning_rate": 1.1250293000796964e-05, - "loss": 0.276, + "learning_rate": 2.1263944580531417e-05, + "loss": 0.2883, "step": 94320 }, { "epoch": 4.4, - "learning_rate": 1.1249824199521824e-05, - "loss": 0.2786, + "learning_rate": 2.1263476510695397e-05, + "loss": 0.1403, "step": 94325 }, { "epoch": 4.4, - "learning_rate": 1.1249355398246684e-05, - "loss": 0.0699, + "learning_rate": 2.126300844085938e-05, + "loss": 0.0786, "step": 94330 }, { "epoch": 4.4, - "learning_rate": 1.1248886596971545e-05, - "loss": 0.0244, + "learning_rate": 2.1262540371023356e-05, + "loss": 0.0144, "step": 94335 }, { "epoch": 4.4, - "learning_rate": 1.1248417795696405e-05, - "loss": 0.0295, + "learning_rate": 2.1262072301187336e-05, + "loss": 0.0328, "step": 94340 }, { "epoch": 4.4, - "learning_rate": 1.1247948994421265e-05, - "loss": 0.0276, + "learning_rate": 2.1261604231351316e-05, + "loss": 0.0423, "step": 94345 }, { "epoch": 4.4, - "learning_rate": 1.1247480193146125e-05, - "loss": 0.1131, + "learning_rate": 2.12611361615153e-05, + "loss": 0.0305, "step": 94350 }, { "epoch": 4.4, - "learning_rate": 1.1247011391870988e-05, - "loss": 0.0372, + "learning_rate": 2.126066809167928e-05, + "loss": 0.0494, "step": 94355 }, { "epoch": 4.4, - "learning_rate": 1.1246542590595848e-05, - "loss": 0.0917, + "learning_rate": 2.126020002184326e-05, + "loss": 0.1664, "step": 94360 }, { "epoch": 4.4, - "learning_rate": 1.1246073789320708e-05, - "loss": 0.0983, + "learning_rate": 2.125973195200724e-05, + "loss": 0.0613, "step": 94365 }, { "epoch": 4.4, - "learning_rate": 1.1245604988045568e-05, - "loss": 0.1681, + "learning_rate": 2.1259263882171222e-05, + "loss": 0.0771, "step": 94370 }, { "epoch": 4.4, - "learning_rate": 1.124513618677043e-05, - "loss": 0.2011, + "learning_rate": 2.12587958123352e-05, + "loss": 0.3053, "step": 94375 }, { "epoch": 4.4, - "learning_rate": 1.124466738549529e-05, - "loss": 0.0976, + "learning_rate": 2.125832774249918e-05, + "loss": 0.0661, "step": 94380 }, { "epoch": 4.4, - "learning_rate": 1.124419858422015e-05, - "loss": 0.0407, + "learning_rate": 2.1257859672663165e-05, + "loss": 0.0551, "step": 94385 }, { "epoch": 4.4, - "learning_rate": 1.124372978294501e-05, - "loss": 0.0502, + "learning_rate": 2.1257391602827144e-05, + "loss": 0.0336, "step": 94390 }, { "epoch": 4.4, - "learning_rate": 1.124326098166987e-05, - "loss": 0.0199, + "learning_rate": 2.1256923532991124e-05, + "loss": 0.0138, "step": 94395 }, { "epoch": 4.4, - "learning_rate": 1.1242792180394733e-05, - "loss": 0.0882, + "learning_rate": 2.12564554631551e-05, + "loss": 0.039, "step": 94400 }, { "epoch": 4.41, - "learning_rate": 1.1242323379119593e-05, - "loss": 0.0577, + "learning_rate": 2.1255987393319084e-05, + "loss": 0.0809, "step": 94405 }, { "epoch": 4.41, - "learning_rate": 1.1241854577844453e-05, - "loss": 0.0946, + "learning_rate": 2.1255519323483064e-05, + "loss": 0.0719, "step": 94410 }, { "epoch": 4.41, - "learning_rate": 1.1241385776569314e-05, - "loss": 0.0738, + "learning_rate": 2.1255051253647043e-05, + "loss": 0.122, "step": 94415 }, { "epoch": 4.41, - "learning_rate": 1.1240916975294174e-05, - "loss": 0.085, + "learning_rate": 2.1254583183811023e-05, + "loss": 0.1085, "step": 94420 }, { "epoch": 4.41, - "learning_rate": 1.1240448174019034e-05, - "loss": 0.235, + "learning_rate": 2.1254115113975007e-05, + "loss": 0.2945, "step": 94425 }, { "epoch": 4.41, - "learning_rate": 1.1239979372743894e-05, - "loss": 0.1043, + "learning_rate": 2.1253647044138986e-05, + "loss": 0.0727, "step": 94430 }, { "epoch": 4.41, - "learning_rate": 1.1239510571468754e-05, - "loss": 0.0541, + "learning_rate": 2.1253178974302966e-05, + "loss": 0.0045, "step": 94435 }, { "epoch": 4.41, - "learning_rate": 1.1239041770193616e-05, - "loss": 0.0314, + "learning_rate": 2.1252710904466946e-05, + "loss": 0.0711, "step": 94440 }, { "epoch": 4.41, - "learning_rate": 1.1238572968918476e-05, - "loss": 0.0914, + "learning_rate": 2.125224283463093e-05, + "loss": 0.0771, "step": 94445 }, { "epoch": 4.41, - "learning_rate": 1.1238104167643337e-05, - "loss": 0.0338, + "learning_rate": 2.125177476479491e-05, + "loss": 0.061, "step": 94450 }, { "epoch": 4.41, - "learning_rate": 1.1237635366368199e-05, - "loss": 0.0784, + "learning_rate": 2.125130669495889e-05, + "loss": 0.0425, "step": 94455 }, { "epoch": 4.41, - "learning_rate": 1.1237166565093059e-05, - "loss": 0.1103, + "learning_rate": 2.1250838625122872e-05, + "loss": 0.1146, "step": 94460 }, { "epoch": 4.41, - "learning_rate": 1.1236697763817919e-05, - "loss": 0.1155, + "learning_rate": 2.125037055528685e-05, + "loss": 0.0617, "step": 94465 }, { "epoch": 4.41, - "learning_rate": 1.1236228962542779e-05, - "loss": 0.1427, + "learning_rate": 2.1249902485450828e-05, + "loss": 0.1188, "step": 94470 }, { "epoch": 4.41, - "learning_rate": 1.1235760161267639e-05, - "loss": 0.179, + "learning_rate": 2.1249434415614808e-05, + "loss": 0.3091, "step": 94475 }, { "epoch": 4.41, - "learning_rate": 1.12352913599925e-05, - "loss": 0.0425, + "learning_rate": 2.124896634577879e-05, + "loss": 0.0746, "step": 94480 }, { "epoch": 4.41, - "learning_rate": 1.123482255871736e-05, - "loss": 0.0324, + "learning_rate": 2.124849827594277e-05, + "loss": 0.0523, "step": 94485 }, { "epoch": 4.41, - "learning_rate": 1.123435375744222e-05, - "loss": 0.0479, + "learning_rate": 2.124803020610675e-05, + "loss": 0.0257, "step": 94490 }, { "epoch": 4.41, - "learning_rate": 1.1233884956167083e-05, - "loss": 0.021, + "learning_rate": 2.124756213627073e-05, + "loss": 0.1152, "step": 94495 }, { "epoch": 4.41, - "learning_rate": 1.1233416154891943e-05, - "loss": 0.0872, + "learning_rate": 2.1247094066434714e-05, + "loss": 0.0752, "step": 94500 }, { "epoch": 4.41, - "learning_rate": 1.1232947353616803e-05, - "loss": 0.0825, + "learning_rate": 2.1246625996598694e-05, + "loss": 0.1015, "step": 94505 }, { "epoch": 4.41, - "learning_rate": 1.1232478552341663e-05, - "loss": 0.0618, + "learning_rate": 2.1246157926762674e-05, + "loss": 0.0779, "step": 94510 }, { "epoch": 4.41, - "learning_rate": 1.1232009751066523e-05, - "loss": 0.0927, + "learning_rate": 2.1245689856926657e-05, + "loss": 0.074, "step": 94515 }, { "epoch": 4.41, - "learning_rate": 1.1231540949791385e-05, - "loss": 0.1971, + "learning_rate": 2.1245221787090637e-05, + "loss": 0.1611, "step": 94520 }, { "epoch": 4.41, - "learning_rate": 1.1231072148516245e-05, - "loss": 0.224, + "learning_rate": 2.1244753717254613e-05, + "loss": 0.3667, "step": 94525 }, { "epoch": 4.41, - "learning_rate": 1.1230603347241105e-05, - "loss": 0.0707, + "learning_rate": 2.1244285647418593e-05, + "loss": 0.067, "step": 94530 }, { "epoch": 4.41, - "learning_rate": 1.1230134545965965e-05, - "loss": 0.0205, + "learning_rate": 2.1243817577582576e-05, + "loss": 0.0051, "step": 94535 }, { "epoch": 4.41, - "learning_rate": 1.1229665744690828e-05, - "loss": 0.0229, + "learning_rate": 2.1243349507746556e-05, + "loss": 0.0369, "step": 94540 }, { "epoch": 4.41, - "learning_rate": 1.1229196943415688e-05, - "loss": 0.0709, + "learning_rate": 2.1242881437910536e-05, + "loss": 0.0365, "step": 94545 }, { "epoch": 4.41, - "learning_rate": 1.1228728142140548e-05, - "loss": 0.1253, + "learning_rate": 2.1242413368074516e-05, + "loss": 0.0823, "step": 94550 }, { "epoch": 4.41, - "learning_rate": 1.1228259340865408e-05, - "loss": 0.0335, + "learning_rate": 2.12419452982385e-05, + "loss": 0.0555, "step": 94555 }, { "epoch": 4.41, - "learning_rate": 1.122779053959027e-05, - "loss": 0.0418, + "learning_rate": 2.124147722840248e-05, + "loss": 0.1436, "step": 94560 }, { "epoch": 4.41, - "learning_rate": 1.122732173831513e-05, - "loss": 0.1633, + "learning_rate": 2.124100915856646e-05, + "loss": 0.1021, "step": 94565 }, { "epoch": 4.41, - "learning_rate": 1.122685293703999e-05, - "loss": 0.0939, + "learning_rate": 2.124054108873044e-05, + "loss": 0.1648, "step": 94570 }, { "epoch": 4.41, - "learning_rate": 1.1226384135764849e-05, - "loss": 0.2821, + "learning_rate": 2.124007301889442e-05, + "loss": 0.2886, "step": 94575 }, { "epoch": 4.41, - "learning_rate": 1.1225915334489709e-05, - "loss": 0.0763, + "learning_rate": 2.12396049490584e-05, + "loss": 0.0906, "step": 94580 }, { "epoch": 4.41, - "learning_rate": 1.1225446533214572e-05, - "loss": 0.0498, + "learning_rate": 2.123913687922238e-05, + "loss": 0.0368, "step": 94585 }, { "epoch": 4.41, - "learning_rate": 1.1224977731939432e-05, - "loss": 0.0285, + "learning_rate": 2.123866880938636e-05, + "loss": 0.058, "step": 94590 }, { "epoch": 4.41, - "learning_rate": 1.1224508930664292e-05, - "loss": 0.0782, + "learning_rate": 2.123820073955034e-05, + "loss": 0.0206, "step": 94595 }, { "epoch": 4.41, - "learning_rate": 1.1224040129389154e-05, - "loss": 0.0302, + "learning_rate": 2.123773266971432e-05, + "loss": 0.0499, "step": 94600 }, { "epoch": 4.41, - "learning_rate": 1.1223571328114014e-05, - "loss": 0.0348, + "learning_rate": 2.12372645998783e-05, + "loss": 0.0175, "step": 94605 }, { "epoch": 4.41, - "learning_rate": 1.1223102526838874e-05, - "loss": 0.0701, + "learning_rate": 2.1236796530042283e-05, + "loss": 0.0815, "step": 94610 }, { "epoch": 4.41, - "learning_rate": 1.1222633725563734e-05, - "loss": 0.1278, + "learning_rate": 2.1236328460206263e-05, + "loss": 0.0352, "step": 94615 }, { "epoch": 4.42, - "learning_rate": 1.1222164924288594e-05, - "loss": 0.0813, + "learning_rate": 2.1235860390370243e-05, + "loss": 0.0682, "step": 94620 }, { "epoch": 4.42, - "learning_rate": 1.1221696123013455e-05, - "loss": 0.37, + "learning_rate": 2.1235392320534223e-05, + "loss": 0.2272, "step": 94625 }, { "epoch": 4.42, - "learning_rate": 1.1221227321738315e-05, - "loss": 0.0652, + "learning_rate": 2.1234924250698206e-05, + "loss": 0.113, "step": 94630 }, { "epoch": 4.42, - "learning_rate": 1.1220758520463177e-05, - "loss": 0.0244, + "learning_rate": 2.1234456180862186e-05, + "loss": 0.0264, "step": 94635 }, { "epoch": 4.42, - "learning_rate": 1.1220289719188038e-05, - "loss": 0.0597, + "learning_rate": 2.1233988111026166e-05, + "loss": 0.0693, "step": 94640 }, { "epoch": 4.42, - "learning_rate": 1.1219820917912898e-05, - "loss": 0.0138, + "learning_rate": 2.123352004119015e-05, + "loss": 0.0168, "step": 94645 }, { "epoch": 4.42, - "learning_rate": 1.1219352116637758e-05, - "loss": 0.0577, + "learning_rate": 2.123305197135413e-05, + "loss": 0.0447, "step": 94650 }, { "epoch": 4.42, - "learning_rate": 1.1218883315362618e-05, - "loss": 0.0518, + "learning_rate": 2.1232583901518105e-05, + "loss": 0.0648, "step": 94655 }, { "epoch": 4.42, - "learning_rate": 1.1218414514087478e-05, - "loss": 0.1242, + "learning_rate": 2.1232115831682085e-05, + "loss": 0.0462, "step": 94660 }, { "epoch": 4.42, - "learning_rate": 1.121794571281234e-05, - "loss": 0.1233, + "learning_rate": 2.1231647761846068e-05, + "loss": 0.0635, "step": 94665 }, { "epoch": 4.42, - "learning_rate": 1.12174769115372e-05, - "loss": 0.0707, + "learning_rate": 2.1231179692010048e-05, + "loss": 0.1457, "step": 94670 }, { "epoch": 4.42, - "learning_rate": 1.121700811026206e-05, - "loss": 0.2971, + "learning_rate": 2.1230711622174028e-05, + "loss": 0.248, "step": 94675 }, { "epoch": 4.42, - "learning_rate": 1.1216539308986923e-05, - "loss": 0.0634, + "learning_rate": 2.1230243552338008e-05, + "loss": 0.0549, "step": 94680 }, { "epoch": 4.42, - "learning_rate": 1.1216070507711783e-05, - "loss": 0.0151, + "learning_rate": 2.122977548250199e-05, + "loss": 0.0901, "step": 94685 }, { "epoch": 4.42, - "learning_rate": 1.1215601706436643e-05, - "loss": 0.0477, + "learning_rate": 2.122930741266597e-05, + "loss": 0.0454, "step": 94690 }, { "epoch": 4.42, - "learning_rate": 1.1215132905161503e-05, - "loss": 0.0449, + "learning_rate": 2.122883934282995e-05, + "loss": 0.058, "step": 94695 }, { "epoch": 4.42, - "learning_rate": 1.1214664103886363e-05, - "loss": 0.0705, + "learning_rate": 2.1228371272993934e-05, + "loss": 0.0655, "step": 94700 }, { "epoch": 4.42, - "learning_rate": 1.1214195302611224e-05, - "loss": 0.0546, + "learning_rate": 2.1227903203157914e-05, + "loss": 0.0595, "step": 94705 }, { "epoch": 4.42, - "learning_rate": 1.1213726501336084e-05, - "loss": 0.0878, + "learning_rate": 2.1227435133321893e-05, + "loss": 0.0598, "step": 94710 }, { "epoch": 4.42, - "learning_rate": 1.1213257700060944e-05, - "loss": 0.0852, + "learning_rate": 2.122696706348587e-05, + "loss": 0.1163, "step": 94715 }, { "epoch": 4.42, - "learning_rate": 1.1212788898785804e-05, - "loss": 0.0628, + "learning_rate": 2.1226498993649853e-05, + "loss": 0.0577, "step": 94720 }, { "epoch": 4.42, - "learning_rate": 1.1212320097510667e-05, - "loss": 0.2429, + "learning_rate": 2.1226030923813833e-05, + "loss": 0.4204, "step": 94725 }, { "epoch": 4.42, - "learning_rate": 1.1211851296235527e-05, - "loss": 0.0501, + "learning_rate": 2.1225562853977813e-05, + "loss": 0.0557, "step": 94730 }, { "epoch": 4.42, - "learning_rate": 1.1211382494960387e-05, - "loss": 0.0319, + "learning_rate": 2.1225094784141792e-05, + "loss": 0.0173, "step": 94735 }, { "epoch": 4.42, - "learning_rate": 1.1210913693685247e-05, - "loss": 0.0394, + "learning_rate": 2.1224626714305776e-05, + "loss": 0.0256, "step": 94740 }, { "epoch": 4.42, - "learning_rate": 1.1210444892410109e-05, - "loss": 0.0397, + "learning_rate": 2.1224158644469755e-05, + "loss": 0.014, "step": 94745 }, { "epoch": 4.42, - "learning_rate": 1.1209976091134969e-05, - "loss": 0.0548, + "learning_rate": 2.1223690574633735e-05, + "loss": 0.0441, "step": 94750 }, { "epoch": 4.42, - "learning_rate": 1.1209507289859829e-05, - "loss": 0.15, + "learning_rate": 2.122322250479772e-05, + "loss": 0.0326, "step": 94755 }, { "epoch": 4.42, - "learning_rate": 1.1209038488584689e-05, - "loss": 0.0634, + "learning_rate": 2.12227544349617e-05, + "loss": 0.0824, "step": 94760 }, { "epoch": 4.42, - "learning_rate": 1.120856968730955e-05, - "loss": 0.0811, + "learning_rate": 2.1222286365125678e-05, + "loss": 0.0643, "step": 94765 }, { "epoch": 4.42, - "learning_rate": 1.120810088603441e-05, - "loss": 0.1546, + "learning_rate": 2.1221818295289658e-05, + "loss": 0.1485, "step": 94770 }, { "epoch": 4.42, - "learning_rate": 1.1207632084759272e-05, - "loss": 0.245, + "learning_rate": 2.122135022545364e-05, + "loss": 0.2404, "step": 94775 }, { "epoch": 4.42, - "learning_rate": 1.1207163283484133e-05, - "loss": 0.0774, + "learning_rate": 2.1220882155617618e-05, + "loss": 0.0901, "step": 94780 }, { "epoch": 4.42, - "learning_rate": 1.1206694482208993e-05, - "loss": 0.0428, + "learning_rate": 2.1220414085781597e-05, + "loss": 0.0131, "step": 94785 }, { "epoch": 4.42, - "learning_rate": 1.1206225680933853e-05, - "loss": 0.045, + "learning_rate": 2.1219946015945577e-05, + "loss": 0.0122, "step": 94790 }, { "epoch": 4.42, - "learning_rate": 1.1205756879658713e-05, - "loss": 0.0884, + "learning_rate": 2.121947794610956e-05, + "loss": 0.0153, "step": 94795 }, { "epoch": 4.42, - "learning_rate": 1.1205288078383573e-05, - "loss": 0.0341, + "learning_rate": 2.121900987627354e-05, + "loss": 0.0379, "step": 94800 }, { "epoch": 4.42, - "learning_rate": 1.1204819277108435e-05, - "loss": 0.0694, + "learning_rate": 2.121854180643752e-05, + "loss": 0.1011, "step": 94805 }, { "epoch": 4.42, - "learning_rate": 1.1204350475833295e-05, - "loss": 0.0746, + "learning_rate": 2.12180737366015e-05, + "loss": 0.0922, "step": 94810 }, { "epoch": 4.42, - "learning_rate": 1.1203881674558155e-05, - "loss": 0.1387, + "learning_rate": 2.1217605666765483e-05, + "loss": 0.1386, "step": 94815 }, { "epoch": 4.42, - "learning_rate": 1.1203412873283018e-05, - "loss": 0.1527, + "learning_rate": 2.1217137596929463e-05, + "loss": 0.2042, "step": 94820 }, { "epoch": 4.42, - "learning_rate": 1.1202944072007878e-05, - "loss": 0.2188, + "learning_rate": 2.1216669527093443e-05, + "loss": 0.345, "step": 94825 }, { "epoch": 4.42, - "learning_rate": 1.1202475270732738e-05, - "loss": 0.0657, + "learning_rate": 2.1216201457257426e-05, + "loss": 0.0835, "step": 94830 }, { "epoch": 4.43, - "learning_rate": 1.1202006469457598e-05, - "loss": 0.0172, + "learning_rate": 2.1215733387421406e-05, + "loss": 0.0243, "step": 94835 }, { "epoch": 4.43, - "learning_rate": 1.1201537668182458e-05, - "loss": 0.034, + "learning_rate": 2.1215265317585386e-05, + "loss": 0.0108, "step": 94840 }, { "epoch": 4.43, - "learning_rate": 1.120106886690732e-05, - "loss": 0.0754, + "learning_rate": 2.1214797247749362e-05, + "loss": 0.043, "step": 94845 }, { "epoch": 4.43, - "learning_rate": 1.120060006563218e-05, - "loss": 0.0494, + "learning_rate": 2.1214329177913345e-05, + "loss": 0.0537, "step": 94850 }, { "epoch": 4.43, - "learning_rate": 1.120013126435704e-05, - "loss": 0.0505, + "learning_rate": 2.1213861108077325e-05, + "loss": 0.0692, "step": 94855 }, { "epoch": 4.43, - "learning_rate": 1.11996624630819e-05, - "loss": 0.0729, + "learning_rate": 2.1213393038241305e-05, + "loss": 0.0712, "step": 94860 }, { "epoch": 4.43, - "learning_rate": 1.1199193661806763e-05, - "loss": 0.11, + "learning_rate": 2.1212924968405285e-05, + "loss": 0.0643, "step": 94865 }, { "epoch": 4.43, - "learning_rate": 1.1198724860531622e-05, - "loss": 0.1641, + "learning_rate": 2.1212456898569268e-05, + "loss": 0.1645, "step": 94870 }, { "epoch": 4.43, - "learning_rate": 1.1198256059256482e-05, - "loss": 0.1853, + "learning_rate": 2.1211988828733248e-05, + "loss": 0.2834, "step": 94875 }, { "epoch": 4.43, - "learning_rate": 1.1197787257981342e-05, - "loss": 0.0978, + "learning_rate": 2.1211520758897228e-05, + "loss": 0.0986, "step": 94880 }, { "epoch": 4.43, - "learning_rate": 1.1197318456706204e-05, - "loss": 0.012, + "learning_rate": 2.121105268906121e-05, + "loss": 0.0302, "step": 94885 }, { "epoch": 4.43, - "learning_rate": 1.1196849655431064e-05, - "loss": 0.0485, + "learning_rate": 2.121058461922519e-05, + "loss": 0.0272, "step": 94890 }, { "epoch": 4.43, - "learning_rate": 1.1196380854155924e-05, - "loss": 0.0729, + "learning_rate": 2.121011654938917e-05, + "loss": 0.0396, "step": 94895 }, { "epoch": 4.43, - "learning_rate": 1.1195912052880784e-05, - "loss": 0.0622, + "learning_rate": 2.120964847955315e-05, + "loss": 0.0452, "step": 94900 }, { "epoch": 4.43, - "learning_rate": 1.1195443251605644e-05, - "loss": 0.0556, + "learning_rate": 2.120918040971713e-05, + "loss": 0.0323, "step": 94905 }, { "epoch": 4.43, - "learning_rate": 1.1194974450330507e-05, - "loss": 0.0774, + "learning_rate": 2.120871233988111e-05, + "loss": 0.0672, "step": 94910 }, { "epoch": 4.43, - "learning_rate": 1.1194505649055367e-05, - "loss": 0.0702, + "learning_rate": 2.120824427004509e-05, + "loss": 0.1655, "step": 94915 }, { "epoch": 4.43, - "learning_rate": 1.1194036847780227e-05, - "loss": 0.2151, + "learning_rate": 2.120777620020907e-05, + "loss": 0.1278, "step": 94920 }, { "epoch": 4.43, - "learning_rate": 1.1193568046505088e-05, - "loss": 0.1308, + "learning_rate": 2.1207308130373053e-05, + "loss": 0.209, "step": 94925 }, { "epoch": 4.43, - "learning_rate": 1.1193099245229948e-05, - "loss": 0.0543, + "learning_rate": 2.1206840060537032e-05, + "loss": 0.0686, "step": 94930 }, { "epoch": 4.43, - "learning_rate": 1.1192630443954808e-05, - "loss": 0.0192, + "learning_rate": 2.1206371990701012e-05, + "loss": 0.0164, "step": 94935 }, { "epoch": 4.43, - "learning_rate": 1.1192161642679668e-05, - "loss": 0.066, + "learning_rate": 2.1205903920864995e-05, + "loss": 0.0373, "step": 94940 }, { "epoch": 4.43, - "learning_rate": 1.1191692841404528e-05, - "loss": 0.0214, + "learning_rate": 2.1205435851028975e-05, + "loss": 0.0439, "step": 94945 }, { "epoch": 4.43, - "learning_rate": 1.119122404012939e-05, - "loss": 0.0609, + "learning_rate": 2.1204967781192955e-05, + "loss": 0.0818, "step": 94950 }, { "epoch": 4.43, - "learning_rate": 1.119075523885425e-05, - "loss": 0.0662, + "learning_rate": 2.1204499711356935e-05, + "loss": 0.0513, "step": 94955 }, { "epoch": 4.43, - "learning_rate": 1.1190286437579111e-05, - "loss": 0.0625, + "learning_rate": 2.1204031641520918e-05, + "loss": 0.0533, "step": 94960 }, { "epoch": 4.43, - "learning_rate": 1.1189817636303973e-05, - "loss": 0.0978, + "learning_rate": 2.1203563571684898e-05, + "loss": 0.0768, "step": 94965 }, { "epoch": 4.43, - "learning_rate": 1.1189348835028833e-05, - "loss": 0.1464, + "learning_rate": 2.1203095501848874e-05, + "loss": 0.114, "step": 94970 }, { "epoch": 4.43, - "learning_rate": 1.1188880033753693e-05, - "loss": 0.2409, + "learning_rate": 2.1202627432012854e-05, + "loss": 0.1452, "step": 94975 }, { "epoch": 4.43, - "learning_rate": 1.1188411232478553e-05, - "loss": 0.0647, + "learning_rate": 2.1202159362176837e-05, + "loss": 0.0858, "step": 94980 }, { "epoch": 4.43, - "learning_rate": 1.1187942431203413e-05, - "loss": 0.0181, + "learning_rate": 2.1201691292340817e-05, + "loss": 0.0207, "step": 94985 }, { "epoch": 4.43, - "learning_rate": 1.1187473629928274e-05, - "loss": 0.0826, + "learning_rate": 2.1201223222504797e-05, + "loss": 0.0302, "step": 94990 }, { "epoch": 4.43, - "learning_rate": 1.1187004828653134e-05, - "loss": 0.0638, + "learning_rate": 2.1200755152668777e-05, + "loss": 0.0201, "step": 94995 }, { "epoch": 4.43, - "learning_rate": 1.1186536027377994e-05, - "loss": 0.0776, + "learning_rate": 2.120028708283276e-05, + "loss": 0.0465, "step": 95000 }, { "epoch": 4.43, - "learning_rate": 1.1186067226102858e-05, - "loss": 0.0641, + "learning_rate": 2.119981901299674e-05, + "loss": 0.0242, "step": 95005 }, { "epoch": 4.43, - "learning_rate": 1.1185598424827718e-05, - "loss": 0.0684, + "learning_rate": 2.119935094316072e-05, + "loss": 0.0417, "step": 95010 }, { "epoch": 4.43, - "learning_rate": 1.1185129623552577e-05, - "loss": 0.1146, + "learning_rate": 2.1198882873324703e-05, + "loss": 0.1135, "step": 95015 }, { "epoch": 4.43, - "learning_rate": 1.1184660822277437e-05, - "loss": 0.1459, + "learning_rate": 2.1198414803488683e-05, + "loss": 0.1604, "step": 95020 }, { "epoch": 4.43, - "learning_rate": 1.1184192021002297e-05, - "loss": 0.3169, + "learning_rate": 2.1197946733652663e-05, + "loss": 0.2643, "step": 95025 }, { "epoch": 4.43, - "learning_rate": 1.1183723219727159e-05, - "loss": 0.0857, + "learning_rate": 2.1197478663816642e-05, + "loss": 0.073, "step": 95030 }, { "epoch": 4.43, - "learning_rate": 1.1183254418452019e-05, - "loss": 0.0188, + "learning_rate": 2.1197010593980622e-05, + "loss": 0.0154, "step": 95035 }, { "epoch": 4.43, - "learning_rate": 1.1182785617176879e-05, - "loss": 0.0165, + "learning_rate": 2.1196542524144602e-05, + "loss": 0.0549, "step": 95040 }, { "epoch": 4.43, - "learning_rate": 1.1182316815901739e-05, - "loss": 0.0268, + "learning_rate": 2.1196074454308582e-05, + "loss": 0.0323, "step": 95045 }, { "epoch": 4.44, - "learning_rate": 1.1181848014626602e-05, - "loss": 0.0907, + "learning_rate": 2.119560638447256e-05, + "loss": 0.0188, "step": 95050 }, { "epoch": 4.44, - "learning_rate": 1.1181379213351462e-05, - "loss": 0.0898, + "learning_rate": 2.1195138314636545e-05, + "loss": 0.0825, "step": 95055 }, { "epoch": 4.44, - "learning_rate": 1.1180910412076322e-05, - "loss": 0.0895, + "learning_rate": 2.1194670244800525e-05, + "loss": 0.118, "step": 95060 }, { "epoch": 4.44, - "learning_rate": 1.1180441610801182e-05, - "loss": 0.1027, + "learning_rate": 2.1194202174964504e-05, + "loss": 0.1401, "step": 95065 }, { "epoch": 4.44, - "learning_rate": 1.1179972809526044e-05, - "loss": 0.1274, + "learning_rate": 2.1193734105128488e-05, + "loss": 0.1273, "step": 95070 }, { "epoch": 4.44, - "learning_rate": 1.1179504008250903e-05, - "loss": 0.277, + "learning_rate": 2.1193266035292468e-05, + "loss": 0.2309, "step": 95075 }, { "epoch": 4.44, - "learning_rate": 1.1179035206975763e-05, - "loss": 0.0948, + "learning_rate": 2.1192797965456447e-05, + "loss": 0.0446, "step": 95080 }, { "epoch": 4.44, - "learning_rate": 1.1178566405700623e-05, - "loss": 0.0314, + "learning_rate": 2.1192329895620427e-05, + "loss": 0.0226, "step": 95085 }, { "epoch": 4.44, - "learning_rate": 1.1178097604425483e-05, - "loss": 0.0511, + "learning_rate": 2.119186182578441e-05, + "loss": 0.0246, "step": 95090 }, { "epoch": 4.44, - "learning_rate": 1.1177628803150345e-05, - "loss": 0.0339, + "learning_rate": 2.1191393755948387e-05, + "loss": 0.0304, "step": 95095 }, { "epoch": 4.44, - "learning_rate": 1.1177160001875206e-05, - "loss": 0.0502, + "learning_rate": 2.1190925686112367e-05, + "loss": 0.0452, "step": 95100 }, { "epoch": 4.44, - "learning_rate": 1.1176691200600066e-05, - "loss": 0.1113, + "learning_rate": 2.1190457616276346e-05, + "loss": 0.0635, "step": 95105 }, { "epoch": 4.44, - "learning_rate": 1.1176222399324928e-05, - "loss": 0.139, + "learning_rate": 2.118998954644033e-05, + "loss": 0.0829, "step": 95110 }, { "epoch": 4.44, - "learning_rate": 1.1175753598049788e-05, - "loss": 0.1397, + "learning_rate": 2.118952147660431e-05, + "loss": 0.0767, "step": 95115 }, { "epoch": 4.44, - "learning_rate": 1.1175284796774648e-05, - "loss": 0.2609, + "learning_rate": 2.118905340676829e-05, + "loss": 0.1304, "step": 95120 }, { "epoch": 4.44, - "learning_rate": 1.1174815995499508e-05, - "loss": 0.1834, + "learning_rate": 2.1188585336932272e-05, + "loss": 0.1772, "step": 95125 }, { "epoch": 4.44, - "learning_rate": 1.117434719422437e-05, - "loss": 0.0553, + "learning_rate": 2.1188117267096252e-05, + "loss": 0.0753, "step": 95130 }, { "epoch": 4.44, - "learning_rate": 1.117387839294923e-05, - "loss": 0.043, + "learning_rate": 2.1187649197260232e-05, + "loss": 0.1066, "step": 95135 }, { "epoch": 4.44, - "learning_rate": 1.117340959167409e-05, - "loss": 0.03, + "learning_rate": 2.1187181127424212e-05, + "loss": 0.0365, "step": 95140 }, { "epoch": 4.44, - "learning_rate": 1.1172940790398951e-05, - "loss": 0.0375, + "learning_rate": 2.1186713057588195e-05, + "loss": 0.053, "step": 95145 }, { "epoch": 4.44, - "learning_rate": 1.1172471989123813e-05, - "loss": 0.0403, + "learning_rate": 2.1186244987752175e-05, + "loss": 0.0124, "step": 95150 }, { "epoch": 4.44, - "learning_rate": 1.1172003187848673e-05, - "loss": 0.0278, + "learning_rate": 2.1185776917916155e-05, + "loss": 0.0435, "step": 95155 }, { "epoch": 4.44, - "learning_rate": 1.1171534386573532e-05, - "loss": 0.0922, + "learning_rate": 2.118530884808013e-05, + "loss": 0.0879, "step": 95160 }, { "epoch": 4.44, - "learning_rate": 1.1171065585298392e-05, - "loss": 0.0873, + "learning_rate": 2.1184840778244114e-05, + "loss": 0.0377, "step": 95165 }, { "epoch": 4.44, - "learning_rate": 1.1170596784023254e-05, - "loss": 0.1153, + "learning_rate": 2.1184372708408094e-05, + "loss": 0.1405, "step": 95170 }, { "epoch": 4.44, - "learning_rate": 1.1170127982748114e-05, - "loss": 0.1616, + "learning_rate": 2.1183904638572074e-05, + "loss": 0.3279, "step": 95175 }, { "epoch": 4.44, - "learning_rate": 1.1169659181472974e-05, - "loss": 0.1208, + "learning_rate": 2.1183436568736057e-05, + "loss": 0.0894, "step": 95180 }, { "epoch": 4.44, - "learning_rate": 1.1169190380197834e-05, - "loss": 0.0132, + "learning_rate": 2.1182968498900037e-05, + "loss": 0.0144, "step": 95185 }, { "epoch": 4.44, - "learning_rate": 1.1168721578922697e-05, - "loss": 0.0177, + "learning_rate": 2.1182500429064017e-05, + "loss": 0.0228, "step": 95190 }, { "epoch": 4.44, - "learning_rate": 1.1168252777647557e-05, - "loss": 0.0466, + "learning_rate": 2.1182032359227997e-05, + "loss": 0.0252, "step": 95195 }, { "epoch": 4.44, - "learning_rate": 1.1167783976372417e-05, - "loss": 0.0432, + "learning_rate": 2.118156428939198e-05, + "loss": 0.0793, "step": 95200 }, { "epoch": 4.44, - "learning_rate": 1.1167315175097277e-05, - "loss": 0.0164, + "learning_rate": 2.118109621955596e-05, + "loss": 0.0901, "step": 95205 }, { "epoch": 4.44, - "learning_rate": 1.1166846373822139e-05, - "loss": 0.0618, + "learning_rate": 2.118062814971994e-05, + "loss": 0.0524, "step": 95210 }, { "epoch": 4.44, - "learning_rate": 1.1166377572546999e-05, - "loss": 0.0556, + "learning_rate": 2.118016007988392e-05, + "loss": 0.2202, "step": 95215 }, { "epoch": 4.44, - "learning_rate": 1.1165908771271858e-05, - "loss": 0.1505, + "learning_rate": 2.11796920100479e-05, + "loss": 0.1563, "step": 95220 }, { "epoch": 4.44, - "learning_rate": 1.1165439969996718e-05, - "loss": 0.2956, + "learning_rate": 2.117922394021188e-05, + "loss": 0.2027, "step": 95225 }, { "epoch": 4.44, - "learning_rate": 1.1164971168721578e-05, - "loss": 0.095, + "learning_rate": 2.117875587037586e-05, + "loss": 0.0537, "step": 95230 }, { "epoch": 4.44, - "learning_rate": 1.1164502367446442e-05, - "loss": 0.045, + "learning_rate": 2.117828780053984e-05, + "loss": 0.0477, "step": 95235 }, { "epoch": 4.44, - "learning_rate": 1.1164033566171302e-05, - "loss": 0.0196, + "learning_rate": 2.1177819730703822e-05, + "loss": 0.037, "step": 95240 }, { "epoch": 4.44, - "learning_rate": 1.1163564764896162e-05, - "loss": 0.0183, + "learning_rate": 2.11773516608678e-05, + "loss": 0.0498, "step": 95245 }, { "epoch": 4.44, - "learning_rate": 1.1163095963621023e-05, - "loss": 0.0611, + "learning_rate": 2.117688359103178e-05, + "loss": 0.1358, "step": 95250 }, { "epoch": 4.44, - "learning_rate": 1.1162627162345883e-05, - "loss": 0.0417, + "learning_rate": 2.1176415521195765e-05, + "loss": 0.0495, "step": 95255 }, { "epoch": 4.44, - "learning_rate": 1.1162158361070743e-05, - "loss": 0.1163, + "learning_rate": 2.1175947451359744e-05, + "loss": 0.2006, "step": 95260 }, { "epoch": 4.45, - "learning_rate": 1.1161689559795603e-05, - "loss": 0.1493, + "learning_rate": 2.1175479381523724e-05, + "loss": 0.0889, "step": 95265 }, { "epoch": 4.45, - "learning_rate": 1.1161220758520463e-05, - "loss": 0.0971, + "learning_rate": 2.1175011311687704e-05, + "loss": 0.1427, "step": 95270 }, { "epoch": 4.45, - "learning_rate": 1.1160751957245325e-05, - "loss": 0.2363, + "learning_rate": 2.1174543241851687e-05, + "loss": 0.3559, "step": 95275 }, { "epoch": 4.45, - "learning_rate": 1.1160283155970184e-05, - "loss": 0.0752, + "learning_rate": 2.1174075172015667e-05, + "loss": 0.071, "step": 95280 }, { "epoch": 4.45, - "learning_rate": 1.1159814354695046e-05, - "loss": 0.0119, + "learning_rate": 2.1173607102179644e-05, + "loss": 0.0041, "step": 95285 }, { "epoch": 4.45, - "learning_rate": 1.1159345553419908e-05, - "loss": 0.0405, + "learning_rate": 2.1173139032343623e-05, + "loss": 0.0136, "step": 95290 }, { "epoch": 4.45, - "learning_rate": 1.1158876752144768e-05, - "loss": 0.038, + "learning_rate": 2.1172670962507607e-05, + "loss": 0.0247, "step": 95295 }, { "epoch": 4.45, - "learning_rate": 1.1158407950869628e-05, - "loss": 0.0414, + "learning_rate": 2.1172202892671586e-05, + "loss": 0.0652, "step": 95300 }, { "epoch": 4.45, - "learning_rate": 1.1157939149594487e-05, - "loss": 0.0728, + "learning_rate": 2.1171734822835566e-05, + "loss": 0.0472, "step": 95305 }, { "epoch": 4.45, - "learning_rate": 1.1157470348319347e-05, - "loss": 0.0689, + "learning_rate": 2.117126675299955e-05, + "loss": 0.0703, "step": 95310 }, { "epoch": 4.45, - "learning_rate": 1.1157001547044209e-05, - "loss": 0.0786, + "learning_rate": 2.117079868316353e-05, + "loss": 0.0663, "step": 95315 }, { "epoch": 4.45, - "learning_rate": 1.1156532745769069e-05, - "loss": 0.0901, + "learning_rate": 2.117033061332751e-05, + "loss": 0.1421, "step": 95320 }, { "epoch": 4.45, - "learning_rate": 1.1156063944493929e-05, - "loss": 0.3152, + "learning_rate": 2.116986254349149e-05, + "loss": 0.2283, "step": 95325 }, { "epoch": 4.45, - "learning_rate": 1.1155595143218792e-05, - "loss": 0.0653, + "learning_rate": 2.1169394473655472e-05, + "loss": 0.0899, "step": 95330 }, { "epoch": 4.45, - "learning_rate": 1.1155126341943652e-05, - "loss": 0.0816, + "learning_rate": 2.1168926403819452e-05, + "loss": 0.0363, "step": 95335 }, { "epoch": 4.45, - "learning_rate": 1.1154657540668512e-05, - "loss": 0.0327, + "learning_rate": 2.1168458333983432e-05, + "loss": 0.0265, "step": 95340 }, { "epoch": 4.45, - "learning_rate": 1.1154188739393372e-05, - "loss": 0.0665, + "learning_rate": 2.116799026414741e-05, + "loss": 0.0595, "step": 95345 }, { "epoch": 4.45, - "learning_rate": 1.1153719938118232e-05, - "loss": 0.0672, + "learning_rate": 2.116752219431139e-05, + "loss": 0.0459, "step": 95350 }, { "epoch": 4.45, - "learning_rate": 1.1153251136843094e-05, - "loss": 0.0251, + "learning_rate": 2.116705412447537e-05, + "loss": 0.0515, "step": 95355 }, { "epoch": 4.45, - "learning_rate": 1.1152782335567954e-05, - "loss": 0.1739, + "learning_rate": 2.116658605463935e-05, + "loss": 0.0634, "step": 95360 }, { "epoch": 4.45, - "learning_rate": 1.1152313534292813e-05, - "loss": 0.0909, + "learning_rate": 2.1166117984803334e-05, + "loss": 0.181, "step": 95365 }, { "epoch": 4.45, - "learning_rate": 1.1151844733017673e-05, - "loss": 0.1172, + "learning_rate": 2.1165649914967314e-05, + "loss": 0.209, "step": 95370 }, { "epoch": 4.45, - "learning_rate": 1.1151375931742537e-05, - "loss": 0.334, + "learning_rate": 2.1165181845131294e-05, + "loss": 0.303, "step": 95375 }, { "epoch": 4.45, - "learning_rate": 1.1150907130467397e-05, - "loss": 0.0837, + "learning_rate": 2.1164713775295274e-05, + "loss": 0.1158, "step": 95380 }, { "epoch": 4.45, - "learning_rate": 1.1150438329192257e-05, - "loss": 0.0251, + "learning_rate": 2.1164245705459257e-05, + "loss": 0.023, "step": 95385 }, { "epoch": 4.45, - "learning_rate": 1.1149969527917117e-05, - "loss": 0.0642, + "learning_rate": 2.1163777635623237e-05, + "loss": 0.0322, "step": 95390 }, { "epoch": 4.45, - "learning_rate": 1.1149500726641978e-05, - "loss": 0.0333, + "learning_rate": 2.1163309565787216e-05, + "loss": 0.0418, "step": 95395 }, { "epoch": 4.45, - "learning_rate": 1.1149031925366838e-05, - "loss": 0.0664, + "learning_rate": 2.1162841495951196e-05, + "loss": 0.0495, "step": 95400 }, { "epoch": 4.45, - "learning_rate": 1.1148563124091698e-05, - "loss": 0.0883, + "learning_rate": 2.116237342611518e-05, + "loss": 0.0306, "step": 95405 }, { "epoch": 4.45, - "learning_rate": 1.1148094322816558e-05, - "loss": 0.0742, + "learning_rate": 2.1161905356279156e-05, + "loss": 0.1101, "step": 95410 }, { "epoch": 4.45, - "learning_rate": 1.1147625521541418e-05, - "loss": 0.1999, + "learning_rate": 2.1161437286443136e-05, + "loss": 0.1311, "step": 95415 }, { "epoch": 4.45, - "learning_rate": 1.114715672026628e-05, - "loss": 0.1842, + "learning_rate": 2.1160969216607116e-05, + "loss": 0.2207, "step": 95420 }, { "epoch": 4.45, - "learning_rate": 1.1146687918991141e-05, - "loss": 0.2251, + "learning_rate": 2.11605011467711e-05, + "loss": 0.2852, "step": 95425 }, { "epoch": 4.45, - "learning_rate": 1.1146219117716001e-05, - "loss": 0.0577, + "learning_rate": 2.116003307693508e-05, + "loss": 0.0969, "step": 95430 }, { "epoch": 4.45, - "learning_rate": 1.1145750316440863e-05, - "loss": 0.0651, + "learning_rate": 2.115956500709906e-05, + "loss": 0.0041, "step": 95435 }, { "epoch": 4.45, - "learning_rate": 1.1145281515165723e-05, - "loss": 0.0185, + "learning_rate": 2.115909693726304e-05, + "loss": 0.0266, "step": 95440 }, { "epoch": 4.45, - "learning_rate": 1.1144812713890583e-05, - "loss": 0.0541, + "learning_rate": 2.115862886742702e-05, + "loss": 0.0525, "step": 95445 }, { "epoch": 4.45, - "learning_rate": 1.1144343912615443e-05, - "loss": 0.0319, + "learning_rate": 2.1158160797591e-05, + "loss": 0.0257, "step": 95450 }, { "epoch": 4.45, - "learning_rate": 1.1143875111340302e-05, - "loss": 0.0499, + "learning_rate": 2.115769272775498e-05, + "loss": 0.049, "step": 95455 }, { "epoch": 4.45, - "learning_rate": 1.1143406310065164e-05, - "loss": 0.117, + "learning_rate": 2.1157224657918964e-05, + "loss": 0.0839, "step": 95460 }, { "epoch": 4.45, - "learning_rate": 1.1142937508790024e-05, - "loss": 0.0796, + "learning_rate": 2.1156756588082944e-05, + "loss": 0.1076, "step": 95465 }, { "epoch": 4.45, - "learning_rate": 1.1142468707514886e-05, - "loss": 0.1977, + "learning_rate": 2.1156288518246924e-05, + "loss": 0.0808, "step": 95470 }, { "epoch": 4.45, - "learning_rate": 1.1141999906239747e-05, - "loss": 0.2544, + "learning_rate": 2.11558204484109e-05, + "loss": 0.3204, "step": 95475 }, { "epoch": 4.46, - "learning_rate": 1.1141531104964607e-05, - "loss": 0.0535, + "learning_rate": 2.1155352378574884e-05, + "loss": 0.0828, "step": 95480 }, { "epoch": 4.46, - "learning_rate": 1.1141062303689467e-05, - "loss": 0.0592, + "learning_rate": 2.1154884308738863e-05, + "loss": 0.0021, "step": 95485 }, { "epoch": 4.46, - "learning_rate": 1.1140593502414327e-05, - "loss": 0.0436, + "learning_rate": 2.1154416238902843e-05, + "loss": 0.0496, "step": 95490 }, { "epoch": 4.46, - "learning_rate": 1.1140124701139187e-05, - "loss": 0.0454, + "learning_rate": 2.1153948169066826e-05, + "loss": 0.122, "step": 95495 }, { "epoch": 4.46, - "learning_rate": 1.1139655899864049e-05, - "loss": 0.0685, + "learning_rate": 2.1153480099230806e-05, + "loss": 0.0334, "step": 95500 }, { "epoch": 4.46, - "learning_rate": 1.1139187098588909e-05, - "loss": 0.0472, + "learning_rate": 2.1153012029394786e-05, + "loss": 0.0268, "step": 95505 }, { "epoch": 4.46, - "learning_rate": 1.1138718297313768e-05, - "loss": 0.0682, + "learning_rate": 2.1152543959558766e-05, + "loss": 0.1741, "step": 95510 }, { "epoch": 4.46, - "learning_rate": 1.1138249496038632e-05, - "loss": 0.0703, + "learning_rate": 2.115207588972275e-05, + "loss": 0.0494, "step": 95515 }, { "epoch": 4.46, - "learning_rate": 1.1137780694763492e-05, - "loss": 0.1434, + "learning_rate": 2.115160781988673e-05, + "loss": 0.1062, "step": 95520 }, { "epoch": 4.46, - "learning_rate": 1.1137311893488352e-05, - "loss": 0.3051, + "learning_rate": 2.115113975005071e-05, + "loss": 0.2674, "step": 95525 }, { "epoch": 4.46, - "learning_rate": 1.1136843092213212e-05, - "loss": 0.0487, + "learning_rate": 2.115067168021469e-05, + "loss": 0.0684, "step": 95530 }, { "epoch": 4.46, - "learning_rate": 1.1136374290938072e-05, - "loss": 0.0317, + "learning_rate": 2.115020361037867e-05, + "loss": 0.0196, "step": 95535 }, { "epoch": 4.46, - "learning_rate": 1.1135905489662933e-05, - "loss": 0.0255, + "learning_rate": 2.1149735540542648e-05, + "loss": 0.0299, "step": 95540 }, { "epoch": 4.46, - "learning_rate": 1.1135436688387793e-05, - "loss": 0.0182, + "learning_rate": 2.1149267470706628e-05, + "loss": 0.0357, "step": 95545 }, { "epoch": 4.46, - "learning_rate": 1.1134967887112653e-05, - "loss": 0.0671, + "learning_rate": 2.114879940087061e-05, + "loss": 0.0307, "step": 95550 }, { "epoch": 4.46, - "learning_rate": 1.1134499085837513e-05, - "loss": 0.0563, + "learning_rate": 2.114833133103459e-05, + "loss": 0.0787, "step": 95555 }, { "epoch": 4.46, - "learning_rate": 1.1134030284562376e-05, - "loss": 0.1338, + "learning_rate": 2.114786326119857e-05, + "loss": 0.1283, "step": 95560 }, { "epoch": 4.46, - "learning_rate": 1.1133561483287236e-05, - "loss": 0.069, + "learning_rate": 2.114739519136255e-05, + "loss": 0.1014, "step": 95565 }, { "epoch": 4.46, - "learning_rate": 1.1133092682012096e-05, - "loss": 0.1806, + "learning_rate": 2.1146927121526534e-05, + "loss": 0.1713, "step": 95570 }, { "epoch": 4.46, - "learning_rate": 1.1132623880736956e-05, - "loss": 0.2525, + "learning_rate": 2.1146459051690514e-05, + "loss": 0.3141, "step": 95575 }, { "epoch": 4.46, - "learning_rate": 1.1132155079461818e-05, - "loss": 0.0892, + "learning_rate": 2.1145990981854493e-05, + "loss": 0.0964, "step": 95580 }, { "epoch": 4.46, - "learning_rate": 1.1131686278186678e-05, - "loss": 0.0073, + "learning_rate": 2.1145522912018473e-05, + "loss": 0.0178, "step": 95585 }, { "epoch": 4.46, - "learning_rate": 1.1131217476911538e-05, - "loss": 0.0148, + "learning_rate": 2.1145054842182456e-05, + "loss": 0.0106, "step": 95590 }, { "epoch": 4.46, - "learning_rate": 1.1130748675636398e-05, - "loss": 0.0387, + "learning_rate": 2.1144586772346436e-05, + "loss": 0.0123, "step": 95595 }, { "epoch": 4.46, - "learning_rate": 1.1130279874361259e-05, - "loss": 0.0584, + "learning_rate": 2.1144118702510413e-05, + "loss": 0.0542, "step": 95600 }, { "epoch": 4.46, - "learning_rate": 1.1129811073086119e-05, - "loss": 0.0672, + "learning_rate": 2.1143650632674393e-05, + "loss": 0.0903, "step": 95605 }, { "epoch": 4.46, - "learning_rate": 1.112934227181098e-05, - "loss": 0.0871, + "learning_rate": 2.1143182562838376e-05, + "loss": 0.0654, "step": 95610 }, { "epoch": 4.46, - "learning_rate": 1.112887347053584e-05, - "loss": 0.0725, + "learning_rate": 2.1142714493002356e-05, + "loss": 0.0832, "step": 95615 }, { "epoch": 4.46, - "learning_rate": 1.1128404669260702e-05, - "loss": 0.1412, + "learning_rate": 2.1142246423166335e-05, + "loss": 0.1504, "step": 95620 }, { "epoch": 4.46, - "learning_rate": 1.1127935867985562e-05, - "loss": 0.2711, + "learning_rate": 2.114177835333032e-05, + "loss": 0.1159, "step": 95625 }, { "epoch": 4.46, - "learning_rate": 1.1127467066710422e-05, - "loss": 0.0837, + "learning_rate": 2.11413102834943e-05, + "loss": 0.0668, "step": 95630 }, { "epoch": 4.46, - "learning_rate": 1.1126998265435282e-05, - "loss": 0.0292, + "learning_rate": 2.1140842213658278e-05, + "loss": 0.0233, "step": 95635 }, { "epoch": 4.46, - "learning_rate": 1.1126529464160144e-05, - "loss": 0.0107, + "learning_rate": 2.1140374143822258e-05, + "loss": 0.0345, "step": 95640 }, { "epoch": 4.46, - "learning_rate": 1.1126060662885004e-05, - "loss": 0.0795, + "learning_rate": 2.113990607398624e-05, + "loss": 0.0547, "step": 95645 }, { "epoch": 4.46, - "learning_rate": 1.1125591861609864e-05, - "loss": 0.0506, + "learning_rate": 2.113943800415022e-05, + "loss": 0.0761, "step": 95650 }, { "epoch": 4.46, - "learning_rate": 1.1125123060334725e-05, - "loss": 0.0403, + "learning_rate": 2.11389699343142e-05, + "loss": 0.0952, "step": 95655 }, { "epoch": 4.46, - "learning_rate": 1.1124654259059587e-05, - "loss": 0.1757, + "learning_rate": 2.113850186447818e-05, + "loss": 0.1041, "step": 95660 }, { "epoch": 4.46, - "learning_rate": 1.1124185457784447e-05, - "loss": 0.0437, + "learning_rate": 2.113803379464216e-05, + "loss": 0.1276, "step": 95665 }, { "epoch": 4.46, - "learning_rate": 1.1123716656509307e-05, - "loss": 0.1074, + "learning_rate": 2.113756572480614e-05, + "loss": 0.1325, "step": 95670 }, { "epoch": 4.46, - "learning_rate": 1.1123247855234167e-05, - "loss": 0.1198, + "learning_rate": 2.113709765497012e-05, + "loss": 0.3882, "step": 95675 }, { "epoch": 4.46, - "learning_rate": 1.1122779053959028e-05, - "loss": 0.053, + "learning_rate": 2.1136629585134103e-05, + "loss": 0.1372, "step": 95680 }, { "epoch": 4.46, - "learning_rate": 1.1122310252683888e-05, - "loss": 0.0107, + "learning_rate": 2.1136161515298083e-05, + "loss": 0.042, "step": 95685 }, { "epoch": 4.47, - "learning_rate": 1.1121841451408748e-05, - "loss": 0.0362, + "learning_rate": 2.1135693445462063e-05, + "loss": 0.0208, "step": 95690 }, { "epoch": 4.47, - "learning_rate": 1.1121372650133608e-05, - "loss": 0.0381, + "learning_rate": 2.1135225375626043e-05, + "loss": 0.0529, "step": 95695 }, { "epoch": 4.47, - "learning_rate": 1.1120903848858471e-05, - "loss": 0.0542, + "learning_rate": 2.1134757305790026e-05, + "loss": 0.0741, "step": 95700 }, { "epoch": 4.47, - "learning_rate": 1.1120435047583331e-05, - "loss": 0.0464, + "learning_rate": 2.1134289235954006e-05, + "loss": 0.0648, "step": 95705 }, { "epoch": 4.47, - "learning_rate": 1.1119966246308191e-05, - "loss": 0.0754, + "learning_rate": 2.1133821166117986e-05, + "loss": 0.127, "step": 95710 }, { "epoch": 4.47, - "learning_rate": 1.1119497445033051e-05, - "loss": 0.1106, + "learning_rate": 2.1133353096281965e-05, + "loss": 0.0887, "step": 95715 }, { "epoch": 4.47, - "learning_rate": 1.1119028643757913e-05, - "loss": 0.1349, + "learning_rate": 2.113288502644595e-05, + "loss": 0.0724, "step": 95720 }, { "epoch": 4.47, - "learning_rate": 1.1118559842482773e-05, - "loss": 0.2704, + "learning_rate": 2.1132416956609925e-05, + "loss": 0.3174, "step": 95725 }, { "epoch": 4.47, - "learning_rate": 1.1118091041207633e-05, - "loss": 0.0986, + "learning_rate": 2.1131948886773905e-05, + "loss": 0.0329, "step": 95730 }, { "epoch": 4.47, - "learning_rate": 1.1117622239932493e-05, - "loss": 0.0458, + "learning_rate": 2.1131480816937888e-05, + "loss": 0.0155, "step": 95735 }, { "epoch": 4.47, - "learning_rate": 1.1117153438657353e-05, - "loss": 0.0116, + "learning_rate": 2.1131012747101868e-05, + "loss": 0.0124, "step": 95740 }, { "epoch": 4.47, - "learning_rate": 1.1116684637382214e-05, - "loss": 0.0146, + "learning_rate": 2.1130544677265848e-05, + "loss": 0.0556, "step": 95745 }, { "epoch": 4.47, - "learning_rate": 1.1116215836107076e-05, - "loss": 0.058, + "learning_rate": 2.1130076607429828e-05, + "loss": 0.114, "step": 95750 }, { "epoch": 4.47, - "learning_rate": 1.1115747034831936e-05, - "loss": 0.0346, + "learning_rate": 2.112960853759381e-05, + "loss": 0.0663, "step": 95755 }, { "epoch": 4.47, - "learning_rate": 1.1115278233556797e-05, - "loss": 0.1073, + "learning_rate": 2.112914046775779e-05, + "loss": 0.0392, "step": 95760 }, { "epoch": 4.47, - "learning_rate": 1.1114809432281657e-05, - "loss": 0.1048, + "learning_rate": 2.112867239792177e-05, + "loss": 0.1601, "step": 95765 }, { "epoch": 4.47, - "learning_rate": 1.1114340631006517e-05, - "loss": 0.1791, + "learning_rate": 2.112820432808575e-05, + "loss": 0.1717, "step": 95770 }, { "epoch": 4.47, - "learning_rate": 1.1113871829731377e-05, - "loss": 0.2328, + "learning_rate": 2.1127736258249733e-05, + "loss": 0.258, "step": 95775 }, { "epoch": 4.47, - "learning_rate": 1.1113403028456237e-05, - "loss": 0.0619, + "learning_rate": 2.1127268188413713e-05, + "loss": 0.0403, "step": 95780 }, { "epoch": 4.47, - "learning_rate": 1.1112934227181099e-05, - "loss": 0.046, + "learning_rate": 2.1126800118577693e-05, + "loss": 0.0047, "step": 95785 }, { "epoch": 4.47, - "learning_rate": 1.1112465425905959e-05, - "loss": 0.0233, + "learning_rate": 2.1126332048741673e-05, + "loss": 0.0794, "step": 95790 }, { "epoch": 4.47, - "learning_rate": 1.111199662463082e-05, - "loss": 0.037, + "learning_rate": 2.1125863978905653e-05, + "loss": 0.0556, "step": 95795 }, { "epoch": 4.47, - "learning_rate": 1.1111527823355682e-05, - "loss": 0.0482, + "learning_rate": 2.1125395909069633e-05, + "loss": 0.0385, "step": 95800 }, { "epoch": 4.47, - "learning_rate": 1.1111059022080542e-05, - "loss": 0.0661, + "learning_rate": 2.1124927839233612e-05, + "loss": 0.0516, "step": 95805 }, { "epoch": 4.47, - "learning_rate": 1.1110590220805402e-05, - "loss": 0.0709, + "learning_rate": 2.1124459769397596e-05, + "loss": 0.0407, "step": 95810 }, { "epoch": 4.47, - "learning_rate": 1.1110121419530262e-05, - "loss": 0.1305, + "learning_rate": 2.1123991699561575e-05, + "loss": 0.1151, "step": 95815 }, { "epoch": 4.47, - "learning_rate": 1.1109652618255122e-05, - "loss": 0.1817, + "learning_rate": 2.1123523629725555e-05, + "loss": 0.1281, "step": 95820 }, { "epoch": 4.47, - "learning_rate": 1.1109183816979983e-05, - "loss": 0.1981, + "learning_rate": 2.1123055559889535e-05, + "loss": 0.3811, "step": 95825 }, { "epoch": 4.47, - "learning_rate": 1.1108715015704843e-05, - "loss": 0.0654, + "learning_rate": 2.1122587490053518e-05, + "loss": 0.0524, "step": 95830 }, { "epoch": 4.47, - "learning_rate": 1.1108246214429703e-05, - "loss": 0.0232, + "learning_rate": 2.1122119420217498e-05, + "loss": 0.0117, "step": 95835 }, { "epoch": 4.47, - "learning_rate": 1.1107777413154566e-05, - "loss": 0.0285, + "learning_rate": 2.1121651350381478e-05, + "loss": 0.0471, "step": 95840 }, { "epoch": 4.47, - "learning_rate": 1.1107308611879426e-05, - "loss": 0.0306, + "learning_rate": 2.1121183280545458e-05, + "loss": 0.0134, "step": 95845 }, { "epoch": 4.47, - "learning_rate": 1.1106839810604286e-05, - "loss": 0.0335, + "learning_rate": 2.112071521070944e-05, + "loss": 0.048, "step": 95850 }, { "epoch": 4.47, - "learning_rate": 1.1106371009329146e-05, - "loss": 0.0693, + "learning_rate": 2.1120247140873417e-05, + "loss": 0.1223, "step": 95855 }, { "epoch": 4.47, - "learning_rate": 1.1105902208054006e-05, - "loss": 0.0439, + "learning_rate": 2.1119779071037397e-05, + "loss": 0.0255, "step": 95860 }, { "epoch": 4.47, - "learning_rate": 1.1105433406778868e-05, - "loss": 0.0739, + "learning_rate": 2.111931100120138e-05, + "loss": 0.0968, "step": 95865 }, { "epoch": 4.47, - "learning_rate": 1.1104964605503728e-05, - "loss": 0.175, + "learning_rate": 2.111884293136536e-05, + "loss": 0.1293, "step": 95870 }, { "epoch": 4.47, - "learning_rate": 1.1104495804228588e-05, - "loss": 0.1857, + "learning_rate": 2.111837486152934e-05, + "loss": 0.2229, "step": 95875 }, { "epoch": 4.47, - "learning_rate": 1.1104027002953448e-05, - "loss": 0.0797, + "learning_rate": 2.111790679169332e-05, + "loss": 0.0788, "step": 95880 }, { "epoch": 4.47, - "learning_rate": 1.1103558201678311e-05, - "loss": 0.0092, + "learning_rate": 2.1117438721857303e-05, + "loss": 0.0173, "step": 95885 }, { "epoch": 4.47, - "learning_rate": 1.1103089400403171e-05, - "loss": 0.0372, + "learning_rate": 2.1116970652021283e-05, + "loss": 0.0329, "step": 95890 }, { "epoch": 4.47, - "learning_rate": 1.110262059912803e-05, - "loss": 0.036, + "learning_rate": 2.1116502582185263e-05, + "loss": 0.0262, "step": 95895 }, { "epoch": 4.47, - "learning_rate": 1.110215179785289e-05, - "loss": 0.0757, + "learning_rate": 2.1116034512349242e-05, + "loss": 0.0685, "step": 95900 }, { "epoch": 4.48, - "learning_rate": 1.1101682996577752e-05, - "loss": 0.0829, + "learning_rate": 2.1115566442513226e-05, + "loss": 0.0641, "step": 95905 }, { "epoch": 4.48, - "learning_rate": 1.1101214195302612e-05, - "loss": 0.105, + "learning_rate": 2.1115098372677205e-05, + "loss": 0.1011, "step": 95910 }, { "epoch": 4.48, - "learning_rate": 1.1100745394027472e-05, - "loss": 0.0873, + "learning_rate": 2.1114630302841182e-05, + "loss": 0.082, "step": 95915 }, { "epoch": 4.48, - "learning_rate": 1.1100276592752332e-05, - "loss": 0.1886, + "learning_rate": 2.1114162233005165e-05, + "loss": 0.1379, "step": 95920 }, { "epoch": 4.48, - "learning_rate": 1.1099807791477192e-05, - "loss": 0.2514, + "learning_rate": 2.1113694163169145e-05, + "loss": 0.2035, "step": 95925 }, { "epoch": 4.48, - "learning_rate": 1.1099338990202054e-05, - "loss": 0.0861, + "learning_rate": 2.1113226093333125e-05, + "loss": 0.0799, "step": 95930 }, { "epoch": 4.48, - "learning_rate": 1.1098870188926915e-05, - "loss": 0.0121, + "learning_rate": 2.1112758023497105e-05, + "loss": 0.016, "step": 95935 }, { "epoch": 4.48, - "learning_rate": 1.1098401387651775e-05, - "loss": 0.047, + "learning_rate": 2.1112289953661088e-05, + "loss": 0.0242, "step": 95940 }, { "epoch": 4.48, - "learning_rate": 1.1097932586376637e-05, - "loss": 0.0204, + "learning_rate": 2.1111821883825068e-05, + "loss": 0.0979, "step": 95945 }, { "epoch": 4.48, - "learning_rate": 1.1097463785101497e-05, - "loss": 0.0529, + "learning_rate": 2.1111353813989047e-05, + "loss": 0.0844, "step": 95950 }, { "epoch": 4.48, - "learning_rate": 1.1096994983826357e-05, - "loss": 0.0537, + "learning_rate": 2.1110885744153027e-05, + "loss": 0.1343, "step": 95955 }, { "epoch": 4.48, - "learning_rate": 1.1096526182551217e-05, - "loss": 0.0243, + "learning_rate": 2.111041767431701e-05, + "loss": 0.1042, "step": 95960 }, { "epoch": 4.48, - "learning_rate": 1.1096057381276077e-05, - "loss": 0.1391, + "learning_rate": 2.110994960448099e-05, + "loss": 0.1154, "step": 95965 }, { "epoch": 4.48, - "learning_rate": 1.1095588580000938e-05, - "loss": 0.1364, + "learning_rate": 2.110948153464497e-05, + "loss": 0.1396, "step": 95970 }, { "epoch": 4.48, - "learning_rate": 1.1095119778725798e-05, - "loss": 0.1301, + "learning_rate": 2.1109013464808953e-05, + "loss": 0.2786, "step": 95975 }, { "epoch": 4.48, - "learning_rate": 1.109465097745066e-05, - "loss": 0.0858, + "learning_rate": 2.110854539497293e-05, + "loss": 0.0785, "step": 95980 }, { "epoch": 4.48, - "learning_rate": 1.1094182176175521e-05, - "loss": 0.0424, + "learning_rate": 2.110807732513691e-05, + "loss": 0.0512, "step": 95985 }, { "epoch": 4.48, - "learning_rate": 1.1093713374900381e-05, - "loss": 0.0076, + "learning_rate": 2.110760925530089e-05, + "loss": 0.0254, "step": 95990 }, { "epoch": 4.48, - "learning_rate": 1.1093244573625241e-05, - "loss": 0.0142, + "learning_rate": 2.1107141185464873e-05, + "loss": 0.0078, "step": 95995 }, { "epoch": 4.48, - "learning_rate": 1.1092775772350101e-05, - "loss": 0.0197, + "learning_rate": 2.1106673115628852e-05, + "loss": 0.0671, "step": 96000 }, { "epoch": 4.48, - "learning_rate": 1.1092306971074961e-05, - "loss": 0.0696, + "learning_rate": 2.1106205045792832e-05, + "loss": 0.0608, "step": 96005 }, { "epoch": 4.48, - "learning_rate": 1.1091838169799823e-05, - "loss": 0.0602, + "learning_rate": 2.1105736975956812e-05, + "loss": 0.0839, "step": 96010 }, { "epoch": 4.48, - "learning_rate": 1.1091369368524683e-05, - "loss": 0.0852, + "learning_rate": 2.1105268906120795e-05, + "loss": 0.1326, "step": 96015 }, { "epoch": 4.48, - "learning_rate": 1.1090900567249543e-05, - "loss": 0.1463, + "learning_rate": 2.1104800836284775e-05, + "loss": 0.4545, "step": 96020 }, { "epoch": 4.48, - "learning_rate": 1.1090431765974406e-05, - "loss": 0.1661, + "learning_rate": 2.1104332766448755e-05, + "loss": 0.2576, "step": 96025 }, { "epoch": 4.48, - "learning_rate": 1.1089962964699266e-05, - "loss": 0.1079, + "learning_rate": 2.1103864696612735e-05, + "loss": 0.073, "step": 96030 }, { "epoch": 4.48, - "learning_rate": 1.1089494163424126e-05, - "loss": 0.2677, + "learning_rate": 2.1103396626776718e-05, + "loss": 0.0133, "step": 96035 }, { "epoch": 4.48, - "learning_rate": 1.1089025362148986e-05, - "loss": 0.061, + "learning_rate": 2.1102928556940698e-05, + "loss": 0.0246, "step": 96040 }, { "epoch": 4.48, - "learning_rate": 1.1088556560873846e-05, - "loss": 0.0189, + "learning_rate": 2.1102460487104674e-05, + "loss": 0.0606, "step": 96045 }, { "epoch": 4.48, - "learning_rate": 1.1088087759598707e-05, - "loss": 0.027, + "learning_rate": 2.1101992417268657e-05, + "loss": 0.0783, "step": 96050 }, { "epoch": 4.48, - "learning_rate": 1.1087618958323567e-05, - "loss": 0.1223, + "learning_rate": 2.1101524347432637e-05, + "loss": 0.0408, "step": 96055 }, { "epoch": 4.48, - "learning_rate": 1.1087150157048427e-05, - "loss": 0.1034, + "learning_rate": 2.1101056277596617e-05, + "loss": 0.0493, "step": 96060 }, { "epoch": 4.48, - "learning_rate": 1.1086681355773287e-05, - "loss": 0.2181, + "learning_rate": 2.1100588207760597e-05, + "loss": 0.08, "step": 96065 }, { "epoch": 4.48, - "learning_rate": 1.1086212554498149e-05, - "loss": 0.2042, + "learning_rate": 2.110012013792458e-05, + "loss": 0.1235, "step": 96070 }, { "epoch": 4.48, - "learning_rate": 1.108574375322301e-05, - "loss": 0.255, + "learning_rate": 2.109965206808856e-05, + "loss": 0.2048, "step": 96075 }, { "epoch": 4.48, - "learning_rate": 1.108527495194787e-05, - "loss": 0.0895, + "learning_rate": 2.109918399825254e-05, + "loss": 0.1219, "step": 96080 }, { "epoch": 4.48, - "learning_rate": 1.108480615067273e-05, - "loss": 0.0154, + "learning_rate": 2.109871592841652e-05, + "loss": 0.008, "step": 96085 }, { "epoch": 4.48, - "learning_rate": 1.1084337349397592e-05, - "loss": 0.0624, + "learning_rate": 2.1098247858580503e-05, + "loss": 0.0375, "step": 96090 }, { "epoch": 4.48, - "learning_rate": 1.1083868548122452e-05, - "loss": 0.015, + "learning_rate": 2.1097779788744482e-05, + "loss": 0.0519, "step": 96095 }, { "epoch": 4.48, - "learning_rate": 1.1083399746847312e-05, - "loss": 0.1369, + "learning_rate": 2.1097311718908462e-05, + "loss": 0.0886, "step": 96100 }, { "epoch": 4.48, - "learning_rate": 1.1082930945572172e-05, - "loss": 0.1338, + "learning_rate": 2.1096843649072442e-05, + "loss": 0.0403, "step": 96105 }, { "epoch": 4.48, - "learning_rate": 1.1082462144297033e-05, - "loss": 0.0547, + "learning_rate": 2.1096375579236422e-05, + "loss": 0.0586, "step": 96110 }, { "epoch": 4.48, - "learning_rate": 1.1081993343021893e-05, - "loss": 0.0968, + "learning_rate": 2.1095907509400402e-05, + "loss": 0.0708, "step": 96115 }, { "epoch": 4.49, - "learning_rate": 1.1081524541746755e-05, - "loss": 0.1239, + "learning_rate": 2.109543943956438e-05, + "loss": 0.1319, "step": 96120 }, { "epoch": 4.49, - "learning_rate": 1.1081055740471615e-05, - "loss": 0.185, + "learning_rate": 2.1094971369728365e-05, + "loss": 0.3419, "step": 96125 }, { "epoch": 4.49, - "learning_rate": 1.1080586939196476e-05, - "loss": 0.1027, + "learning_rate": 2.1094503299892345e-05, + "loss": 0.0283, "step": 96130 }, { "epoch": 4.49, - "learning_rate": 1.1080118137921336e-05, - "loss": 0.0296, + "learning_rate": 2.1094035230056324e-05, + "loss": 0.0139, "step": 96135 }, { "epoch": 4.49, - "learning_rate": 1.1079649336646196e-05, - "loss": 0.0374, + "learning_rate": 2.1093567160220304e-05, + "loss": 0.0144, "step": 96140 }, { "epoch": 4.49, - "learning_rate": 1.1079180535371056e-05, - "loss": 0.0256, + "learning_rate": 2.1093099090384287e-05, + "loss": 0.0153, "step": 96145 }, { "epoch": 4.49, - "learning_rate": 1.1078711734095918e-05, - "loss": 0.1339, + "learning_rate": 2.1092631020548267e-05, + "loss": 0.0316, "step": 96150 }, { "epoch": 4.49, - "learning_rate": 1.1078242932820778e-05, - "loss": 0.0594, + "learning_rate": 2.1092162950712247e-05, + "loss": 0.0527, "step": 96155 }, { "epoch": 4.49, - "learning_rate": 1.1077774131545638e-05, - "loss": 0.1316, + "learning_rate": 2.109169488087623e-05, + "loss": 0.1038, "step": 96160 }, { "epoch": 4.49, - "learning_rate": 1.10773053302705e-05, - "loss": 0.0907, + "learning_rate": 2.109122681104021e-05, + "loss": 0.0608, "step": 96165 }, { "epoch": 4.49, - "learning_rate": 1.1076836528995361e-05, - "loss": 0.2559, + "learning_rate": 2.1090758741204186e-05, + "loss": 0.1792, "step": 96170 }, { "epoch": 4.49, - "learning_rate": 1.1076367727720221e-05, - "loss": 0.1893, + "learning_rate": 2.1090290671368166e-05, + "loss": 0.3586, "step": 96175 }, { "epoch": 4.49, - "learning_rate": 1.1075898926445081e-05, - "loss": 0.0744, + "learning_rate": 2.108982260153215e-05, + "loss": 0.1457, "step": 96180 }, { "epoch": 4.49, - "learning_rate": 1.107543012516994e-05, - "loss": 0.0304, + "learning_rate": 2.108935453169613e-05, + "loss": 0.022, "step": 96185 }, { "epoch": 4.49, - "learning_rate": 1.1074961323894802e-05, - "loss": 0.0477, + "learning_rate": 2.108888646186011e-05, + "loss": 0.0316, "step": 96190 }, { "epoch": 4.49, - "learning_rate": 1.1074492522619662e-05, - "loss": 0.0658, + "learning_rate": 2.108841839202409e-05, + "loss": 0.012, "step": 96195 }, { "epoch": 4.49, - "learning_rate": 1.1074023721344522e-05, - "loss": 0.0421, + "learning_rate": 2.1087950322188072e-05, + "loss": 0.0156, "step": 96200 }, { "epoch": 4.49, - "learning_rate": 1.1073554920069382e-05, - "loss": 0.0284, + "learning_rate": 2.1087482252352052e-05, + "loss": 0.0866, "step": 96205 }, { "epoch": 4.49, - "learning_rate": 1.1073086118794246e-05, - "loss": 0.0383, + "learning_rate": 2.1087014182516032e-05, + "loss": 0.1091, "step": 96210 }, { "epoch": 4.49, - "learning_rate": 1.1072617317519106e-05, - "loss": 0.0802, + "learning_rate": 2.108654611268001e-05, + "loss": 0.0803, "step": 96215 }, { "epoch": 4.49, - "learning_rate": 1.1072148516243965e-05, - "loss": 0.145, + "learning_rate": 2.1086078042843995e-05, + "loss": 0.1144, "step": 96220 }, { "epoch": 4.49, - "learning_rate": 1.1071679714968825e-05, - "loss": 0.227, + "learning_rate": 2.1085609973007975e-05, + "loss": 0.1716, "step": 96225 }, { "epoch": 4.49, - "learning_rate": 1.1071210913693687e-05, - "loss": 0.0797, + "learning_rate": 2.1085141903171954e-05, + "loss": 0.0724, "step": 96230 }, { "epoch": 4.49, - "learning_rate": 1.1070742112418547e-05, - "loss": 0.029, + "learning_rate": 2.1084673833335934e-05, + "loss": 0.0153, "step": 96235 }, { "epoch": 4.49, - "learning_rate": 1.1070273311143407e-05, - "loss": 0.0172, + "learning_rate": 2.1084205763499914e-05, + "loss": 0.0502, "step": 96240 }, { "epoch": 4.49, - "learning_rate": 1.1069804509868267e-05, - "loss": 0.0487, + "learning_rate": 2.1083737693663894e-05, + "loss": 0.0481, "step": 96245 }, { "epoch": 4.49, - "learning_rate": 1.1069335708593127e-05, - "loss": 0.0531, + "learning_rate": 2.1083269623827874e-05, + "loss": 0.0438, "step": 96250 }, { "epoch": 4.49, - "learning_rate": 1.1068866907317988e-05, - "loss": 0.0498, + "learning_rate": 2.1082801553991857e-05, + "loss": 0.082, "step": 96255 }, { "epoch": 4.49, - "learning_rate": 1.106839810604285e-05, - "loss": 0.1319, + "learning_rate": 2.1082333484155837e-05, + "loss": 0.0782, "step": 96260 }, { "epoch": 4.49, - "learning_rate": 1.106792930476771e-05, - "loss": 0.0511, + "learning_rate": 2.1081865414319817e-05, + "loss": 0.0947, "step": 96265 }, { "epoch": 4.49, - "learning_rate": 1.1067460503492572e-05, - "loss": 0.1645, + "learning_rate": 2.1081397344483796e-05, + "loss": 0.1366, "step": 96270 }, { "epoch": 4.49, - "learning_rate": 1.1066991702217432e-05, - "loss": 0.2393, + "learning_rate": 2.108092927464778e-05, + "loss": 0.165, "step": 96275 }, { "epoch": 4.49, - "learning_rate": 1.1066522900942291e-05, - "loss": 0.0761, + "learning_rate": 2.108046120481176e-05, + "loss": 0.0784, "step": 96280 }, { "epoch": 4.49, - "learning_rate": 1.1066054099667151e-05, - "loss": 0.0396, + "learning_rate": 2.107999313497574e-05, + "loss": 0.0334, "step": 96285 }, { "epoch": 4.49, - "learning_rate": 1.1065585298392011e-05, - "loss": 0.0337, + "learning_rate": 2.1079525065139722e-05, + "loss": 0.0168, "step": 96290 }, { "epoch": 4.49, - "learning_rate": 1.1065116497116873e-05, - "loss": 0.0499, + "learning_rate": 2.10790569953037e-05, + "loss": 0.1069, "step": 96295 }, { "epoch": 4.49, - "learning_rate": 1.1064647695841733e-05, - "loss": 0.0542, + "learning_rate": 2.107858892546768e-05, + "loss": 0.3672, "step": 96300 }, { "epoch": 4.49, - "learning_rate": 1.1064178894566594e-05, - "loss": 0.1207, + "learning_rate": 2.107812085563166e-05, + "loss": 0.0667, "step": 96305 }, { "epoch": 4.49, - "learning_rate": 1.1063710093291456e-05, - "loss": 0.1424, + "learning_rate": 2.1077652785795642e-05, + "loss": 0.1218, "step": 96310 }, { "epoch": 4.49, - "learning_rate": 1.1063241292016316e-05, - "loss": 0.1092, + "learning_rate": 2.107718471595962e-05, + "loss": 0.0972, "step": 96315 }, { "epoch": 4.49, - "learning_rate": 1.1062772490741176e-05, - "loss": 0.207, + "learning_rate": 2.10767166461236e-05, + "loss": 0.0931, "step": 96320 }, { "epoch": 4.49, - "learning_rate": 1.1062303689466036e-05, - "loss": 0.24, + "learning_rate": 2.107624857628758e-05, + "loss": 0.1208, "step": 96325 }, { "epoch": 4.49, - "learning_rate": 1.1061834888190896e-05, - "loss": 0.0939, + "learning_rate": 2.1075780506451564e-05, + "loss": 0.0994, "step": 96330 }, { "epoch": 4.5, - "learning_rate": 1.1061366086915757e-05, - "loss": 0.0077, + "learning_rate": 2.1075312436615544e-05, + "loss": 0.0161, "step": 96335 }, { "epoch": 4.5, - "learning_rate": 1.1060897285640617e-05, - "loss": 0.0183, + "learning_rate": 2.1074844366779524e-05, + "loss": 0.0081, "step": 96340 }, { "epoch": 4.5, - "learning_rate": 1.1060428484365477e-05, - "loss": 0.0182, + "learning_rate": 2.1074376296943507e-05, + "loss": 0.0219, "step": 96345 }, { "epoch": 4.5, - "learning_rate": 1.105995968309034e-05, - "loss": 0.0428, + "learning_rate": 2.1073908227107487e-05, + "loss": 0.0506, "step": 96350 }, { "epoch": 4.5, - "learning_rate": 1.10594908818152e-05, - "loss": 0.0747, + "learning_rate": 2.1073440157271467e-05, + "loss": 0.0868, "step": 96355 }, { "epoch": 4.5, - "learning_rate": 1.105902208054006e-05, - "loss": 0.0367, + "learning_rate": 2.1072972087435443e-05, + "loss": 0.0412, "step": 96360 }, { "epoch": 4.5, - "learning_rate": 1.105855327926492e-05, - "loss": 0.133, + "learning_rate": 2.1072504017599426e-05, + "loss": 0.086, "step": 96365 }, { "epoch": 4.5, - "learning_rate": 1.105808447798978e-05, - "loss": 0.1218, + "learning_rate": 2.1072035947763406e-05, + "loss": 0.0529, "step": 96370 }, { "epoch": 4.5, - "learning_rate": 1.1057615676714642e-05, - "loss": 0.2208, + "learning_rate": 2.1071567877927386e-05, + "loss": 0.2459, "step": 96375 }, { "epoch": 4.5, - "learning_rate": 1.1057146875439502e-05, - "loss": 0.0698, + "learning_rate": 2.1071099808091366e-05, + "loss": 0.0976, "step": 96380 }, { "epoch": 4.5, - "learning_rate": 1.1056678074164362e-05, - "loss": 0.0295, + "learning_rate": 2.107063173825535e-05, + "loss": 0.0229, "step": 96385 }, { "epoch": 4.5, - "learning_rate": 1.1056209272889222e-05, - "loss": 0.035, + "learning_rate": 2.107016366841933e-05, + "loss": 0.0431, "step": 96390 }, { "epoch": 4.5, - "learning_rate": 1.1055740471614082e-05, - "loss": 0.0509, + "learning_rate": 2.106969559858331e-05, + "loss": 0.0082, "step": 96395 }, { "epoch": 4.5, - "learning_rate": 1.1055271670338945e-05, - "loss": 0.0959, + "learning_rate": 2.106922752874729e-05, + "loss": 0.0328, "step": 96400 }, { "epoch": 4.5, - "learning_rate": 1.1054802869063805e-05, - "loss": 0.0893, + "learning_rate": 2.1068759458911272e-05, + "loss": 0.1202, "step": 96405 }, { "epoch": 4.5, - "learning_rate": 1.1054334067788665e-05, - "loss": 0.0758, + "learning_rate": 2.106829138907525e-05, + "loss": 0.1237, "step": 96410 }, { "epoch": 4.5, - "learning_rate": 1.1053865266513527e-05, - "loss": 0.1056, + "learning_rate": 2.106782331923923e-05, + "loss": 0.1777, "step": 96415 }, { "epoch": 4.5, - "learning_rate": 1.1053396465238387e-05, - "loss": 0.1086, + "learning_rate": 2.106735524940321e-05, + "loss": 0.0516, "step": 96420 }, { "epoch": 4.5, - "learning_rate": 1.1052927663963246e-05, - "loss": 0.3389, + "learning_rate": 2.106688717956719e-05, + "loss": 0.1586, "step": 96425 }, { "epoch": 4.5, - "learning_rate": 1.1052458862688106e-05, - "loss": 0.0704, + "learning_rate": 2.106641910973117e-05, + "loss": 0.0802, "step": 96430 }, { "epoch": 4.5, - "learning_rate": 1.1051990061412966e-05, - "loss": 0.0478, + "learning_rate": 2.106595103989515e-05, + "loss": 0.0059, "step": 96435 }, { "epoch": 4.5, - "learning_rate": 1.1051521260137828e-05, - "loss": 0.0457, + "learning_rate": 2.1065482970059134e-05, + "loss": 0.009, "step": 96440 }, { "epoch": 4.5, - "learning_rate": 1.105105245886269e-05, - "loss": 0.0443, + "learning_rate": 2.1065014900223114e-05, + "loss": 0.0281, "step": 96445 }, { "epoch": 4.5, - "learning_rate": 1.105058365758755e-05, - "loss": 0.0494, + "learning_rate": 2.1064546830387094e-05, + "loss": 0.0808, "step": 96450 }, { "epoch": 4.5, - "learning_rate": 1.1050114856312411e-05, - "loss": 0.0535, + "learning_rate": 2.1064078760551073e-05, + "loss": 0.0604, "step": 96455 }, { "epoch": 4.5, - "learning_rate": 1.1049646055037271e-05, - "loss": 0.1155, + "learning_rate": 2.1063610690715057e-05, + "loss": 0.0453, "step": 96460 }, { "epoch": 4.5, - "learning_rate": 1.1049177253762131e-05, - "loss": 0.0679, + "learning_rate": 2.1063142620879036e-05, + "loss": 0.1497, "step": 96465 }, { "epoch": 4.5, - "learning_rate": 1.1048708452486991e-05, - "loss": 0.1323, + "learning_rate": 2.1062674551043016e-05, + "loss": 0.1803, "step": 96470 }, { "epoch": 4.5, - "learning_rate": 1.1048239651211851e-05, - "loss": 0.2943, + "learning_rate": 2.1062206481207e-05, + "loss": 0.335, "step": 96475 }, { "epoch": 4.5, - "learning_rate": 1.1047770849936713e-05, - "loss": 0.1359, + "learning_rate": 2.106173841137098e-05, + "loss": 0.1236, "step": 96480 }, { "epoch": 4.5, - "learning_rate": 1.1047302048661572e-05, - "loss": 0.0188, + "learning_rate": 2.1061270341534956e-05, + "loss": 0.0074, "step": 96485 }, { "epoch": 4.5, - "learning_rate": 1.1046833247386434e-05, - "loss": 0.0058, + "learning_rate": 2.1060802271698935e-05, + "loss": 0.0237, "step": 96490 }, { "epoch": 4.5, - "learning_rate": 1.1046364446111296e-05, - "loss": 0.1123, + "learning_rate": 2.106033420186292e-05, + "loss": 0.0265, "step": 96495 }, { "epoch": 4.5, - "learning_rate": 1.1045895644836156e-05, - "loss": 0.092, + "learning_rate": 2.10598661320269e-05, + "loss": 0.0472, "step": 96500 }, { "epoch": 4.5, - "learning_rate": 1.1045426843561016e-05, - "loss": 0.0793, + "learning_rate": 2.105939806219088e-05, + "loss": 0.0394, "step": 96505 }, { "epoch": 4.5, - "learning_rate": 1.1044958042285875e-05, - "loss": 0.0414, + "learning_rate": 2.1058929992354858e-05, + "loss": 0.0737, "step": 96510 }, { "epoch": 4.5, - "learning_rate": 1.1044489241010735e-05, - "loss": 0.0444, + "learning_rate": 2.105846192251884e-05, + "loss": 0.1595, "step": 96515 }, { "epoch": 4.5, - "learning_rate": 1.1044020439735597e-05, - "loss": 0.1698, + "learning_rate": 2.105799385268282e-05, + "loss": 0.173, "step": 96520 }, { "epoch": 4.5, - "learning_rate": 1.1043551638460457e-05, - "loss": 0.2276, + "learning_rate": 2.10575257828468e-05, + "loss": 0.2484, "step": 96525 }, { "epoch": 4.5, - "learning_rate": 1.1043082837185317e-05, - "loss": 0.0498, + "learning_rate": 2.1057057713010784e-05, + "loss": 0.0687, "step": 96530 }, { "epoch": 4.5, - "learning_rate": 1.104261403591018e-05, - "loss": 0.0123, + "learning_rate": 2.1056589643174764e-05, + "loss": 0.0236, "step": 96535 }, { "epoch": 4.5, - "learning_rate": 1.104214523463504e-05, - "loss": 0.0793, + "learning_rate": 2.1056121573338744e-05, + "loss": 0.0118, "step": 96540 }, { "epoch": 4.5, - "learning_rate": 1.10416764333599e-05, - "loss": 0.0751, + "learning_rate": 2.1055653503502724e-05, + "loss": 0.0463, "step": 96545 }, { "epoch": 4.51, - "learning_rate": 1.104120763208476e-05, - "loss": 0.044, + "learning_rate": 2.1055185433666703e-05, + "loss": 0.0696, "step": 96550 }, { "epoch": 4.51, - "learning_rate": 1.1040738830809622e-05, - "loss": 0.0604, + "learning_rate": 2.1054717363830683e-05, + "loss": 0.0483, "step": 96555 }, { "epoch": 4.51, - "learning_rate": 1.1040270029534482e-05, - "loss": 0.0385, + "learning_rate": 2.1054249293994663e-05, + "loss": 0.0617, "step": 96560 }, { "epoch": 4.51, - "learning_rate": 1.1039801228259342e-05, - "loss": 0.0994, + "learning_rate": 2.1053781224158643e-05, + "loss": 0.2466, "step": 96565 }, { "epoch": 4.51, - "learning_rate": 1.1039332426984201e-05, - "loss": 0.1932, + "learning_rate": 2.1053313154322626e-05, + "loss": 0.2343, "step": 96570 }, { "epoch": 4.51, - "learning_rate": 1.1038863625709061e-05, - "loss": 0.2327, + "learning_rate": 2.1052845084486606e-05, + "loss": 0.4494, "step": 96575 }, { "epoch": 4.51, - "learning_rate": 1.1038394824433923e-05, - "loss": 0.0545, + "learning_rate": 2.1052377014650586e-05, + "loss": 0.0933, "step": 96580 }, { "epoch": 4.51, - "learning_rate": 1.1037926023158785e-05, - "loss": 0.2896, + "learning_rate": 2.105190894481457e-05, + "loss": 0.044, "step": 96585 }, { "epoch": 4.51, - "learning_rate": 1.1037457221883645e-05, - "loss": 0.0264, + "learning_rate": 2.105144087497855e-05, + "loss": 0.0224, "step": 96590 }, { "epoch": 4.51, - "learning_rate": 1.1036988420608506e-05, - "loss": 0.0415, + "learning_rate": 2.105097280514253e-05, + "loss": 0.0259, "step": 96595 }, { "epoch": 4.51, - "learning_rate": 1.1036519619333366e-05, - "loss": 0.0908, + "learning_rate": 2.105050473530651e-05, + "loss": 0.0542, "step": 96600 }, { "epoch": 4.51, - "learning_rate": 1.1036050818058226e-05, - "loss": 0.0537, + "learning_rate": 2.105003666547049e-05, + "loss": 0.0405, "step": 96605 }, { "epoch": 4.51, - "learning_rate": 1.1035582016783086e-05, - "loss": 0.0708, + "learning_rate": 2.1049568595634468e-05, + "loss": 0.0324, "step": 96610 }, { "epoch": 4.51, - "learning_rate": 1.1035113215507946e-05, - "loss": 0.0977, + "learning_rate": 2.1049100525798448e-05, + "loss": 0.0842, "step": 96615 }, { "epoch": 4.51, - "learning_rate": 1.1034644414232808e-05, - "loss": 0.1554, + "learning_rate": 2.1048632455962428e-05, + "loss": 0.091, "step": 96620 }, { "epoch": 4.51, - "learning_rate": 1.1034175612957668e-05, - "loss": 0.3359, + "learning_rate": 2.104816438612641e-05, + "loss": 0.3264, "step": 96625 }, { "epoch": 4.51, - "learning_rate": 1.1033706811682529e-05, - "loss": 0.095, + "learning_rate": 2.104769631629039e-05, + "loss": 0.0759, "step": 96630 }, { "epoch": 4.51, - "learning_rate": 1.103323801040739e-05, - "loss": 0.0484, + "learning_rate": 2.104722824645437e-05, + "loss": 0.0197, "step": 96635 }, { "epoch": 4.51, - "learning_rate": 1.103276920913225e-05, - "loss": 0.0477, + "learning_rate": 2.104676017661835e-05, + "loss": 0.0449, "step": 96640 }, { "epoch": 4.51, - "learning_rate": 1.103230040785711e-05, - "loss": 0.1291, + "learning_rate": 2.1046292106782334e-05, + "loss": 0.0578, "step": 96645 }, { "epoch": 4.51, - "learning_rate": 1.103183160658197e-05, - "loss": 0.0511, + "learning_rate": 2.1045824036946313e-05, + "loss": 0.0754, "step": 96650 }, { "epoch": 4.51, - "learning_rate": 1.103136280530683e-05, - "loss": 0.0412, + "learning_rate": 2.1045355967110293e-05, + "loss": 0.0415, "step": 96655 }, { "epoch": 4.51, - "learning_rate": 1.1030894004031692e-05, - "loss": 0.0718, + "learning_rate": 2.1044887897274276e-05, + "loss": 0.0649, "step": 96660 }, { "epoch": 4.51, - "learning_rate": 1.1030425202756552e-05, - "loss": 0.1408, + "learning_rate": 2.1044419827438256e-05, + "loss": 0.0603, "step": 96665 }, { "epoch": 4.51, - "learning_rate": 1.1029956401481412e-05, - "loss": 0.1417, + "learning_rate": 2.1043951757602236e-05, + "loss": 0.1624, "step": 96670 }, { "epoch": 4.51, - "learning_rate": 1.1029487600206275e-05, - "loss": 0.3219, + "learning_rate": 2.1043483687766212e-05, + "loss": 0.6251, "step": 96675 }, { "epoch": 4.51, - "learning_rate": 1.1029018798931135e-05, - "loss": 0.0997, + "learning_rate": 2.1043015617930196e-05, + "loss": 0.0823, "step": 96680 }, { "epoch": 4.51, - "learning_rate": 1.1028549997655995e-05, - "loss": 0.0185, + "learning_rate": 2.1042547548094175e-05, + "loss": 0.0107, "step": 96685 }, { "epoch": 4.51, - "learning_rate": 1.1028081196380855e-05, - "loss": 0.0311, + "learning_rate": 2.1042079478258155e-05, + "loss": 0.0368, "step": 96690 }, { "epoch": 4.51, - "learning_rate": 1.1027612395105715e-05, - "loss": 0.0496, + "learning_rate": 2.1041611408422135e-05, + "loss": 0.0419, "step": 96695 }, { "epoch": 4.51, - "learning_rate": 1.1027143593830577e-05, - "loss": 0.0159, + "learning_rate": 2.104114333858612e-05, + "loss": 0.0728, "step": 96700 }, { "epoch": 4.51, - "learning_rate": 1.1026674792555437e-05, - "loss": 0.0784, + "learning_rate": 2.1040675268750098e-05, + "loss": 0.0535, "step": 96705 }, { "epoch": 4.51, - "learning_rate": 1.1026205991280297e-05, - "loss": 0.1331, + "learning_rate": 2.1040207198914078e-05, + "loss": 0.068, "step": 96710 }, { "epoch": 4.51, - "learning_rate": 1.1025737190005156e-05, - "loss": 0.1623, + "learning_rate": 2.103973912907806e-05, + "loss": 0.0941, "step": 96715 }, { "epoch": 4.51, - "learning_rate": 1.1025268388730016e-05, - "loss": 0.1472, + "learning_rate": 2.103927105924204e-05, + "loss": 0.1881, "step": 96720 }, { "epoch": 4.51, - "learning_rate": 1.102479958745488e-05, - "loss": 0.2449, + "learning_rate": 2.103880298940602e-05, + "loss": 0.2628, "step": 96725 }, { "epoch": 4.51, - "learning_rate": 1.102433078617974e-05, - "loss": 0.0831, + "learning_rate": 2.103833491957e-05, + "loss": 0.0618, "step": 96730 }, { "epoch": 4.51, - "learning_rate": 1.10238619849046e-05, - "loss": 0.0175, + "learning_rate": 2.103786684973398e-05, + "loss": 0.0235, "step": 96735 }, { "epoch": 4.51, - "learning_rate": 1.1023393183629461e-05, - "loss": 0.0469, + "learning_rate": 2.103739877989796e-05, + "loss": 0.0222, "step": 96740 }, { "epoch": 4.51, - "learning_rate": 1.1022924382354321e-05, - "loss": 0.0333, + "learning_rate": 2.103693071006194e-05, + "loss": 0.0154, "step": 96745 }, { "epoch": 4.51, - "learning_rate": 1.1022455581079181e-05, - "loss": 0.0576, + "learning_rate": 2.103646264022592e-05, + "loss": 0.018, "step": 96750 }, { "epoch": 4.51, - "learning_rate": 1.1021986779804041e-05, - "loss": 0.0975, + "learning_rate": 2.1035994570389903e-05, + "loss": 0.029, "step": 96755 }, { "epoch": 4.51, - "learning_rate": 1.1021517978528901e-05, - "loss": 0.1719, + "learning_rate": 2.1035526500553883e-05, + "loss": 0.0789, "step": 96760 }, { "epoch": 4.52, - "learning_rate": 1.1021049177253763e-05, - "loss": 0.1369, + "learning_rate": 2.1035058430717863e-05, + "loss": 0.1202, "step": 96765 }, { "epoch": 4.52, - "learning_rate": 1.1020580375978624e-05, - "loss": 0.1846, + "learning_rate": 2.1034590360881846e-05, + "loss": 0.1718, "step": 96770 }, { "epoch": 4.52, - "learning_rate": 1.1020111574703484e-05, - "loss": 0.1975, + "learning_rate": 2.1034122291045826e-05, + "loss": 0.2168, "step": 96775 }, { "epoch": 4.52, - "learning_rate": 1.1019642773428346e-05, - "loss": 0.0786, + "learning_rate": 2.1033654221209806e-05, + "loss": 0.0463, "step": 96780 }, { "epoch": 4.52, - "learning_rate": 1.1019173972153206e-05, - "loss": 0.0112, + "learning_rate": 2.1033186151373785e-05, + "loss": 0.0311, "step": 96785 }, { "epoch": 4.52, - "learning_rate": 1.1018705170878066e-05, - "loss": 0.0262, + "learning_rate": 2.103271808153777e-05, + "loss": 0.0213, "step": 96790 }, { "epoch": 4.52, - "learning_rate": 1.1018236369602926e-05, - "loss": 0.0202, + "learning_rate": 2.103225001170175e-05, + "loss": 0.0388, "step": 96795 }, { "epoch": 4.52, - "learning_rate": 1.1017767568327786e-05, - "loss": 0.0724, + "learning_rate": 2.1031781941865725e-05, + "loss": 0.0403, "step": 96800 }, { "epoch": 4.52, - "learning_rate": 1.1017298767052647e-05, - "loss": 0.0506, + "learning_rate": 2.1031313872029705e-05, + "loss": 0.0893, "step": 96805 }, { "epoch": 4.52, - "learning_rate": 1.1016829965777507e-05, - "loss": 0.0837, + "learning_rate": 2.1030845802193688e-05, + "loss": 0.0326, "step": 96810 }, { "epoch": 4.52, - "learning_rate": 1.1016361164502369e-05, - "loss": 0.1212, + "learning_rate": 2.1030377732357668e-05, + "loss": 0.1007, "step": 96815 }, { "epoch": 4.52, - "learning_rate": 1.101589236322723e-05, - "loss": 0.1663, + "learning_rate": 2.1029909662521647e-05, + "loss": 0.1811, "step": 96820 }, { "epoch": 4.52, - "learning_rate": 1.101542356195209e-05, - "loss": 0.298, + "learning_rate": 2.1029441592685627e-05, + "loss": 0.2788, "step": 96825 }, { "epoch": 4.52, - "learning_rate": 1.101495476067695e-05, - "loss": 0.0688, + "learning_rate": 2.102897352284961e-05, + "loss": 0.0979, "step": 96830 }, { "epoch": 4.52, - "learning_rate": 1.101448595940181e-05, - "loss": 0.0154, + "learning_rate": 2.102850545301359e-05, + "loss": 0.0234, "step": 96835 }, { "epoch": 4.52, - "learning_rate": 1.101401715812667e-05, - "loss": 0.0637, + "learning_rate": 2.102803738317757e-05, + "loss": 0.0459, "step": 96840 }, { "epoch": 4.52, - "learning_rate": 1.1013548356851532e-05, - "loss": 0.0434, + "learning_rate": 2.1027569313341553e-05, + "loss": 0.0211, "step": 96845 }, { "epoch": 4.52, - "learning_rate": 1.1013079555576392e-05, - "loss": 0.0713, + "learning_rate": 2.1027101243505533e-05, + "loss": 0.0447, "step": 96850 }, { "epoch": 4.52, - "learning_rate": 1.1012610754301252e-05, - "loss": 0.0701, + "learning_rate": 2.1026633173669513e-05, + "loss": 0.0724, "step": 96855 }, { "epoch": 4.52, - "learning_rate": 1.1012141953026115e-05, - "loss": 0.0347, + "learning_rate": 2.1026165103833493e-05, + "loss": 0.0289, "step": 96860 }, { "epoch": 4.52, - "learning_rate": 1.1011673151750975e-05, - "loss": 0.0871, + "learning_rate": 2.1025697033997473e-05, + "loss": 0.0293, "step": 96865 }, { "epoch": 4.52, - "learning_rate": 1.1011204350475835e-05, - "loss": 0.1951, + "learning_rate": 2.1025228964161452e-05, + "loss": 0.1463, "step": 96870 }, { "epoch": 4.52, - "learning_rate": 1.1010735549200695e-05, - "loss": 0.2353, + "learning_rate": 2.1024760894325432e-05, + "loss": 0.1827, "step": 96875 }, { "epoch": 4.52, - "learning_rate": 1.1010266747925555e-05, - "loss": 0.0605, + "learning_rate": 2.1024292824489412e-05, + "loss": 0.0594, "step": 96880 }, { "epoch": 4.52, - "learning_rate": 1.1009797946650416e-05, - "loss": 0.0668, + "learning_rate": 2.1023824754653395e-05, + "loss": 0.0114, "step": 96885 }, { "epoch": 4.52, - "learning_rate": 1.1009329145375276e-05, - "loss": 0.0718, + "learning_rate": 2.1023356684817375e-05, + "loss": 0.0089, "step": 96890 }, { "epoch": 4.52, - "learning_rate": 1.1008860344100136e-05, - "loss": 0.0293, + "learning_rate": 2.1022888614981355e-05, + "loss": 0.0183, "step": 96895 }, { "epoch": 4.52, - "learning_rate": 1.1008391542824996e-05, - "loss": 0.0657, + "learning_rate": 2.1022420545145338e-05, + "loss": 0.0467, "step": 96900 }, { "epoch": 4.52, - "learning_rate": 1.1007922741549858e-05, - "loss": 0.0703, + "learning_rate": 2.1021952475309318e-05, + "loss": 0.0482, "step": 96905 }, { "epoch": 4.52, - "learning_rate": 1.100745394027472e-05, - "loss": 0.1935, + "learning_rate": 2.1021484405473298e-05, + "loss": 0.0979, "step": 96910 }, { "epoch": 4.52, - "learning_rate": 1.100698513899958e-05, - "loss": 0.0927, + "learning_rate": 2.1021016335637278e-05, + "loss": 0.0979, "step": 96915 }, { "epoch": 4.52, - "learning_rate": 1.100651633772444e-05, - "loss": 0.1122, + "learning_rate": 2.102054826580126e-05, + "loss": 0.1551, "step": 96920 }, { "epoch": 4.52, - "learning_rate": 1.10060475364493e-05, - "loss": 0.2593, + "learning_rate": 2.1020080195965237e-05, + "loss": 0.3301, "step": 96925 }, { "epoch": 4.52, - "learning_rate": 1.100557873517416e-05, - "loss": 0.0908, + "learning_rate": 2.1019612126129217e-05, + "loss": 0.0489, "step": 96930 }, { "epoch": 4.52, - "learning_rate": 1.100510993389902e-05, - "loss": 0.0327, + "learning_rate": 2.1019144056293197e-05, + "loss": 0.0154, "step": 96935 }, { "epoch": 4.52, - "learning_rate": 1.100464113262388e-05, - "loss": 0.0196, + "learning_rate": 2.101867598645718e-05, + "loss": 0.062, "step": 96940 }, { "epoch": 4.52, - "learning_rate": 1.1004172331348742e-05, - "loss": 0.0489, + "learning_rate": 2.101820791662116e-05, + "loss": 0.0796, "step": 96945 }, { "epoch": 4.52, - "learning_rate": 1.1003703530073602e-05, - "loss": 0.0452, + "learning_rate": 2.101773984678514e-05, + "loss": 0.0386, "step": 96950 }, { "epoch": 4.52, - "learning_rate": 1.1003234728798464e-05, - "loss": 0.0649, + "learning_rate": 2.1017271776949123e-05, + "loss": 0.0633, "step": 96955 }, { "epoch": 4.52, - "learning_rate": 1.1002765927523324e-05, - "loss": 0.0449, + "learning_rate": 2.1016803707113103e-05, + "loss": 0.0719, "step": 96960 }, { "epoch": 4.52, - "learning_rate": 1.1002297126248185e-05, - "loss": 0.1372, + "learning_rate": 2.1016335637277083e-05, + "loss": 0.1411, "step": 96965 }, { "epoch": 4.52, - "learning_rate": 1.1001828324973045e-05, - "loss": 0.0901, + "learning_rate": 2.1015867567441062e-05, + "loss": 0.1299, "step": 96970 }, { "epoch": 4.52, - "learning_rate": 1.1001359523697905e-05, - "loss": 0.3421, + "learning_rate": 2.1015399497605046e-05, + "loss": 0.3784, "step": 96975 }, { "epoch": 4.53, - "learning_rate": 1.1000890722422765e-05, - "loss": 0.1052, + "learning_rate": 2.1014931427769025e-05, + "loss": 0.0757, "step": 96980 }, { "epoch": 4.53, - "learning_rate": 1.1000421921147627e-05, - "loss": 0.014, + "learning_rate": 2.1014463357933005e-05, + "loss": 0.0273, "step": 96985 }, { "epoch": 4.53, - "learning_rate": 1.0999953119872487e-05, - "loss": 0.065, + "learning_rate": 2.101399528809698e-05, + "loss": 0.073, "step": 96990 }, { "epoch": 4.53, - "learning_rate": 1.0999484318597347e-05, - "loss": 0.0455, + "learning_rate": 2.1013527218260965e-05, + "loss": 0.0179, "step": 96995 }, { "epoch": 4.53, - "learning_rate": 1.0999015517322208e-05, - "loss": 0.1408, + "learning_rate": 2.1013059148424945e-05, + "loss": 0.0181, "step": 97000 }, { "epoch": 4.53, - "learning_rate": 1.099854671604707e-05, - "loss": 0.0374, + "learning_rate": 2.1012591078588924e-05, + "loss": 0.0736, "step": 97005 }, { "epoch": 4.53, - "learning_rate": 1.099807791477193e-05, - "loss": 0.1551, + "learning_rate": 2.1012123008752904e-05, + "loss": 0.0721, "step": 97010 }, { "epoch": 4.53, - "learning_rate": 1.099760911349679e-05, - "loss": 0.1214, + "learning_rate": 2.1011654938916887e-05, + "loss": 0.0637, "step": 97015 }, { "epoch": 4.53, - "learning_rate": 1.099714031222165e-05, - "loss": 0.1428, + "learning_rate": 2.1011186869080867e-05, + "loss": 0.1807, "step": 97020 }, { "epoch": 4.53, - "learning_rate": 1.0996671510946511e-05, - "loss": 0.4012, + "learning_rate": 2.1010718799244847e-05, + "loss": 0.1821, "step": 97025 }, { "epoch": 4.53, - "learning_rate": 1.0996202709671371e-05, - "loss": 0.087, + "learning_rate": 2.101025072940883e-05, + "loss": 0.08, "step": 97030 }, { "epoch": 4.53, - "learning_rate": 1.0995733908396231e-05, - "loss": 0.0144, + "learning_rate": 2.100978265957281e-05, + "loss": 0.0292, "step": 97035 }, { "epoch": 4.53, - "learning_rate": 1.0995265107121091e-05, - "loss": 0.0311, + "learning_rate": 2.100931458973679e-05, + "loss": 0.023, "step": 97040 }, { "epoch": 4.53, - "learning_rate": 1.0994796305845951e-05, - "loss": 0.0335, + "learning_rate": 2.100884651990077e-05, + "loss": 0.0323, "step": 97045 }, { "epoch": 4.53, - "learning_rate": 1.0994327504570814e-05, - "loss": 0.0715, + "learning_rate": 2.1008378450064753e-05, + "loss": 0.045, "step": 97050 }, { "epoch": 4.53, - "learning_rate": 1.0993858703295674e-05, - "loss": 0.0725, + "learning_rate": 2.100791038022873e-05, + "loss": 0.0494, "step": 97055 }, { "epoch": 4.53, - "learning_rate": 1.0993389902020534e-05, - "loss": 0.1107, + "learning_rate": 2.100744231039271e-05, + "loss": 0.1249, "step": 97060 }, { "epoch": 4.53, - "learning_rate": 1.0992921100745396e-05, - "loss": 0.1399, + "learning_rate": 2.100697424055669e-05, + "loss": 0.1015, "step": 97065 }, { "epoch": 4.53, - "learning_rate": 1.0992452299470256e-05, - "loss": 0.2294, + "learning_rate": 2.1006506170720672e-05, + "loss": 0.264, "step": 97070 }, { "epoch": 4.53, - "learning_rate": 1.0991983498195116e-05, - "loss": 0.2816, + "learning_rate": 2.1006038100884652e-05, + "loss": 0.1161, "step": 97075 }, { "epoch": 4.53, - "learning_rate": 1.0991514696919976e-05, - "loss": 0.0701, + "learning_rate": 2.1005570031048632e-05, + "loss": 0.0534, "step": 97080 }, { "epoch": 4.53, - "learning_rate": 1.0991045895644836e-05, - "loss": 0.0097, + "learning_rate": 2.1005101961212615e-05, + "loss": 0.0641, "step": 97085 }, { "epoch": 4.53, - "learning_rate": 1.0990577094369697e-05, - "loss": 0.0163, + "learning_rate": 2.1004633891376595e-05, + "loss": 0.0161, "step": 97090 }, { "epoch": 4.53, - "learning_rate": 1.0990108293094559e-05, - "loss": 0.0571, + "learning_rate": 2.1004165821540575e-05, + "loss": 0.0435, "step": 97095 }, { "epoch": 4.53, - "learning_rate": 1.0989639491819419e-05, - "loss": 0.0786, + "learning_rate": 2.1003697751704555e-05, + "loss": 0.0698, "step": 97100 }, { "epoch": 4.53, - "learning_rate": 1.098917069054428e-05, - "loss": 0.0738, + "learning_rate": 2.1003229681868538e-05, + "loss": 0.0774, "step": 97105 }, { "epoch": 4.53, - "learning_rate": 1.098870188926914e-05, - "loss": 0.0865, + "learning_rate": 2.1002761612032518e-05, + "loss": 0.0486, "step": 97110 }, { "epoch": 4.53, - "learning_rate": 1.0988233087994e-05, - "loss": 0.088, + "learning_rate": 2.1002293542196494e-05, + "loss": 0.1204, "step": 97115 }, { "epoch": 4.53, - "learning_rate": 1.098776428671886e-05, - "loss": 0.1743, + "learning_rate": 2.1001825472360474e-05, + "loss": 0.2374, "step": 97120 }, { "epoch": 4.53, - "learning_rate": 1.098729548544372e-05, - "loss": 0.2513, + "learning_rate": 2.1001357402524457e-05, + "loss": 0.2785, "step": 97125 }, { "epoch": 4.53, - "learning_rate": 1.0986826684168582e-05, - "loss": 0.069, + "learning_rate": 2.1000889332688437e-05, + "loss": 0.0814, "step": 97130 }, { "epoch": 4.53, - "learning_rate": 1.0986357882893442e-05, - "loss": 0.0248, + "learning_rate": 2.1000421262852417e-05, + "loss": 0.0239, "step": 97135 }, { "epoch": 4.53, - "learning_rate": 1.0985889081618303e-05, - "loss": 0.0325, + "learning_rate": 2.09999531930164e-05, + "loss": 0.055, "step": 97140 }, { "epoch": 4.53, - "learning_rate": 1.0985420280343165e-05, - "loss": 0.0395, + "learning_rate": 2.099948512318038e-05, + "loss": 0.0741, "step": 97145 }, { "epoch": 4.53, - "learning_rate": 1.0984951479068025e-05, - "loss": 0.0381, + "learning_rate": 2.099901705334436e-05, + "loss": 0.042, "step": 97150 }, { "epoch": 4.53, - "learning_rate": 1.0984482677792885e-05, - "loss": 0.0915, + "learning_rate": 2.099854898350834e-05, + "loss": 0.0392, "step": 97155 }, { "epoch": 4.53, - "learning_rate": 1.0984013876517745e-05, - "loss": 0.0811, + "learning_rate": 2.0998080913672323e-05, + "loss": 0.0959, "step": 97160 }, { "epoch": 4.53, - "learning_rate": 1.0983545075242605e-05, - "loss": 0.1434, + "learning_rate": 2.0997612843836302e-05, + "loss": 0.0883, "step": 97165 }, { "epoch": 4.53, - "learning_rate": 1.0983076273967466e-05, - "loss": 0.2167, + "learning_rate": 2.0997144774000282e-05, + "loss": 0.0943, "step": 97170 }, { "epoch": 4.53, - "learning_rate": 1.0982607472692326e-05, - "loss": 0.3037, + "learning_rate": 2.0996676704164262e-05, + "loss": 0.2075, "step": 97175 }, { "epoch": 4.53, - "learning_rate": 1.0982138671417186e-05, - "loss": 0.0661, + "learning_rate": 2.0996208634328242e-05, + "loss": 0.0236, "step": 97180 }, { "epoch": 4.53, - "learning_rate": 1.098166987014205e-05, - "loss": 0.043, + "learning_rate": 2.099574056449222e-05, + "loss": 0.0504, "step": 97185 }, { "epoch": 4.54, - "learning_rate": 1.098120106886691e-05, - "loss": 0.051, + "learning_rate": 2.09952724946562e-05, + "loss": 0.0656, "step": 97190 }, { "epoch": 4.54, - "learning_rate": 1.098073226759177e-05, - "loss": 0.012, + "learning_rate": 2.0994804424820185e-05, + "loss": 0.0327, "step": 97195 }, { "epoch": 4.54, - "learning_rate": 1.098026346631663e-05, - "loss": 0.0995, + "learning_rate": 2.0994336354984164e-05, + "loss": 0.1101, "step": 97200 }, { "epoch": 4.54, - "learning_rate": 1.097979466504149e-05, - "loss": 0.0614, + "learning_rate": 2.0993868285148144e-05, + "loss": 0.0357, "step": 97205 }, { "epoch": 4.54, - "learning_rate": 1.0979325863766351e-05, - "loss": 0.0419, + "learning_rate": 2.0993400215312124e-05, + "loss": 0.0755, "step": 97210 }, { "epoch": 4.54, - "learning_rate": 1.097885706249121e-05, - "loss": 0.1193, + "learning_rate": 2.0992932145476107e-05, + "loss": 0.1216, "step": 97215 }, { "epoch": 4.54, - "learning_rate": 1.097838826121607e-05, - "loss": 0.0944, + "learning_rate": 2.0992464075640087e-05, + "loss": 0.1381, "step": 97220 }, { "epoch": 4.54, - "learning_rate": 1.097791945994093e-05, - "loss": 0.3821, + "learning_rate": 2.0991996005804067e-05, + "loss": 0.2631, "step": 97225 }, { "epoch": 4.54, - "learning_rate": 1.097745065866579e-05, - "loss": 0.0986, + "learning_rate": 2.0991527935968047e-05, + "loss": 0.0583, "step": 97230 }, { "epoch": 4.54, - "learning_rate": 1.0976981857390654e-05, - "loss": 0.0231, + "learning_rate": 2.099105986613203e-05, + "loss": 0.0249, "step": 97235 }, { "epoch": 4.54, - "learning_rate": 1.0976513056115514e-05, - "loss": 0.0322, + "learning_rate": 2.099059179629601e-05, + "loss": 0.0249, "step": 97240 }, { "epoch": 4.54, - "learning_rate": 1.0976044254840374e-05, - "loss": 0.0597, + "learning_rate": 2.0990123726459986e-05, + "loss": 0.052, "step": 97245 }, { "epoch": 4.54, - "learning_rate": 1.0975575453565235e-05, + "learning_rate": 2.0989655656623966e-05, "loss": 0.0502, "step": 97250 }, { "epoch": 4.54, - "learning_rate": 1.0975106652290095e-05, - "loss": 0.0478, + "learning_rate": 2.098918758678795e-05, + "loss": 0.0953, "step": 97255 }, { "epoch": 4.54, - "learning_rate": 1.0974637851014955e-05, - "loss": 0.1236, + "learning_rate": 2.098871951695193e-05, + "loss": 0.0977, "step": 97260 }, { "epoch": 4.54, - "learning_rate": 1.0974169049739815e-05, - "loss": 0.1108, + "learning_rate": 2.098825144711591e-05, + "loss": 0.0375, "step": 97265 }, { "epoch": 4.54, - "learning_rate": 1.0973700248464675e-05, - "loss": 0.1377, + "learning_rate": 2.0987783377279892e-05, + "loss": 0.2199, "step": 97270 }, { "epoch": 4.54, - "learning_rate": 1.0973231447189537e-05, - "loss": 0.2356, + "learning_rate": 2.0987315307443872e-05, + "loss": 0.2748, "step": 97275 }, { "epoch": 4.54, - "learning_rate": 1.0972762645914398e-05, - "loss": 0.0937, + "learning_rate": 2.0986847237607852e-05, + "loss": 0.0995, "step": 97280 }, { "epoch": 4.54, - "learning_rate": 1.0972293844639258e-05, - "loss": 0.0699, + "learning_rate": 2.098637916777183e-05, + "loss": 0.0088, "step": 97285 }, { "epoch": 4.54, - "learning_rate": 1.097182504336412e-05, - "loss": 0.0228, + "learning_rate": 2.0985911097935815e-05, + "loss": 0.0101, "step": 97290 }, { "epoch": 4.54, - "learning_rate": 1.097135624208898e-05, - "loss": 0.0322, + "learning_rate": 2.0985443028099795e-05, + "loss": 0.0485, "step": 97295 }, { "epoch": 4.54, - "learning_rate": 1.097088744081384e-05, - "loss": 0.0416, + "learning_rate": 2.0984974958263774e-05, + "loss": 0.0527, "step": 97300 }, { "epoch": 4.54, - "learning_rate": 1.09704186395387e-05, - "loss": 0.0185, + "learning_rate": 2.098450688842775e-05, + "loss": 0.0274, "step": 97305 }, { "epoch": 4.54, - "learning_rate": 1.096994983826356e-05, - "loss": 0.081, + "learning_rate": 2.0984038818591734e-05, + "loss": 0.0815, "step": 97310 }, { "epoch": 4.54, - "learning_rate": 1.0969481036988421e-05, - "loss": 0.0595, + "learning_rate": 2.0983570748755714e-05, + "loss": 0.1094, "step": 97315 }, { "epoch": 4.54, - "learning_rate": 1.0969012235713281e-05, - "loss": 0.2023, + "learning_rate": 2.0983102678919694e-05, + "loss": 0.09, "step": 97320 }, { "epoch": 4.54, - "learning_rate": 1.0968543434438143e-05, - "loss": 0.2479, + "learning_rate": 2.0982634609083677e-05, + "loss": 0.3576, "step": 97325 }, { "epoch": 4.54, - "learning_rate": 1.0968074633163005e-05, - "loss": 0.0563, + "learning_rate": 2.0982166539247657e-05, + "loss": 0.0465, "step": 97330 }, { "epoch": 4.54, - "learning_rate": 1.0967605831887864e-05, - "loss": 0.0366, + "learning_rate": 2.0981698469411636e-05, + "loss": 0.0243, "step": 97335 }, { "epoch": 4.54, - "learning_rate": 1.0967137030612724e-05, - "loss": 0.0332, + "learning_rate": 2.0981230399575616e-05, + "loss": 0.0545, "step": 97340 }, { "epoch": 4.54, - "learning_rate": 1.0966668229337584e-05, - "loss": 0.1006, + "learning_rate": 2.09807623297396e-05, + "loss": 0.0595, "step": 97345 }, { "epoch": 4.54, - "learning_rate": 1.0966199428062444e-05, - "loss": 0.0936, + "learning_rate": 2.098029425990358e-05, + "loss": 0.0586, "step": 97350 }, { "epoch": 4.54, - "learning_rate": 1.0965730626787306e-05, - "loss": 0.0536, + "learning_rate": 2.097982619006756e-05, + "loss": 0.0654, "step": 97355 }, { "epoch": 4.54, - "learning_rate": 1.0965261825512166e-05, - "loss": 0.1128, + "learning_rate": 2.097935812023154e-05, + "loss": 0.0334, "step": 97360 }, { "epoch": 4.54, - "learning_rate": 1.0964793024237026e-05, - "loss": 0.0861, + "learning_rate": 2.0978890050395522e-05, + "loss": 0.0662, "step": 97365 }, { "epoch": 4.54, - "learning_rate": 1.0964324222961886e-05, - "loss": 0.1503, + "learning_rate": 2.09784219805595e-05, + "loss": 0.1354, "step": 97370 }, { "epoch": 4.54, - "learning_rate": 1.0963855421686749e-05, - "loss": 0.2551, + "learning_rate": 2.097795391072348e-05, + "loss": 0.1373, "step": 97375 }, { "epoch": 4.54, - "learning_rate": 1.0963386620411609e-05, - "loss": 0.0587, + "learning_rate": 2.097748584088746e-05, + "loss": 0.0762, "step": 97380 }, { "epoch": 4.54, - "learning_rate": 1.0962917819136469e-05, - "loss": 0.007, + "learning_rate": 2.097701777105144e-05, + "loss": 0.0078, "step": 97385 }, { "epoch": 4.54, - "learning_rate": 1.0962449017861329e-05, - "loss": 0.0194, + "learning_rate": 2.097654970121542e-05, + "loss": 0.0218, "step": 97390 }, { "epoch": 4.54, - "learning_rate": 1.096198021658619e-05, - "loss": 0.0409, + "learning_rate": 2.09760816313794e-05, + "loss": 0.0481, "step": 97395 }, { "epoch": 4.54, - "learning_rate": 1.096151141531105e-05, - "loss": 0.0656, + "learning_rate": 2.0975613561543384e-05, + "loss": 0.0639, "step": 97400 }, { "epoch": 4.55, - "learning_rate": 1.096104261403591e-05, - "loss": 0.0702, + "learning_rate": 2.0975145491707364e-05, + "loss": 0.0183, "step": 97405 }, { "epoch": 4.55, - "learning_rate": 1.096057381276077e-05, - "loss": 0.0919, + "learning_rate": 2.0974677421871344e-05, + "loss": 0.0961, "step": 97410 }, { "epoch": 4.55, - "learning_rate": 1.0960105011485632e-05, - "loss": 0.0922, + "learning_rate": 2.0974209352035324e-05, + "loss": 0.0809, "step": 97415 }, { "epoch": 4.55, - "learning_rate": 1.0959636210210494e-05, - "loss": 0.1553, + "learning_rate": 2.0973741282199307e-05, + "loss": 0.089, "step": 97420 }, { "epoch": 4.55, - "learning_rate": 1.0959167408935353e-05, - "loss": 0.1833, + "learning_rate": 2.0973273212363287e-05, + "loss": 0.3088, "step": 97425 }, { "epoch": 4.55, - "learning_rate": 1.0958698607660213e-05, - "loss": 0.0837, + "learning_rate": 2.0972805142527267e-05, + "loss": 0.0613, "step": 97430 }, { "epoch": 4.55, - "learning_rate": 1.0958229806385075e-05, - "loss": 0.0326, + "learning_rate": 2.0972337072691243e-05, + "loss": 0.0359, "step": 97435 }, { "epoch": 4.55, - "learning_rate": 1.0957761005109935e-05, - "loss": 0.0537, + "learning_rate": 2.0971869002855226e-05, + "loss": 0.0426, "step": 97440 }, { "epoch": 4.55, - "learning_rate": 1.0957292203834795e-05, - "loss": 0.0605, + "learning_rate": 2.0971400933019206e-05, + "loss": 0.0139, "step": 97445 }, { "epoch": 4.55, - "learning_rate": 1.0956823402559655e-05, - "loss": 0.0339, + "learning_rate": 2.0970932863183186e-05, + "loss": 0.0417, "step": 97450 }, { "epoch": 4.55, - "learning_rate": 1.0956354601284516e-05, - "loss": 0.0426, + "learning_rate": 2.097046479334717e-05, + "loss": 0.0634, "step": 97455 }, { "epoch": 4.55, - "learning_rate": 1.0955885800009376e-05, - "loss": 0.0434, + "learning_rate": 2.096999672351115e-05, + "loss": 0.0711, "step": 97460 }, { "epoch": 4.55, - "learning_rate": 1.0955416998734238e-05, - "loss": 0.1599, + "learning_rate": 2.096952865367513e-05, + "loss": 0.1053, "step": 97465 }, { "epoch": 4.55, - "learning_rate": 1.0954948197459098e-05, - "loss": 0.1171, + "learning_rate": 2.096906058383911e-05, + "loss": 0.1856, "step": 97470 }, { "epoch": 4.55, - "learning_rate": 1.095447939618396e-05, - "loss": 0.3212, + "learning_rate": 2.0968592514003092e-05, + "loss": 0.2587, "step": 97475 }, { "epoch": 4.55, - "learning_rate": 1.095401059490882e-05, - "loss": 0.0729, + "learning_rate": 2.096812444416707e-05, + "loss": 0.0621, "step": 97480 }, { "epoch": 4.55, - "learning_rate": 1.095354179363368e-05, - "loss": 0.0176, + "learning_rate": 2.096765637433105e-05, + "loss": 0.0091, "step": 97485 }, { "epoch": 4.55, - "learning_rate": 1.095307299235854e-05, - "loss": 0.0497, + "learning_rate": 2.096718830449503e-05, + "loss": 0.0304, "step": 97490 }, { "epoch": 4.55, - "learning_rate": 1.0952604191083401e-05, - "loss": 0.0452, + "learning_rate": 2.096672023465901e-05, + "loss": 0.0136, "step": 97495 }, { "epoch": 4.55, - "learning_rate": 1.0952135389808261e-05, - "loss": 0.0537, + "learning_rate": 2.096625216482299e-05, + "loss": 0.0453, "step": 97500 }, { "epoch": 4.55, - "learning_rate": 1.0951666588533121e-05, - "loss": 0.0738, + "learning_rate": 2.096578409498697e-05, + "loss": 0.0709, "step": 97505 }, { "epoch": 4.55, - "learning_rate": 1.0951197787257983e-05, - "loss": 0.0585, + "learning_rate": 2.0965316025150954e-05, + "loss": 0.0918, "step": 97510 }, { "epoch": 4.55, - "learning_rate": 1.0950728985982844e-05, - "loss": 0.0748, + "learning_rate": 2.0964847955314934e-05, + "loss": 0.1289, "step": 97515 }, { "epoch": 4.55, - "learning_rate": 1.0950260184707704e-05, - "loss": 0.113, + "learning_rate": 2.0964379885478913e-05, + "loss": 0.1695, "step": 97520 }, { "epoch": 4.55, - "learning_rate": 1.0949791383432564e-05, - "loss": 0.1528, + "learning_rate": 2.0963911815642893e-05, + "loss": 0.3825, "step": 97525 }, { "epoch": 4.55, - "learning_rate": 1.0949322582157424e-05, - "loss": 0.1041, + "learning_rate": 2.0963443745806876e-05, + "loss": 0.0915, "step": 97530 }, { "epoch": 4.55, - "learning_rate": 1.0948853780882286e-05, - "loss": 0.012, + "learning_rate": 2.0962975675970856e-05, + "loss": 0.018, "step": 97535 }, { "epoch": 4.55, - "learning_rate": 1.0948384979607145e-05, - "loss": 0.0394, + "learning_rate": 2.0962507606134836e-05, + "loss": 0.0233, "step": 97540 }, { "epoch": 4.55, - "learning_rate": 1.0947916178332005e-05, - "loss": 0.0728, + "learning_rate": 2.0962039536298816e-05, + "loss": 0.0214, "step": 97545 }, { "epoch": 4.55, - "learning_rate": 1.0947447377056865e-05, - "loss": 0.0417, + "learning_rate": 2.09615714664628e-05, + "loss": 0.1078, "step": 97550 }, { "epoch": 4.55, - "learning_rate": 1.0946978575781725e-05, - "loss": 0.0681, + "learning_rate": 2.096110339662678e-05, + "loss": 0.0543, "step": 97555 }, { "epoch": 4.55, - "learning_rate": 1.0946509774506589e-05, - "loss": 0.0422, + "learning_rate": 2.0960635326790755e-05, + "loss": 0.0576, "step": 97560 }, { "epoch": 4.55, - "learning_rate": 1.0946040973231449e-05, - "loss": 0.0566, + "learning_rate": 2.096016725695474e-05, + "loss": 0.1305, "step": 97565 }, { "epoch": 4.55, - "learning_rate": 1.0945572171956308e-05, - "loss": 0.1136, + "learning_rate": 2.095969918711872e-05, + "loss": 0.0968, "step": 97570 }, { "epoch": 4.55, - "learning_rate": 1.094510337068117e-05, - "loss": 0.3118, + "learning_rate": 2.0959231117282698e-05, + "loss": 0.2824, "step": 97575 }, { "epoch": 4.55, - "learning_rate": 1.094463456940603e-05, - "loss": 0.0719, + "learning_rate": 2.0958763047446678e-05, + "loss": 0.07, "step": 97580 }, { "epoch": 4.55, - "learning_rate": 1.094416576813089e-05, - "loss": 0.0333, + "learning_rate": 2.095829497761066e-05, + "loss": 0.025, "step": 97585 }, { "epoch": 4.55, - "learning_rate": 1.094369696685575e-05, - "loss": 0.0595, + "learning_rate": 2.095782690777464e-05, + "loss": 0.0873, "step": 97590 }, { "epoch": 4.55, - "learning_rate": 1.094322816558061e-05, - "loss": 0.0278, + "learning_rate": 2.095735883793862e-05, + "loss": 0.0467, "step": 97595 }, { "epoch": 4.55, - "learning_rate": 1.0942759364305471e-05, - "loss": 0.0473, + "learning_rate": 2.09568907681026e-05, + "loss": 0.0814, "step": 97600 }, { "epoch": 4.55, - "learning_rate": 1.0942290563030333e-05, - "loss": 0.0964, + "learning_rate": 2.0956422698266584e-05, + "loss": 0.0798, "step": 97605 }, { "epoch": 4.55, - "learning_rate": 1.0941821761755193e-05, - "loss": 0.1222, + "learning_rate": 2.0955954628430564e-05, + "loss": 0.1046, "step": 97610 }, { "epoch": 4.55, - "learning_rate": 1.0941352960480055e-05, - "loss": 0.1608, + "learning_rate": 2.0955486558594544e-05, + "loss": 0.0886, "step": 97615 }, { "epoch": 4.56, - "learning_rate": 1.0940884159204915e-05, - "loss": 0.1823, + "learning_rate": 2.0955018488758523e-05, + "loss": 0.1292, "step": 97620 }, { "epoch": 4.56, - "learning_rate": 1.0940415357929775e-05, - "loss": 0.4506, + "learning_rate": 2.0954550418922503e-05, + "loss": 0.2294, "step": 97625 }, { "epoch": 4.56, - "learning_rate": 1.0939946556654634e-05, - "loss": 0.1061, + "learning_rate": 2.0954082349086483e-05, + "loss": 0.0984, "step": 97630 }, { "epoch": 4.56, - "learning_rate": 1.0939477755379494e-05, - "loss": 0.0073, + "learning_rate": 2.0953614279250463e-05, + "loss": 0.0274, "step": 97635 }, { "epoch": 4.56, - "learning_rate": 1.0939008954104356e-05, - "loss": 0.044, + "learning_rate": 2.0953146209414446e-05, + "loss": 0.0137, "step": 97640 }, { "epoch": 4.56, - "learning_rate": 1.0938540152829216e-05, - "loss": 0.0332, + "learning_rate": 2.0952678139578426e-05, + "loss": 0.0302, "step": 97645 }, { "epoch": 4.56, - "learning_rate": 1.0938071351554078e-05, - "loss": 0.0764, + "learning_rate": 2.0952210069742406e-05, + "loss": 0.0428, "step": 97650 }, { "epoch": 4.56, - "learning_rate": 1.093760255027894e-05, - "loss": 0.0479, + "learning_rate": 2.0951741999906385e-05, + "loss": 0.0857, "step": 97655 }, { "epoch": 4.56, - "learning_rate": 1.0937133749003799e-05, - "loss": 0.0683, + "learning_rate": 2.095127393007037e-05, + "loss": 0.0873, "step": 97660 }, { "epoch": 4.56, - "learning_rate": 1.0936664947728659e-05, - "loss": 0.0851, + "learning_rate": 2.095080586023435e-05, + "loss": 0.084, "step": 97665 }, { "epoch": 4.56, - "learning_rate": 1.0936196146453519e-05, - "loss": 0.1891, + "learning_rate": 2.0950337790398328e-05, + "loss": 0.1788, "step": 97670 }, { "epoch": 4.56, - "learning_rate": 1.0935727345178379e-05, - "loss": 0.149, + "learning_rate": 2.0949869720562308e-05, + "loss": 0.2485, "step": 97675 }, { "epoch": 4.56, - "learning_rate": 1.093525854390324e-05, - "loss": 0.0344, + "learning_rate": 2.094940165072629e-05, + "loss": 0.0679, "step": 97680 }, { "epoch": 4.56, - "learning_rate": 1.09347897426281e-05, - "loss": 0.0037, + "learning_rate": 2.0948933580890268e-05, + "loss": 0.0198, "step": 97685 }, { "epoch": 4.56, - "learning_rate": 1.093432094135296e-05, - "loss": 0.0173, + "learning_rate": 2.0948465511054248e-05, + "loss": 0.0297, "step": 97690 }, { "epoch": 4.56, - "learning_rate": 1.093385214007782e-05, - "loss": 0.0239, + "learning_rate": 2.094799744121823e-05, + "loss": 0.0608, "step": 97695 }, { "epoch": 4.56, - "learning_rate": 1.0933383338802684e-05, - "loss": 0.0625, + "learning_rate": 2.094752937138221e-05, + "loss": 0.0573, "step": 97700 }, { "epoch": 4.56, - "learning_rate": 1.0932914537527544e-05, - "loss": 0.0484, + "learning_rate": 2.094706130154619e-05, + "loss": 0.0442, "step": 97705 }, { "epoch": 4.56, - "learning_rate": 1.0932445736252404e-05, - "loss": 0.028, + "learning_rate": 2.094659323171017e-05, + "loss": 0.0958, "step": 97710 }, { "epoch": 4.56, - "learning_rate": 1.0931976934977263e-05, - "loss": 0.0654, + "learning_rate": 2.0946125161874153e-05, + "loss": 0.1107, "step": 97715 }, { "epoch": 4.56, - "learning_rate": 1.0931508133702125e-05, - "loss": 0.1062, + "learning_rate": 2.0945657092038133e-05, + "loss": 0.1718, "step": 97720 }, { "epoch": 4.56, - "learning_rate": 1.0931039332426985e-05, - "loss": 0.2395, + "learning_rate": 2.0945189022202113e-05, + "loss": 0.2188, "step": 97725 }, { "epoch": 4.56, - "learning_rate": 1.0930570531151845e-05, - "loss": 0.0758, + "learning_rate": 2.0944720952366093e-05, + "loss": 0.0558, "step": 97730 }, { "epoch": 4.56, - "learning_rate": 1.0930101729876705e-05, - "loss": 0.0345, + "learning_rate": 2.0944252882530076e-05, + "loss": 0.0074, "step": 97735 }, { "epoch": 4.56, - "learning_rate": 1.0929632928601565e-05, - "loss": 0.0282, + "learning_rate": 2.0943784812694056e-05, + "loss": 0.0668, "step": 97740 }, { "epoch": 4.56, - "learning_rate": 1.0929164127326428e-05, - "loss": 0.03, + "learning_rate": 2.0943316742858036e-05, + "loss": 0.0489, "step": 97745 }, { "epoch": 4.56, - "learning_rate": 1.0928695326051288e-05, - "loss": 0.0638, + "learning_rate": 2.0942848673022016e-05, + "loss": 0.0683, "step": 97750 }, { "epoch": 4.56, - "learning_rate": 1.0928226524776148e-05, - "loss": 0.0724, + "learning_rate": 2.0942380603185995e-05, + "loss": 0.0763, "step": 97755 }, { "epoch": 4.56, - "learning_rate": 1.092775772350101e-05, - "loss": 0.1126, + "learning_rate": 2.0941912533349975e-05, + "loss": 0.0632, "step": 97760 }, { "epoch": 4.56, - "learning_rate": 1.092728892222587e-05, - "loss": 0.1215, + "learning_rate": 2.0941444463513955e-05, + "loss": 0.1464, "step": 97765 }, { "epoch": 4.56, - "learning_rate": 1.092682012095073e-05, - "loss": 0.1396, + "learning_rate": 2.0940976393677938e-05, + "loss": 0.1141, "step": 97770 }, { "epoch": 4.56, - "learning_rate": 1.092635131967559e-05, - "loss": 0.3429, + "learning_rate": 2.0940508323841918e-05, + "loss": 0.2393, "step": 97775 }, { "epoch": 4.56, - "learning_rate": 1.092588251840045e-05, - "loss": 0.0768, + "learning_rate": 2.0940040254005898e-05, + "loss": 0.0838, "step": 97780 }, { "epoch": 4.56, - "learning_rate": 1.0925413717125311e-05, - "loss": 0.0162, + "learning_rate": 2.0939572184169878e-05, + "loss": 0.019, "step": 97785 }, { "epoch": 4.56, - "learning_rate": 1.0924944915850173e-05, - "loss": 0.0282, + "learning_rate": 2.093910411433386e-05, + "loss": 0.0219, "step": 97790 }, { "epoch": 4.56, - "learning_rate": 1.0924476114575033e-05, - "loss": 0.0279, + "learning_rate": 2.093863604449784e-05, + "loss": 0.0268, "step": 97795 }, { "epoch": 4.56, - "learning_rate": 1.0924007313299894e-05, - "loss": 0.0783, + "learning_rate": 2.093816797466182e-05, + "loss": 0.0198, "step": 97800 }, { "epoch": 4.56, - "learning_rate": 1.0923538512024754e-05, - "loss": 0.0671, + "learning_rate": 2.09376999048258e-05, + "loss": 0.0491, "step": 97805 }, { "epoch": 4.56, - "learning_rate": 1.0923069710749614e-05, - "loss": 0.0647, + "learning_rate": 2.093723183498978e-05, + "loss": 0.1577, "step": 97810 }, { "epoch": 4.56, - "learning_rate": 1.0922600909474474e-05, - "loss": 0.1167, + "learning_rate": 2.093676376515376e-05, + "loss": 0.0551, "step": 97815 }, { "epoch": 4.56, - "learning_rate": 1.0922132108199334e-05, - "loss": 0.0914, + "learning_rate": 2.093629569531774e-05, + "loss": 0.192, "step": 97820 }, { "epoch": 4.56, - "learning_rate": 1.0921663306924196e-05, - "loss": 0.2441, + "learning_rate": 2.0935827625481723e-05, + "loss": 0.1859, "step": 97825 }, { "epoch": 4.56, - "learning_rate": 1.0921194505649056e-05, - "loss": 0.0606, + "learning_rate": 2.0935359555645703e-05, + "loss": 0.1078, "step": 97830 }, { "epoch": 4.57, - "learning_rate": 1.0920725704373917e-05, - "loss": 0.0078, + "learning_rate": 2.0934891485809683e-05, + "loss": 0.0654, "step": 97835 }, { "epoch": 4.57, - "learning_rate": 1.0920256903098779e-05, - "loss": 0.0232, + "learning_rate": 2.0934423415973662e-05, + "loss": 0.0584, "step": 97840 }, { "epoch": 4.57, - "learning_rate": 1.0919788101823639e-05, - "loss": 0.0427, + "learning_rate": 2.0933955346137646e-05, + "loss": 0.04, "step": 97845 }, { "epoch": 4.57, - "learning_rate": 1.0919319300548499e-05, - "loss": 0.0964, + "learning_rate": 2.0933487276301625e-05, + "loss": 0.0196, "step": 97850 }, { "epoch": 4.57, - "learning_rate": 1.0918850499273359e-05, - "loss": 0.0811, + "learning_rate": 2.0933019206465605e-05, + "loss": 0.1002, "step": 97855 }, { "epoch": 4.57, - "learning_rate": 1.0918381697998219e-05, - "loss": 0.1582, + "learning_rate": 2.0932551136629585e-05, + "loss": 0.041, "step": 97860 }, { "epoch": 4.57, - "learning_rate": 1.091791289672308e-05, - "loss": 0.1579, + "learning_rate": 2.0932083066793568e-05, + "loss": 0.0852, "step": 97865 }, { "epoch": 4.57, - "learning_rate": 1.091744409544794e-05, - "loss": 0.1226, + "learning_rate": 2.0931614996957548e-05, + "loss": 0.1922, "step": 97870 }, { "epoch": 4.57, - "learning_rate": 1.09169752941728e-05, - "loss": 0.1606, + "learning_rate": 2.0931146927121525e-05, + "loss": 0.3232, "step": 97875 }, { "epoch": 4.57, - "learning_rate": 1.091650649289766e-05, - "loss": 0.0293, + "learning_rate": 2.0930678857285508e-05, + "loss": 0.1016, "step": 97880 }, { "epoch": 4.57, - "learning_rate": 1.0916037691622523e-05, - "loss": 0.0428, + "learning_rate": 2.0930210787449488e-05, + "loss": 0.0076, "step": 97885 }, { "epoch": 4.57, - "learning_rate": 1.0915568890347383e-05, - "loss": 0.0083, + "learning_rate": 2.0929742717613467e-05, + "loss": 0.066, "step": 97890 }, { "epoch": 4.57, - "learning_rate": 1.0915100089072243e-05, - "loss": 0.0634, + "learning_rate": 2.0929274647777447e-05, + "loss": 0.0353, "step": 97895 }, { "epoch": 4.57, - "learning_rate": 1.0914631287797103e-05, - "loss": 0.0564, + "learning_rate": 2.092880657794143e-05, + "loss": 0.0904, "step": 97900 }, { "epoch": 4.57, - "learning_rate": 1.0914162486521965e-05, - "loss": 0.0286, + "learning_rate": 2.092833850810541e-05, + "loss": 0.0695, "step": 97905 }, { "epoch": 4.57, - "learning_rate": 1.0913693685246825e-05, - "loss": 0.0439, + "learning_rate": 2.092787043826939e-05, + "loss": 0.1509, "step": 97910 }, { "epoch": 4.57, - "learning_rate": 1.0913224883971685e-05, - "loss": 0.1408, + "learning_rate": 2.092740236843337e-05, + "loss": 0.0951, "step": 97915 }, { "epoch": 4.57, - "learning_rate": 1.0912756082696544e-05, - "loss": 0.1428, + "learning_rate": 2.0926934298597353e-05, + "loss": 0.1418, "step": 97920 }, { "epoch": 4.57, - "learning_rate": 1.0912287281421406e-05, - "loss": 0.2285, + "learning_rate": 2.0926466228761333e-05, + "loss": 0.2848, "step": 97925 }, { "epoch": 4.57, - "learning_rate": 1.0911818480146268e-05, - "loss": 0.0525, + "learning_rate": 2.0925998158925313e-05, + "loss": 0.0698, "step": 97930 }, { "epoch": 4.57, - "learning_rate": 1.0911349678871128e-05, - "loss": 0.034, + "learning_rate": 2.0925530089089293e-05, + "loss": 0.0582, "step": 97935 }, { "epoch": 4.57, - "learning_rate": 1.0910880877595988e-05, - "loss": 0.0188, + "learning_rate": 2.0925062019253272e-05, + "loss": 0.0141, "step": 97940 }, { "epoch": 4.57, - "learning_rate": 1.091041207632085e-05, - "loss": 0.0551, + "learning_rate": 2.0924593949417252e-05, + "loss": 0.0258, "step": 97945 }, { "epoch": 4.57, - "learning_rate": 1.090994327504571e-05, - "loss": 0.1143, + "learning_rate": 2.0924125879581232e-05, + "loss": 0.0857, "step": 97950 }, { "epoch": 4.57, - "learning_rate": 1.0909474473770569e-05, - "loss": 0.0686, + "learning_rate": 2.0923657809745215e-05, + "loss": 0.1039, "step": 97955 }, { "epoch": 4.57, - "learning_rate": 1.0909005672495429e-05, - "loss": 0.0711, + "learning_rate": 2.0923189739909195e-05, + "loss": 0.1305, "step": 97960 }, { "epoch": 4.57, - "learning_rate": 1.090853687122029e-05, - "loss": 0.0532, + "learning_rate": 2.0922721670073175e-05, + "loss": 0.1226, "step": 97965 }, { "epoch": 4.57, - "learning_rate": 1.090806806994515e-05, - "loss": 0.1223, + "learning_rate": 2.0922253600237155e-05, + "loss": 0.1142, "step": 97970 }, { "epoch": 4.57, - "learning_rate": 1.0907599268670012e-05, - "loss": 0.4614, + "learning_rate": 2.0921785530401138e-05, + "loss": 0.2917, "step": 97975 }, { "epoch": 4.57, - "learning_rate": 1.0907130467394874e-05, - "loss": 0.0924, + "learning_rate": 2.0921317460565118e-05, + "loss": 0.0902, "step": 97980 }, { "epoch": 4.57, - "learning_rate": 1.0906661666119734e-05, - "loss": 0.044, + "learning_rate": 2.0920849390729097e-05, + "loss": 0.014, "step": 97985 }, { "epoch": 4.57, - "learning_rate": 1.0906192864844594e-05, - "loss": 0.0362, + "learning_rate": 2.092038132089308e-05, + "loss": 0.0896, "step": 97990 }, { "epoch": 4.57, - "learning_rate": 1.0905724063569454e-05, - "loss": 0.0519, + "learning_rate": 2.091991325105706e-05, + "loss": 0.0305, "step": 97995 }, { "epoch": 4.57, - "learning_rate": 1.0905255262294314e-05, - "loss": 0.1229, + "learning_rate": 2.0919445181221037e-05, + "loss": 0.0184, "step": 98000 }, { "epoch": 4.57, - "learning_rate": 1.0904786461019175e-05, - "loss": 0.0834, + "learning_rate": 2.0918977111385017e-05, + "loss": 0.0585, "step": 98005 }, { "epoch": 4.57, - "learning_rate": 1.0904317659744035e-05, - "loss": 0.0484, + "learning_rate": 2.0918509041549e-05, + "loss": 0.082, "step": 98010 }, { "epoch": 4.57, - "learning_rate": 1.0903848858468895e-05, - "loss": 0.0684, + "learning_rate": 2.091804097171298e-05, + "loss": 0.0995, "step": 98015 }, { "epoch": 4.57, - "learning_rate": 1.0903380057193755e-05, - "loss": 0.1274, + "learning_rate": 2.091757290187696e-05, + "loss": 0.1579, "step": 98020 }, { "epoch": 4.57, - "learning_rate": 1.0902911255918618e-05, - "loss": 0.2913, + "learning_rate": 2.091710483204094e-05, + "loss": 0.1199, "step": 98025 }, { "epoch": 4.57, - "learning_rate": 1.0902442454643478e-05, - "loss": 0.0674, + "learning_rate": 2.0916636762204923e-05, + "loss": 0.0369, "step": 98030 }, { "epoch": 4.57, - "learning_rate": 1.0901973653368338e-05, - "loss": 0.0507, + "learning_rate": 2.0916168692368902e-05, + "loss": 0.0297, "step": 98035 }, { "epoch": 4.57, - "learning_rate": 1.0901504852093198e-05, - "loss": 0.0237, + "learning_rate": 2.0915700622532882e-05, + "loss": 0.0191, "step": 98040 }, { "epoch": 4.57, - "learning_rate": 1.090103605081806e-05, - "loss": 0.0243, + "learning_rate": 2.0915232552696862e-05, + "loss": 0.0806, "step": 98045 }, { "epoch": 4.58, - "learning_rate": 1.090056724954292e-05, - "loss": 0.0373, + "learning_rate": 2.0914764482860845e-05, + "loss": 0.069, "step": 98050 }, { "epoch": 4.58, - "learning_rate": 1.090009844826778e-05, - "loss": 0.0581, + "learning_rate": 2.0914296413024825e-05, + "loss": 0.0304, "step": 98055 }, { "epoch": 4.58, - "learning_rate": 1.089962964699264e-05, - "loss": 0.0991, + "learning_rate": 2.0913828343188805e-05, + "loss": 0.1036, "step": 98060 }, { "epoch": 4.58, - "learning_rate": 1.08991608457175e-05, - "loss": 0.0774, + "learning_rate": 2.0913360273352785e-05, + "loss": 0.1182, "step": 98065 }, { "epoch": 4.58, - "learning_rate": 1.0898692044442363e-05, - "loss": 0.1783, + "learning_rate": 2.0912892203516765e-05, + "loss": 0.073, "step": 98070 }, { "epoch": 4.58, - "learning_rate": 1.0898223243167223e-05, - "loss": 0.1901, + "learning_rate": 2.0912424133680744e-05, + "loss": 0.3434, "step": 98075 }, { "epoch": 4.58, - "learning_rate": 1.0897754441892083e-05, - "loss": 0.065, + "learning_rate": 2.0911956063844724e-05, + "loss": 0.0741, "step": 98080 }, { "epoch": 4.58, - "learning_rate": 1.0897285640616944e-05, - "loss": 0.0115, + "learning_rate": 2.0911487994008707e-05, + "loss": 0.0131, "step": 98085 }, { "epoch": 4.58, - "learning_rate": 1.0896816839341804e-05, - "loss": 0.0487, + "learning_rate": 2.0911019924172687e-05, + "loss": 0.0095, "step": 98090 }, { "epoch": 4.58, - "learning_rate": 1.0896348038066664e-05, - "loss": 0.0425, + "learning_rate": 2.0910551854336667e-05, + "loss": 0.0274, "step": 98095 }, { "epoch": 4.58, - "learning_rate": 1.0895879236791524e-05, - "loss": 0.0566, + "learning_rate": 2.0910083784500647e-05, + "loss": 0.0364, "step": 98100 }, { "epoch": 4.58, - "learning_rate": 1.0895410435516384e-05, - "loss": 0.0341, + "learning_rate": 2.090961571466463e-05, + "loss": 0.0153, "step": 98105 }, { "epoch": 4.58, - "learning_rate": 1.0894941634241246e-05, - "loss": 0.0749, + "learning_rate": 2.090914764482861e-05, + "loss": 0.1289, "step": 98110 }, { "epoch": 4.58, - "learning_rate": 1.0894472832966107e-05, - "loss": 0.0787, + "learning_rate": 2.090867957499259e-05, + "loss": 0.175, "step": 98115 }, { "epoch": 4.58, - "learning_rate": 1.0894004031690967e-05, - "loss": 0.0786, + "learning_rate": 2.0908211505156573e-05, + "loss": 0.1068, "step": 98120 }, { "epoch": 4.58, - "learning_rate": 1.0893535230415829e-05, - "loss": 0.1973, + "learning_rate": 2.090774343532055e-05, + "loss": 0.186, "step": 98125 }, { "epoch": 4.58, - "learning_rate": 1.0893066429140689e-05, - "loss": 0.1024, + "learning_rate": 2.090727536548453e-05, + "loss": 0.0854, "step": 98130 }, { "epoch": 4.58, - "learning_rate": 1.0892597627865549e-05, - "loss": 0.0435, + "learning_rate": 2.090680729564851e-05, + "loss": 0.03, "step": 98135 }, { "epoch": 4.58, - "learning_rate": 1.0892128826590409e-05, - "loss": 0.0598, + "learning_rate": 2.0906339225812492e-05, + "loss": 0.026, "step": 98140 }, { "epoch": 4.58, - "learning_rate": 1.0891660025315269e-05, - "loss": 0.0788, + "learning_rate": 2.0905871155976472e-05, + "loss": 0.0184, "step": 98145 }, { "epoch": 4.58, - "learning_rate": 1.089119122404013e-05, - "loss": 0.0257, + "learning_rate": 2.0905403086140452e-05, + "loss": 0.0162, "step": 98150 }, { "epoch": 4.58, - "learning_rate": 1.089072242276499e-05, - "loss": 0.0886, + "learning_rate": 2.090493501630443e-05, + "loss": 0.0702, "step": 98155 }, { "epoch": 4.58, - "learning_rate": 1.0890253621489852e-05, - "loss": 0.0717, + "learning_rate": 2.0904466946468415e-05, + "loss": 0.0821, "step": 98160 }, { "epoch": 4.58, - "learning_rate": 1.0889784820214713e-05, - "loss": 0.1966, + "learning_rate": 2.0903998876632395e-05, + "loss": 0.0926, "step": 98165 }, { "epoch": 4.58, - "learning_rate": 1.0889316018939573e-05, - "loss": 0.1449, + "learning_rate": 2.0903530806796374e-05, + "loss": 0.0871, "step": 98170 }, { "epoch": 4.58, - "learning_rate": 1.0888847217664433e-05, - "loss": 0.1456, + "learning_rate": 2.0903062736960358e-05, + "loss": 0.3495, "step": 98175 }, { "epoch": 4.58, - "learning_rate": 1.0888378416389293e-05, - "loss": 0.0836, + "learning_rate": 2.0902594667124337e-05, + "loss": 0.0807, "step": 98180 }, { "epoch": 4.58, - "learning_rate": 1.0887909615114153e-05, - "loss": 0.0204, + "learning_rate": 2.0902126597288317e-05, + "loss": 0.0098, "step": 98185 }, { "epoch": 4.58, - "learning_rate": 1.0887440813839015e-05, - "loss": 0.0212, + "learning_rate": 2.0901658527452294e-05, + "loss": 0.0199, "step": 98190 }, { "epoch": 4.58, - "learning_rate": 1.0886972012563875e-05, - "loss": 0.0514, + "learning_rate": 2.0901190457616277e-05, + "loss": 0.019, "step": 98195 }, { "epoch": 4.58, - "learning_rate": 1.0886503211288735e-05, - "loss": 0.0795, + "learning_rate": 2.0900722387780257e-05, + "loss": 0.056, "step": 98200 }, { "epoch": 4.58, - "learning_rate": 1.0886034410013595e-05, - "loss": 0.0567, + "learning_rate": 2.0900254317944237e-05, + "loss": 0.0589, "step": 98205 }, { "epoch": 4.58, - "learning_rate": 1.0885565608738458e-05, - "loss": 0.0893, + "learning_rate": 2.0899786248108216e-05, + "loss": 0.0745, "step": 98210 }, { "epoch": 4.58, - "learning_rate": 1.0885096807463318e-05, - "loss": 0.1304, + "learning_rate": 2.08993181782722e-05, + "loss": 0.0798, "step": 98215 }, { "epoch": 4.58, - "learning_rate": 1.0884628006188178e-05, - "loss": 0.2232, + "learning_rate": 2.089885010843618e-05, + "loss": 0.0656, "step": 98220 }, { "epoch": 4.58, - "learning_rate": 1.0884159204913038e-05, - "loss": 0.3183, + "learning_rate": 2.089838203860016e-05, + "loss": 0.3903, "step": 98225 }, { "epoch": 4.58, - "learning_rate": 1.08836904036379e-05, - "loss": 0.0753, + "learning_rate": 2.089791396876414e-05, + "loss": 0.0913, "step": 98230 }, { "epoch": 4.58, - "learning_rate": 1.088322160236276e-05, - "loss": 0.025, + "learning_rate": 2.0897445898928122e-05, + "loss": 0.0431, "step": 98235 }, { "epoch": 4.58, - "learning_rate": 1.088275280108762e-05, - "loss": 0.018, + "learning_rate": 2.0896977829092102e-05, + "loss": 0.0301, "step": 98240 }, { "epoch": 4.58, - "learning_rate": 1.0882283999812479e-05, - "loss": 0.0605, + "learning_rate": 2.0896509759256082e-05, + "loss": 0.1145, "step": 98245 }, { "epoch": 4.58, - "learning_rate": 1.0881815198537339e-05, - "loss": 0.0564, + "learning_rate": 2.0896041689420065e-05, + "loss": 0.1385, "step": 98250 }, { "epoch": 4.58, - "learning_rate": 1.0881346397262202e-05, - "loss": 0.1028, + "learning_rate": 2.089557361958404e-05, + "loss": 0.0555, "step": 98255 }, { "epoch": 4.58, - "learning_rate": 1.0880877595987062e-05, - "loss": 0.0965, + "learning_rate": 2.089510554974802e-05, + "loss": 0.1079, "step": 98260 }, { "epoch": 4.59, - "learning_rate": 1.0880408794711922e-05, - "loss": 0.0909, + "learning_rate": 2.0894637479912e-05, + "loss": 0.1355, "step": 98265 }, { "epoch": 4.59, - "learning_rate": 1.0879939993436784e-05, - "loss": 0.1554, + "learning_rate": 2.0894169410075984e-05, + "loss": 0.1333, "step": 98270 }, { "epoch": 4.59, - "learning_rate": 1.0879471192161644e-05, - "loss": 0.2125, + "learning_rate": 2.0893701340239964e-05, + "loss": 0.2761, "step": 98275 }, { "epoch": 4.59, - "learning_rate": 1.0879002390886504e-05, - "loss": 0.0855, + "learning_rate": 2.0893233270403944e-05, + "loss": 0.061, "step": 98280 }, { "epoch": 4.59, - "learning_rate": 1.0878533589611364e-05, - "loss": 0.0064, + "learning_rate": 2.0892765200567924e-05, + "loss": 0.0221, "step": 98285 }, { "epoch": 4.59, - "learning_rate": 1.0878064788336225e-05, - "loss": 0.0419, + "learning_rate": 2.0892297130731907e-05, + "loss": 0.0316, "step": 98290 }, { "epoch": 4.59, - "learning_rate": 1.0877595987061085e-05, - "loss": 0.0686, + "learning_rate": 2.0891829060895887e-05, + "loss": 0.0482, "step": 98295 }, { "epoch": 4.59, - "learning_rate": 1.0877127185785947e-05, - "loss": 0.0698, + "learning_rate": 2.0891360991059867e-05, + "loss": 0.0629, "step": 98300 }, { "epoch": 4.59, - "learning_rate": 1.0876658384510807e-05, - "loss": 0.071, + "learning_rate": 2.089089292122385e-05, + "loss": 0.1529, "step": 98305 }, { "epoch": 4.59, - "learning_rate": 1.0876189583235668e-05, - "loss": 0.041, + "learning_rate": 2.089042485138783e-05, + "loss": 0.1144, "step": 98310 }, { "epoch": 4.59, - "learning_rate": 1.0875720781960528e-05, - "loss": 0.1351, + "learning_rate": 2.0889956781551806e-05, + "loss": 0.1029, "step": 98315 }, { "epoch": 4.59, - "learning_rate": 1.0875251980685388e-05, - "loss": 0.0993, + "learning_rate": 2.0889488711715786e-05, + "loss": 0.1392, "step": 98320 }, { "epoch": 4.59, - "learning_rate": 1.0874783179410248e-05, - "loss": 0.2124, + "learning_rate": 2.088902064187977e-05, + "loss": 0.2247, "step": 98325 }, { "epoch": 4.59, - "learning_rate": 1.087431437813511e-05, - "loss": 0.1036, + "learning_rate": 2.088855257204375e-05, + "loss": 0.1048, "step": 98330 }, { "epoch": 4.59, - "learning_rate": 1.087384557685997e-05, - "loss": 0.0395, + "learning_rate": 2.088808450220773e-05, + "loss": 0.3264, "step": 98335 }, { "epoch": 4.59, - "learning_rate": 1.087337677558483e-05, - "loss": 0.0865, + "learning_rate": 2.088761643237171e-05, + "loss": 0.0375, "step": 98340 }, { "epoch": 4.59, - "learning_rate": 1.087290797430969e-05, - "loss": 0.0526, + "learning_rate": 2.0887148362535692e-05, + "loss": 0.0682, "step": 98345 }, { "epoch": 4.59, - "learning_rate": 1.0872439173034553e-05, - "loss": 0.0604, + "learning_rate": 2.088668029269967e-05, + "loss": 0.0422, "step": 98350 }, { "epoch": 4.59, - "learning_rate": 1.0871970371759413e-05, - "loss": 0.0464, + "learning_rate": 2.088621222286365e-05, + "loss": 0.0516, "step": 98355 }, { "epoch": 4.59, - "learning_rate": 1.0871501570484273e-05, - "loss": 0.1454, + "learning_rate": 2.0885744153027635e-05, + "loss": 0.0741, "step": 98360 }, { "epoch": 4.59, - "learning_rate": 1.0871032769209133e-05, - "loss": 0.1192, + "learning_rate": 2.0885276083191614e-05, + "loss": 0.1034, "step": 98365 }, { "epoch": 4.59, - "learning_rate": 1.0870563967933994e-05, - "loss": 0.1233, + "learning_rate": 2.0884808013355594e-05, + "loss": 0.1312, "step": 98370 }, { "epoch": 4.59, - "learning_rate": 1.0870095166658854e-05, - "loss": 0.2417, + "learning_rate": 2.0884339943519574e-05, + "loss": 0.1713, "step": 98375 }, { "epoch": 4.59, - "learning_rate": 1.0869626365383714e-05, - "loss": 0.0797, + "learning_rate": 2.0883871873683554e-05, + "loss": 0.0827, "step": 98380 }, { "epoch": 4.59, - "learning_rate": 1.0869157564108574e-05, - "loss": 0.0076, + "learning_rate": 2.0883403803847534e-05, + "loss": 0.0397, "step": 98385 }, { "epoch": 4.59, - "learning_rate": 1.0868688762833434e-05, - "loss": 0.0633, + "learning_rate": 2.0882935734011514e-05, + "loss": 0.0463, "step": 98390 }, { "epoch": 4.59, - "learning_rate": 1.0868219961558297e-05, - "loss": 0.0569, + "learning_rate": 2.0882467664175493e-05, + "loss": 0.0207, "step": 98395 }, { "epoch": 4.59, - "learning_rate": 1.0867751160283157e-05, - "loss": 0.0983, + "learning_rate": 2.0881999594339477e-05, + "loss": 0.0304, "step": 98400 }, { "epoch": 4.59, - "learning_rate": 1.0867282359008017e-05, - "loss": 0.0639, + "learning_rate": 2.0881531524503456e-05, + "loss": 0.0588, "step": 98405 }, { "epoch": 4.59, - "learning_rate": 1.0866813557732879e-05, - "loss": 0.0683, + "learning_rate": 2.0881063454667436e-05, + "loss": 0.0982, "step": 98410 }, { "epoch": 4.59, - "learning_rate": 1.0866344756457739e-05, - "loss": 0.207, + "learning_rate": 2.0880595384831416e-05, + "loss": 0.1873, "step": 98415 }, { "epoch": 4.59, - "learning_rate": 1.0865875955182599e-05, - "loss": 0.2157, + "learning_rate": 2.08801273149954e-05, + "loss": 0.1312, "step": 98420 }, { "epoch": 4.59, - "learning_rate": 1.0865407153907459e-05, - "loss": 0.1907, + "learning_rate": 2.087965924515938e-05, + "loss": 0.2277, "step": 98425 }, { "epoch": 4.59, - "learning_rate": 1.0864938352632319e-05, - "loss": 0.0587, + "learning_rate": 2.087919117532336e-05, + "loss": 0.0675, "step": 98430 }, { "epoch": 4.59, - "learning_rate": 1.086446955135718e-05, - "loss": 0.0251, + "learning_rate": 2.0878723105487342e-05, + "loss": 0.0431, "step": 98435 }, { "epoch": 4.59, - "learning_rate": 1.0864000750082042e-05, - "loss": 0.0553, + "learning_rate": 2.0878255035651322e-05, + "loss": 0.0308, "step": 98440 }, { "epoch": 4.59, - "learning_rate": 1.0863531948806902e-05, - "loss": 0.0419, + "learning_rate": 2.0877786965815298e-05, + "loss": 0.0769, "step": 98445 }, { "epoch": 4.59, - "learning_rate": 1.0863063147531764e-05, - "loss": 0.0544, + "learning_rate": 2.0877318895979278e-05, + "loss": 0.0526, "step": 98450 }, { "epoch": 4.59, - "learning_rate": 1.0862594346256623e-05, - "loss": 0.0795, + "learning_rate": 2.087685082614326e-05, + "loss": 0.0824, "step": 98455 }, { "epoch": 4.59, - "learning_rate": 1.0862125544981483e-05, - "loss": 0.1192, + "learning_rate": 2.087638275630724e-05, + "loss": 0.0558, "step": 98460 }, { "epoch": 4.59, - "learning_rate": 1.0861656743706343e-05, - "loss": 0.0826, + "learning_rate": 2.087591468647122e-05, + "loss": 0.1311, "step": 98465 }, { "epoch": 4.59, - "learning_rate": 1.0861187942431203e-05, - "loss": 0.2061, + "learning_rate": 2.08754466166352e-05, + "loss": 0.1184, "step": 98470 }, { "epoch": 4.59, - "learning_rate": 1.0860719141156065e-05, - "loss": 0.2154, + "learning_rate": 2.0874978546799184e-05, + "loss": 0.1942, "step": 98475 }, { "epoch": 4.6, - "learning_rate": 1.0860250339880925e-05, - "loss": 0.0606, + "learning_rate": 2.0874510476963164e-05, + "loss": 0.0894, "step": 98480 }, { "epoch": 4.6, - "learning_rate": 1.0859781538605786e-05, - "loss": 0.0192, + "learning_rate": 2.0874042407127144e-05, + "loss": 0.0538, "step": 98485 }, { "epoch": 4.6, - "learning_rate": 1.0859312737330648e-05, - "loss": 0.0225, + "learning_rate": 2.0873574337291127e-05, + "loss": 0.0534, "step": 98490 }, { "epoch": 4.6, - "learning_rate": 1.0858843936055508e-05, - "loss": 0.0393, + "learning_rate": 2.0873106267455107e-05, + "loss": 0.0961, "step": 98495 }, { "epoch": 4.6, - "learning_rate": 1.0858375134780368e-05, + "learning_rate": 2.0872638197619086e-05, "loss": 0.0352, "step": 98500 }, { "epoch": 4.6, - "learning_rate": 1.0857906333505228e-05, - "loss": 0.0757, + "learning_rate": 2.0872170127783063e-05, + "loss": 0.044, "step": 98505 }, { "epoch": 4.6, - "learning_rate": 1.0857437532230088e-05, - "loss": 0.0837, + "learning_rate": 2.0871702057947046e-05, + "loss": 0.0946, "step": 98510 }, { "epoch": 4.6, - "learning_rate": 1.085696873095495e-05, - "loss": 0.1346, + "learning_rate": 2.0871233988111026e-05, + "loss": 0.0603, "step": 98515 }, { "epoch": 4.6, - "learning_rate": 1.085649992967981e-05, - "loss": 0.1277, + "learning_rate": 2.0870765918275006e-05, + "loss": 0.1384, "step": 98520 }, { "epoch": 4.6, - "learning_rate": 1.085603112840467e-05, - "loss": 0.1872, + "learning_rate": 2.0870297848438986e-05, + "loss": 0.1736, "step": 98525 }, { "epoch": 4.6, - "learning_rate": 1.085556232712953e-05, - "loss": 0.0699, + "learning_rate": 2.086982977860297e-05, + "loss": 0.0964, "step": 98530 }, { "epoch": 4.6, - "learning_rate": 1.0855093525854393e-05, - "loss": 0.0043, + "learning_rate": 2.086936170876695e-05, + "loss": 0.0032, "step": 98535 }, { "epoch": 4.6, - "learning_rate": 1.0854624724579253e-05, - "loss": 0.0165, + "learning_rate": 2.086889363893093e-05, + "loss": 0.0356, "step": 98540 }, { "epoch": 4.6, - "learning_rate": 1.0854155923304112e-05, - "loss": 0.0389, + "learning_rate": 2.086842556909491e-05, + "loss": 0.0226, "step": 98545 }, { "epoch": 4.6, - "learning_rate": 1.0853687122028972e-05, - "loss": 0.0459, + "learning_rate": 2.086795749925889e-05, + "loss": 0.0291, "step": 98550 }, { "epoch": 4.6, - "learning_rate": 1.0853218320753834e-05, - "loss": 0.0789, + "learning_rate": 2.086748942942287e-05, + "loss": 0.0203, "step": 98555 }, { "epoch": 4.6, - "learning_rate": 1.0852749519478694e-05, - "loss": 0.0961, + "learning_rate": 2.086702135958685e-05, + "loss": 0.1087, "step": 98560 }, { "epoch": 4.6, - "learning_rate": 1.0852280718203554e-05, - "loss": 0.0978, + "learning_rate": 2.0866553289750834e-05, + "loss": 0.0847, "step": 98565 }, { "epoch": 4.6, - "learning_rate": 1.0851811916928414e-05, - "loss": 0.1127, + "learning_rate": 2.086608521991481e-05, + "loss": 0.1294, "step": 98570 }, { "epoch": 4.6, - "learning_rate": 1.0851343115653274e-05, - "loss": 0.2313, + "learning_rate": 2.086561715007879e-05, + "loss": 0.0785, "step": 98575 }, { "epoch": 4.6, - "learning_rate": 1.0850874314378137e-05, - "loss": 0.0617, + "learning_rate": 2.086514908024277e-05, + "loss": 0.1081, "step": 98580 }, { "epoch": 4.6, - "learning_rate": 1.0850405513102997e-05, - "loss": 0.0073, + "learning_rate": 2.0864681010406754e-05, + "loss": 0.0345, "step": 98585 }, { "epoch": 4.6, - "learning_rate": 1.0849936711827857e-05, - "loss": 0.0523, + "learning_rate": 2.0864212940570733e-05, + "loss": 0.0465, "step": 98590 }, { "epoch": 4.6, - "learning_rate": 1.0849467910552719e-05, - "loss": 0.0491, + "learning_rate": 2.0863744870734713e-05, + "loss": 0.0303, "step": 98595 }, { "epoch": 4.6, - "learning_rate": 1.0848999109277578e-05, - "loss": 0.0915, + "learning_rate": 2.0863276800898696e-05, + "loss": 0.0424, "step": 98600 }, { "epoch": 4.6, - "learning_rate": 1.0848530308002438e-05, - "loss": 0.1004, + "learning_rate": 2.0862808731062676e-05, + "loss": 0.0265, "step": 98605 }, { "epoch": 4.6, - "learning_rate": 1.0848061506727298e-05, - "loss": 0.0599, + "learning_rate": 2.0862340661226656e-05, + "loss": 0.0638, "step": 98610 }, { "epoch": 4.6, - "learning_rate": 1.0847592705452158e-05, - "loss": 0.1543, + "learning_rate": 2.0861872591390636e-05, + "loss": 0.1111, "step": 98615 }, { "epoch": 4.6, - "learning_rate": 1.084712390417702e-05, - "loss": 0.1006, + "learning_rate": 2.086140452155462e-05, + "loss": 0.2167, "step": 98620 }, { "epoch": 4.6, - "learning_rate": 1.0846655102901882e-05, - "loss": 0.3397, + "learning_rate": 2.08609364517186e-05, + "loss": 0.2568, "step": 98625 }, { "epoch": 4.6, - "learning_rate": 1.0846186301626741e-05, - "loss": 0.0878, + "learning_rate": 2.086046838188258e-05, + "loss": 0.0598, "step": 98630 }, { "epoch": 4.6, - "learning_rate": 1.0845717500351603e-05, - "loss": 0.0588, + "learning_rate": 2.0860000312046555e-05, + "loss": 0.0151, "step": 98635 }, { "epoch": 4.6, - "learning_rate": 1.0845248699076463e-05, - "loss": 0.0212, + "learning_rate": 2.0859532242210538e-05, + "loss": 0.0752, "step": 98640 }, { "epoch": 4.6, - "learning_rate": 1.0844779897801323e-05, - "loss": 0.0454, + "learning_rate": 2.0859064172374518e-05, + "loss": 0.0299, "step": 98645 }, { "epoch": 4.6, - "learning_rate": 1.0844311096526183e-05, - "loss": 0.0405, + "learning_rate": 2.0858596102538498e-05, + "loss": 0.0447, "step": 98650 }, { "epoch": 4.6, - "learning_rate": 1.0843842295251043e-05, - "loss": 0.0378, + "learning_rate": 2.0858128032702478e-05, + "loss": 0.038, "step": 98655 }, { "epoch": 4.6, - "learning_rate": 1.0843373493975904e-05, - "loss": 0.1456, + "learning_rate": 2.085765996286646e-05, + "loss": 0.0746, "step": 98660 }, { "epoch": 4.6, - "learning_rate": 1.0842904692700764e-05, - "loss": 0.1298, + "learning_rate": 2.085719189303044e-05, + "loss": 0.0865, "step": 98665 }, { "epoch": 4.6, - "learning_rate": 1.0842435891425624e-05, - "loss": 0.1485, + "learning_rate": 2.085672382319442e-05, + "loss": 0.1562, "step": 98670 }, { "epoch": 4.6, - "learning_rate": 1.0841967090150488e-05, - "loss": 0.302, + "learning_rate": 2.0856255753358404e-05, + "loss": 0.2224, "step": 98675 }, { "epoch": 4.6, - "learning_rate": 1.0841498288875348e-05, - "loss": 0.0655, + "learning_rate": 2.0855787683522384e-05, + "loss": 0.1086, "step": 98680 }, { "epoch": 4.6, - "learning_rate": 1.0841029487600208e-05, - "loss": 0.0255, + "learning_rate": 2.0855319613686363e-05, + "loss": 0.0242, "step": 98685 }, { "epoch": 4.61, - "learning_rate": 1.0840560686325067e-05, - "loss": 0.0089, + "learning_rate": 2.0854851543850343e-05, + "loss": 0.0348, "step": 98690 }, { "epoch": 4.61, - "learning_rate": 1.0840091885049927e-05, - "loss": 0.0543, + "learning_rate": 2.0854383474014323e-05, + "loss": 0.0112, "step": 98695 }, { "epoch": 4.61, - "learning_rate": 1.0839623083774789e-05, - "loss": 0.0458, + "learning_rate": 2.0853915404178303e-05, + "loss": 0.0405, "step": 98700 }, { "epoch": 4.61, - "learning_rate": 1.0839154282499649e-05, - "loss": 0.039, + "learning_rate": 2.0853447334342283e-05, + "loss": 0.0831, "step": 98705 }, { "epoch": 4.61, - "learning_rate": 1.0838685481224509e-05, - "loss": 0.0698, + "learning_rate": 2.0852979264506263e-05, + "loss": 0.0762, "step": 98710 }, { "epoch": 4.61, - "learning_rate": 1.0838216679949369e-05, - "loss": 0.1055, + "learning_rate": 2.0852511194670246e-05, + "loss": 0.1317, "step": 98715 }, { "epoch": 4.61, - "learning_rate": 1.0837747878674232e-05, - "loss": 0.3573, + "learning_rate": 2.0852043124834226e-05, + "loss": 0.2114, "step": 98720 }, { "epoch": 4.61, - "learning_rate": 1.0837279077399092e-05, - "loss": 0.3102, + "learning_rate": 2.0851575054998205e-05, + "loss": 0.3936, "step": 98725 }, { "epoch": 4.61, - "learning_rate": 1.0836810276123952e-05, - "loss": 0.0689, + "learning_rate": 2.085110698516219e-05, + "loss": 0.0721, "step": 98730 }, { "epoch": 4.61, - "learning_rate": 1.0836341474848812e-05, - "loss": 0.0161, + "learning_rate": 2.085063891532617e-05, + "loss": 0.0333, "step": 98735 }, { "epoch": 4.61, - "learning_rate": 1.0835872673573674e-05, - "loss": 0.0524, + "learning_rate": 2.0850170845490148e-05, + "loss": 0.0335, "step": 98740 }, { "epoch": 4.61, - "learning_rate": 1.0835403872298533e-05, - "loss": 0.0708, + "learning_rate": 2.0849702775654128e-05, + "loss": 0.0636, "step": 98745 }, { "epoch": 4.61, - "learning_rate": 1.0834935071023393e-05, - "loss": 0.0438, + "learning_rate": 2.084923470581811e-05, + "loss": 0.0259, "step": 98750 }, { "epoch": 4.61, - "learning_rate": 1.0834466269748253e-05, - "loss": 0.1151, + "learning_rate": 2.084876663598209e-05, + "loss": 0.0551, "step": 98755 }, { "epoch": 4.61, - "learning_rate": 1.0833997468473115e-05, - "loss": 0.0805, + "learning_rate": 2.0848298566146067e-05, + "loss": 0.0766, "step": 98760 }, { "epoch": 4.61, - "learning_rate": 1.0833528667197977e-05, - "loss": 0.1929, + "learning_rate": 2.0847830496310047e-05, + "loss": 0.1142, "step": 98765 }, { "epoch": 4.61, - "learning_rate": 1.0833059865922837e-05, - "loss": 0.1317, + "learning_rate": 2.084736242647403e-05, + "loss": 0.1535, "step": 98770 }, { "epoch": 4.61, - "learning_rate": 1.0832591064647696e-05, - "loss": 0.2985, + "learning_rate": 2.084689435663801e-05, + "loss": 0.1984, "step": 98775 }, { "epoch": 4.61, - "learning_rate": 1.0832122263372558e-05, - "loss": 0.0765, + "learning_rate": 2.084642628680199e-05, + "loss": 0.0875, "step": 98780 }, { "epoch": 4.61, - "learning_rate": 1.0831653462097418e-05, - "loss": 0.0017, + "learning_rate": 2.0845958216965973e-05, + "loss": 0.0134, "step": 98785 }, { "epoch": 4.61, - "learning_rate": 1.0831184660822278e-05, - "loss": 0.0453, + "learning_rate": 2.0845490147129953e-05, + "loss": 0.0271, "step": 98790 }, { "epoch": 4.61, - "learning_rate": 1.0830715859547138e-05, - "loss": 0.037, + "learning_rate": 2.0845022077293933e-05, + "loss": 0.058, "step": 98795 }, { "epoch": 4.61, - "learning_rate": 1.0830247058272e-05, - "loss": 0.0671, + "learning_rate": 2.0844554007457913e-05, + "loss": 0.0435, "step": 98800 }, { "epoch": 4.61, - "learning_rate": 1.082977825699686e-05, - "loss": 0.057, + "learning_rate": 2.0844085937621896e-05, + "loss": 0.1078, "step": 98805 }, { "epoch": 4.61, - "learning_rate": 1.0829309455721721e-05, - "loss": 0.1384, + "learning_rate": 2.0843617867785876e-05, + "loss": 0.1175, "step": 98810 }, { "epoch": 4.61, - "learning_rate": 1.0828840654446581e-05, - "loss": 0.1596, + "learning_rate": 2.0843149797949856e-05, + "loss": 0.0787, "step": 98815 }, { "epoch": 4.61, - "learning_rate": 1.0828371853171443e-05, - "loss": 0.286, + "learning_rate": 2.0842681728113835e-05, + "loss": 0.1522, "step": 98820 }, { "epoch": 4.61, - "learning_rate": 1.0827903051896303e-05, - "loss": 0.2032, + "learning_rate": 2.0842213658277815e-05, + "loss": 0.1767, "step": 98825 }, { "epoch": 4.61, - "learning_rate": 1.0827434250621163e-05, - "loss": 0.0679, + "learning_rate": 2.0841745588441795e-05, + "loss": 0.0421, "step": 98830 }, { "epoch": 4.61, - "learning_rate": 1.0826965449346022e-05, - "loss": 0.0172, + "learning_rate": 2.0841277518605775e-05, + "loss": 0.1056, "step": 98835 }, { "epoch": 4.61, - "learning_rate": 1.0826496648070884e-05, - "loss": 0.035, + "learning_rate": 2.0840809448769755e-05, + "loss": 0.0369, "step": 98840 }, { "epoch": 4.61, - "learning_rate": 1.0826027846795744e-05, - "loss": 0.049, + "learning_rate": 2.0840341378933738e-05, + "loss": 0.0254, "step": 98845 }, { "epoch": 4.61, - "learning_rate": 1.0825559045520604e-05, - "loss": 0.0568, + "learning_rate": 2.0839873309097718e-05, + "loss": 0.0736, "step": 98850 }, { "epoch": 4.61, - "learning_rate": 1.0825090244245464e-05, - "loss": 0.0966, + "learning_rate": 2.0839405239261698e-05, + "loss": 0.0842, "step": 98855 }, { "epoch": 4.61, - "learning_rate": 1.0824621442970327e-05, - "loss": 0.0621, + "learning_rate": 2.083893716942568e-05, + "loss": 0.0983, "step": 98860 }, { "epoch": 4.61, - "learning_rate": 1.0824152641695187e-05, - "loss": 0.0731, + "learning_rate": 2.083846909958966e-05, + "loss": 0.1, "step": 98865 }, { "epoch": 4.61, - "learning_rate": 1.0823683840420047e-05, - "loss": 0.1169, + "learning_rate": 2.083800102975364e-05, + "loss": 0.1182, "step": 98870 }, { "epoch": 4.61, - "learning_rate": 1.0823215039144907e-05, - "loss": 0.1449, + "learning_rate": 2.083753295991762e-05, + "loss": 0.127, "step": 98875 }, { "epoch": 4.61, - "learning_rate": 1.0822746237869769e-05, - "loss": 0.0552, + "learning_rate": 2.0837064890081603e-05, + "loss": 0.061, "step": 98880 }, { "epoch": 4.61, - "learning_rate": 1.0822277436594629e-05, - "loss": 0.0256, + "learning_rate": 2.083659682024558e-05, + "loss": 0.0253, "step": 98885 }, { "epoch": 4.61, - "learning_rate": 1.0821808635319489e-05, - "loss": 0.0353, + "learning_rate": 2.083612875040956e-05, + "loss": 0.0153, "step": 98890 }, { "epoch": 4.61, - "learning_rate": 1.0821339834044348e-05, - "loss": 0.0843, + "learning_rate": 2.083566068057354e-05, + "loss": 0.079, "step": 98895 }, { "epoch": 4.61, - "learning_rate": 1.0820871032769208e-05, - "loss": 0.0545, + "learning_rate": 2.0835192610737523e-05, + "loss": 0.0493, "step": 98900 }, { "epoch": 4.62, - "learning_rate": 1.0820402231494072e-05, - "loss": 0.0478, + "learning_rate": 2.0834724540901503e-05, + "loss": 0.0791, "step": 98905 }, { "epoch": 4.62, - "learning_rate": 1.0819933430218932e-05, - "loss": 0.033, + "learning_rate": 2.0834256471065482e-05, + "loss": 0.0651, "step": 98910 }, { "epoch": 4.62, - "learning_rate": 1.0819464628943792e-05, - "loss": 0.0588, + "learning_rate": 2.0833788401229466e-05, + "loss": 0.0534, "step": 98915 }, { "epoch": 4.62, - "learning_rate": 1.0818995827668653e-05, - "loss": 0.1776, + "learning_rate": 2.0833320331393445e-05, + "loss": 0.1466, "step": 98920 }, { "epoch": 4.62, - "learning_rate": 1.0818527026393513e-05, - "loss": 0.3594, + "learning_rate": 2.0832852261557425e-05, + "loss": 0.1365, "step": 98925 }, { "epoch": 4.62, - "learning_rate": 1.0818058225118373e-05, - "loss": 0.0635, + "learning_rate": 2.0832384191721405e-05, + "loss": 0.0628, "step": 98930 }, { "epoch": 4.62, - "learning_rate": 1.0817589423843233e-05, - "loss": 0.0541, + "learning_rate": 2.0831916121885388e-05, + "loss": 0.011, "step": 98935 }, { "epoch": 4.62, - "learning_rate": 1.0817120622568093e-05, - "loss": 0.0135, + "learning_rate": 2.0831448052049368e-05, + "loss": 0.0061, "step": 98940 }, { "epoch": 4.62, - "learning_rate": 1.0816651821292955e-05, - "loss": 0.044, + "learning_rate": 2.0830979982213348e-05, + "loss": 0.031, "step": 98945 }, { "epoch": 4.62, - "learning_rate": 1.0816183020017816e-05, - "loss": 0.039, + "learning_rate": 2.0830511912377324e-05, + "loss": 0.048, "step": 98950 }, { "epoch": 4.62, - "learning_rate": 1.0815714218742676e-05, - "loss": 0.0487, + "learning_rate": 2.0830043842541307e-05, + "loss": 0.0175, "step": 98955 }, { "epoch": 4.62, - "learning_rate": 1.0815245417467538e-05, - "loss": 0.0814, + "learning_rate": 2.0829575772705287e-05, + "loss": 0.1233, "step": 98960 }, { "epoch": 4.62, - "learning_rate": 1.0814776616192398e-05, - "loss": 0.1315, + "learning_rate": 2.0829107702869267e-05, + "loss": 0.1211, "step": 98965 }, { "epoch": 4.62, - "learning_rate": 1.0814307814917258e-05, - "loss": 0.1204, + "learning_rate": 2.082863963303325e-05, + "loss": 0.1557, "step": 98970 }, { "epoch": 4.62, - "learning_rate": 1.0813839013642118e-05, - "loss": 0.2536, + "learning_rate": 2.082817156319723e-05, + "loss": 0.2416, "step": 98975 }, { "epoch": 4.62, - "learning_rate": 1.0813370212366977e-05, - "loss": 0.0815, + "learning_rate": 2.082770349336121e-05, + "loss": 0.058, "step": 98980 }, { "epoch": 4.62, - "learning_rate": 1.0812901411091839e-05, - "loss": 0.0141, + "learning_rate": 2.082723542352519e-05, + "loss": 0.0111, "step": 98985 }, { "epoch": 4.62, - "learning_rate": 1.0812432609816699e-05, - "loss": 0.0449, + "learning_rate": 2.0826767353689173e-05, + "loss": 0.0553, "step": 98990 }, { "epoch": 4.62, - "learning_rate": 1.0811963808541559e-05, - "loss": 0.0351, + "learning_rate": 2.0826299283853153e-05, + "loss": 0.0513, "step": 98995 }, { "epoch": 4.62, - "learning_rate": 1.0811495007266422e-05, - "loss": 0.0248, + "learning_rate": 2.0825831214017133e-05, + "loss": 0.0733, "step": 99000 }, { "epoch": 4.62, - "learning_rate": 1.0811026205991282e-05, - "loss": 0.0664, + "learning_rate": 2.0825363144181112e-05, + "loss": 0.0499, "step": 99005 }, { "epoch": 4.62, - "learning_rate": 1.0810557404716142e-05, - "loss": 0.0684, + "learning_rate": 2.0824895074345092e-05, + "loss": 0.0937, "step": 99010 }, { "epoch": 4.62, - "learning_rate": 1.0810088603441002e-05, - "loss": 0.0888, + "learning_rate": 2.0824427004509072e-05, + "loss": 0.0363, "step": 99015 }, { "epoch": 4.62, - "learning_rate": 1.0809619802165862e-05, - "loss": 0.1889, + "learning_rate": 2.0823958934673052e-05, + "loss": 0.2106, "step": 99020 }, { "epoch": 4.62, - "learning_rate": 1.0809151000890724e-05, - "loss": 0.1735, + "learning_rate": 2.082349086483703e-05, + "loss": 0.2605, "step": 99025 }, { "epoch": 4.62, - "learning_rate": 1.0808682199615584e-05, - "loss": 0.0884, + "learning_rate": 2.0823022795001015e-05, + "loss": 0.0686, "step": 99030 }, { "epoch": 4.62, - "learning_rate": 1.0808213398340444e-05, - "loss": 0.0138, + "learning_rate": 2.0822554725164995e-05, + "loss": 0.0306, "step": 99035 }, { "epoch": 4.62, - "learning_rate": 1.0807744597065303e-05, - "loss": 0.039, + "learning_rate": 2.0822086655328975e-05, + "loss": 0.0419, "step": 99040 }, { "epoch": 4.62, - "learning_rate": 1.0807275795790167e-05, - "loss": 0.0216, + "learning_rate": 2.0821618585492958e-05, + "loss": 0.0661, "step": 99045 }, { "epoch": 4.62, - "learning_rate": 1.0806806994515027e-05, - "loss": 0.0492, + "learning_rate": 2.0821150515656938e-05, + "loss": 0.0757, "step": 99050 }, { "epoch": 4.62, - "learning_rate": 1.0806338193239887e-05, - "loss": 0.1089, + "learning_rate": 2.0820682445820917e-05, + "loss": 0.0327, "step": 99055 }, { "epoch": 4.62, - "learning_rate": 1.0805869391964747e-05, - "loss": 0.0727, + "learning_rate": 2.0820214375984897e-05, + "loss": 0.0576, "step": 99060 }, { "epoch": 4.62, - "learning_rate": 1.0805400590689608e-05, - "loss": 0.0978, + "learning_rate": 2.081974630614888e-05, + "loss": 0.1259, "step": 99065 }, { "epoch": 4.62, - "learning_rate": 1.0804931789414468e-05, - "loss": 0.137, + "learning_rate": 2.081927823631286e-05, + "loss": 0.1176, "step": 99070 }, { "epoch": 4.62, - "learning_rate": 1.0804462988139328e-05, - "loss": 0.2223, + "learning_rate": 2.0818810166476837e-05, + "loss": 0.1595, "step": 99075 }, { "epoch": 4.62, - "learning_rate": 1.0803994186864188e-05, - "loss": 0.0674, + "learning_rate": 2.0818342096640816e-05, + "loss": 0.0557, "step": 99080 }, { "epoch": 4.62, - "learning_rate": 1.0803525385589048e-05, - "loss": 0.0336, + "learning_rate": 2.08178740268048e-05, + "loss": 0.0077, "step": 99085 }, { "epoch": 4.62, - "learning_rate": 1.0803056584313911e-05, - "loss": 0.0316, + "learning_rate": 2.081740595696878e-05, + "loss": 0.0514, "step": 99090 }, { "epoch": 4.62, - "learning_rate": 1.0802587783038771e-05, - "loss": 0.0132, + "learning_rate": 2.081693788713276e-05, + "loss": 0.0434, "step": 99095 }, { "epoch": 4.62, - "learning_rate": 1.0802118981763631e-05, - "loss": 0.0319, + "learning_rate": 2.0816469817296743e-05, + "loss": 0.0567, "step": 99100 }, { "epoch": 4.62, - "learning_rate": 1.0801650180488493e-05, - "loss": 0.0532, + "learning_rate": 2.0816001747460722e-05, + "loss": 0.0497, "step": 99105 }, { "epoch": 4.62, - "learning_rate": 1.0801181379213353e-05, - "loss": 0.0389, + "learning_rate": 2.0815533677624702e-05, + "loss": 0.0984, "step": 99110 }, { "epoch": 4.62, - "learning_rate": 1.0800712577938213e-05, - "loss": 0.0504, + "learning_rate": 2.0815065607788682e-05, + "loss": 0.0676, "step": 99115 }, { "epoch": 4.63, - "learning_rate": 1.0800243776663073e-05, - "loss": 0.2022, + "learning_rate": 2.0814597537952665e-05, + "loss": 0.2505, "step": 99120 }, { "epoch": 4.63, - "learning_rate": 1.0799774975387932e-05, - "loss": 0.2379, + "learning_rate": 2.0814129468116645e-05, + "loss": 0.2113, "step": 99125 }, { "epoch": 4.63, - "learning_rate": 1.0799306174112794e-05, - "loss": 0.078, + "learning_rate": 2.0813661398280625e-05, + "loss": 0.0517, "step": 99130 }, { "epoch": 4.63, - "learning_rate": 1.0798837372837656e-05, - "loss": 0.0242, + "learning_rate": 2.0813193328444605e-05, + "loss": 0.0017, "step": 99135 }, { "epoch": 4.63, - "learning_rate": 1.0798368571562516e-05, - "loss": 0.0157, + "learning_rate": 2.0812725258608584e-05, + "loss": 0.0121, "step": 99140 }, { "epoch": 4.63, - "learning_rate": 1.0797899770287377e-05, - "loss": 0.0853, + "learning_rate": 2.0812257188772564e-05, + "loss": 0.0231, "step": 99145 }, { "epoch": 4.63, - "learning_rate": 1.0797430969012237e-05, - "loss": 0.0341, + "learning_rate": 2.0811789118936544e-05, + "loss": 0.0668, "step": 99150 }, { "epoch": 4.63, - "learning_rate": 1.0796962167737097e-05, - "loss": 0.0882, + "learning_rate": 2.0811321049100527e-05, + "loss": 0.0576, "step": 99155 }, { "epoch": 4.63, - "learning_rate": 1.0796493366461957e-05, - "loss": 0.103, + "learning_rate": 2.0810852979264507e-05, + "loss": 0.0693, "step": 99160 }, { "epoch": 4.63, - "learning_rate": 1.0796024565186817e-05, - "loss": 0.0606, + "learning_rate": 2.0810384909428487e-05, + "loss": 0.1418, "step": 99165 }, { "epoch": 4.63, - "learning_rate": 1.0795555763911679e-05, - "loss": 0.1645, + "learning_rate": 2.0809916839592467e-05, + "loss": 0.147, "step": 99170 }, { "epoch": 4.63, - "learning_rate": 1.0795086962636539e-05, - "loss": 0.1672, + "learning_rate": 2.080944876975645e-05, + "loss": 0.341, "step": 99175 }, { "epoch": 4.63, - "learning_rate": 1.0794618161361399e-05, - "loss": 0.092, + "learning_rate": 2.080898069992043e-05, + "loss": 0.0869, "step": 99180 }, { "epoch": 4.63, - "learning_rate": 1.0794149360086262e-05, - "loss": 0.0337, + "learning_rate": 2.080851263008441e-05, + "loss": 0.0388, "step": 99185 }, { "epoch": 4.63, - "learning_rate": 1.0793680558811122e-05, - "loss": 0.0625, + "learning_rate": 2.080804456024839e-05, + "loss": 0.0651, "step": 99190 }, { "epoch": 4.63, - "learning_rate": 1.0793211757535982e-05, - "loss": 0.0462, + "learning_rate": 2.0807576490412373e-05, + "loss": 0.0243, "step": 99195 }, { "epoch": 4.63, - "learning_rate": 1.0792742956260842e-05, - "loss": 0.0298, + "learning_rate": 2.080710842057635e-05, + "loss": 0.0432, "step": 99200 }, { "epoch": 4.63, - "learning_rate": 1.0792274154985702e-05, - "loss": 0.0773, + "learning_rate": 2.080664035074033e-05, + "loss": 0.068, "step": 99205 }, { "epoch": 4.63, - "learning_rate": 1.0791805353710563e-05, - "loss": 0.1238, + "learning_rate": 2.0806172280904312e-05, + "loss": 0.0644, "step": 99210 }, { "epoch": 4.63, - "learning_rate": 1.0791336552435423e-05, - "loss": 0.0865, + "learning_rate": 2.0805704211068292e-05, + "loss": 0.1305, "step": 99215 }, { "epoch": 4.63, - "learning_rate": 1.0790867751160283e-05, - "loss": 0.1429, + "learning_rate": 2.080523614123227e-05, + "loss": 0.2019, "step": 99220 }, { "epoch": 4.63, - "learning_rate": 1.0790398949885143e-05, - "loss": 0.2869, + "learning_rate": 2.080476807139625e-05, + "loss": 0.3147, "step": 99225 }, { "epoch": 4.63, - "learning_rate": 1.0789930148610006e-05, - "loss": 0.067, + "learning_rate": 2.0804300001560235e-05, + "loss": 0.0994, "step": 99230 }, { "epoch": 4.63, - "learning_rate": 1.0789461347334866e-05, - "loss": 0.0572, + "learning_rate": 2.0803831931724215e-05, + "loss": 0.0307, "step": 99235 }, { "epoch": 4.63, - "learning_rate": 1.0788992546059726e-05, - "loss": 0.0255, + "learning_rate": 2.0803363861888194e-05, + "loss": 0.0565, "step": 99240 }, { "epoch": 4.63, - "learning_rate": 1.0788523744784586e-05, - "loss": 0.0248, + "learning_rate": 2.0802895792052174e-05, + "loss": 0.0207, "step": 99245 }, { "epoch": 4.63, - "learning_rate": 1.0788054943509448e-05, - "loss": 0.0834, + "learning_rate": 2.0802427722216157e-05, + "loss": 0.0506, "step": 99250 }, { "epoch": 4.63, - "learning_rate": 1.0787586142234308e-05, - "loss": 0.0635, + "learning_rate": 2.0801959652380137e-05, + "loss": 0.0953, "step": 99255 }, { "epoch": 4.63, - "learning_rate": 1.0787117340959168e-05, - "loss": 0.0725, + "learning_rate": 2.0801491582544117e-05, + "loss": 0.1174, "step": 99260 }, { "epoch": 4.63, - "learning_rate": 1.0786648539684028e-05, - "loss": 0.1468, + "learning_rate": 2.0801023512708093e-05, + "loss": 0.2273, "step": 99265 }, { "epoch": 4.63, - "learning_rate": 1.078617973840889e-05, - "loss": 0.1187, + "learning_rate": 2.0800555442872077e-05, + "loss": 0.1449, "step": 99270 }, { "epoch": 4.63, - "learning_rate": 1.078571093713375e-05, - "loss": 0.2333, + "learning_rate": 2.0800087373036056e-05, + "loss": 0.2228, "step": 99275 }, { "epoch": 4.63, - "learning_rate": 1.078524213585861e-05, - "loss": 0.1005, + "learning_rate": 2.0799619303200036e-05, + "loss": 0.0756, "step": 99280 }, { "epoch": 4.63, - "learning_rate": 1.078477333458347e-05, - "loss": 0.0119, + "learning_rate": 2.079915123336402e-05, + "loss": 0.0319, "step": 99285 }, { "epoch": 4.63, - "learning_rate": 1.0784304533308332e-05, - "loss": 0.0043, + "learning_rate": 2.0798683163528e-05, + "loss": 0.0501, "step": 99290 }, { "epoch": 4.63, - "learning_rate": 1.0783835732033192e-05, - "loss": 0.038, + "learning_rate": 2.079821509369198e-05, + "loss": 0.0457, "step": 99295 }, { "epoch": 4.63, - "learning_rate": 1.0783366930758052e-05, - "loss": 0.071, + "learning_rate": 2.079774702385596e-05, + "loss": 0.0288, "step": 99300 }, { "epoch": 4.63, - "learning_rate": 1.0782898129482912e-05, - "loss": 0.0802, + "learning_rate": 2.0797278954019942e-05, + "loss": 0.0841, "step": 99305 }, { "epoch": 4.63, - "learning_rate": 1.0782429328207774e-05, - "loss": 0.0974, + "learning_rate": 2.0796810884183922e-05, + "loss": 0.0969, "step": 99310 }, { "epoch": 4.63, - "learning_rate": 1.0781960526932634e-05, - "loss": 0.1049, + "learning_rate": 2.0796342814347902e-05, + "loss": 0.1221, "step": 99315 }, { "epoch": 4.63, - "learning_rate": 1.0781491725657494e-05, - "loss": 0.2026, + "learning_rate": 2.079587474451188e-05, + "loss": 0.2344, "step": 99320 }, { "epoch": 4.63, - "learning_rate": 1.0781022924382355e-05, - "loss": 0.3167, + "learning_rate": 2.079540667467586e-05, + "loss": 0.2156, "step": 99325 }, { "epoch": 4.63, - "learning_rate": 1.0780554123107217e-05, - "loss": 0.1143, + "learning_rate": 2.079493860483984e-05, + "loss": 0.0586, "step": 99330 }, { "epoch": 4.64, - "learning_rate": 1.0780085321832077e-05, - "loss": 0.0335, + "learning_rate": 2.079447053500382e-05, + "loss": 0.0024, "step": 99335 }, { "epoch": 4.64, - "learning_rate": 1.0779616520556937e-05, - "loss": 0.0593, + "learning_rate": 2.0794002465167804e-05, + "loss": 0.0393, "step": 99340 }, { "epoch": 4.64, - "learning_rate": 1.0779147719281797e-05, - "loss": 0.0417, + "learning_rate": 2.0793534395331784e-05, + "loss": 0.0554, "step": 99345 }, { "epoch": 4.64, - "learning_rate": 1.0778678918006658e-05, - "loss": 0.0406, + "learning_rate": 2.0793066325495764e-05, + "loss": 0.045, "step": 99350 }, { "epoch": 4.64, - "learning_rate": 1.0778210116731518e-05, - "loss": 0.0409, + "learning_rate": 2.0792598255659744e-05, + "loss": 0.0882, "step": 99355 }, { "epoch": 4.64, - "learning_rate": 1.0777741315456378e-05, - "loss": 0.0867, + "learning_rate": 2.0792130185823727e-05, + "loss": 0.0938, "step": 99360 }, { "epoch": 4.64, - "learning_rate": 1.0777272514181238e-05, - "loss": 0.1873, + "learning_rate": 2.0791662115987707e-05, + "loss": 0.0862, "step": 99365 }, { "epoch": 4.64, - "learning_rate": 1.0776803712906101e-05, - "loss": 0.0758, + "learning_rate": 2.0791194046151687e-05, + "loss": 0.0807, "step": 99370 }, { "epoch": 4.64, - "learning_rate": 1.0776334911630961e-05, - "loss": 0.2972, + "learning_rate": 2.0790725976315666e-05, + "loss": 0.34, "step": 99375 }, { "epoch": 4.64, - "learning_rate": 1.0775866110355821e-05, - "loss": 0.0862, + "learning_rate": 2.079025790647965e-05, + "loss": 0.0568, "step": 99380 }, { "epoch": 4.64, - "learning_rate": 1.0775397309080681e-05, - "loss": 0.0447, + "learning_rate": 2.078978983664363e-05, + "loss": 0.014, "step": 99385 }, { "epoch": 4.64, - "learning_rate": 1.0774928507805543e-05, - "loss": 0.02, + "learning_rate": 2.0789321766807606e-05, + "loss": 0.0557, "step": 99390 }, { "epoch": 4.64, - "learning_rate": 1.0774459706530403e-05, - "loss": 0.016, + "learning_rate": 2.078885369697159e-05, + "loss": 0.08, "step": 99395 }, { "epoch": 4.64, - "learning_rate": 1.0773990905255263e-05, - "loss": 0.0336, + "learning_rate": 2.078838562713557e-05, + "loss": 0.0228, "step": 99400 }, { "epoch": 4.64, - "learning_rate": 1.0773522103980123e-05, - "loss": 0.0406, + "learning_rate": 2.078791755729955e-05, + "loss": 0.0461, "step": 99405 }, { "epoch": 4.64, - "learning_rate": 1.0773053302704983e-05, - "loss": 0.1362, + "learning_rate": 2.078744948746353e-05, + "loss": 0.0669, "step": 99410 }, { "epoch": 4.64, - "learning_rate": 1.0772584501429846e-05, - "loss": 0.1222, + "learning_rate": 2.078698141762751e-05, + "loss": 0.1543, "step": 99415 }, { "epoch": 4.64, - "learning_rate": 1.0772115700154706e-05, - "loss": 0.0985, + "learning_rate": 2.078651334779149e-05, + "loss": 0.2086, "step": 99420 }, { "epoch": 4.64, - "learning_rate": 1.0771646898879566e-05, - "loss": 0.1979, + "learning_rate": 2.078604527795547e-05, + "loss": 0.2551, "step": 99425 }, { "epoch": 4.64, - "learning_rate": 1.0771178097604427e-05, - "loss": 0.1238, + "learning_rate": 2.078557720811945e-05, + "loss": 0.0489, "step": 99430 }, { "epoch": 4.64, - "learning_rate": 1.0770709296329287e-05, - "loss": 0.0166, + "learning_rate": 2.0785109138283434e-05, + "loss": 0.0363, "step": 99435 }, { "epoch": 4.64, - "learning_rate": 1.0770240495054147e-05, - "loss": 0.0284, + "learning_rate": 2.0784641068447414e-05, + "loss": 0.0246, "step": 99440 }, { "epoch": 4.64, - "learning_rate": 1.0769771693779007e-05, - "loss": 0.0267, + "learning_rate": 2.0784172998611394e-05, + "loss": 0.0411, "step": 99445 }, { "epoch": 4.64, - "learning_rate": 1.0769302892503867e-05, - "loss": 0.0786, + "learning_rate": 2.0783704928775374e-05, + "loss": 0.0573, "step": 99450 }, { "epoch": 4.64, - "learning_rate": 1.0768834091228729e-05, - "loss": 0.0815, + "learning_rate": 2.0783236858939354e-05, + "loss": 0.0495, "step": 99455 }, { "epoch": 4.64, - "learning_rate": 1.076836528995359e-05, - "loss": 0.1014, + "learning_rate": 2.0782768789103333e-05, + "loss": 0.1951, "step": 99460 }, { "epoch": 4.64, - "learning_rate": 1.076789648867845e-05, - "loss": 0.075, + "learning_rate": 2.0782300719267313e-05, + "loss": 0.036, "step": 99465 }, { "epoch": 4.64, - "learning_rate": 1.0767427687403312e-05, - "loss": 0.1781, + "learning_rate": 2.0781832649431296e-05, + "loss": 0.1828, "step": 99470 }, { "epoch": 4.64, - "learning_rate": 1.0766958886128172e-05, - "loss": 0.2461, + "learning_rate": 2.0781364579595276e-05, + "loss": 0.1341, "step": 99475 }, { "epoch": 4.64, - "learning_rate": 1.0766490084853032e-05, - "loss": 0.0696, + "learning_rate": 2.0780896509759256e-05, + "loss": 0.0735, "step": 99480 }, { "epoch": 4.64, - "learning_rate": 1.0766021283577892e-05, - "loss": 0.0542, + "learning_rate": 2.0780428439923236e-05, + "loss": 0.0225, "step": 99485 }, { "epoch": 4.64, - "learning_rate": 1.0765552482302752e-05, - "loss": 0.0734, + "learning_rate": 2.077996037008722e-05, + "loss": 0.0136, "step": 99490 }, { "epoch": 4.64, - "learning_rate": 1.0765083681027613e-05, - "loss": 0.0164, + "learning_rate": 2.07794923002512e-05, + "loss": 0.0494, "step": 99495 }, { "epoch": 4.64, - "learning_rate": 1.0764614879752473e-05, - "loss": 0.0494, + "learning_rate": 2.077902423041518e-05, + "loss": 0.0314, "step": 99500 }, { "epoch": 4.64, - "learning_rate": 1.0764146078477333e-05, - "loss": 0.1086, + "learning_rate": 2.077855616057916e-05, + "loss": 0.0275, "step": 99505 }, { "epoch": 4.64, - "learning_rate": 1.0763677277202197e-05, - "loss": 0.0644, + "learning_rate": 2.0778088090743142e-05, + "loss": 0.0576, "step": 99510 }, { "epoch": 4.64, - "learning_rate": 1.0763208475927056e-05, - "loss": 0.1442, + "learning_rate": 2.0777620020907118e-05, + "loss": 0.1932, "step": 99515 }, { "epoch": 4.64, - "learning_rate": 1.0762739674651916e-05, - "loss": 0.178, + "learning_rate": 2.0777151951071098e-05, + "loss": 0.0715, "step": 99520 }, { "epoch": 4.64, - "learning_rate": 1.0762270873376776e-05, - "loss": 0.2618, + "learning_rate": 2.077668388123508e-05, + "loss": 0.2016, "step": 99525 }, { "epoch": 4.64, - "learning_rate": 1.0761802072101636e-05, - "loss": 0.0942, + "learning_rate": 2.077621581139906e-05, + "loss": 0.0488, "step": 99530 }, { "epoch": 4.64, - "learning_rate": 1.0761333270826498e-05, - "loss": 0.0282, + "learning_rate": 2.077574774156304e-05, + "loss": 0.0311, "step": 99535 }, { "epoch": 4.64, - "learning_rate": 1.0760864469551358e-05, - "loss": 0.0709, + "learning_rate": 2.077527967172702e-05, + "loss": 0.0287, "step": 99540 }, { "epoch": 4.64, - "learning_rate": 1.0760395668276218e-05, - "loss": 0.0218, + "learning_rate": 2.0774811601891004e-05, + "loss": 0.0505, "step": 99545 }, { "epoch": 4.65, - "learning_rate": 1.0759926867001078e-05, - "loss": 0.0258, + "learning_rate": 2.0774343532054984e-05, + "loss": 0.0847, "step": 99550 }, { "epoch": 4.65, - "learning_rate": 1.0759458065725941e-05, - "loss": 0.0801, + "learning_rate": 2.0773875462218964e-05, + "loss": 0.0455, "step": 99555 }, { "epoch": 4.65, - "learning_rate": 1.0758989264450801e-05, - "loss": 0.0565, + "learning_rate": 2.0773407392382943e-05, + "loss": 0.0394, "step": 99560 }, { "epoch": 4.65, - "learning_rate": 1.0758520463175661e-05, - "loss": 0.1175, + "learning_rate": 2.0772939322546927e-05, + "loss": 0.0985, "step": 99565 }, { "epoch": 4.65, - "learning_rate": 1.075805166190052e-05, - "loss": 0.0922, + "learning_rate": 2.0772471252710906e-05, + "loss": 0.2197, "step": 99570 }, { "epoch": 4.65, - "learning_rate": 1.0757582860625382e-05, - "loss": 0.3132, + "learning_rate": 2.0772003182874886e-05, + "loss": 0.2316, "step": 99575 }, { "epoch": 4.65, - "learning_rate": 1.0757114059350242e-05, - "loss": 0.0812, + "learning_rate": 2.0771535113038866e-05, + "loss": 0.0543, "step": 99580 }, { "epoch": 4.65, - "learning_rate": 1.0756645258075102e-05, - "loss": 0.0387, + "learning_rate": 2.0771067043202846e-05, + "loss": 0.0135, "step": 99585 }, { "epoch": 4.65, - "learning_rate": 1.0756176456799962e-05, - "loss": 0.013, + "learning_rate": 2.0770598973366826e-05, + "loss": 0.027, "step": 99590 }, { "epoch": 4.65, - "learning_rate": 1.0755707655524822e-05, - "loss": 0.0419, + "learning_rate": 2.0770130903530805e-05, + "loss": 0.0744, "step": 99595 }, { "epoch": 4.65, - "learning_rate": 1.0755238854249685e-05, - "loss": 0.0777, + "learning_rate": 2.076966283369479e-05, + "loss": 0.0576, "step": 99600 }, { "epoch": 4.65, - "learning_rate": 1.0754770052974545e-05, - "loss": 0.0271, + "learning_rate": 2.076919476385877e-05, + "loss": 0.0583, "step": 99605 }, { "epoch": 4.65, - "learning_rate": 1.0754301251699405e-05, - "loss": 0.0288, + "learning_rate": 2.0768726694022748e-05, + "loss": 0.0541, "step": 99610 }, { "epoch": 4.65, - "learning_rate": 1.0753832450424267e-05, - "loss": 0.1725, + "learning_rate": 2.0768258624186728e-05, + "loss": 0.0742, "step": 99615 }, { "epoch": 4.65, - "learning_rate": 1.0753363649149127e-05, - "loss": 0.1753, + "learning_rate": 2.076779055435071e-05, + "loss": 0.1081, "step": 99620 }, { "epoch": 4.65, - "learning_rate": 1.0752894847873987e-05, - "loss": 0.272, + "learning_rate": 2.076732248451469e-05, + "loss": 0.198, "step": 99625 }, { "epoch": 4.65, - "learning_rate": 1.0752426046598847e-05, - "loss": 0.0909, + "learning_rate": 2.076685441467867e-05, + "loss": 0.0677, "step": 99630 }, { "epoch": 4.65, - "learning_rate": 1.0751957245323707e-05, - "loss": 0.0129, + "learning_rate": 2.076638634484265e-05, + "loss": 0.0404, "step": 99635 }, { "epoch": 4.65, - "learning_rate": 1.0751488444048568e-05, - "loss": 0.0199, + "learning_rate": 2.0765918275006634e-05, + "loss": 0.0163, "step": 99640 }, { "epoch": 4.65, - "learning_rate": 1.0751019642773428e-05, - "loss": 0.0234, + "learning_rate": 2.076545020517061e-05, + "loss": 0.1028, "step": 99645 }, { "epoch": 4.65, - "learning_rate": 1.075055084149829e-05, - "loss": 0.0879, + "learning_rate": 2.076498213533459e-05, + "loss": 0.0623, "step": 99650 }, { "epoch": 4.65, - "learning_rate": 1.0750082040223152e-05, - "loss": 0.0563, + "learning_rate": 2.0764514065498573e-05, + "loss": 0.068, "step": 99655 }, { "epoch": 4.65, - "learning_rate": 1.0749613238948011e-05, - "loss": 0.0947, + "learning_rate": 2.0764045995662553e-05, + "loss": 0.102, "step": 99660 }, { "epoch": 4.65, - "learning_rate": 1.0749144437672871e-05, - "loss": 0.1726, + "learning_rate": 2.0763577925826533e-05, + "loss": 0.1024, "step": 99665 }, { "epoch": 4.65, - "learning_rate": 1.0748675636397731e-05, - "loss": 0.1608, + "learning_rate": 2.0763109855990513e-05, + "loss": 0.0943, "step": 99670 }, { "epoch": 4.65, - "learning_rate": 1.0748206835122591e-05, - "loss": 0.2688, + "learning_rate": 2.0762641786154496e-05, + "loss": 0.1972, "step": 99675 }, { "epoch": 4.65, - "learning_rate": 1.0747738033847453e-05, - "loss": 0.0452, + "learning_rate": 2.0762173716318476e-05, + "loss": 0.0714, "step": 99680 }, { "epoch": 4.65, - "learning_rate": 1.0747269232572313e-05, - "loss": 0.011, + "learning_rate": 2.0761705646482456e-05, + "loss": 0.017, "step": 99685 }, { "epoch": 4.65, - "learning_rate": 1.0746800431297173e-05, - "loss": 0.0227, + "learning_rate": 2.0761237576646436e-05, + "loss": 0.0166, "step": 99690 }, { "epoch": 4.65, - "learning_rate": 1.0746331630022036e-05, - "loss": 0.0385, + "learning_rate": 2.076076950681042e-05, + "loss": 0.07, "step": 99695 }, { "epoch": 4.65, - "learning_rate": 1.0745862828746896e-05, - "loss": 0.089, + "learning_rate": 2.07603014369744e-05, + "loss": 0.0622, "step": 99700 }, { "epoch": 4.65, - "learning_rate": 1.0745394027471756e-05, - "loss": 0.08, + "learning_rate": 2.0759833367138375e-05, + "loss": 0.0615, "step": 99705 }, { "epoch": 4.65, - "learning_rate": 1.0744925226196616e-05, - "loss": 0.0665, + "learning_rate": 2.0759365297302358e-05, + "loss": 0.0693, "step": 99710 }, { "epoch": 4.65, - "learning_rate": 1.0744456424921478e-05, - "loss": 0.2098, + "learning_rate": 2.0758897227466338e-05, + "loss": 0.0909, "step": 99715 }, { "epoch": 4.65, - "learning_rate": 1.0743987623646337e-05, - "loss": 0.1671, + "learning_rate": 2.0758429157630318e-05, + "loss": 0.0635, "step": 99720 }, { "epoch": 4.65, - "learning_rate": 1.0743518822371197e-05, - "loss": 0.2883, + "learning_rate": 2.0757961087794298e-05, + "loss": 0.2148, "step": 99725 }, { "epoch": 4.65, - "learning_rate": 1.0743050021096057e-05, - "loss": 0.0748, + "learning_rate": 2.075749301795828e-05, + "loss": 0.0608, "step": 99730 }, { "epoch": 4.65, - "learning_rate": 1.0742581219820917e-05, - "loss": 0.0249, + "learning_rate": 2.075702494812226e-05, + "loss": 0.0114, "step": 99735 }, { "epoch": 4.65, - "learning_rate": 1.074211241854578e-05, - "loss": 0.0332, + "learning_rate": 2.075655687828624e-05, + "loss": 0.0203, "step": 99740 }, { "epoch": 4.65, - "learning_rate": 1.074164361727064e-05, - "loss": 0.0898, + "learning_rate": 2.075608880845022e-05, + "loss": 0.0107, "step": 99745 }, { "epoch": 4.65, - "learning_rate": 1.07411748159955e-05, - "loss": 0.0587, + "learning_rate": 2.0755620738614204e-05, + "loss": 0.0374, "step": 99750 }, { "epoch": 4.65, - "learning_rate": 1.0740706014720362e-05, - "loss": 0.0309, + "learning_rate": 2.0755152668778183e-05, + "loss": 0.0941, "step": 99755 }, { "epoch": 4.65, - "learning_rate": 1.0740237213445222e-05, - "loss": 0.0881, + "learning_rate": 2.0754684598942163e-05, + "loss": 0.1124, "step": 99760 }, { "epoch": 4.66, - "learning_rate": 1.0739768412170082e-05, - "loss": 0.1376, + "learning_rate": 2.0754216529106146e-05, + "loss": 0.0945, "step": 99765 }, { "epoch": 4.66, - "learning_rate": 1.0739299610894942e-05, - "loss": 0.2707, + "learning_rate": 2.0753748459270123e-05, + "loss": 0.1335, "step": 99770 }, { "epoch": 4.66, - "learning_rate": 1.0738830809619802e-05, - "loss": 0.2035, + "learning_rate": 2.0753280389434103e-05, + "loss": 0.1781, "step": 99775 }, { "epoch": 4.66, - "learning_rate": 1.0738362008344663e-05, - "loss": 0.0786, + "learning_rate": 2.0752812319598082e-05, + "loss": 0.06, "step": 99780 }, { "epoch": 4.66, - "learning_rate": 1.0737893207069525e-05, - "loss": 0.0299, + "learning_rate": 2.0752344249762066e-05, + "loss": 0.0022, "step": 99785 }, { "epoch": 4.66, - "learning_rate": 1.0737424405794385e-05, - "loss": 0.0207, + "learning_rate": 2.0751876179926045e-05, + "loss": 0.013, "step": 99790 }, { "epoch": 4.66, - "learning_rate": 1.0736955604519247e-05, - "loss": 0.0134, + "learning_rate": 2.0751408110090025e-05, + "loss": 0.1181, "step": 99795 }, { "epoch": 4.66, - "learning_rate": 1.0736486803244107e-05, - "loss": 0.0427, + "learning_rate": 2.0750940040254005e-05, + "loss": 0.0504, "step": 99800 }, { "epoch": 4.66, - "learning_rate": 1.0736018001968966e-05, - "loss": 0.0398, + "learning_rate": 2.0750471970417988e-05, + "loss": 0.0694, "step": 99805 }, { "epoch": 4.66, - "learning_rate": 1.0735549200693826e-05, - "loss": 0.1027, + "learning_rate": 2.0750003900581968e-05, + "loss": 0.0524, "step": 99810 }, { "epoch": 4.66, - "learning_rate": 1.0735080399418686e-05, - "loss": 0.0696, + "learning_rate": 2.0749535830745948e-05, + "loss": 0.0995, "step": 99815 }, { "epoch": 4.66, - "learning_rate": 1.0734611598143548e-05, - "loss": 0.1245, + "learning_rate": 2.0749067760909928e-05, + "loss": 0.156, "step": 99820 }, { "epoch": 4.66, - "learning_rate": 1.0734142796868408e-05, - "loss": 0.2651, + "learning_rate": 2.074859969107391e-05, + "loss": 0.2159, "step": 99825 }, { "epoch": 4.66, - "learning_rate": 1.0733673995593268e-05, - "loss": 0.0804, + "learning_rate": 2.074813162123789e-05, + "loss": 0.0476, "step": 99830 }, { "epoch": 4.66, - "learning_rate": 1.0733205194318131e-05, - "loss": 0.008, + "learning_rate": 2.0747663551401867e-05, + "loss": 0.0078, "step": 99835 }, { "epoch": 4.66, - "learning_rate": 1.0732736393042991e-05, - "loss": 0.0325, + "learning_rate": 2.074719548156585e-05, + "loss": 0.0263, "step": 99840 }, { "epoch": 4.66, - "learning_rate": 1.0732267591767851e-05, - "loss": 0.0206, + "learning_rate": 2.074672741172983e-05, + "loss": 0.0553, "step": 99845 }, { "epoch": 4.66, - "learning_rate": 1.0731798790492711e-05, - "loss": 0.0482, + "learning_rate": 2.074625934189381e-05, + "loss": 0.0591, "step": 99850 }, { "epoch": 4.66, - "learning_rate": 1.0731329989217571e-05, - "loss": 0.06, + "learning_rate": 2.074579127205779e-05, + "loss": 0.0492, "step": 99855 }, { "epoch": 4.66, - "learning_rate": 1.0730861187942433e-05, - "loss": 0.1655, + "learning_rate": 2.0745323202221773e-05, + "loss": 0.083, "step": 99860 }, { "epoch": 4.66, - "learning_rate": 1.0730392386667292e-05, - "loss": 0.0665, + "learning_rate": 2.0744855132385753e-05, + "loss": 0.1589, "step": 99865 }, { "epoch": 4.66, - "learning_rate": 1.0729923585392152e-05, - "loss": 0.1627, + "learning_rate": 2.0744387062549733e-05, + "loss": 0.1928, "step": 99870 }, { "epoch": 4.66, - "learning_rate": 1.0729454784117012e-05, - "loss": 0.1678, + "learning_rate": 2.0743918992713713e-05, + "loss": 0.3125, "step": 99875 }, { "epoch": 4.66, - "learning_rate": 1.0728985982841876e-05, - "loss": 0.0892, + "learning_rate": 2.0743450922877696e-05, + "loss": 0.0875, "step": 99880 }, { "epoch": 4.66, - "learning_rate": 1.0728517181566736e-05, - "loss": 0.0208, + "learning_rate": 2.0742982853041676e-05, + "loss": 0.0606, "step": 99885 }, { "epoch": 4.66, - "learning_rate": 1.0728048380291596e-05, - "loss": 0.0626, + "learning_rate": 2.0742514783205655e-05, + "loss": 0.0206, "step": 99890 }, { "epoch": 4.66, - "learning_rate": 1.0727579579016455e-05, - "loss": 0.0392, + "learning_rate": 2.0742046713369635e-05, + "loss": 0.0238, "step": 99895 }, { "epoch": 4.66, - "learning_rate": 1.0727110777741317e-05, - "loss": 0.0458, + "learning_rate": 2.0741578643533615e-05, + "loss": 0.0411, "step": 99900 }, { "epoch": 4.66, - "learning_rate": 1.0726641976466177e-05, - "loss": 0.0586, + "learning_rate": 2.0741110573697595e-05, + "loss": 0.0444, "step": 99905 }, { "epoch": 4.66, - "learning_rate": 1.0726173175191037e-05, - "loss": 0.0368, + "learning_rate": 2.0740642503861575e-05, + "loss": 0.0817, "step": 99910 }, { "epoch": 4.66, - "learning_rate": 1.0725704373915897e-05, - "loss": 0.1235, + "learning_rate": 2.0740174434025558e-05, + "loss": 0.115, "step": 99915 }, { "epoch": 4.66, - "learning_rate": 1.0725235572640757e-05, - "loss": 0.1358, + "learning_rate": 2.0739706364189538e-05, + "loss": 0.1006, "step": 99920 }, { "epoch": 4.66, - "learning_rate": 1.072476677136562e-05, - "loss": 0.2023, + "learning_rate": 2.0739238294353517e-05, + "loss": 0.3645, "step": 99925 }, { "epoch": 4.66, - "learning_rate": 1.072429797009048e-05, - "loss": 0.0468, + "learning_rate": 2.0738770224517497e-05, + "loss": 0.0537, "step": 99930 }, { "epoch": 4.66, - "learning_rate": 1.072382916881534e-05, - "loss": 0.018, + "learning_rate": 2.073830215468148e-05, + "loss": 0.0393, "step": 99935 }, { "epoch": 4.66, - "learning_rate": 1.0723360367540202e-05, - "loss": 0.0191, + "learning_rate": 2.073783408484546e-05, + "loss": 0.0198, "step": 99940 }, { "epoch": 4.66, - "learning_rate": 1.0722891566265062e-05, - "loss": 0.038, + "learning_rate": 2.073736601500944e-05, + "loss": 0.0174, "step": 99945 }, { "epoch": 4.66, - "learning_rate": 1.0722422764989922e-05, - "loss": 0.0758, + "learning_rate": 2.0736897945173423e-05, + "loss": 0.0109, "step": 99950 }, { "epoch": 4.66, - "learning_rate": 1.0721953963714781e-05, - "loss": 0.0867, + "learning_rate": 2.0736429875337403e-05, + "loss": 0.0591, "step": 99955 }, { "epoch": 4.66, - "learning_rate": 1.0721485162439641e-05, - "loss": 0.0558, + "learning_rate": 2.073596180550138e-05, + "loss": 0.1553, "step": 99960 }, { "epoch": 4.66, - "learning_rate": 1.0721016361164503e-05, - "loss": 0.1859, + "learning_rate": 2.073549373566536e-05, + "loss": 0.1976, "step": 99965 }, { "epoch": 4.66, - "learning_rate": 1.0720547559889363e-05, - "loss": 0.1605, + "learning_rate": 2.0735025665829343e-05, + "loss": 0.124, "step": 99970 }, { "epoch": 4.66, - "learning_rate": 1.0720078758614225e-05, - "loss": 0.1372, + "learning_rate": 2.0734557595993322e-05, + "loss": 0.1288, "step": 99975 }, { "epoch": 4.67, - "learning_rate": 1.0719609957339086e-05, - "loss": 0.1288, + "learning_rate": 2.0734089526157302e-05, + "loss": 0.1012, "step": 99980 }, { "epoch": 4.67, - "learning_rate": 1.0719141156063946e-05, - "loss": 0.0231, + "learning_rate": 2.0733621456321282e-05, + "loss": 0.014, "step": 99985 }, { "epoch": 4.67, - "learning_rate": 1.0718672354788806e-05, - "loss": 0.0178, + "learning_rate": 2.0733153386485265e-05, + "loss": 0.0613, "step": 99990 }, { "epoch": 4.67, - "learning_rate": 1.0718203553513666e-05, - "loss": 0.0467, + "learning_rate": 2.0732685316649245e-05, + "loss": 0.012, "step": 99995 }, { "epoch": 4.67, - "learning_rate": 1.0717734752238526e-05, - "loss": 0.0779, + "learning_rate": 2.0732217246813225e-05, + "loss": 0.0237, "step": 100000 }, { "epoch": 4.67, - "learning_rate": 1.0717265950963388e-05, - "loss": 0.0453, + "learning_rate": 2.0731749176977208e-05, + "loss": 0.0684, "step": 100005 }, { "epoch": 4.67, - "learning_rate": 1.0716797149688247e-05, - "loss": 0.0382, + "learning_rate": 2.0731281107141188e-05, + "loss": 0.1161, "step": 100010 }, { "epoch": 4.67, - "learning_rate": 1.0716328348413107e-05, - "loss": 0.1943, + "learning_rate": 2.0730813037305168e-05, + "loss": 0.073, "step": 100015 }, { "epoch": 4.67, - "learning_rate": 1.071585954713797e-05, - "loss": 0.1339, + "learning_rate": 2.0730344967469148e-05, + "loss": 0.1206, "step": 100020 }, { "epoch": 4.67, - "learning_rate": 1.071539074586283e-05, - "loss": 0.2929, + "learning_rate": 2.0729876897633127e-05, + "loss": 0.3225, "step": 100025 }, { "epoch": 4.67, - "learning_rate": 1.071492194458769e-05, - "loss": 0.0649, + "learning_rate": 2.0729408827797107e-05, + "loss": 0.067, "step": 100030 }, { "epoch": 4.67, - "learning_rate": 1.071445314331255e-05, - "loss": 0.0202, + "learning_rate": 2.0728940757961087e-05, + "loss": 0.026, "step": 100035 }, { "epoch": 4.67, - "learning_rate": 1.071398434203741e-05, - "loss": 0.0123, + "learning_rate": 2.0728472688125067e-05, + "loss": 0.0466, "step": 100040 }, { "epoch": 4.67, - "learning_rate": 1.0713515540762272e-05, - "loss": 0.0731, + "learning_rate": 2.072800461828905e-05, + "loss": 0.0375, "step": 100045 }, { "epoch": 4.67, - "learning_rate": 1.0713046739487132e-05, - "loss": 0.0526, + "learning_rate": 2.072753654845303e-05, + "loss": 0.0455, "step": 100050 }, { "epoch": 4.67, - "learning_rate": 1.0712577938211992e-05, - "loss": 0.0832, + "learning_rate": 2.072706847861701e-05, + "loss": 0.0689, "step": 100055 }, { "epoch": 4.67, - "learning_rate": 1.0712109136936852e-05, - "loss": 0.0807, + "learning_rate": 2.072660040878099e-05, + "loss": 0.0598, "step": 100060 }, { "epoch": 4.67, - "learning_rate": 1.0711640335661715e-05, - "loss": 0.0992, + "learning_rate": 2.0726132338944973e-05, + "loss": 0.1243, "step": 100065 }, { "epoch": 4.67, - "learning_rate": 1.0711171534386575e-05, - "loss": 0.2506, + "learning_rate": 2.0725664269108952e-05, + "loss": 0.1608, "step": 100070 }, { "epoch": 4.67, - "learning_rate": 1.0710702733111435e-05, - "loss": 0.2901, + "learning_rate": 2.0725196199272932e-05, + "loss": 0.2854, "step": 100075 }, { "epoch": 4.67, - "learning_rate": 1.0710233931836295e-05, - "loss": 0.1019, + "learning_rate": 2.0724728129436916e-05, + "loss": 0.0541, "step": 100080 }, { "epoch": 4.67, - "learning_rate": 1.0709765130561157e-05, - "loss": 0.0498, + "learning_rate": 2.0724260059600892e-05, + "loss": 0.0085, "step": 100085 }, { "epoch": 4.67, - "learning_rate": 1.0709296329286017e-05, - "loss": 0.0206, + "learning_rate": 2.0723791989764872e-05, + "loss": 0.0365, "step": 100090 }, { "epoch": 4.67, - "learning_rate": 1.0708827528010877e-05, - "loss": 0.0778, + "learning_rate": 2.072332391992885e-05, + "loss": 0.0586, "step": 100095 }, { "epoch": 4.67, - "learning_rate": 1.0708358726735736e-05, - "loss": 0.0532, + "learning_rate": 2.0722855850092835e-05, + "loss": 0.0363, "step": 100100 }, { "epoch": 4.67, - "learning_rate": 1.0707889925460598e-05, - "loss": 0.1336, + "learning_rate": 2.0722387780256815e-05, + "loss": 0.0495, "step": 100105 }, { "epoch": 4.67, - "learning_rate": 1.070742112418546e-05, - "loss": 0.0676, + "learning_rate": 2.0721919710420794e-05, + "loss": 0.0497, "step": 100110 }, { "epoch": 4.67, - "learning_rate": 1.070695232291032e-05, - "loss": 0.1286, + "learning_rate": 2.0721451640584774e-05, + "loss": 0.0868, "step": 100115 }, { "epoch": 4.67, - "learning_rate": 1.070648352163518e-05, - "loss": 0.2123, + "learning_rate": 2.0720983570748757e-05, + "loss": 0.1819, "step": 100120 }, { "epoch": 4.67, - "learning_rate": 1.0706014720360041e-05, - "loss": 0.315, + "learning_rate": 2.0720515500912737e-05, + "loss": 0.2221, "step": 100125 }, { "epoch": 4.67, - "learning_rate": 1.0705545919084901e-05, - "loss": 0.0815, + "learning_rate": 2.0720047431076717e-05, + "loss": 0.0539, "step": 100130 }, { "epoch": 4.67, - "learning_rate": 1.0705077117809761e-05, - "loss": 0.0161, + "learning_rate": 2.07195793612407e-05, + "loss": 0.0146, "step": 100135 }, { "epoch": 4.67, - "learning_rate": 1.0704608316534621e-05, - "loss": 0.0348, + "learning_rate": 2.071911129140468e-05, + "loss": 0.0156, "step": 100140 }, { "epoch": 4.67, - "learning_rate": 1.0704139515259483e-05, - "loss": 0.0347, + "learning_rate": 2.071864322156866e-05, + "loss": 0.0666, "step": 100145 }, { "epoch": 4.67, - "learning_rate": 1.0703670713984343e-05, - "loss": 0.0313, + "learning_rate": 2.0718175151732636e-05, + "loss": 0.0338, "step": 100150 }, { "epoch": 4.67, - "learning_rate": 1.0703201912709202e-05, - "loss": 0.0339, + "learning_rate": 2.071770708189662e-05, + "loss": 0.0434, "step": 100155 }, { "epoch": 4.67, - "learning_rate": 1.0702733111434064e-05, - "loss": 0.1409, + "learning_rate": 2.07172390120606e-05, + "loss": 0.1111, "step": 100160 }, { "epoch": 4.67, - "learning_rate": 1.0702264310158926e-05, - "loss": 0.144, + "learning_rate": 2.071677094222458e-05, + "loss": 0.0508, "step": 100165 }, { "epoch": 4.67, - "learning_rate": 1.0701795508883786e-05, - "loss": 0.1255, + "learning_rate": 2.071630287238856e-05, + "loss": 0.0869, "step": 100170 }, { "epoch": 4.67, - "learning_rate": 1.0701326707608646e-05, - "loss": 0.2538, + "learning_rate": 2.0715834802552542e-05, + "loss": 0.4634, "step": 100175 }, { "epoch": 4.67, - "learning_rate": 1.0700857906333506e-05, - "loss": 0.0603, + "learning_rate": 2.0715366732716522e-05, + "loss": 0.104, "step": 100180 }, { "epoch": 4.67, - "learning_rate": 1.0700389105058367e-05, - "loss": 0.0213, + "learning_rate": 2.0714898662880502e-05, + "loss": 0.013, "step": 100185 }, { "epoch": 4.68, - "learning_rate": 1.0699920303783227e-05, - "loss": 0.0201, + "learning_rate": 2.0714430593044485e-05, + "loss": 0.033, "step": 100190 }, { "epoch": 4.68, - "learning_rate": 1.0699451502508087e-05, - "loss": 0.0163, + "learning_rate": 2.0713962523208465e-05, + "loss": 0.0563, "step": 100195 }, { "epoch": 4.68, - "learning_rate": 1.0698982701232947e-05, - "loss": 0.0369, + "learning_rate": 2.0713494453372445e-05, + "loss": 0.0472, "step": 100200 }, { "epoch": 4.68, - "learning_rate": 1.069851389995781e-05, - "loss": 0.053, + "learning_rate": 2.0713026383536425e-05, + "loss": 0.0392, "step": 100205 }, { "epoch": 4.68, - "learning_rate": 1.069804509868267e-05, - "loss": 0.1032, + "learning_rate": 2.0712558313700404e-05, + "loss": 0.0618, "step": 100210 }, { "epoch": 4.68, - "learning_rate": 1.069757629740753e-05, - "loss": 0.1269, + "learning_rate": 2.0712090243864384e-05, + "loss": 0.1028, "step": 100215 }, { "epoch": 4.68, - "learning_rate": 1.069710749613239e-05, - "loss": 0.1276, + "learning_rate": 2.0711622174028364e-05, + "loss": 0.0937, "step": 100220 }, { "epoch": 4.68, - "learning_rate": 1.0696638694857252e-05, - "loss": 0.2841, + "learning_rate": 2.0711154104192344e-05, + "loss": 0.1686, "step": 100225 }, { "epoch": 4.68, - "learning_rate": 1.0696169893582112e-05, - "loss": 0.0599, + "learning_rate": 2.0710686034356327e-05, + "loss": 0.0949, "step": 100230 }, { "epoch": 4.68, - "learning_rate": 1.0695701092306972e-05, - "loss": 0.0166, + "learning_rate": 2.0710217964520307e-05, + "loss": 0.0559, "step": 100235 }, { "epoch": 4.68, - "learning_rate": 1.0695232291031832e-05, - "loss": 0.0124, + "learning_rate": 2.0709749894684287e-05, + "loss": 0.0194, "step": 100240 }, { "epoch": 4.68, - "learning_rate": 1.0694763489756691e-05, - "loss": 0.0527, + "learning_rate": 2.0709281824848266e-05, + "loss": 0.0855, "step": 100245 }, { "epoch": 4.68, - "learning_rate": 1.0694294688481555e-05, - "loss": 0.0974, + "learning_rate": 2.070881375501225e-05, + "loss": 0.0482, "step": 100250 }, { "epoch": 4.68, - "learning_rate": 1.0693825887206415e-05, - "loss": 0.0282, + "learning_rate": 2.070834568517623e-05, + "loss": 0.0688, "step": 100255 }, { "epoch": 4.68, - "learning_rate": 1.0693357085931275e-05, - "loss": 0.071, + "learning_rate": 2.070787761534021e-05, + "loss": 0.1219, "step": 100260 }, { "epoch": 4.68, - "learning_rate": 1.0692888284656136e-05, - "loss": 0.0878, + "learning_rate": 2.0707409545504192e-05, + "loss": 0.0509, "step": 100265 }, { "epoch": 4.68, - "learning_rate": 1.0692419483380996e-05, - "loss": 0.1519, + "learning_rate": 2.0706941475668172e-05, + "loss": 0.2666, "step": 100270 }, { "epoch": 4.68, - "learning_rate": 1.0691950682105856e-05, - "loss": 0.1982, + "learning_rate": 2.070647340583215e-05, + "loss": 0.1582, "step": 100275 }, { "epoch": 4.68, - "learning_rate": 1.0691481880830716e-05, - "loss": 0.0716, + "learning_rate": 2.070600533599613e-05, + "loss": 0.0736, "step": 100280 }, { "epoch": 4.68, - "learning_rate": 1.0691013079555576e-05, - "loss": 0.0221, + "learning_rate": 2.0705537266160112e-05, + "loss": 0.0201, "step": 100285 }, { "epoch": 4.68, - "learning_rate": 1.0690544278280438e-05, - "loss": 0.0543, + "learning_rate": 2.070506919632409e-05, + "loss": 0.0254, "step": 100290 }, { "epoch": 4.68, - "learning_rate": 1.0690075477005298e-05, - "loss": 0.0433, + "learning_rate": 2.070460112648807e-05, + "loss": 0.0126, "step": 100295 }, { "epoch": 4.68, - "learning_rate": 1.068960667573016e-05, - "loss": 0.0965, + "learning_rate": 2.070413305665205e-05, + "loss": 0.0658, "step": 100300 }, { "epoch": 4.68, - "learning_rate": 1.068913787445502e-05, - "loss": 0.033, + "learning_rate": 2.0703664986816034e-05, + "loss": 0.0517, "step": 100305 }, { "epoch": 4.68, - "learning_rate": 1.068866907317988e-05, - "loss": 0.0709, + "learning_rate": 2.0703196916980014e-05, + "loss": 0.0697, "step": 100310 }, { "epoch": 4.68, - "learning_rate": 1.068820027190474e-05, - "loss": 0.1506, + "learning_rate": 2.0702728847143994e-05, + "loss": 0.0617, "step": 100315 }, { "epoch": 4.68, - "learning_rate": 1.06877314706296e-05, - "loss": 0.081, + "learning_rate": 2.0702260777307977e-05, + "loss": 0.1101, "step": 100320 }, { "epoch": 4.68, - "learning_rate": 1.068726266935446e-05, - "loss": 0.2956, + "learning_rate": 2.0701792707471957e-05, + "loss": 0.2308, "step": 100325 }, { "epoch": 4.68, - "learning_rate": 1.0686793868079322e-05, - "loss": 0.0425, + "learning_rate": 2.0701324637635937e-05, + "loss": 0.114, "step": 100330 }, { "epoch": 4.68, - "learning_rate": 1.0686325066804182e-05, - "loss": 0.0386, + "learning_rate": 2.0700856567799917e-05, + "loss": 0.0307, "step": 100335 }, { "epoch": 4.68, - "learning_rate": 1.0685856265529042e-05, - "loss": 0.0421, + "learning_rate": 2.0700388497963897e-05, + "loss": 0.0229, "step": 100340 }, { "epoch": 4.68, - "learning_rate": 1.0685387464253905e-05, - "loss": 0.0551, + "learning_rate": 2.0699920428127876e-05, + "loss": 0.0374, "step": 100345 }, { "epoch": 4.68, - "learning_rate": 1.0684918662978765e-05, - "loss": 0.0615, + "learning_rate": 2.0699452358291856e-05, + "loss": 0.0563, "step": 100350 }, { "epoch": 4.68, - "learning_rate": 1.0684449861703625e-05, - "loss": 0.1055, + "learning_rate": 2.0698984288455836e-05, + "loss": 0.0798, "step": 100355 }, { "epoch": 4.68, - "learning_rate": 1.0683981060428485e-05, - "loss": 0.0648, + "learning_rate": 2.069851621861982e-05, + "loss": 0.0529, "step": 100360 }, { "epoch": 4.68, - "learning_rate": 1.0683512259153345e-05, - "loss": 0.1031, + "learning_rate": 2.06980481487838e-05, + "loss": 0.1305, "step": 100365 }, { "epoch": 4.68, - "learning_rate": 1.0683043457878207e-05, - "loss": 0.2118, + "learning_rate": 2.069758007894778e-05, + "loss": 0.1081, "step": 100370 }, { "epoch": 4.68, - "learning_rate": 1.0682574656603067e-05, - "loss": 0.2273, + "learning_rate": 2.0697112009111762e-05, + "loss": 0.1886, "step": 100375 }, { "epoch": 4.68, - "learning_rate": 1.0682105855327927e-05, - "loss": 0.0594, + "learning_rate": 2.0696643939275742e-05, + "loss": 0.0849, "step": 100380 }, { "epoch": 4.68, - "learning_rate": 1.0681637054052787e-05, - "loss": 0.0534, + "learning_rate": 2.069617586943972e-05, + "loss": 0.0355, "step": 100385 }, { "epoch": 4.68, - "learning_rate": 1.068116825277765e-05, - "loss": 0.0265, + "learning_rate": 2.06957077996037e-05, + "loss": 0.0361, "step": 100390 }, { "epoch": 4.68, - "learning_rate": 1.068069945150251e-05, - "loss": 0.0198, + "learning_rate": 2.0695239729767685e-05, + "loss": 0.0654, "step": 100395 }, { "epoch": 4.68, - "learning_rate": 1.068023065022737e-05, - "loss": 0.0553, + "learning_rate": 2.069477165993166e-05, + "loss": 0.0294, "step": 100400 }, { "epoch": 4.69, - "learning_rate": 1.067976184895223e-05, - "loss": 0.0506, + "learning_rate": 2.069430359009564e-05, + "loss": 0.0263, "step": 100405 }, { "epoch": 4.69, - "learning_rate": 1.0679293047677091e-05, - "loss": 0.0465, + "learning_rate": 2.069383552025962e-05, + "loss": 0.1162, "step": 100410 }, { "epoch": 4.69, - "learning_rate": 1.0678824246401951e-05, - "loss": 0.0542, + "learning_rate": 2.0693367450423604e-05, + "loss": 0.4251, "step": 100415 }, { "epoch": 4.69, - "learning_rate": 1.0678355445126811e-05, - "loss": 0.0767, + "learning_rate": 2.0692899380587584e-05, + "loss": 0.1266, "step": 100420 }, { "epoch": 4.69, - "learning_rate": 1.0677886643851671e-05, - "loss": 0.4844, + "learning_rate": 2.0692431310751564e-05, + "loss": 0.2186, "step": 100425 }, { "epoch": 4.69, - "learning_rate": 1.0677417842576531e-05, - "loss": 0.0618, + "learning_rate": 2.0691963240915543e-05, + "loss": 0.0723, "step": 100430 }, { "epoch": 4.69, - "learning_rate": 1.0676949041301394e-05, - "loss": 0.0378, + "learning_rate": 2.0691495171079527e-05, + "loss": 0.0205, "step": 100435 }, { "epoch": 4.69, - "learning_rate": 1.0676480240026254e-05, - "loss": 0.0389, + "learning_rate": 2.0691027101243506e-05, + "loss": 0.0332, "step": 100440 }, { "epoch": 4.69, - "learning_rate": 1.0676011438751114e-05, - "loss": 0.0847, + "learning_rate": 2.0690559031407486e-05, + "loss": 0.1131, "step": 100445 }, { "epoch": 4.69, - "learning_rate": 1.0675542637475976e-05, - "loss": 0.1025, + "learning_rate": 2.069009096157147e-05, + "loss": 0.0056, "step": 100450 }, { "epoch": 4.69, - "learning_rate": 1.0675073836200836e-05, - "loss": 0.0869, + "learning_rate": 2.068962289173545e-05, + "loss": 0.0388, "step": 100455 }, { "epoch": 4.69, - "learning_rate": 1.0674605034925696e-05, - "loss": 0.137, + "learning_rate": 2.068915482189943e-05, + "loss": 0.0913, "step": 100460 }, { "epoch": 4.69, - "learning_rate": 1.0674136233650556e-05, - "loss": 0.1748, + "learning_rate": 2.0688686752063406e-05, + "loss": 0.0771, "step": 100465 }, { "epoch": 4.69, - "learning_rate": 1.0673667432375416e-05, - "loss": 0.2259, + "learning_rate": 2.068821868222739e-05, + "loss": 0.1496, "step": 100470 }, { "epoch": 4.69, - "learning_rate": 1.0673198631100277e-05, - "loss": 0.3033, + "learning_rate": 2.068775061239137e-05, + "loss": 0.2272, "step": 100475 }, { "epoch": 4.69, - "learning_rate": 1.0672729829825137e-05, - "loss": 0.0709, + "learning_rate": 2.068728254255535e-05, + "loss": 0.0755, "step": 100480 }, { "epoch": 4.69, - "learning_rate": 1.0672261028549999e-05, - "loss": 0.0298, + "learning_rate": 2.0686814472719328e-05, + "loss": 0.2681, "step": 100485 }, { "epoch": 4.69, - "learning_rate": 1.067179222727486e-05, - "loss": 0.0149, + "learning_rate": 2.068634640288331e-05, + "loss": 0.0145, "step": 100490 }, { "epoch": 4.69, - "learning_rate": 1.067132342599972e-05, - "loss": 0.0639, + "learning_rate": 2.068587833304729e-05, + "loss": 0.0276, "step": 100495 }, { "epoch": 4.69, - "learning_rate": 1.067085462472458e-05, - "loss": 0.0498, + "learning_rate": 2.068541026321127e-05, + "loss": 0.0348, "step": 100500 }, { "epoch": 4.69, - "learning_rate": 1.067038582344944e-05, - "loss": 0.0721, + "learning_rate": 2.0684942193375254e-05, + "loss": 0.0344, "step": 100505 }, { "epoch": 4.69, - "learning_rate": 1.06699170221743e-05, - "loss": 0.1122, + "learning_rate": 2.0684474123539234e-05, + "loss": 0.1, "step": 100510 }, { "epoch": 4.69, - "learning_rate": 1.0669448220899162e-05, - "loss": 0.1297, + "learning_rate": 2.0684006053703214e-05, + "loss": 0.0771, "step": 100515 }, { "epoch": 4.69, - "learning_rate": 1.0668979419624022e-05, - "loss": 0.2213, + "learning_rate": 2.0683537983867194e-05, + "loss": 0.2195, "step": 100520 }, { "epoch": 4.69, - "learning_rate": 1.0668510618348882e-05, - "loss": 0.213, + "learning_rate": 2.0683069914031177e-05, + "loss": 0.1979, "step": 100525 }, { "epoch": 4.69, - "learning_rate": 1.0668041817073745e-05, - "loss": 0.0283, + "learning_rate": 2.0682601844195153e-05, + "loss": 0.0519, "step": 100530 }, { "epoch": 4.69, - "learning_rate": 1.0667573015798605e-05, - "loss": 0.0678, + "learning_rate": 2.0682133774359133e-05, + "loss": 0.0461, "step": 100535 }, { "epoch": 4.69, - "learning_rate": 1.0667104214523465e-05, - "loss": 0.0633, + "learning_rate": 2.0681665704523113e-05, + "loss": 0.0353, "step": 100540 }, { "epoch": 4.69, - "learning_rate": 1.0666635413248325e-05, - "loss": 0.0746, + "learning_rate": 2.0681197634687096e-05, + "loss": 0.0548, "step": 100545 }, { "epoch": 4.69, - "learning_rate": 1.0666166611973185e-05, - "loss": 0.0363, + "learning_rate": 2.0680729564851076e-05, + "loss": 0.045, "step": 100550 }, { "epoch": 4.69, - "learning_rate": 1.0665697810698046e-05, - "loss": 0.1217, + "learning_rate": 2.0680261495015056e-05, + "loss": 0.0737, "step": 100555 }, { "epoch": 4.69, - "learning_rate": 1.0665229009422906e-05, - "loss": 0.1239, + "learning_rate": 2.067979342517904e-05, + "loss": 0.1189, "step": 100560 }, { "epoch": 4.69, - "learning_rate": 1.0664760208147766e-05, - "loss": 0.0527, + "learning_rate": 2.067932535534302e-05, + "loss": 0.1149, "step": 100565 }, { "epoch": 4.69, - "learning_rate": 1.0664291406872626e-05, - "loss": 0.084, + "learning_rate": 2.0678857285507e-05, + "loss": 0.1223, "step": 100570 }, { "epoch": 4.69, - "learning_rate": 1.066382260559749e-05, - "loss": 0.2952, + "learning_rate": 2.067838921567098e-05, + "loss": 0.1488, "step": 100575 }, { "epoch": 4.69, - "learning_rate": 1.066335380432235e-05, - "loss": 0.076, + "learning_rate": 2.067792114583496e-05, + "loss": 0.0959, "step": 100580 }, { "epoch": 4.69, - "learning_rate": 1.066288500304721e-05, - "loss": 0.0167, + "learning_rate": 2.067745307599894e-05, + "loss": 0.0347, "step": 100585 }, { "epoch": 4.69, - "learning_rate": 1.066241620177207e-05, - "loss": 0.0179, + "learning_rate": 2.0676985006162918e-05, + "loss": 0.0316, "step": 100590 }, { "epoch": 4.69, - "learning_rate": 1.0661947400496931e-05, - "loss": 0.006, + "learning_rate": 2.0676516936326898e-05, + "loss": 0.0487, "step": 100595 }, { "epoch": 4.69, - "learning_rate": 1.066147859922179e-05, - "loss": 0.0712, + "learning_rate": 2.067604886649088e-05, + "loss": 0.0394, "step": 100600 }, { "epoch": 4.69, - "learning_rate": 1.066100979794665e-05, - "loss": 0.0385, + "learning_rate": 2.067558079665486e-05, + "loss": 0.0248, "step": 100605 }, { "epoch": 4.69, - "learning_rate": 1.066054099667151e-05, - "loss": 0.1344, + "learning_rate": 2.067511272681884e-05, + "loss": 0.0787, "step": 100610 }, { "epoch": 4.69, - "learning_rate": 1.0660072195396372e-05, - "loss": 0.1812, + "learning_rate": 2.0674644656982824e-05, + "loss": 0.0828, "step": 100615 }, { "epoch": 4.7, - "learning_rate": 1.0659603394121232e-05, - "loss": 0.1248, + "learning_rate": 2.0674176587146804e-05, + "loss": 0.1169, "step": 100620 }, { "epoch": 4.7, - "learning_rate": 1.0659134592846094e-05, - "loss": 0.2997, + "learning_rate": 2.0673708517310783e-05, + "loss": 0.2705, "step": 100625 }, { "epoch": 4.7, - "learning_rate": 1.0658665791570954e-05, - "loss": 0.1126, + "learning_rate": 2.0673240447474763e-05, + "loss": 0.0478, "step": 100630 }, { "epoch": 4.7, - "learning_rate": 1.0658196990295815e-05, - "loss": 0.0013, + "learning_rate": 2.0672772377638746e-05, + "loss": 0.0211, "step": 100635 }, { "epoch": 4.7, - "learning_rate": 1.0657728189020675e-05, - "loss": 0.0172, + "learning_rate": 2.0672304307802726e-05, + "loss": 0.0166, "step": 100640 }, { "epoch": 4.7, - "learning_rate": 1.0657259387745535e-05, - "loss": 0.0729, + "learning_rate": 2.0671836237966706e-05, + "loss": 0.0158, "step": 100645 }, { "epoch": 4.7, - "learning_rate": 1.0656790586470395e-05, - "loss": 0.0848, + "learning_rate": 2.0671368168130686e-05, + "loss": 0.0469, "step": 100650 }, { "epoch": 4.7, - "learning_rate": 1.0656321785195257e-05, - "loss": 0.0482, + "learning_rate": 2.0670900098294666e-05, + "loss": 0.0942, "step": 100655 }, { "epoch": 4.7, - "learning_rate": 1.0655852983920117e-05, - "loss": 0.08, + "learning_rate": 2.0670432028458646e-05, + "loss": 0.0858, "step": 100660 }, { "epoch": 4.7, - "learning_rate": 1.0655384182644977e-05, - "loss": 0.0696, + "learning_rate": 2.0669963958622625e-05, + "loss": 0.0527, "step": 100665 }, { "epoch": 4.7, - "learning_rate": 1.0654915381369838e-05, - "loss": 0.1357, + "learning_rate": 2.0669495888786605e-05, + "loss": 0.1849, "step": 100670 }, { "epoch": 4.7, - "learning_rate": 1.06544465800947e-05, - "loss": 0.2, + "learning_rate": 2.066902781895059e-05, + "loss": 0.3487, "step": 100675 }, { "epoch": 4.7, - "learning_rate": 1.065397777881956e-05, - "loss": 0.1173, + "learning_rate": 2.0668559749114568e-05, + "loss": 0.0969, "step": 100680 }, { "epoch": 4.7, - "learning_rate": 1.065350897754442e-05, - "loss": 0.0499, + "learning_rate": 2.0668091679278548e-05, + "loss": 0.0087, "step": 100685 }, { "epoch": 4.7, - "learning_rate": 1.065304017626928e-05, - "loss": 0.0274, + "learning_rate": 2.066762360944253e-05, + "loss": 0.0483, "step": 100690 }, { "epoch": 4.7, - "learning_rate": 1.0652571374994141e-05, - "loss": 0.0225, + "learning_rate": 2.066715553960651e-05, + "loss": 0.1106, "step": 100695 }, { "epoch": 4.7, - "learning_rate": 1.0652102573719001e-05, - "loss": 0.0368, + "learning_rate": 2.066668746977049e-05, + "loss": 0.0658, "step": 100700 }, { "epoch": 4.7, - "learning_rate": 1.0651633772443861e-05, - "loss": 0.039, + "learning_rate": 2.066621939993447e-05, + "loss": 0.0337, "step": 100705 }, { "epoch": 4.7, - "learning_rate": 1.0651164971168721e-05, - "loss": 0.0947, + "learning_rate": 2.0665751330098454e-05, + "loss": 0.0366, "step": 100710 }, { "epoch": 4.7, - "learning_rate": 1.0650696169893585e-05, - "loss": 0.1109, + "learning_rate": 2.066528326026243e-05, + "loss": 0.1248, "step": 100715 }, { "epoch": 4.7, - "learning_rate": 1.0650227368618444e-05, - "loss": 0.1628, + "learning_rate": 2.066481519042641e-05, + "loss": 0.1035, "step": 100720 }, { "epoch": 4.7, - "learning_rate": 1.0649758567343304e-05, - "loss": 0.2349, + "learning_rate": 2.066434712059039e-05, + "loss": 0.2199, "step": 100725 }, { "epoch": 4.7, - "learning_rate": 1.0649289766068164e-05, - "loss": 0.0657, + "learning_rate": 2.0663879050754373e-05, + "loss": 0.0786, "step": 100730 }, { "epoch": 4.7, - "learning_rate": 1.0648820964793026e-05, - "loss": 0.0673, + "learning_rate": 2.0663410980918353e-05, + "loss": 0.0695, "step": 100735 }, { "epoch": 4.7, - "learning_rate": 1.0648352163517886e-05, - "loss": 0.0122, + "learning_rate": 2.0662942911082333e-05, + "loss": 0.0219, "step": 100740 }, { "epoch": 4.7, - "learning_rate": 1.0647883362242746e-05, - "loss": 0.0133, + "learning_rate": 2.0662474841246316e-05, + "loss": 0.0378, "step": 100745 }, { "epoch": 4.7, - "learning_rate": 1.0647414560967606e-05, - "loss": 0.1202, + "learning_rate": 2.0662006771410296e-05, + "loss": 0.0678, "step": 100750 }, { "epoch": 4.7, - "learning_rate": 1.0646945759692466e-05, - "loss": 0.0651, + "learning_rate": 2.0661538701574276e-05, + "loss": 0.0694, "step": 100755 }, { "epoch": 4.7, - "learning_rate": 1.0646476958417329e-05, - "loss": 0.0506, + "learning_rate": 2.0661070631738255e-05, + "loss": 0.0967, "step": 100760 }, { "epoch": 4.7, - "learning_rate": 1.0646008157142189e-05, - "loss": 0.1803, + "learning_rate": 2.066060256190224e-05, + "loss": 0.1192, "step": 100765 }, { "epoch": 4.7, - "learning_rate": 1.0645539355867049e-05, - "loss": 0.1909, + "learning_rate": 2.066013449206622e-05, + "loss": 0.1448, "step": 100770 }, { "epoch": 4.7, - "learning_rate": 1.064507055459191e-05, - "loss": 0.4379, + "learning_rate": 2.0659666422230198e-05, + "loss": 0.2097, "step": 100775 }, { "epoch": 4.7, - "learning_rate": 1.064460175331677e-05, - "loss": 0.0823, + "learning_rate": 2.0659198352394175e-05, + "loss": 0.0534, "step": 100780 }, { "epoch": 4.7, - "learning_rate": 1.064413295204163e-05, - "loss": 0.016, + "learning_rate": 2.0658730282558158e-05, + "loss": 0.0156, "step": 100785 }, { "epoch": 4.7, - "learning_rate": 1.064366415076649e-05, - "loss": 0.0294, + "learning_rate": 2.0658262212722138e-05, + "loss": 0.0264, "step": 100790 }, { "epoch": 4.7, - "learning_rate": 1.064319534949135e-05, - "loss": 0.0207, + "learning_rate": 2.0657794142886118e-05, + "loss": 0.0857, "step": 100795 }, { "epoch": 4.7, - "learning_rate": 1.0642726548216212e-05, - "loss": 0.0547, + "learning_rate": 2.06573260730501e-05, + "loss": 0.064, "step": 100800 }, { "epoch": 4.7, - "learning_rate": 1.0642257746941072e-05, - "loss": 0.0596, + "learning_rate": 2.065685800321408e-05, + "loss": 0.0327, "step": 100805 }, { "epoch": 4.7, - "learning_rate": 1.0641788945665933e-05, - "loss": 0.038, + "learning_rate": 2.065638993337806e-05, + "loss": 0.0898, "step": 100810 }, { "epoch": 4.7, - "learning_rate": 1.0641320144390795e-05, - "loss": 0.0886, + "learning_rate": 2.065592186354204e-05, + "loss": 0.1231, "step": 100815 }, { "epoch": 4.7, - "learning_rate": 1.0640851343115655e-05, - "loss": 0.1261, + "learning_rate": 2.0655453793706023e-05, + "loss": 0.1501, "step": 100820 }, { "epoch": 4.7, - "learning_rate": 1.0640382541840515e-05, - "loss": 0.2122, + "learning_rate": 2.0654985723870003e-05, + "loss": 0.2063, "step": 100825 }, { "epoch": 4.7, - "learning_rate": 1.0639913740565375e-05, - "loss": 0.0946, + "learning_rate": 2.0654517654033983e-05, + "loss": 0.083, "step": 100830 }, { "epoch": 4.71, - "learning_rate": 1.0639444939290235e-05, - "loss": 0.0335, + "learning_rate": 2.0654049584197963e-05, + "loss": 0.0392, "step": 100835 }, { "epoch": 4.71, - "learning_rate": 1.0638976138015096e-05, - "loss": 0.029, + "learning_rate": 2.0653581514361946e-05, + "loss": 0.071, "step": 100840 }, { "epoch": 4.71, - "learning_rate": 1.0638507336739956e-05, - "loss": 0.113, + "learning_rate": 2.0653113444525922e-05, + "loss": 0.0321, "step": 100845 }, { "epoch": 4.71, - "learning_rate": 1.0638038535464816e-05, - "loss": 0.04, + "learning_rate": 2.0652645374689902e-05, + "loss": 0.0209, "step": 100850 }, { "epoch": 4.71, - "learning_rate": 1.063756973418968e-05, - "loss": 0.0688, + "learning_rate": 2.0652177304853882e-05, + "loss": 0.0636, "step": 100855 }, { "epoch": 4.71, - "learning_rate": 1.063710093291454e-05, - "loss": 0.1049, + "learning_rate": 2.0651709235017865e-05, + "loss": 0.123, "step": 100860 }, { "epoch": 4.71, - "learning_rate": 1.06366321316394e-05, - "loss": 0.1635, + "learning_rate": 2.0651241165181845e-05, + "loss": 0.1141, "step": 100865 }, { "epoch": 4.71, - "learning_rate": 1.063616333036426e-05, - "loss": 0.1323, + "learning_rate": 2.0650773095345825e-05, + "loss": 0.1212, "step": 100870 }, { "epoch": 4.71, - "learning_rate": 1.063569452908912e-05, - "loss": 0.1909, + "learning_rate": 2.0650305025509808e-05, + "loss": 0.347, "step": 100875 }, { "epoch": 4.71, - "learning_rate": 1.0635225727813981e-05, - "loss": 0.0875, + "learning_rate": 2.0649836955673788e-05, + "loss": 0.0341, "step": 100880 }, { "epoch": 4.71, - "learning_rate": 1.0634756926538841e-05, - "loss": 0.0142, + "learning_rate": 2.0649368885837768e-05, + "loss": 0.0392, "step": 100885 }, { "epoch": 4.71, - "learning_rate": 1.06342881252637e-05, - "loss": 0.019, + "learning_rate": 2.0648900816001748e-05, + "loss": 0.086, "step": 100890 }, { "epoch": 4.71, - "learning_rate": 1.063381932398856e-05, - "loss": 0.0893, + "learning_rate": 2.064843274616573e-05, + "loss": 0.0399, "step": 100895 }, { "epoch": 4.71, - "learning_rate": 1.0633350522713424e-05, - "loss": 0.0698, + "learning_rate": 2.064796467632971e-05, + "loss": 0.0689, "step": 100900 }, { "epoch": 4.71, - "learning_rate": 1.0632881721438284e-05, - "loss": 0.0431, + "learning_rate": 2.0647496606493687e-05, + "loss": 0.124, "step": 100905 }, { "epoch": 4.71, - "learning_rate": 1.0632412920163144e-05, - "loss": 0.1191, + "learning_rate": 2.0647028536657667e-05, + "loss": 0.0549, "step": 100910 }, { "epoch": 4.71, - "learning_rate": 1.0631944118888004e-05, - "loss": 0.0925, + "learning_rate": 2.064656046682165e-05, + "loss": 0.1014, "step": 100915 }, { "epoch": 4.71, - "learning_rate": 1.0631475317612866e-05, - "loss": 0.1974, + "learning_rate": 2.064609239698563e-05, + "loss": 0.1171, "step": 100920 }, { "epoch": 4.71, - "learning_rate": 1.0631006516337725e-05, - "loss": 0.204, + "learning_rate": 2.064562432714961e-05, + "loss": 0.3154, "step": 100925 }, { "epoch": 4.71, - "learning_rate": 1.0630537715062585e-05, - "loss": 0.0802, + "learning_rate": 2.0645156257313593e-05, + "loss": 0.1162, "step": 100930 }, { "epoch": 4.71, - "learning_rate": 1.0630068913787445e-05, - "loss": 0.0217, + "learning_rate": 2.0644688187477573e-05, + "loss": 0.0223, "step": 100935 }, { "epoch": 4.71, - "learning_rate": 1.0629600112512305e-05, - "loss": 0.0483, + "learning_rate": 2.0644220117641553e-05, + "loss": 0.0172, "step": 100940 }, { "epoch": 4.71, - "learning_rate": 1.0629131311237167e-05, - "loss": 0.042, + "learning_rate": 2.0643752047805532e-05, + "loss": 0.0388, "step": 100945 }, { "epoch": 4.71, - "learning_rate": 1.0628662509962029e-05, - "loss": 0.0397, + "learning_rate": 2.0643283977969516e-05, + "loss": 0.02, "step": 100950 }, { "epoch": 4.71, - "learning_rate": 1.0628193708686888e-05, - "loss": 0.0709, + "learning_rate": 2.0642815908133495e-05, + "loss": 0.0737, "step": 100955 }, { "epoch": 4.71, - "learning_rate": 1.062772490741175e-05, - "loss": 0.1507, + "learning_rate": 2.0642347838297475e-05, + "loss": 0.1195, "step": 100960 }, { "epoch": 4.71, - "learning_rate": 1.062725610613661e-05, - "loss": 0.1002, + "learning_rate": 2.0641879768461455e-05, + "loss": 0.1308, "step": 100965 }, { "epoch": 4.71, - "learning_rate": 1.062678730486147e-05, - "loss": 0.1177, + "learning_rate": 2.0641411698625435e-05, + "loss": 0.212, "step": 100970 }, { "epoch": 4.71, - "learning_rate": 1.062631850358633e-05, - "loss": 0.2481, + "learning_rate": 2.0640943628789415e-05, + "loss": 0.3065, "step": 100975 }, { "epoch": 4.71, - "learning_rate": 1.062584970231119e-05, - "loss": 0.0972, + "learning_rate": 2.0640475558953395e-05, + "loss": 0.0757, "step": 100980 }, { "epoch": 4.71, - "learning_rate": 1.0625380901036051e-05, - "loss": 0.0139, + "learning_rate": 2.0640007489117378e-05, + "loss": 0.0557, "step": 100985 }, { "epoch": 4.71, - "learning_rate": 1.0624912099760911e-05, - "loss": 0.0208, + "learning_rate": 2.0639539419281358e-05, + "loss": 0.0414, "step": 100990 }, { "epoch": 4.71, - "learning_rate": 1.0624443298485773e-05, - "loss": 0.0425, + "learning_rate": 2.0639071349445337e-05, + "loss": 0.1021, "step": 100995 }, { "epoch": 4.71, - "learning_rate": 1.0623974497210635e-05, - "loss": 0.0514, + "learning_rate": 2.0638603279609317e-05, + "loss": 0.0786, "step": 101000 }, { "epoch": 4.71, - "learning_rate": 1.0623505695935495e-05, - "loss": 0.0322, + "learning_rate": 2.06381352097733e-05, + "loss": 0.0696, "step": 101005 }, { "epoch": 4.71, - "learning_rate": 1.0623036894660354e-05, - "loss": 0.0461, + "learning_rate": 2.063766713993728e-05, + "loss": 0.1417, "step": 101010 }, { "epoch": 4.71, - "learning_rate": 1.0622568093385214e-05, - "loss": 0.093, + "learning_rate": 2.063719907010126e-05, + "loss": 0.1694, "step": 101015 }, { "epoch": 4.71, - "learning_rate": 1.0622099292110074e-05, - "loss": 0.1639, + "learning_rate": 2.063673100026524e-05, + "loss": 0.0709, "step": 101020 }, { "epoch": 4.71, - "learning_rate": 1.0621630490834936e-05, - "loss": 0.3203, + "learning_rate": 2.0636262930429223e-05, + "loss": 0.3299, "step": 101025 }, { "epoch": 4.71, - "learning_rate": 1.0621161689559796e-05, - "loss": 0.1225, + "learning_rate": 2.0635794860593203e-05, + "loss": 0.0644, "step": 101030 }, { "epoch": 4.71, - "learning_rate": 1.0620692888284656e-05, - "loss": 0.0396, + "learning_rate": 2.063532679075718e-05, + "loss": 0.0121, "step": 101035 }, { "epoch": 4.71, - "learning_rate": 1.062022408700952e-05, - "loss": 0.0563, + "learning_rate": 2.063485872092116e-05, + "loss": 0.033, "step": 101040 }, { "epoch": 4.71, - "learning_rate": 1.0619755285734379e-05, - "loss": 0.0214, + "learning_rate": 2.0634390651085142e-05, + "loss": 0.0136, "step": 101045 }, { "epoch": 4.72, - "learning_rate": 1.0619286484459239e-05, - "loss": 0.0621, + "learning_rate": 2.0633922581249122e-05, + "loss": 0.044, "step": 101050 }, { "epoch": 4.72, - "learning_rate": 1.0618817683184099e-05, - "loss": 0.0518, + "learning_rate": 2.0633454511413102e-05, + "loss": 0.0415, "step": 101055 }, { "epoch": 4.72, - "learning_rate": 1.0618348881908959e-05, - "loss": 0.0619, + "learning_rate": 2.0632986441577085e-05, + "loss": 0.0586, "step": 101060 }, { "epoch": 4.72, - "learning_rate": 1.061788008063382e-05, - "loss": 0.0415, + "learning_rate": 2.0632518371741065e-05, + "loss": 0.221, "step": 101065 }, { "epoch": 4.72, - "learning_rate": 1.061741127935868e-05, - "loss": 0.0964, + "learning_rate": 2.0632050301905045e-05, + "loss": 0.109, "step": 101070 }, { "epoch": 4.72, - "learning_rate": 1.061694247808354e-05, - "loss": 0.2579, + "learning_rate": 2.0631582232069025e-05, + "loss": 0.2865, "step": 101075 }, { "epoch": 4.72, - "learning_rate": 1.06164736768084e-05, - "loss": 0.1298, + "learning_rate": 2.0631114162233008e-05, + "loss": 0.0692, "step": 101080 }, { "epoch": 4.72, - "learning_rate": 1.0616004875533264e-05, - "loss": 0.007, + "learning_rate": 2.0630646092396988e-05, + "loss": 0.0471, "step": 101085 }, { "epoch": 4.72, - "learning_rate": 1.0615536074258124e-05, - "loss": 0.0423, + "learning_rate": 2.0630178022560967e-05, + "loss": 0.0315, "step": 101090 }, { "epoch": 4.72, - "learning_rate": 1.0615067272982984e-05, - "loss": 0.0619, + "learning_rate": 2.0629709952724944e-05, + "loss": 0.0927, "step": 101095 }, { "epoch": 4.72, - "learning_rate": 1.0614598471707843e-05, - "loss": 0.031, + "learning_rate": 2.0629241882888927e-05, + "loss": 0.016, "step": 101100 }, { "epoch": 4.72, - "learning_rate": 1.0614129670432705e-05, - "loss": 0.0469, + "learning_rate": 2.0628773813052907e-05, + "loss": 0.061, "step": 101105 }, { "epoch": 4.72, - "learning_rate": 1.0613660869157565e-05, - "loss": 0.1384, + "learning_rate": 2.0628305743216887e-05, + "loss": 0.0704, "step": 101110 }, { "epoch": 4.72, - "learning_rate": 1.0613192067882425e-05, - "loss": 0.0661, + "learning_rate": 2.062783767338087e-05, + "loss": 0.0994, "step": 101115 }, { "epoch": 4.72, - "learning_rate": 1.0612723266607285e-05, - "loss": 0.215, + "learning_rate": 2.062736960354485e-05, + "loss": 0.1452, "step": 101120 }, { "epoch": 4.72, - "learning_rate": 1.0612254465332147e-05, - "loss": 0.1955, + "learning_rate": 2.062690153370883e-05, + "loss": 0.2224, "step": 101125 }, { "epoch": 4.72, - "learning_rate": 1.0611785664057006e-05, - "loss": 0.0905, + "learning_rate": 2.062643346387281e-05, + "loss": 0.0776, "step": 101130 }, { "epoch": 4.72, - "learning_rate": 1.0611316862781868e-05, - "loss": 0.0382, + "learning_rate": 2.0625965394036793e-05, + "loss": 0.0283, "step": 101135 }, { "epoch": 4.72, - "learning_rate": 1.061084806150673e-05, - "loss": 0.026, + "learning_rate": 2.0625497324200772e-05, + "loss": 0.064, "step": 101140 }, { "epoch": 4.72, - "learning_rate": 1.061037926023159e-05, - "loss": 0.0418, + "learning_rate": 2.0625029254364752e-05, + "loss": 0.0243, "step": 101145 }, { "epoch": 4.72, - "learning_rate": 1.060991045895645e-05, - "loss": 0.032, + "learning_rate": 2.0624561184528732e-05, + "loss": 0.0305, "step": 101150 }, { "epoch": 4.72, - "learning_rate": 1.060944165768131e-05, - "loss": 0.0463, + "learning_rate": 2.0624093114692715e-05, + "loss": 0.0223, "step": 101155 }, { "epoch": 4.72, - "learning_rate": 1.060897285640617e-05, - "loss": 0.0731, + "learning_rate": 2.062362504485669e-05, + "loss": 0.0483, "step": 101160 }, { "epoch": 4.72, - "learning_rate": 1.0608504055131031e-05, - "loss": 0.1283, + "learning_rate": 2.062315697502067e-05, + "loss": 0.0335, "step": 101165 }, { "epoch": 4.72, - "learning_rate": 1.0608035253855891e-05, - "loss": 0.1381, + "learning_rate": 2.0622688905184655e-05, + "loss": 0.0764, "step": 101170 }, { "epoch": 4.72, - "learning_rate": 1.0607566452580751e-05, - "loss": 0.3131, + "learning_rate": 2.0622220835348635e-05, + "loss": 0.2842, "step": 101175 }, { "epoch": 4.72, - "learning_rate": 1.0607097651305614e-05, - "loss": 0.061, + "learning_rate": 2.0621752765512614e-05, + "loss": 0.0659, "step": 101180 }, { "epoch": 4.72, - "learning_rate": 1.0606628850030474e-05, - "loss": 0.0771, + "learning_rate": 2.0621284695676594e-05, + "loss": 0.0394, "step": 101185 }, { "epoch": 4.72, - "learning_rate": 1.0606160048755334e-05, - "loss": 0.0415, + "learning_rate": 2.0620816625840577e-05, + "loss": 0.0243, "step": 101190 }, { "epoch": 4.72, - "learning_rate": 1.0605691247480194e-05, - "loss": 0.0172, + "learning_rate": 2.0620348556004557e-05, + "loss": 0.0408, "step": 101195 }, { "epoch": 4.72, - "learning_rate": 1.0605222446205054e-05, - "loss": 0.029, + "learning_rate": 2.0619880486168537e-05, + "loss": 0.0439, "step": 101200 }, { "epoch": 4.72, - "learning_rate": 1.0604753644929916e-05, - "loss": 0.0524, + "learning_rate": 2.0619412416332517e-05, + "loss": 0.0523, "step": 101205 }, { "epoch": 4.72, - "learning_rate": 1.0604284843654776e-05, - "loss": 0.0929, + "learning_rate": 2.06189443464965e-05, + "loss": 0.086, "step": 101210 }, { "epoch": 4.72, - "learning_rate": 1.0603816042379635e-05, - "loss": 0.1215, + "learning_rate": 2.061847627666048e-05, + "loss": 0.0952, "step": 101215 }, { "epoch": 4.72, - "learning_rate": 1.0603347241104495e-05, - "loss": 0.1959, + "learning_rate": 2.061800820682446e-05, + "loss": 0.1019, "step": 101220 }, { "epoch": 4.72, - "learning_rate": 1.0602878439829359e-05, - "loss": 0.4005, + "learning_rate": 2.0617540136988436e-05, + "loss": 0.1951, "step": 101225 }, { "epoch": 4.72, - "learning_rate": 1.0602409638554219e-05, - "loss": 0.0942, + "learning_rate": 2.061707206715242e-05, + "loss": 0.1392, "step": 101230 }, { "epoch": 4.72, - "learning_rate": 1.0601940837279079e-05, - "loss": 0.0179, + "learning_rate": 2.06166039973164e-05, + "loss": 0.0209, "step": 101235 }, { "epoch": 4.72, - "learning_rate": 1.0601472036003939e-05, - "loss": 0.0323, + "learning_rate": 2.061613592748038e-05, + "loss": 0.0221, "step": 101240 }, { "epoch": 4.72, - "learning_rate": 1.06010032347288e-05, - "loss": 0.0408, + "learning_rate": 2.0615667857644362e-05, + "loss": 0.0249, "step": 101245 }, { "epoch": 4.72, - "learning_rate": 1.060053443345366e-05, - "loss": 0.0497, + "learning_rate": 2.0615199787808342e-05, + "loss": 0.056, "step": 101250 }, { "epoch": 4.72, - "learning_rate": 1.060006563217852e-05, - "loss": 0.0602, + "learning_rate": 2.0614731717972322e-05, + "loss": 0.0186, "step": 101255 }, { "epoch": 4.72, - "learning_rate": 1.059959683090338e-05, - "loss": 0.1121, + "learning_rate": 2.06142636481363e-05, + "loss": 0.0486, "step": 101260 }, { "epoch": 4.73, - "learning_rate": 1.059912802962824e-05, - "loss": 0.097, + "learning_rate": 2.0613795578300285e-05, + "loss": 0.059, "step": 101265 }, { "epoch": 4.73, - "learning_rate": 1.0598659228353102e-05, - "loss": 0.1617, + "learning_rate": 2.0613327508464265e-05, + "loss": 0.0867, "step": 101270 }, { "epoch": 4.73, - "learning_rate": 1.0598190427077963e-05, - "loss": 0.3105, + "learning_rate": 2.0612859438628244e-05, + "loss": 0.23, "step": 101275 }, { "epoch": 4.73, - "learning_rate": 1.0597721625802823e-05, - "loss": 0.0835, + "learning_rate": 2.0612391368792224e-05, + "loss": 0.084, "step": 101280 }, { "epoch": 4.73, - "learning_rate": 1.0597252824527685e-05, - "loss": 0.0294, + "learning_rate": 2.0611923298956204e-05, + "loss": 0.0173, "step": 101285 }, { "epoch": 4.73, - "learning_rate": 1.0596784023252545e-05, - "loss": 0.0231, + "learning_rate": 2.0611455229120184e-05, + "loss": 0.03, "step": 101290 }, { "epoch": 4.73, - "learning_rate": 1.0596315221977405e-05, - "loss": 0.0334, + "learning_rate": 2.0610987159284164e-05, + "loss": 0.0612, "step": 101295 }, { "epoch": 4.73, - "learning_rate": 1.0595846420702265e-05, - "loss": 0.0379, + "learning_rate": 2.0610519089448147e-05, + "loss": 0.0302, "step": 101300 }, { "epoch": 4.73, - "learning_rate": 1.0595377619427124e-05, - "loss": 0.1497, + "learning_rate": 2.0610051019612127e-05, + "loss": 0.0303, "step": 101305 }, { "epoch": 4.73, - "learning_rate": 1.0594908818151986e-05, - "loss": 0.0553, + "learning_rate": 2.0609582949776107e-05, + "loss": 0.0582, "step": 101310 }, { "epoch": 4.73, - "learning_rate": 1.0594440016876846e-05, - "loss": 0.1355, + "learning_rate": 2.0609114879940086e-05, + "loss": 0.0981, "step": 101315 }, { "epoch": 4.73, - "learning_rate": 1.0593971215601708e-05, - "loss": 0.1098, + "learning_rate": 2.060864681010407e-05, + "loss": 0.1056, "step": 101320 }, { "epoch": 4.73, - "learning_rate": 1.059350241432657e-05, - "loss": 0.3215, + "learning_rate": 2.060817874026805e-05, + "loss": 0.2134, "step": 101325 }, { "epoch": 4.73, - "learning_rate": 1.059303361305143e-05, - "loss": 0.0571, + "learning_rate": 2.060771067043203e-05, + "loss": 0.0733, "step": 101330 }, { "epoch": 4.73, - "learning_rate": 1.0592564811776289e-05, - "loss": 0.0312, + "learning_rate": 2.060724260059601e-05, + "loss": 0.0385, "step": 101335 }, { "epoch": 4.73, - "learning_rate": 1.0592096010501149e-05, - "loss": 0.0226, + "learning_rate": 2.0606774530759992e-05, + "loss": 0.0232, "step": 101340 }, { "epoch": 4.73, - "learning_rate": 1.0591627209226009e-05, - "loss": 0.023, + "learning_rate": 2.0606306460923972e-05, + "loss": 0.0324, "step": 101345 }, { "epoch": 4.73, - "learning_rate": 1.059115840795087e-05, - "loss": 0.0544, + "learning_rate": 2.060583839108795e-05, + "loss": 0.0429, "step": 101350 }, { "epoch": 4.73, - "learning_rate": 1.059068960667573e-05, - "loss": 0.0402, + "learning_rate": 2.060537032125193e-05, + "loss": 0.0529, "step": 101355 }, { "epoch": 4.73, - "learning_rate": 1.059022080540059e-05, - "loss": 0.0666, + "learning_rate": 2.060490225141591e-05, + "loss": 0.0396, "step": 101360 }, { "epoch": 4.73, - "learning_rate": 1.0589752004125454e-05, - "loss": 0.1517, + "learning_rate": 2.060443418157989e-05, + "loss": 0.1532, "step": 101365 }, { "epoch": 4.73, - "learning_rate": 1.0589283202850314e-05, - "loss": 0.2026, + "learning_rate": 2.060396611174387e-05, + "loss": 0.144, "step": 101370 }, { "epoch": 4.73, - "learning_rate": 1.0588814401575174e-05, - "loss": 0.2658, + "learning_rate": 2.0603498041907854e-05, + "loss": 0.2462, "step": 101375 }, { "epoch": 4.73, - "learning_rate": 1.0588345600300034e-05, - "loss": 0.068, + "learning_rate": 2.0603029972071834e-05, + "loss": 0.0599, "step": 101380 }, { "epoch": 4.73, - "learning_rate": 1.0587876799024894e-05, - "loss": 0.0279, + "learning_rate": 2.0602561902235814e-05, + "loss": 0.026, "step": 101385 }, { "epoch": 4.73, - "learning_rate": 1.0587407997749755e-05, - "loss": 0.035, + "learning_rate": 2.0602093832399794e-05, + "loss": 0.0138, "step": 101390 }, { "epoch": 4.73, - "learning_rate": 1.0586939196474615e-05, - "loss": 0.0521, + "learning_rate": 2.0601625762563777e-05, + "loss": 0.028, "step": 101395 }, { "epoch": 4.73, - "learning_rate": 1.0586470395199475e-05, - "loss": 0.0612, + "learning_rate": 2.0601157692727757e-05, + "loss": 0.0191, "step": 101400 }, { "epoch": 4.73, - "learning_rate": 1.0586001593924335e-05, - "loss": 0.0438, + "learning_rate": 2.0600689622891737e-05, + "loss": 0.0588, "step": 101405 }, { "epoch": 4.73, - "learning_rate": 1.0585532792649198e-05, - "loss": 0.0862, + "learning_rate": 2.0600221553055716e-05, + "loss": 0.1307, "step": 101410 }, { "epoch": 4.73, - "learning_rate": 1.0585063991374058e-05, - "loss": 0.1029, + "learning_rate": 2.0599753483219696e-05, + "loss": 0.0749, "step": 101415 }, { "epoch": 4.73, - "learning_rate": 1.0584595190098918e-05, - "loss": 0.2249, + "learning_rate": 2.0599285413383676e-05, + "loss": 0.0841, "step": 101420 }, { "epoch": 4.73, - "learning_rate": 1.0584126388823778e-05, - "loss": 0.3748, + "learning_rate": 2.0598817343547656e-05, + "loss": 0.2315, "step": 101425 }, { "epoch": 4.73, - "learning_rate": 1.058365758754864e-05, - "loss": 0.0915, + "learning_rate": 2.059834927371164e-05, + "loss": 0.1001, "step": 101430 }, { "epoch": 4.73, - "learning_rate": 1.05831887862735e-05, - "loss": 0.0284, + "learning_rate": 2.059788120387562e-05, + "loss": 0.0273, "step": 101435 }, { "epoch": 4.73, - "learning_rate": 1.058271998499836e-05, - "loss": 0.0441, + "learning_rate": 2.05974131340396e-05, + "loss": 0.0127, "step": 101440 }, { "epoch": 4.73, - "learning_rate": 1.058225118372322e-05, - "loss": 0.0797, + "learning_rate": 2.059694506420358e-05, + "loss": 0.0375, "step": 101445 }, { "epoch": 4.73, - "learning_rate": 1.058178238244808e-05, - "loss": 0.0886, + "learning_rate": 2.0596476994367562e-05, + "loss": 0.0445, "step": 101450 }, { "epoch": 4.73, - "learning_rate": 1.0581313581172941e-05, - "loss": 0.0483, + "learning_rate": 2.059600892453154e-05, + "loss": 0.0404, "step": 101455 }, { "epoch": 4.73, - "learning_rate": 1.0580844779897803e-05, - "loss": 0.0716, + "learning_rate": 2.059554085469552e-05, + "loss": 0.0877, "step": 101460 }, { "epoch": 4.73, - "learning_rate": 1.0580375978622663e-05, - "loss": 0.0923, + "learning_rate": 2.05950727848595e-05, + "loss": 0.0606, "step": 101465 }, { "epoch": 4.73, - "learning_rate": 1.0579907177347524e-05, - "loss": 0.1421, + "learning_rate": 2.0594604715023484e-05, + "loss": 0.1142, "step": 101470 }, { "epoch": 4.73, - "learning_rate": 1.0579438376072384e-05, - "loss": 0.1857, + "learning_rate": 2.059413664518746e-05, + "loss": 0.2121, "step": 101475 }, { "epoch": 4.74, - "learning_rate": 1.0578969574797244e-05, - "loss": 0.0826, + "learning_rate": 2.059366857535144e-05, + "loss": 0.0558, "step": 101480 }, { "epoch": 4.74, - "learning_rate": 1.0578500773522104e-05, - "loss": 0.0218, + "learning_rate": 2.0593200505515424e-05, + "loss": 0.0563, "step": 101485 }, { "epoch": 4.74, - "learning_rate": 1.0578031972246966e-05, - "loss": 0.0298, + "learning_rate": 2.0592732435679404e-05, + "loss": 0.0502, "step": 101490 }, { "epoch": 4.74, - "learning_rate": 1.0577563170971826e-05, - "loss": 0.0628, + "learning_rate": 2.0592264365843383e-05, + "loss": 0.0142, "step": 101495 }, { "epoch": 4.74, - "learning_rate": 1.0577094369696686e-05, - "loss": 0.0613, + "learning_rate": 2.0591796296007363e-05, + "loss": 0.0572, "step": 101500 }, { "epoch": 4.74, - "learning_rate": 1.0576625568421547e-05, - "loss": 0.0306, + "learning_rate": 2.0591328226171347e-05, + "loss": 0.0408, "step": 101505 }, { "epoch": 4.74, - "learning_rate": 1.0576156767146409e-05, - "loss": 0.0818, + "learning_rate": 2.0590860156335326e-05, + "loss": 0.1245, "step": 101510 }, { "epoch": 4.74, - "learning_rate": 1.0575687965871269e-05, - "loss": 0.0592, + "learning_rate": 2.0590392086499306e-05, + "loss": 0.1332, "step": 101515 }, { "epoch": 4.74, - "learning_rate": 1.0575219164596129e-05, - "loss": 0.1266, + "learning_rate": 2.0589924016663286e-05, + "loss": 0.1631, "step": 101520 }, { "epoch": 4.74, - "learning_rate": 1.0574750363320989e-05, - "loss": 0.217, + "learning_rate": 2.058945594682727e-05, + "loss": 0.2125, "step": 101525 }, { "epoch": 4.74, - "learning_rate": 1.057428156204585e-05, - "loss": 0.0769, + "learning_rate": 2.058898787699125e-05, + "loss": 0.0757, "step": 101530 }, { "epoch": 4.74, - "learning_rate": 1.057381276077071e-05, - "loss": 0.0268, + "learning_rate": 2.058851980715523e-05, + "loss": 0.0286, "step": 101535 }, { "epoch": 4.74, - "learning_rate": 1.057334395949557e-05, - "loss": 0.0274, + "learning_rate": 2.058805173731921e-05, + "loss": 0.0424, "step": 101540 }, { "epoch": 4.74, - "learning_rate": 1.057287515822043e-05, - "loss": 0.0371, + "learning_rate": 2.058758366748319e-05, + "loss": 0.0949, "step": 101545 }, { "epoch": 4.74, - "learning_rate": 1.0572406356945293e-05, - "loss": 0.0786, + "learning_rate": 2.0587115597647168e-05, + "loss": 0.0446, "step": 101550 }, { "epoch": 4.74, - "learning_rate": 1.0571937555670153e-05, - "loss": 0.0879, + "learning_rate": 2.0586647527811148e-05, + "loss": 0.0546, "step": 101555 }, { "epoch": 4.74, - "learning_rate": 1.0571468754395013e-05, - "loss": 0.0993, + "learning_rate": 2.058617945797513e-05, + "loss": 0.0683, "step": 101560 }, { "epoch": 4.74, - "learning_rate": 1.0570999953119873e-05, - "loss": 0.1341, + "learning_rate": 2.058571138813911e-05, + "loss": 0.0424, "step": 101565 }, { "epoch": 4.74, - "learning_rate": 1.0570531151844735e-05, - "loss": 0.238, + "learning_rate": 2.058524331830309e-05, + "loss": 0.0894, "step": 101570 }, { "epoch": 4.74, - "learning_rate": 1.0570062350569595e-05, - "loss": 0.2976, + "learning_rate": 2.058477524846707e-05, + "loss": 0.2125, "step": 101575 }, { "epoch": 4.74, - "learning_rate": 1.0569593549294455e-05, - "loss": 0.0692, + "learning_rate": 2.0584307178631054e-05, + "loss": 0.0662, "step": 101580 }, { "epoch": 4.74, - "learning_rate": 1.0569124748019315e-05, - "loss": 0.03, + "learning_rate": 2.0583839108795034e-05, + "loss": 0.0079, "step": 101585 }, { "epoch": 4.74, - "learning_rate": 1.0568655946744175e-05, - "loss": 0.0146, + "learning_rate": 2.0583371038959014e-05, + "loss": 0.0444, "step": 101590 }, { "epoch": 4.74, - "learning_rate": 1.0568187145469036e-05, - "loss": 0.0406, + "learning_rate": 2.0582902969122997e-05, + "loss": 0.0348, "step": 101595 }, { "epoch": 4.74, - "learning_rate": 1.0567718344193898e-05, - "loss": 0.0327, + "learning_rate": 2.0582434899286973e-05, + "loss": 0.0445, "step": 101600 }, { "epoch": 4.74, - "learning_rate": 1.0567249542918758e-05, - "loss": 0.0249, + "learning_rate": 2.0581966829450953e-05, + "loss": 0.0168, "step": 101605 }, { "epoch": 4.74, - "learning_rate": 1.056678074164362e-05, - "loss": 0.0787, + "learning_rate": 2.0581498759614933e-05, + "loss": 0.0469, "step": 101610 }, { "epoch": 4.74, - "learning_rate": 1.056631194036848e-05, - "loss": 0.0633, + "learning_rate": 2.0581030689778916e-05, + "loss": 0.1256, "step": 101615 }, { "epoch": 4.74, - "learning_rate": 1.056584313909334e-05, - "loss": 0.0889, + "learning_rate": 2.0580562619942896e-05, + "loss": 0.0965, "step": 101620 }, { "epoch": 4.74, - "learning_rate": 1.05653743378182e-05, - "loss": 0.1909, + "learning_rate": 2.0580094550106876e-05, + "loss": 0.187, "step": 101625 }, { "epoch": 4.74, - "learning_rate": 1.0564905536543059e-05, - "loss": 0.0471, + "learning_rate": 2.0579626480270856e-05, + "loss": 0.0824, "step": 101630 }, { "epoch": 4.74, - "learning_rate": 1.056443673526792e-05, - "loss": 0.029, + "learning_rate": 2.057915841043484e-05, + "loss": 0.0439, "step": 101635 }, { "epoch": 4.74, - "learning_rate": 1.056396793399278e-05, - "loss": 0.0324, + "learning_rate": 2.057869034059882e-05, + "loss": 0.029, "step": 101640 }, { "epoch": 4.74, - "learning_rate": 1.0563499132717642e-05, - "loss": 0.1051, + "learning_rate": 2.05782222707628e-05, + "loss": 0.0688, "step": 101645 }, { "epoch": 4.74, - "learning_rate": 1.0563030331442504e-05, - "loss": 0.0491, + "learning_rate": 2.0577754200926778e-05, + "loss": 0.0528, "step": 101650 }, { "epoch": 4.74, - "learning_rate": 1.0562561530167364e-05, - "loss": 0.053, + "learning_rate": 2.057728613109076e-05, + "loss": 0.1139, "step": 101655 }, { "epoch": 4.74, - "learning_rate": 1.0562092728892224e-05, - "loss": 0.0921, + "learning_rate": 2.057681806125474e-05, + "loss": 0.1192, "step": 101660 }, { "epoch": 4.74, - "learning_rate": 1.0561623927617084e-05, - "loss": 0.1537, + "learning_rate": 2.0576349991418718e-05, + "loss": 0.0985, "step": 101665 }, { "epoch": 4.74, - "learning_rate": 1.0561155126341944e-05, - "loss": 0.1917, + "learning_rate": 2.05758819215827e-05, + "loss": 0.1532, "step": 101670 }, { "epoch": 4.74, - "learning_rate": 1.0560686325066805e-05, - "loss": 0.2712, + "learning_rate": 2.057541385174668e-05, + "loss": 0.3554, "step": 101675 }, { "epoch": 4.74, - "learning_rate": 1.0560217523791665e-05, - "loss": 0.0696, + "learning_rate": 2.057494578191066e-05, + "loss": 0.0422, "step": 101680 }, { "epoch": 4.74, - "learning_rate": 1.0559748722516525e-05, - "loss": 0.0133, + "learning_rate": 2.057447771207464e-05, + "loss": 0.0137, "step": 101685 }, { "epoch": 4.74, - "learning_rate": 1.0559279921241388e-05, - "loss": 0.0415, + "learning_rate": 2.0574009642238623e-05, + "loss": 0.053, "step": 101690 }, { "epoch": 4.75, - "learning_rate": 1.0558811119966248e-05, - "loss": 0.0612, + "learning_rate": 2.0573541572402603e-05, + "loss": 0.0103, "step": 101695 }, { "epoch": 4.75, - "learning_rate": 1.0558342318691108e-05, - "loss": 0.0546, + "learning_rate": 2.0573073502566583e-05, + "loss": 0.1281, "step": 101700 }, { "epoch": 4.75, - "learning_rate": 1.0557873517415968e-05, - "loss": 0.0456, + "learning_rate": 2.0572605432730563e-05, + "loss": 0.0288, "step": 101705 }, { "epoch": 4.75, - "learning_rate": 1.0557404716140828e-05, - "loss": 0.1161, + "learning_rate": 2.0572137362894546e-05, + "loss": 0.1263, "step": 101710 }, { "epoch": 4.75, - "learning_rate": 1.055693591486569e-05, - "loss": 0.0825, + "learning_rate": 2.0571669293058526e-05, + "loss": 0.1169, "step": 101715 }, { "epoch": 4.75, - "learning_rate": 1.055646711359055e-05, - "loss": 0.1404, + "learning_rate": 2.0571201223222506e-05, + "loss": 0.1622, "step": 101720 }, { "epoch": 4.75, - "learning_rate": 1.055599831231541e-05, - "loss": 0.3184, + "learning_rate": 2.057073315338649e-05, + "loss": 0.1466, "step": 101725 }, { "epoch": 4.75, - "learning_rate": 1.055552951104027e-05, - "loss": 0.0752, + "learning_rate": 2.0570265083550465e-05, + "loss": 0.1006, "step": 101730 }, { "epoch": 4.75, - "learning_rate": 1.0555060709765133e-05, - "loss": 0.0162, + "learning_rate": 2.0569797013714445e-05, + "loss": 0.002, "step": 101735 }, { "epoch": 4.75, - "learning_rate": 1.0554591908489993e-05, - "loss": 0.0307, + "learning_rate": 2.0569328943878425e-05, + "loss": 0.0495, "step": 101740 }, { "epoch": 4.75, - "learning_rate": 1.0554123107214853e-05, - "loss": 0.0248, + "learning_rate": 2.0568860874042408e-05, + "loss": 0.0417, "step": 101745 }, { "epoch": 4.75, - "learning_rate": 1.0553654305939713e-05, - "loss": 0.0339, + "learning_rate": 2.0568392804206388e-05, + "loss": 0.032, "step": 101750 }, { "epoch": 4.75, - "learning_rate": 1.0553185504664574e-05, - "loss": 0.0374, + "learning_rate": 2.0567924734370368e-05, + "loss": 0.0481, "step": 101755 }, { "epoch": 4.75, - "learning_rate": 1.0552716703389434e-05, - "loss": 0.0871, + "learning_rate": 2.0567456664534348e-05, + "loss": 0.0698, "step": 101760 }, { "epoch": 4.75, - "learning_rate": 1.0552247902114294e-05, - "loss": 0.0959, + "learning_rate": 2.056698859469833e-05, + "loss": 0.1335, "step": 101765 }, { "epoch": 4.75, - "learning_rate": 1.0551779100839154e-05, - "loss": 0.1684, + "learning_rate": 2.056652052486231e-05, + "loss": 0.0675, "step": 101770 }, { "epoch": 4.75, - "learning_rate": 1.0551310299564014e-05, - "loss": 0.2322, + "learning_rate": 2.056605245502629e-05, + "loss": 0.3164, "step": 101775 }, { "epoch": 4.75, - "learning_rate": 1.0550841498288876e-05, - "loss": 0.08, + "learning_rate": 2.0565584385190274e-05, + "loss": 0.077, "step": 101780 }, { "epoch": 4.75, - "learning_rate": 1.0550372697013737e-05, - "loss": 0.0207, + "learning_rate": 2.0565116315354254e-05, + "loss": 0.0053, "step": 101785 }, { "epoch": 4.75, - "learning_rate": 1.0549903895738597e-05, - "loss": 0.0214, + "learning_rate": 2.056464824551823e-05, + "loss": 0.0268, "step": 101790 }, { "epoch": 4.75, - "learning_rate": 1.0549435094463459e-05, - "loss": 0.0576, + "learning_rate": 2.056418017568221e-05, + "loss": 0.0203, "step": 101795 }, { "epoch": 4.75, - "learning_rate": 1.0548966293188319e-05, - "loss": 0.0363, + "learning_rate": 2.0563712105846193e-05, + "loss": 0.0391, "step": 101800 }, { "epoch": 4.75, - "learning_rate": 1.0548497491913179e-05, - "loss": 0.0223, + "learning_rate": 2.0563244036010173e-05, + "loss": 0.059, "step": 101805 }, { "epoch": 4.75, - "learning_rate": 1.0548028690638039e-05, - "loss": 0.073, + "learning_rate": 2.0562775966174153e-05, + "loss": 0.0609, "step": 101810 }, { "epoch": 4.75, - "learning_rate": 1.0547559889362899e-05, - "loss": 0.0442, + "learning_rate": 2.0562307896338132e-05, + "loss": 0.1309, "step": 101815 }, { "epoch": 4.75, - "learning_rate": 1.054709108808776e-05, - "loss": 0.1845, + "learning_rate": 2.0561839826502116e-05, + "loss": 0.1129, "step": 101820 }, { "epoch": 4.75, - "learning_rate": 1.054662228681262e-05, - "loss": 0.2365, + "learning_rate": 2.0561371756666096e-05, + "loss": 0.2197, "step": 101825 }, { "epoch": 4.75, - "learning_rate": 1.0546153485537482e-05, - "loss": 0.1072, + "learning_rate": 2.0560903686830075e-05, + "loss": 0.045, "step": 101830 }, { "epoch": 4.75, - "learning_rate": 1.0545684684262343e-05, - "loss": 0.0228, + "learning_rate": 2.0560435616994055e-05, + "loss": 0.0527, "step": 101835 }, { "epoch": 4.75, - "learning_rate": 1.0545215882987203e-05, - "loss": 0.0466, + "learning_rate": 2.055996754715804e-05, + "loss": 0.0196, "step": 101840 }, { "epoch": 4.75, - "learning_rate": 1.0544747081712063e-05, - "loss": 0.0442, + "learning_rate": 2.0559499477322018e-05, + "loss": 0.0335, "step": 101845 }, { "epoch": 4.75, - "learning_rate": 1.0544278280436923e-05, - "loss": 0.0952, + "learning_rate": 2.0559031407485998e-05, + "loss": 0.0305, "step": 101850 }, { "epoch": 4.75, - "learning_rate": 1.0543809479161783e-05, - "loss": 0.0889, + "learning_rate": 2.0558563337649978e-05, + "loss": 0.1126, "step": 101855 }, { "epoch": 4.75, - "learning_rate": 1.0543340677886645e-05, - "loss": 0.0531, + "learning_rate": 2.0558095267813958e-05, + "loss": 0.1174, "step": 101860 }, { "epoch": 4.75, - "learning_rate": 1.0542871876611505e-05, - "loss": 0.055, + "learning_rate": 2.0557627197977937e-05, + "loss": 0.0822, "step": 101865 }, { "epoch": 4.75, - "learning_rate": 1.0542403075336365e-05, - "loss": 0.1037, + "learning_rate": 2.0557159128141917e-05, + "loss": 0.1422, "step": 101870 }, { "epoch": 4.75, - "learning_rate": 1.0541934274061228e-05, - "loss": 0.2653, + "learning_rate": 2.05566910583059e-05, + "loss": 0.1781, "step": 101875 }, { "epoch": 4.75, - "learning_rate": 1.0541465472786088e-05, - "loss": 0.0719, + "learning_rate": 2.055622298846988e-05, + "loss": 0.0784, "step": 101880 }, { "epoch": 4.75, - "learning_rate": 1.0540996671510948e-05, - "loss": 0.0188, + "learning_rate": 2.055575491863386e-05, + "loss": 0.0135, "step": 101885 }, { "epoch": 4.75, - "learning_rate": 1.0540527870235808e-05, - "loss": 0.0263, + "learning_rate": 2.055528684879784e-05, + "loss": 0.0242, "step": 101890 }, { "epoch": 4.75, - "learning_rate": 1.0540059068960668e-05, - "loss": 0.0209, + "learning_rate": 2.0554818778961823e-05, + "loss": 0.0522, "step": 101895 }, { "epoch": 4.75, - "learning_rate": 1.053959026768553e-05, - "loss": 0.0561, + "learning_rate": 2.0554350709125803e-05, + "loss": 0.0414, "step": 101900 }, { "epoch": 4.76, - "learning_rate": 1.053912146641039e-05, - "loss": 0.0401, + "learning_rate": 2.0553882639289783e-05, + "loss": 0.0678, "step": 101905 }, { "epoch": 4.76, - "learning_rate": 1.053865266513525e-05, - "loss": 0.0429, + "learning_rate": 2.0553414569453766e-05, + "loss": 0.0294, "step": 101910 }, { "epoch": 4.76, - "learning_rate": 1.053818386386011e-05, - "loss": 0.187, + "learning_rate": 2.0552946499617746e-05, + "loss": 0.1228, "step": 101915 }, { "epoch": 4.76, - "learning_rate": 1.053771506258497e-05, - "loss": 0.1474, + "learning_rate": 2.0552478429781722e-05, + "loss": 0.1941, "step": 101920 }, { "epoch": 4.76, - "learning_rate": 1.0537246261309832e-05, - "loss": 0.2741, + "learning_rate": 2.0552010359945702e-05, + "loss": 0.375, "step": 101925 }, { "epoch": 4.76, - "learning_rate": 1.0536777460034692e-05, - "loss": 0.0729, + "learning_rate": 2.0551542290109685e-05, + "loss": 0.0801, "step": 101930 }, { "epoch": 4.76, - "learning_rate": 1.0536308658759552e-05, - "loss": 0.0145, + "learning_rate": 2.0551074220273665e-05, + "loss": 0.0148, "step": 101935 }, { "epoch": 4.76, - "learning_rate": 1.0535839857484414e-05, - "loss": 0.0065, + "learning_rate": 2.0550606150437645e-05, + "loss": 0.0237, "step": 101940 }, { "epoch": 4.76, - "learning_rate": 1.0535371056209274e-05, - "loss": 0.0341, + "learning_rate": 2.0550138080601625e-05, + "loss": 0.063, "step": 101945 }, { "epoch": 4.76, - "learning_rate": 1.0534902254934134e-05, - "loss": 0.0818, + "learning_rate": 2.0549670010765608e-05, + "loss": 0.0274, "step": 101950 }, { "epoch": 4.76, - "learning_rate": 1.0534433453658994e-05, - "loss": 0.0284, + "learning_rate": 2.0549201940929588e-05, + "loss": 0.0507, "step": 101955 }, { "epoch": 4.76, - "learning_rate": 1.0533964652383855e-05, - "loss": 0.1548, + "learning_rate": 2.0548733871093568e-05, + "loss": 0.0321, "step": 101960 }, { "epoch": 4.76, - "learning_rate": 1.0533495851108715e-05, - "loss": 0.1037, + "learning_rate": 2.054826580125755e-05, + "loss": 0.0561, "step": 101965 }, { "epoch": 4.76, - "learning_rate": 1.0533027049833577e-05, - "loss": 0.1463, + "learning_rate": 2.054779773142153e-05, + "loss": 0.0823, "step": 101970 }, { "epoch": 4.76, - "learning_rate": 1.0532558248558437e-05, - "loss": 0.2978, + "learning_rate": 2.054732966158551e-05, + "loss": 0.21, "step": 101975 }, { "epoch": 4.76, - "learning_rate": 1.0532089447283299e-05, - "loss": 0.0927, + "learning_rate": 2.0546861591749487e-05, + "loss": 0.0584, "step": 101980 }, { "epoch": 4.76, - "learning_rate": 1.0531620646008158e-05, - "loss": 0.0104, + "learning_rate": 2.054639352191347e-05, + "loss": 0.0226, "step": 101985 }, { "epoch": 4.76, - "learning_rate": 1.0531151844733018e-05, - "loss": 0.0138, + "learning_rate": 2.054592545207745e-05, + "loss": 0.0185, "step": 101990 }, { "epoch": 4.76, - "learning_rate": 1.0530683043457878e-05, - "loss": 0.0604, + "learning_rate": 2.054545738224143e-05, + "loss": 0.0204, "step": 101995 }, { "epoch": 4.76, - "learning_rate": 1.053021424218274e-05, - "loss": 0.0422, + "learning_rate": 2.054498931240541e-05, + "loss": 0.0488, "step": 102000 }, { "epoch": 4.76, - "learning_rate": 1.05297454409076e-05, - "loss": 0.0416, + "learning_rate": 2.0544521242569393e-05, + "loss": 0.0462, "step": 102005 }, { "epoch": 4.76, - "learning_rate": 1.052927663963246e-05, - "loss": 0.0786, + "learning_rate": 2.0544053172733372e-05, + "loss": 0.0917, "step": 102010 }, { "epoch": 4.76, - "learning_rate": 1.0528807838357321e-05, - "loss": 0.0921, + "learning_rate": 2.0543585102897352e-05, + "loss": 0.1532, "step": 102015 }, { "epoch": 4.76, - "learning_rate": 1.0528339037082183e-05, - "loss": 0.118, + "learning_rate": 2.0543117033061335e-05, + "loss": 0.2124, "step": 102020 }, { "epoch": 4.76, - "learning_rate": 1.0527870235807043e-05, - "loss": 0.2083, + "learning_rate": 2.0542648963225315e-05, + "loss": 0.0644, "step": 102025 }, { "epoch": 4.76, - "learning_rate": 1.0527401434531903e-05, - "loss": 0.0936, + "learning_rate": 2.0542180893389295e-05, + "loss": 0.0573, "step": 102030 }, { "epoch": 4.76, - "learning_rate": 1.0526932633256763e-05, - "loss": 0.0237, + "learning_rate": 2.0541712823553275e-05, + "loss": 0.0066, "step": 102035 }, { "epoch": 4.76, - "learning_rate": 1.0526463831981624e-05, - "loss": 0.0362, + "learning_rate": 2.0541244753717258e-05, + "loss": 0.0171, "step": 102040 }, { "epoch": 4.76, - "learning_rate": 1.0525995030706484e-05, - "loss": 0.027, + "learning_rate": 2.0540776683881235e-05, + "loss": 0.1262, "step": 102045 }, { "epoch": 4.76, - "learning_rate": 1.0525526229431344e-05, - "loss": 0.1199, + "learning_rate": 2.0540308614045214e-05, + "loss": 0.0839, "step": 102050 }, { "epoch": 4.76, - "learning_rate": 1.0525057428156204e-05, - "loss": 0.0709, + "learning_rate": 2.0539840544209194e-05, + "loss": 0.0868, "step": 102055 }, { "epoch": 4.76, - "learning_rate": 1.0524588626881068e-05, - "loss": 0.1292, + "learning_rate": 2.0539372474373177e-05, + "loss": 0.0812, "step": 102060 }, { "epoch": 4.76, - "learning_rate": 1.0524119825605928e-05, - "loss": 0.1528, + "learning_rate": 2.0538904404537157e-05, + "loss": 0.1317, "step": 102065 }, { "epoch": 4.76, - "learning_rate": 1.0523651024330787e-05, - "loss": 0.183, + "learning_rate": 2.0538436334701137e-05, + "loss": 0.0822, "step": 102070 }, { "epoch": 4.76, - "learning_rate": 1.0523182223055647e-05, - "loss": 0.3114, + "learning_rate": 2.0537968264865117e-05, + "loss": 0.2261, "step": 102075 }, { "epoch": 4.76, - "learning_rate": 1.0522713421780509e-05, - "loss": 0.0515, + "learning_rate": 2.05375001950291e-05, + "loss": 0.056, "step": 102080 }, { "epoch": 4.76, - "learning_rate": 1.0522244620505369e-05, - "loss": 0.0123, + "learning_rate": 2.053703212519308e-05, + "loss": 0.0054, "step": 102085 }, { "epoch": 4.76, - "learning_rate": 1.0521775819230229e-05, - "loss": 0.0325, + "learning_rate": 2.053656405535706e-05, + "loss": 0.0368, "step": 102090 }, { "epoch": 4.76, - "learning_rate": 1.0521307017955089e-05, - "loss": 0.0539, + "learning_rate": 2.0536095985521043e-05, + "loss": 0.0375, "step": 102095 }, { "epoch": 4.76, - "learning_rate": 1.0520838216679949e-05, - "loss": 0.0782, + "learning_rate": 2.0535627915685023e-05, + "loss": 0.0872, "step": 102100 }, { "epoch": 4.76, - "learning_rate": 1.052036941540481e-05, - "loss": 0.0703, + "learning_rate": 2.0535159845849e-05, + "loss": 0.0879, "step": 102105 }, { "epoch": 4.76, - "learning_rate": 1.0519900614129672e-05, - "loss": 0.1676, + "learning_rate": 2.053469177601298e-05, + "loss": 0.0939, "step": 102110 }, { "epoch": 4.76, - "learning_rate": 1.0519431812854532e-05, - "loss": 0.0893, + "learning_rate": 2.0534223706176962e-05, + "loss": 0.0875, "step": 102115 }, { "epoch": 4.77, - "learning_rate": 1.0518963011579394e-05, - "loss": 0.1772, + "learning_rate": 2.0533755636340942e-05, + "loss": 0.2175, "step": 102120 }, { "epoch": 4.77, - "learning_rate": 1.0518494210304254e-05, - "loss": 0.3778, + "learning_rate": 2.0533287566504922e-05, + "loss": 0.1574, "step": 102125 }, { "epoch": 4.77, - "learning_rate": 1.0518025409029113e-05, - "loss": 0.1143, + "learning_rate": 2.05328194966689e-05, + "loss": 0.0538, "step": 102130 }, { "epoch": 4.77, - "learning_rate": 1.0517556607753973e-05, - "loss": 0.0178, + "learning_rate": 2.0532351426832885e-05, + "loss": 0.0271, "step": 102135 }, { "epoch": 4.77, - "learning_rate": 1.0517087806478833e-05, - "loss": 0.0604, + "learning_rate": 2.0531883356996865e-05, + "loss": 0.0155, "step": 102140 }, { "epoch": 4.77, - "learning_rate": 1.0516619005203695e-05, - "loss": 0.0426, + "learning_rate": 2.0531415287160844e-05, + "loss": 0.0393, "step": 102145 }, { "epoch": 4.77, - "learning_rate": 1.0516150203928555e-05, - "loss": 0.056, + "learning_rate": 2.0530947217324828e-05, + "loss": 0.0658, "step": 102150 }, { "epoch": 4.77, - "learning_rate": 1.0515681402653417e-05, - "loss": 0.0306, + "learning_rate": 2.0530479147488808e-05, + "loss": 0.0564, "step": 102155 }, { "epoch": 4.77, - "learning_rate": 1.0515212601378278e-05, - "loss": 0.1248, + "learning_rate": 2.0530011077652787e-05, + "loss": 0.0316, "step": 102160 }, { "epoch": 4.77, - "learning_rate": 1.0514743800103138e-05, - "loss": 0.1266, + "learning_rate": 2.0529543007816767e-05, + "loss": 0.1015, "step": 102165 }, { "epoch": 4.77, - "learning_rate": 1.0514274998827998e-05, - "loss": 0.0695, + "learning_rate": 2.0529074937980747e-05, + "loss": 0.1757, "step": 102170 }, { "epoch": 4.77, - "learning_rate": 1.0513806197552858e-05, - "loss": 0.2844, + "learning_rate": 2.0528606868144727e-05, + "loss": 0.2948, "step": 102175 }, { "epoch": 4.77, - "learning_rate": 1.0513337396277718e-05, - "loss": 0.1131, + "learning_rate": 2.0528138798308707e-05, + "loss": 0.078, "step": 102180 }, { "epoch": 4.77, - "learning_rate": 1.051286859500258e-05, - "loss": 0.0065, + "learning_rate": 2.0527670728472686e-05, + "loss": 0.0341, "step": 102185 }, { "epoch": 4.77, - "learning_rate": 1.051239979372744e-05, - "loss": 0.073, + "learning_rate": 2.052720265863667e-05, + "loss": 0.0254, "step": 102190 }, { "epoch": 4.77, - "learning_rate": 1.05119309924523e-05, - "loss": 0.0746, + "learning_rate": 2.052673458880065e-05, + "loss": 0.0389, "step": 102195 }, { "epoch": 4.77, - "learning_rate": 1.0511462191177163e-05, - "loss": 0.0159, + "learning_rate": 2.052626651896463e-05, + "loss": 0.0778, "step": 102200 }, { "epoch": 4.77, - "learning_rate": 1.0510993389902023e-05, - "loss": 0.1154, + "learning_rate": 2.0525798449128612e-05, + "loss": 0.04, "step": 102205 }, { "epoch": 4.77, - "learning_rate": 1.0510524588626883e-05, - "loss": 0.0396, + "learning_rate": 2.0525330379292592e-05, + "loss": 0.0621, "step": 102210 }, { "epoch": 4.77, - "learning_rate": 1.0510055787351742e-05, - "loss": 0.1486, + "learning_rate": 2.0524862309456572e-05, + "loss": 0.0488, "step": 102215 }, { "epoch": 4.77, - "learning_rate": 1.0509586986076602e-05, - "loss": 0.1446, + "learning_rate": 2.0524394239620552e-05, + "loss": 0.0848, "step": 102220 }, { "epoch": 4.77, - "learning_rate": 1.0509118184801464e-05, - "loss": 0.2505, + "learning_rate": 2.0523926169784535e-05, + "loss": 0.3061, "step": 102225 }, { "epoch": 4.77, - "learning_rate": 1.0508649383526324e-05, - "loss": 0.0507, + "learning_rate": 2.0523458099948515e-05, + "loss": 0.0875, "step": 102230 }, { "epoch": 4.77, - "learning_rate": 1.0508180582251184e-05, - "loss": 0.0063, + "learning_rate": 2.052299003011249e-05, + "loss": 0.0274, "step": 102235 }, { "epoch": 4.77, - "learning_rate": 1.0507711780976044e-05, - "loss": 0.0407, + "learning_rate": 2.052252196027647e-05, + "loss": 0.0161, "step": 102240 }, { "epoch": 4.77, - "learning_rate": 1.0507242979700904e-05, - "loss": 0.0539, + "learning_rate": 2.0522053890440454e-05, + "loss": 0.0116, "step": 102245 }, { "epoch": 4.77, - "learning_rate": 1.0506774178425767e-05, - "loss": 0.0282, + "learning_rate": 2.0521585820604434e-05, + "loss": 0.0142, "step": 102250 }, { "epoch": 4.77, - "learning_rate": 1.0506305377150627e-05, - "loss": 0.1024, + "learning_rate": 2.0521117750768414e-05, + "loss": 0.0214, "step": 102255 }, { "epoch": 4.77, - "learning_rate": 1.0505836575875487e-05, - "loss": 0.0387, + "learning_rate": 2.0520649680932394e-05, + "loss": 0.0685, "step": 102260 }, { "epoch": 4.77, - "learning_rate": 1.0505367774600349e-05, - "loss": 0.0949, + "learning_rate": 2.0520181611096377e-05, + "loss": 0.0854, "step": 102265 }, { "epoch": 4.77, - "learning_rate": 1.0504898973325209e-05, - "loss": 0.1512, + "learning_rate": 2.0519713541260357e-05, + "loss": 0.0866, "step": 102270 }, { "epoch": 4.77, - "learning_rate": 1.0504430172050068e-05, - "loss": 0.218, + "learning_rate": 2.0519245471424337e-05, + "loss": 0.2355, "step": 102275 }, { "epoch": 4.77, - "learning_rate": 1.0503961370774928e-05, - "loss": 0.061, + "learning_rate": 2.051877740158832e-05, + "loss": 0.0897, "step": 102280 }, { "epoch": 4.77, - "learning_rate": 1.0503492569499788e-05, - "loss": 0.0243, + "learning_rate": 2.05183093317523e-05, + "loss": 0.0301, "step": 102285 }, { "epoch": 4.77, - "learning_rate": 1.050302376822465e-05, - "loss": 0.0249, + "learning_rate": 2.051784126191628e-05, + "loss": 0.0616, "step": 102290 }, { "epoch": 4.77, - "learning_rate": 1.0502554966949512e-05, - "loss": 0.1478, + "learning_rate": 2.0517373192080256e-05, + "loss": 0.0121, "step": 102295 }, { "epoch": 4.77, - "learning_rate": 1.0502086165674372e-05, - "loss": 0.0684, + "learning_rate": 2.051690512224424e-05, + "loss": 0.0349, "step": 102300 }, { "epoch": 4.77, - "learning_rate": 1.0501617364399233e-05, - "loss": 0.0813, + "learning_rate": 2.051643705240822e-05, + "loss": 0.0529, "step": 102305 }, { "epoch": 4.77, - "learning_rate": 1.0501148563124093e-05, - "loss": 0.2099, + "learning_rate": 2.05159689825722e-05, + "loss": 0.0462, "step": 102310 }, { "epoch": 4.77, - "learning_rate": 1.0500679761848953e-05, - "loss": 0.08, + "learning_rate": 2.051550091273618e-05, + "loss": 0.059, "step": 102315 }, { "epoch": 4.77, - "learning_rate": 1.0500210960573813e-05, - "loss": 0.1541, + "learning_rate": 2.0515032842900162e-05, + "loss": 0.1302, "step": 102320 }, { "epoch": 4.77, - "learning_rate": 1.0499742159298673e-05, - "loss": 0.177, + "learning_rate": 2.051456477306414e-05, + "loss": 0.3231, "step": 102325 }, { "epoch": 4.77, - "learning_rate": 1.0499273358023535e-05, - "loss": 0.0886, + "learning_rate": 2.051409670322812e-05, + "loss": 0.0751, "step": 102330 }, { "epoch": 4.78, - "learning_rate": 1.0498804556748394e-05, - "loss": 0.0221, + "learning_rate": 2.0513628633392105e-05, + "loss": 0.0312, "step": 102335 }, { "epoch": 4.78, - "learning_rate": 1.0498335755473256e-05, - "loss": 0.029, + "learning_rate": 2.0513160563556084e-05, + "loss": 0.0305, "step": 102340 }, { "epoch": 4.78, - "learning_rate": 1.0497866954198118e-05, - "loss": 0.0281, + "learning_rate": 2.0512692493720064e-05, + "loss": 0.1366, "step": 102345 }, { "epoch": 4.78, - "learning_rate": 1.0497398152922978e-05, - "loss": 0.2023, + "learning_rate": 2.0512224423884044e-05, + "loss": 0.0327, "step": 102350 }, { "epoch": 4.78, - "learning_rate": 1.0496929351647838e-05, - "loss": 0.1092, + "learning_rate": 2.0511756354048027e-05, + "loss": 0.0638, "step": 102355 }, { "epoch": 4.78, - "learning_rate": 1.0496460550372698e-05, - "loss": 0.0955, + "learning_rate": 2.0511288284212004e-05, + "loss": 0.1034, "step": 102360 }, { "epoch": 4.78, - "learning_rate": 1.0495991749097557e-05, - "loss": 0.0616, + "learning_rate": 2.0510820214375984e-05, + "loss": 0.0579, "step": 102365 }, { "epoch": 4.78, - "learning_rate": 1.0495522947822419e-05, - "loss": 0.1498, + "learning_rate": 2.0510352144539963e-05, + "loss": 0.161, "step": 102370 }, { "epoch": 4.78, - "learning_rate": 1.0495054146547279e-05, - "loss": 0.3811, + "learning_rate": 2.0509884074703947e-05, + "loss": 0.2302, "step": 102375 }, { "epoch": 4.78, - "learning_rate": 1.0494585345272139e-05, - "loss": 0.0465, + "learning_rate": 2.0509416004867926e-05, + "loss": 0.1005, "step": 102380 }, { "epoch": 4.78, - "learning_rate": 1.0494116543997002e-05, - "loss": 0.0102, + "learning_rate": 2.0508947935031906e-05, + "loss": 0.0234, "step": 102385 }, { "epoch": 4.78, - "learning_rate": 1.0493647742721862e-05, - "loss": 0.0027, + "learning_rate": 2.050847986519589e-05, + "loss": 0.0474, "step": 102390 }, { "epoch": 4.78, - "learning_rate": 1.0493178941446722e-05, - "loss": 0.0889, + "learning_rate": 2.050801179535987e-05, + "loss": 0.0504, "step": 102395 }, { "epoch": 4.78, - "learning_rate": 1.0492710140171582e-05, - "loss": 0.039, + "learning_rate": 2.050754372552385e-05, + "loss": 0.1049, "step": 102400 }, { "epoch": 4.78, - "learning_rate": 1.0492241338896442e-05, - "loss": 0.0563, + "learning_rate": 2.050707565568783e-05, + "loss": 0.0943, "step": 102405 }, { "epoch": 4.78, - "learning_rate": 1.0491772537621304e-05, - "loss": 0.103, + "learning_rate": 2.0506607585851812e-05, + "loss": 0.0941, "step": 102410 }, { "epoch": 4.78, - "learning_rate": 1.0491303736346164e-05, - "loss": 0.0921, + "learning_rate": 2.0506139516015792e-05, + "loss": 0.0995, "step": 102415 }, { "epoch": 4.78, - "learning_rate": 1.0490834935071023e-05, - "loss": 0.2047, + "learning_rate": 2.0505671446179772e-05, + "loss": 0.1714, "step": 102420 }, { "epoch": 4.78, - "learning_rate": 1.0490366133795883e-05, - "loss": 0.2644, + "learning_rate": 2.0505203376343748e-05, + "loss": 0.1521, "step": 102425 }, { "epoch": 4.78, - "learning_rate": 1.0489897332520745e-05, - "loss": 0.0547, + "learning_rate": 2.050473530650773e-05, + "loss": 0.0656, "step": 102430 }, { "epoch": 4.78, - "learning_rate": 1.0489428531245607e-05, - "loss": 0.0294, + "learning_rate": 2.050426723667171e-05, + "loss": 0.0763, "step": 102435 }, { "epoch": 4.78, - "learning_rate": 1.0488959729970467e-05, - "loss": 0.0247, + "learning_rate": 2.050379916683569e-05, + "loss": 0.0163, "step": 102440 }, { "epoch": 4.78, - "learning_rate": 1.0488490928695327e-05, - "loss": 0.0283, + "learning_rate": 2.050333109699967e-05, + "loss": 0.0801, "step": 102445 }, { "epoch": 4.78, - "learning_rate": 1.0488022127420188e-05, - "loss": 0.102, + "learning_rate": 2.0502863027163654e-05, + "loss": 0.0304, "step": 102450 }, { "epoch": 4.78, - "learning_rate": 1.0487553326145048e-05, - "loss": 0.06, + "learning_rate": 2.0502394957327634e-05, + "loss": 0.0779, "step": 102455 }, { "epoch": 4.78, - "learning_rate": 1.0487084524869908e-05, - "loss": 0.0462, + "learning_rate": 2.0501926887491614e-05, + "loss": 0.0959, "step": 102460 }, { "epoch": 4.78, - "learning_rate": 1.0486615723594768e-05, - "loss": 0.0689, + "learning_rate": 2.0501458817655597e-05, + "loss": 0.1015, "step": 102465 }, { "epoch": 4.78, - "learning_rate": 1.048614692231963e-05, - "loss": 0.1073, + "learning_rate": 2.0500990747819577e-05, + "loss": 0.2187, "step": 102470 }, { "epoch": 4.78, - "learning_rate": 1.048567812104449e-05, - "loss": 0.13, + "learning_rate": 2.0500522677983557e-05, + "loss": 0.3233, "step": 102475 }, { "epoch": 4.78, - "learning_rate": 1.0485209319769351e-05, - "loss": 0.063, + "learning_rate": 2.0500054608147536e-05, + "loss": 0.0862, "step": 102480 }, { "epoch": 4.78, - "learning_rate": 1.0484740518494211e-05, - "loss": 0.0185, + "learning_rate": 2.0499586538311516e-05, + "loss": 0.0177, "step": 102485 }, { "epoch": 4.78, - "learning_rate": 1.0484271717219073e-05, - "loss": 0.0306, + "learning_rate": 2.0499118468475496e-05, + "loss": 0.0116, "step": 102490 }, { "epoch": 4.78, - "learning_rate": 1.0483802915943933e-05, - "loss": 0.0213, + "learning_rate": 2.0498650398639476e-05, + "loss": 0.0549, "step": 102495 }, { "epoch": 4.78, - "learning_rate": 1.0483334114668793e-05, - "loss": 0.0409, + "learning_rate": 2.0498182328803456e-05, + "loss": 0.1206, "step": 102500 }, { "epoch": 4.78, - "learning_rate": 1.0482865313393653e-05, - "loss": 0.0853, + "learning_rate": 2.049771425896744e-05, + "loss": 0.0763, "step": 102505 }, { "epoch": 4.78, - "learning_rate": 1.0482396512118514e-05, - "loss": 0.1192, + "learning_rate": 2.049724618913142e-05, + "loss": 0.0637, "step": 102510 }, { "epoch": 4.78, - "learning_rate": 1.0481927710843374e-05, - "loss": 0.1563, + "learning_rate": 2.04967781192954e-05, + "loss": 0.1711, "step": 102515 }, { "epoch": 4.78, - "learning_rate": 1.0481458909568234e-05, - "loss": 0.1211, + "learning_rate": 2.049631004945938e-05, + "loss": 0.2412, "step": 102520 }, { "epoch": 4.78, - "learning_rate": 1.0480990108293096e-05, - "loss": 0.2028, + "learning_rate": 2.049584197962336e-05, + "loss": 0.3057, "step": 102525 }, { "epoch": 4.78, - "learning_rate": 1.0480521307017957e-05, - "loss": 0.056, + "learning_rate": 2.049537390978734e-05, + "loss": 0.1071, "step": 102530 }, { "epoch": 4.78, - "learning_rate": 1.0480052505742817e-05, - "loss": 0.0188, + "learning_rate": 2.049490583995132e-05, + "loss": 0.0248, "step": 102535 }, { "epoch": 4.78, - "learning_rate": 1.0479583704467677e-05, - "loss": 0.0183, + "learning_rate": 2.0494437770115304e-05, + "loss": 0.0312, "step": 102540 }, { "epoch": 4.78, - "learning_rate": 1.0479114903192537e-05, - "loss": 0.0444, + "learning_rate": 2.0493969700279284e-05, + "loss": 0.0237, "step": 102545 }, { "epoch": 4.79, - "learning_rate": 1.0478646101917399e-05, - "loss": 0.0253, + "learning_rate": 2.049350163044326e-05, + "loss": 0.0316, "step": 102550 }, { "epoch": 4.79, - "learning_rate": 1.0478177300642259e-05, - "loss": 0.0455, + "learning_rate": 2.049303356060724e-05, + "loss": 0.0707, "step": 102555 }, { "epoch": 4.79, - "learning_rate": 1.0477708499367119e-05, - "loss": 0.0746, + "learning_rate": 2.0492565490771224e-05, + "loss": 0.0872, "step": 102560 }, { "epoch": 4.79, - "learning_rate": 1.0477239698091979e-05, - "loss": 0.0873, + "learning_rate": 2.0492097420935203e-05, + "loss": 0.1154, "step": 102565 }, { "epoch": 4.79, - "learning_rate": 1.0476770896816838e-05, - "loss": 0.1166, + "learning_rate": 2.0491629351099183e-05, + "loss": 0.1406, "step": 102570 }, { "epoch": 4.79, - "learning_rate": 1.0476302095541702e-05, - "loss": 0.3312, + "learning_rate": 2.0491161281263166e-05, + "loss": 0.3299, "step": 102575 }, { "epoch": 4.79, - "learning_rate": 1.0475833294266562e-05, - "loss": 0.0987, + "learning_rate": 2.0490693211427146e-05, + "loss": 0.0837, "step": 102580 }, { "epoch": 4.79, - "learning_rate": 1.0475364492991422e-05, - "loss": 0.0258, + "learning_rate": 2.0490225141591126e-05, + "loss": 0.0124, "step": 102585 }, { "epoch": 4.79, - "learning_rate": 1.0474895691716283e-05, - "loss": 0.055, + "learning_rate": 2.0489757071755106e-05, + "loss": 0.0382, "step": 102590 }, { "epoch": 4.79, - "learning_rate": 1.0474426890441143e-05, - "loss": 0.0206, + "learning_rate": 2.048928900191909e-05, + "loss": 0.0673, "step": 102595 }, { "epoch": 4.79, - "learning_rate": 1.0473958089166003e-05, - "loss": 0.0802, + "learning_rate": 2.048882093208307e-05, + "loss": 0.1054, "step": 102600 }, { "epoch": 4.79, - "learning_rate": 1.0473489287890863e-05, - "loss": 0.0479, + "learning_rate": 2.048835286224705e-05, + "loss": 0.0408, "step": 102605 }, { "epoch": 4.79, - "learning_rate": 1.0473020486615723e-05, - "loss": 0.204, + "learning_rate": 2.048788479241103e-05, + "loss": 0.0727, "step": 102610 }, { "epoch": 4.79, - "learning_rate": 1.0472551685340585e-05, - "loss": 0.2111, + "learning_rate": 2.048741672257501e-05, + "loss": 0.135, "step": 102615 }, { "epoch": 4.79, - "learning_rate": 1.0472082884065446e-05, - "loss": 0.1316, + "learning_rate": 2.0486948652738988e-05, + "loss": 0.0976, "step": 102620 }, { "epoch": 4.79, - "learning_rate": 1.0471614082790306e-05, - "loss": 0.2781, + "learning_rate": 2.0486480582902968e-05, + "loss": 0.1841, "step": 102625 }, { "epoch": 4.79, - "learning_rate": 1.0471145281515168e-05, - "loss": 0.0586, + "learning_rate": 2.0486012513066948e-05, + "loss": 0.0749, "step": 102630 }, { "epoch": 4.79, - "learning_rate": 1.0470676480240028e-05, - "loss": 0.0007, + "learning_rate": 2.048554444323093e-05, + "loss": 0.0298, "step": 102635 }, { "epoch": 4.79, - "learning_rate": 1.0470207678964888e-05, - "loss": 0.0742, + "learning_rate": 2.048507637339491e-05, + "loss": 0.0226, "step": 102640 }, { "epoch": 4.79, - "learning_rate": 1.0469738877689748e-05, - "loss": 0.0156, + "learning_rate": 2.048460830355889e-05, + "loss": 0.039, "step": 102645 }, { "epoch": 4.79, - "learning_rate": 1.0469270076414608e-05, - "loss": 0.0449, + "learning_rate": 2.0484140233722874e-05, + "loss": 0.0415, "step": 102650 }, { "epoch": 4.79, - "learning_rate": 1.046880127513947e-05, - "loss": 0.0215, + "learning_rate": 2.0483672163886854e-05, + "loss": 0.0213, "step": 102655 }, { "epoch": 4.79, - "learning_rate": 1.0468332473864329e-05, - "loss": 0.0911, + "learning_rate": 2.0483204094050833e-05, + "loss": 0.1199, "step": 102660 }, { "epoch": 4.79, - "learning_rate": 1.046786367258919e-05, - "loss": 0.0647, + "learning_rate": 2.0482736024214813e-05, + "loss": 0.1805, "step": 102665 }, { "epoch": 4.79, - "learning_rate": 1.0467394871314052e-05, - "loss": 0.1279, + "learning_rate": 2.0482267954378796e-05, + "loss": 0.1366, "step": 102670 }, { "epoch": 4.79, - "learning_rate": 1.0466926070038912e-05, - "loss": 0.4815, + "learning_rate": 2.0481799884542773e-05, + "loss": 0.2772, "step": 102675 }, { "epoch": 4.79, - "learning_rate": 1.0466457268763772e-05, - "loss": 0.0562, + "learning_rate": 2.0481331814706753e-05, + "loss": 0.0469, "step": 102680 }, { "epoch": 4.79, - "learning_rate": 1.0465988467488632e-05, - "loss": 0.0061, + "learning_rate": 2.0480863744870733e-05, + "loss": 0.0345, "step": 102685 }, { "epoch": 4.79, - "learning_rate": 1.0465519666213492e-05, - "loss": 0.0062, + "learning_rate": 2.0480395675034716e-05, + "loss": 0.0131, "step": 102690 }, { "epoch": 4.79, - "learning_rate": 1.0465050864938354e-05, - "loss": 0.0523, + "learning_rate": 2.0479927605198696e-05, + "loss": 0.0269, "step": 102695 }, { "epoch": 4.79, - "learning_rate": 1.0464582063663214e-05, - "loss": 0.0474, + "learning_rate": 2.0479459535362675e-05, + "loss": 0.0847, "step": 102700 }, { "epoch": 4.79, - "learning_rate": 1.0464113262388074e-05, - "loss": 0.0315, + "learning_rate": 2.047899146552666e-05, + "loss": 0.0385, "step": 102705 }, { "epoch": 4.79, - "learning_rate": 1.0463644461112937e-05, - "loss": 0.0587, + "learning_rate": 2.047852339569064e-05, + "loss": 0.1319, "step": 102710 }, { "epoch": 4.79, - "learning_rate": 1.0463175659837797e-05, - "loss": 0.0522, + "learning_rate": 2.0478055325854618e-05, + "loss": 0.0851, "step": 102715 }, { "epoch": 4.79, - "learning_rate": 1.0462706858562657e-05, - "loss": 0.185, + "learning_rate": 2.0477587256018598e-05, + "loss": 0.1045, "step": 102720 }, { "epoch": 4.79, - "learning_rate": 1.0462238057287517e-05, - "loss": 0.3021, + "learning_rate": 2.047711918618258e-05, + "loss": 0.3473, "step": 102725 }, { "epoch": 4.79, - "learning_rate": 1.0461769256012377e-05, - "loss": 0.0592, + "learning_rate": 2.047665111634656e-05, + "loss": 0.1159, "step": 102730 }, { "epoch": 4.79, - "learning_rate": 1.0461300454737238e-05, - "loss": 0.0363, + "learning_rate": 2.047618304651054e-05, + "loss": 0.0066, "step": 102735 }, { "epoch": 4.79, - "learning_rate": 1.0460831653462098e-05, - "loss": 0.0202, + "learning_rate": 2.0475714976674517e-05, + "loss": 0.0354, "step": 102740 }, { "epoch": 4.79, - "learning_rate": 1.0460362852186958e-05, - "loss": 0.0779, + "learning_rate": 2.04752469068385e-05, + "loss": 0.0127, "step": 102745 }, { "epoch": 4.79, - "learning_rate": 1.0459894050911818e-05, - "loss": 0.0314, + "learning_rate": 2.047477883700248e-05, + "loss": 0.0654, "step": 102750 }, { "epoch": 4.79, - "learning_rate": 1.0459425249636678e-05, - "loss": 0.0416, + "learning_rate": 2.047431076716646e-05, + "loss": 0.044, "step": 102755 }, { "epoch": 4.79, - "learning_rate": 1.0458956448361541e-05, - "loss": 0.0725, + "learning_rate": 2.0473842697330443e-05, + "loss": 0.0469, "step": 102760 }, { "epoch": 4.8, - "learning_rate": 1.0458487647086401e-05, - "loss": 0.1599, + "learning_rate": 2.0473374627494423e-05, + "loss": 0.173, "step": 102765 }, { "epoch": 4.8, - "learning_rate": 1.0458018845811261e-05, - "loss": 0.1278, + "learning_rate": 2.0472906557658403e-05, + "loss": 0.1214, "step": 102770 }, { "epoch": 4.8, - "learning_rate": 1.0457550044536123e-05, - "loss": 0.2902, + "learning_rate": 2.0472438487822383e-05, + "loss": 0.2724, "step": 102775 }, { "epoch": 4.8, - "learning_rate": 1.0457081243260983e-05, - "loss": 0.0624, + "learning_rate": 2.0471970417986366e-05, + "loss": 0.0887, "step": 102780 }, { "epoch": 4.8, - "learning_rate": 1.0456612441985843e-05, - "loss": 0.0076, + "learning_rate": 2.0471502348150346e-05, + "loss": 0.011, "step": 102785 }, { "epoch": 4.8, - "learning_rate": 1.0456143640710703e-05, - "loss": 0.0258, + "learning_rate": 2.0471034278314326e-05, + "loss": 0.0279, "step": 102790 }, { "epoch": 4.8, - "learning_rate": 1.0455674839435563e-05, - "loss": 0.0954, + "learning_rate": 2.0470566208478305e-05, + "loss": 0.0478, "step": 102795 }, { "epoch": 4.8, - "learning_rate": 1.0455206038160424e-05, - "loss": 0.0258, + "learning_rate": 2.0470098138642285e-05, + "loss": 0.0662, "step": 102800 }, { "epoch": 4.8, - "learning_rate": 1.0454737236885286e-05, - "loss": 0.0543, + "learning_rate": 2.0469630068806265e-05, + "loss": 0.0737, "step": 102805 }, { "epoch": 4.8, - "learning_rate": 1.0454268435610146e-05, - "loss": 0.0732, + "learning_rate": 2.0469161998970245e-05, + "loss": 0.1093, "step": 102810 }, { "epoch": 4.8, - "learning_rate": 1.0453799634335007e-05, - "loss": 0.1822, + "learning_rate": 2.0468693929134228e-05, + "loss": 0.1391, "step": 102815 }, { "epoch": 4.8, - "learning_rate": 1.0453330833059867e-05, - "loss": 0.2422, + "learning_rate": 2.0468225859298208e-05, + "loss": 0.2068, "step": 102820 }, { "epoch": 4.8, - "learning_rate": 1.0452862031784727e-05, - "loss": 0.48, + "learning_rate": 2.0467757789462188e-05, + "loss": 0.326, "step": 102825 }, { "epoch": 4.8, - "learning_rate": 1.0452393230509587e-05, - "loss": 0.0662, + "learning_rate": 2.0467289719626168e-05, + "loss": 0.0968, "step": 102830 }, { "epoch": 4.8, - "learning_rate": 1.0451924429234447e-05, - "loss": 0.0167, + "learning_rate": 2.046682164979015e-05, + "loss": 0.0326, "step": 102835 }, { "epoch": 4.8, - "learning_rate": 1.0451455627959309e-05, - "loss": 0.0499, + "learning_rate": 2.046635357995413e-05, + "loss": 0.0192, "step": 102840 }, { "epoch": 4.8, - "learning_rate": 1.0450986826684169e-05, - "loss": 0.0641, + "learning_rate": 2.046588551011811e-05, + "loss": 0.0372, "step": 102845 }, { "epoch": 4.8, - "learning_rate": 1.045051802540903e-05, - "loss": 0.0767, + "learning_rate": 2.046541744028209e-05, + "loss": 0.0246, "step": 102850 }, { "epoch": 4.8, - "learning_rate": 1.0450049224133892e-05, - "loss": 0.0573, + "learning_rate": 2.0464949370446073e-05, + "loss": 0.0947, "step": 102855 }, { "epoch": 4.8, - "learning_rate": 1.0449580422858752e-05, - "loss": 0.0918, + "learning_rate": 2.0464481300610053e-05, + "loss": 0.0812, "step": 102860 }, { "epoch": 4.8, - "learning_rate": 1.0449111621583612e-05, - "loss": 0.1074, + "learning_rate": 2.046401323077403e-05, + "loss": 0.1565, "step": 102865 }, { "epoch": 4.8, - "learning_rate": 1.0448642820308472e-05, - "loss": 0.1516, + "learning_rate": 2.046354516093801e-05, + "loss": 0.0956, "step": 102870 }, { "epoch": 4.8, - "learning_rate": 1.0448174019033332e-05, - "loss": 0.1747, + "learning_rate": 2.0463077091101993e-05, + "loss": 0.2886, "step": 102875 }, { "epoch": 4.8, - "learning_rate": 1.0447705217758193e-05, - "loss": 0.0691, + "learning_rate": 2.0462609021265973e-05, + "loss": 0.1066, "step": 102880 }, { "epoch": 4.8, - "learning_rate": 1.0447236416483053e-05, - "loss": 0.0151, + "learning_rate": 2.0462140951429952e-05, + "loss": 0.0195, "step": 102885 }, { "epoch": 4.8, - "learning_rate": 1.0446767615207913e-05, - "loss": 0.0072, + "learning_rate": 2.0461672881593936e-05, + "loss": 0.0101, "step": 102890 }, { "epoch": 4.8, - "learning_rate": 1.0446298813932773e-05, - "loss": 0.0563, + "learning_rate": 2.0461204811757915e-05, + "loss": 0.0364, "step": 102895 }, { "epoch": 4.8, - "learning_rate": 1.0445830012657636e-05, - "loss": 0.0934, + "learning_rate": 2.0460736741921895e-05, + "loss": 0.0349, "step": 102900 }, { "epoch": 4.8, - "learning_rate": 1.0445361211382496e-05, - "loss": 0.0263, + "learning_rate": 2.0460268672085875e-05, + "loss": 0.0573, "step": 102905 }, { "epoch": 4.8, - "learning_rate": 1.0444892410107356e-05, - "loss": 0.1091, + "learning_rate": 2.0459800602249858e-05, + "loss": 0.084, "step": 102910 }, { "epoch": 4.8, - "learning_rate": 1.0444423608832218e-05, - "loss": 0.0825, + "learning_rate": 2.0459332532413838e-05, + "loss": 0.094, "step": 102915 }, { "epoch": 4.8, - "learning_rate": 1.0443954807557078e-05, - "loss": 0.1376, + "learning_rate": 2.0458864462577818e-05, + "loss": 0.1825, "step": 102920 }, { "epoch": 4.8, - "learning_rate": 1.0443486006281938e-05, - "loss": 0.2562, + "learning_rate": 2.0458396392741798e-05, + "loss": 0.3217, "step": 102925 }, { "epoch": 4.8, - "learning_rate": 1.0443017205006798e-05, - "loss": 0.0418, + "learning_rate": 2.0457928322905778e-05, + "loss": 0.0687, "step": 102930 }, { "epoch": 4.8, - "learning_rate": 1.0442548403731658e-05, - "loss": 0.0191, + "learning_rate": 2.0457460253069757e-05, + "loss": 0.013, "step": 102935 }, { "epoch": 4.8, - "learning_rate": 1.044207960245652e-05, - "loss": 0.0611, + "learning_rate": 2.0456992183233737e-05, + "loss": 0.0249, "step": 102940 }, { "epoch": 4.8, - "learning_rate": 1.0441610801181381e-05, - "loss": 0.0371, + "learning_rate": 2.045652411339772e-05, + "loss": 0.0324, "step": 102945 }, { "epoch": 4.8, - "learning_rate": 1.044114199990624e-05, - "loss": 0.0461, + "learning_rate": 2.04560560435617e-05, + "loss": 0.0667, "step": 102950 }, { "epoch": 4.8, - "learning_rate": 1.0440673198631102e-05, - "loss": 0.0477, + "learning_rate": 2.045558797372568e-05, + "loss": 0.0641, "step": 102955 }, { "epoch": 4.8, - "learning_rate": 1.0440204397355962e-05, - "loss": 0.067, + "learning_rate": 2.045511990388966e-05, + "loss": 0.0759, "step": 102960 }, { "epoch": 4.8, - "learning_rate": 1.0439735596080822e-05, - "loss": 0.1263, + "learning_rate": 2.0454651834053643e-05, + "loss": 0.153, "step": 102965 }, { "epoch": 4.8, - "learning_rate": 1.0439266794805682e-05, - "loss": 0.1754, + "learning_rate": 2.0454183764217623e-05, + "loss": 0.1045, "step": 102970 }, { "epoch": 4.8, - "learning_rate": 1.0438797993530542e-05, - "loss": 0.2918, + "learning_rate": 2.0453715694381603e-05, + "loss": 0.2201, "step": 102975 }, { "epoch": 4.81, - "learning_rate": 1.0438329192255404e-05, - "loss": 0.0594, + "learning_rate": 2.0453247624545582e-05, + "loss": 0.1069, "step": 102980 }, { "epoch": 4.81, - "learning_rate": 1.0437860390980264e-05, - "loss": 0.0457, + "learning_rate": 2.0452779554709566e-05, + "loss": 0.0132, "step": 102985 }, { "epoch": 4.81, - "learning_rate": 1.0437391589705125e-05, - "loss": 0.0798, + "learning_rate": 2.0452311484873542e-05, + "loss": 0.0217, "step": 102990 }, { "epoch": 4.81, - "learning_rate": 1.0436922788429987e-05, - "loss": 0.059, + "learning_rate": 2.0451843415037522e-05, + "loss": 0.044, "step": 102995 }, { "epoch": 4.81, - "learning_rate": 1.0436453987154847e-05, - "loss": 0.0832, + "learning_rate": 2.0451375345201505e-05, + "loss": 0.0388, "step": 103000 }, { "epoch": 4.81, - "learning_rate": 1.0435985185879707e-05, - "loss": 0.0854, + "learning_rate": 2.0450907275365485e-05, + "loss": 0.0534, "step": 103005 }, { "epoch": 4.81, - "learning_rate": 1.0435516384604567e-05, - "loss": 0.0542, + "learning_rate": 2.0450439205529465e-05, + "loss": 0.0375, "step": 103010 }, { "epoch": 4.81, - "learning_rate": 1.0435047583329427e-05, - "loss": 0.1214, + "learning_rate": 2.0449971135693445e-05, + "loss": 0.0761, "step": 103015 }, { "epoch": 4.81, - "learning_rate": 1.0434578782054288e-05, - "loss": 0.1712, + "learning_rate": 2.0449503065857428e-05, + "loss": 0.1373, "step": 103020 }, { "epoch": 4.81, - "learning_rate": 1.0434109980779148e-05, - "loss": 0.2041, + "learning_rate": 2.0449034996021408e-05, + "loss": 0.1539, "step": 103025 }, { "epoch": 4.81, - "learning_rate": 1.0433641179504008e-05, - "loss": 0.0414, + "learning_rate": 2.0448566926185387e-05, + "loss": 0.082, "step": 103030 }, { "epoch": 4.81, - "learning_rate": 1.0433172378228872e-05, - "loss": 0.0617, + "learning_rate": 2.0448098856349367e-05, + "loss": 0.0025, "step": 103035 }, { "epoch": 4.81, - "learning_rate": 1.0432703576953731e-05, - "loss": 0.0553, + "learning_rate": 2.044763078651335e-05, + "loss": 0.0487, "step": 103040 }, { "epoch": 4.81, - "learning_rate": 1.0432234775678591e-05, - "loss": 0.0496, + "learning_rate": 2.044716271667733e-05, + "loss": 0.0313, "step": 103045 }, { "epoch": 4.81, - "learning_rate": 1.0431765974403451e-05, - "loss": 0.052, + "learning_rate": 2.044669464684131e-05, + "loss": 0.0356, "step": 103050 }, { "epoch": 4.81, - "learning_rate": 1.0431297173128311e-05, - "loss": 0.0692, + "learning_rate": 2.0446226577005287e-05, + "loss": 0.0332, "step": 103055 }, { "epoch": 4.81, - "learning_rate": 1.0430828371853173e-05, - "loss": 0.0978, + "learning_rate": 2.044575850716927e-05, + "loss": 0.0727, "step": 103060 }, { "epoch": 4.81, - "learning_rate": 1.0430359570578033e-05, - "loss": 0.1229, + "learning_rate": 2.044529043733325e-05, + "loss": 0.1331, "step": 103065 }, { "epoch": 4.81, - "learning_rate": 1.0429890769302893e-05, - "loss": 0.0788, + "learning_rate": 2.044482236749723e-05, + "loss": 0.1972, "step": 103070 }, { "epoch": 4.81, - "learning_rate": 1.0429421968027753e-05, - "loss": 0.249, + "learning_rate": 2.0444354297661213e-05, + "loss": 0.1802, "step": 103075 }, { "epoch": 4.81, - "learning_rate": 1.0428953166752613e-05, - "loss": 0.0913, + "learning_rate": 2.0443886227825192e-05, + "loss": 0.0666, "step": 103080 }, { "epoch": 4.81, - "learning_rate": 1.0428484365477476e-05, - "loss": 0.0702, + "learning_rate": 2.0443418157989172e-05, + "loss": 0.0323, "step": 103085 }, { "epoch": 4.81, - "learning_rate": 1.0428015564202336e-05, - "loss": 0.0502, + "learning_rate": 2.0442950088153152e-05, + "loss": 0.079, "step": 103090 }, { "epoch": 4.81, - "learning_rate": 1.0427546762927196e-05, - "loss": 0.083, + "learning_rate": 2.0442482018317135e-05, + "loss": 0.0222, "step": 103095 }, { "epoch": 4.81, - "learning_rate": 1.0427077961652057e-05, - "loss": 0.053, + "learning_rate": 2.0442013948481115e-05, + "loss": 0.036, "step": 103100 }, { "epoch": 4.81, - "learning_rate": 1.0426609160376917e-05, - "loss": 0.1018, + "learning_rate": 2.0441545878645095e-05, + "loss": 0.0585, "step": 103105 }, { "epoch": 4.81, - "learning_rate": 1.0426140359101777e-05, - "loss": 0.126, + "learning_rate": 2.0441077808809075e-05, + "loss": 0.0587, "step": 103110 }, { "epoch": 4.81, - "learning_rate": 1.0425671557826637e-05, - "loss": 0.0771, + "learning_rate": 2.0440609738973058e-05, + "loss": 0.0866, "step": 103115 }, { "epoch": 4.81, - "learning_rate": 1.0425202756551497e-05, - "loss": 0.1753, + "learning_rate": 2.0440141669137034e-05, + "loss": 0.0911, "step": 103120 }, { "epoch": 4.81, - "learning_rate": 1.0424733955276359e-05, - "loss": 0.1639, + "learning_rate": 2.0439673599301014e-05, + "loss": 0.3029, "step": 103125 }, { "epoch": 4.81, - "learning_rate": 1.042426515400122e-05, - "loss": 0.0716, + "learning_rate": 2.0439205529464997e-05, + "loss": 0.0865, "step": 103130 }, { "epoch": 4.81, - "learning_rate": 1.042379635272608e-05, - "loss": 0.0139, + "learning_rate": 2.0438737459628977e-05, + "loss": 0.018, "step": 103135 }, { "epoch": 4.81, - "learning_rate": 1.0423327551450942e-05, - "loss": 0.0337, + "learning_rate": 2.0438269389792957e-05, + "loss": 0.0178, "step": 103140 }, { "epoch": 4.81, - "learning_rate": 1.0422858750175802e-05, - "loss": 0.0685, + "learning_rate": 2.0437801319956937e-05, + "loss": 0.0466, "step": 103145 }, { "epoch": 4.81, - "learning_rate": 1.0422389948900662e-05, - "loss": 0.0434, + "learning_rate": 2.043733325012092e-05, + "loss": 0.0302, "step": 103150 }, { "epoch": 4.81, - "learning_rate": 1.0421921147625522e-05, - "loss": 0.0754, + "learning_rate": 2.04368651802849e-05, + "loss": 0.0897, "step": 103155 }, { "epoch": 4.81, - "learning_rate": 1.0421452346350382e-05, - "loss": 0.0806, + "learning_rate": 2.043639711044888e-05, + "loss": 0.0631, "step": 103160 }, { "epoch": 4.81, - "learning_rate": 1.0420983545075243e-05, - "loss": 0.1579, + "learning_rate": 2.043592904061286e-05, + "loss": 0.0575, "step": 103165 }, { "epoch": 4.81, - "learning_rate": 1.0420514743800103e-05, - "loss": 0.2571, + "learning_rate": 2.0435460970776843e-05, + "loss": 0.1061, "step": 103170 }, { "epoch": 4.81, - "learning_rate": 1.0420045942524965e-05, - "loss": 0.2651, + "learning_rate": 2.0434992900940822e-05, + "loss": 0.2964, "step": 103175 }, { "epoch": 4.81, - "learning_rate": 1.0419577141249827e-05, - "loss": 0.1012, + "learning_rate": 2.04345248311048e-05, + "loss": 0.0707, "step": 103180 }, { "epoch": 4.81, - "learning_rate": 1.0419108339974687e-05, - "loss": 0.0798, + "learning_rate": 2.0434056761268782e-05, + "loss": 0.0118, "step": 103185 }, { "epoch": 4.81, - "learning_rate": 1.0418639538699546e-05, - "loss": 0.0407, + "learning_rate": 2.0433588691432762e-05, + "loss": 0.0336, "step": 103190 }, { "epoch": 4.82, - "learning_rate": 1.0418170737424406e-05, - "loss": 0.0716, + "learning_rate": 2.0433120621596742e-05, + "loss": 0.0614, "step": 103195 }, { "epoch": 4.82, - "learning_rate": 1.0417701936149266e-05, - "loss": 0.0265, + "learning_rate": 2.043265255176072e-05, + "loss": 0.0595, "step": 103200 }, { "epoch": 4.82, - "learning_rate": 1.0417233134874128e-05, - "loss": 0.0514, + "learning_rate": 2.0432184481924705e-05, + "loss": 0.0942, "step": 103205 }, { "epoch": 4.82, - "learning_rate": 1.0416764333598988e-05, - "loss": 0.1115, + "learning_rate": 2.0431716412088685e-05, + "loss": 0.1042, "step": 103210 }, { "epoch": 4.82, - "learning_rate": 1.0416295532323848e-05, - "loss": 0.0714, + "learning_rate": 2.0431248342252664e-05, + "loss": 0.0695, "step": 103215 }, { "epoch": 4.82, - "learning_rate": 1.0415826731048708e-05, - "loss": 0.1274, + "learning_rate": 2.0430780272416644e-05, + "loss": 0.1099, "step": 103220 }, { "epoch": 4.82, - "learning_rate": 1.0415357929773571e-05, - "loss": 0.3215, + "learning_rate": 2.0430312202580627e-05, + "loss": 0.2398, "step": 103225 }, { "epoch": 4.82, - "learning_rate": 1.0414889128498431e-05, - "loss": 0.1016, + "learning_rate": 2.0429844132744607e-05, + "loss": 0.0752, "step": 103230 }, { "epoch": 4.82, - "learning_rate": 1.0414420327223291e-05, - "loss": 0.025, + "learning_rate": 2.0429376062908587e-05, + "loss": 0.0066, "step": 103235 }, { "epoch": 4.82, - "learning_rate": 1.0413951525948151e-05, - "loss": 0.0103, + "learning_rate": 2.0428907993072567e-05, + "loss": 0.0219, "step": 103240 }, { "epoch": 4.82, - "learning_rate": 1.0413482724673012e-05, - "loss": 0.0244, + "learning_rate": 2.0428439923236547e-05, + "loss": 0.0515, "step": 103245 }, { "epoch": 4.82, - "learning_rate": 1.0413013923397872e-05, - "loss": 0.0437, + "learning_rate": 2.0427971853400526e-05, + "loss": 0.0229, "step": 103250 }, { "epoch": 4.82, - "learning_rate": 1.0412545122122732e-05, - "loss": 0.0632, + "learning_rate": 2.0427503783564506e-05, + "loss": 0.0401, "step": 103255 }, { "epoch": 4.82, - "learning_rate": 1.0412076320847592e-05, - "loss": 0.076, + "learning_rate": 2.042703571372849e-05, + "loss": 0.0665, "step": 103260 }, { "epoch": 4.82, - "learning_rate": 1.0411607519572454e-05, - "loss": 0.0733, + "learning_rate": 2.042656764389247e-05, + "loss": 0.1092, "step": 103265 }, { "epoch": 4.82, - "learning_rate": 1.0411138718297316e-05, - "loss": 0.1895, + "learning_rate": 2.042609957405645e-05, + "loss": 0.1776, "step": 103270 }, { "epoch": 4.82, - "learning_rate": 1.0410669917022175e-05, - "loss": 0.2007, + "learning_rate": 2.042563150422043e-05, + "loss": 0.2558, "step": 103275 }, { "epoch": 4.82, - "learning_rate": 1.0410201115747035e-05, - "loss": 0.0924, + "learning_rate": 2.0425163434384412e-05, + "loss": 0.0951, "step": 103280 }, { "epoch": 4.82, - "learning_rate": 1.0409732314471897e-05, - "loss": 0.0607, + "learning_rate": 2.0424695364548392e-05, + "loss": 0.007, "step": 103285 }, { "epoch": 4.82, - "learning_rate": 1.0409263513196757e-05, - "loss": 0.0297, + "learning_rate": 2.0424227294712372e-05, + "loss": 0.043, "step": 103290 }, { "epoch": 4.82, - "learning_rate": 1.0408794711921617e-05, - "loss": 0.0709, + "learning_rate": 2.042375922487635e-05, + "loss": 0.0714, "step": 103295 }, { "epoch": 4.82, - "learning_rate": 1.0408325910646477e-05, - "loss": 0.0464, + "learning_rate": 2.0423291155040335e-05, + "loss": 0.0254, "step": 103300 }, { "epoch": 4.82, - "learning_rate": 1.0407857109371338e-05, - "loss": 0.0611, + "learning_rate": 2.042282308520431e-05, + "loss": 0.0488, "step": 103305 }, { "epoch": 4.82, - "learning_rate": 1.0407388308096198e-05, - "loss": 0.0293, + "learning_rate": 2.042235501536829e-05, + "loss": 0.077, "step": 103310 }, { "epoch": 4.82, - "learning_rate": 1.040691950682106e-05, - "loss": 0.0788, + "learning_rate": 2.0421886945532274e-05, + "loss": 0.1393, "step": 103315 }, { "epoch": 4.82, - "learning_rate": 1.040645070554592e-05, - "loss": 0.2029, + "learning_rate": 2.0421418875696254e-05, + "loss": 0.194, "step": 103320 }, { "epoch": 4.82, - "learning_rate": 1.0405981904270782e-05, - "loss": 0.3586, + "learning_rate": 2.0420950805860234e-05, + "loss": 0.2162, "step": 103325 }, { "epoch": 4.82, - "learning_rate": 1.0405513102995642e-05, - "loss": 0.0467, + "learning_rate": 2.0420482736024214e-05, + "loss": 0.0593, "step": 103330 }, { "epoch": 4.82, - "learning_rate": 1.0405044301720501e-05, - "loss": 0.0179, + "learning_rate": 2.0420014666188197e-05, + "loss": 0.0129, "step": 103335 }, { "epoch": 4.82, - "learning_rate": 1.0404575500445361e-05, - "loss": 0.0412, + "learning_rate": 2.0419546596352177e-05, + "loss": 0.036, "step": 103340 }, { "epoch": 4.82, - "learning_rate": 1.0404106699170223e-05, - "loss": 0.0508, + "learning_rate": 2.0419078526516157e-05, + "loss": 0.0629, "step": 103345 }, { "epoch": 4.82, - "learning_rate": 1.0403637897895083e-05, - "loss": 0.0399, + "learning_rate": 2.0418610456680136e-05, + "loss": 0.0522, "step": 103350 }, { "epoch": 4.82, - "learning_rate": 1.0403169096619943e-05, - "loss": 0.0471, + "learning_rate": 2.041814238684412e-05, + "loss": 0.0466, "step": 103355 }, { "epoch": 4.82, - "learning_rate": 1.0402700295344803e-05, - "loss": 0.1354, + "learning_rate": 2.04176743170081e-05, + "loss": 0.096, "step": 103360 }, { "epoch": 4.82, - "learning_rate": 1.0402231494069666e-05, - "loss": 0.1801, + "learning_rate": 2.041720624717208e-05, + "loss": 0.0658, "step": 103365 }, { "epoch": 4.82, - "learning_rate": 1.0401762692794526e-05, - "loss": 0.1218, + "learning_rate": 2.041673817733606e-05, + "loss": 0.1528, "step": 103370 }, { "epoch": 4.82, - "learning_rate": 1.0401293891519386e-05, - "loss": 0.2205, + "learning_rate": 2.041627010750004e-05, + "loss": 0.1442, "step": 103375 }, { "epoch": 4.82, - "learning_rate": 1.0400825090244246e-05, - "loss": 0.0513, + "learning_rate": 2.041580203766402e-05, + "loss": 0.0648, "step": 103380 }, { "epoch": 4.82, - "learning_rate": 1.0400356288969108e-05, - "loss": 0.0253, + "learning_rate": 2.0415333967828e-05, + "loss": 0.0083, "step": 103385 }, { "epoch": 4.82, - "learning_rate": 1.0399887487693968e-05, - "loss": 0.0256, + "learning_rate": 2.0414865897991982e-05, + "loss": 0.0305, "step": 103390 }, { "epoch": 4.82, - "learning_rate": 1.0399418686418827e-05, - "loss": 0.0203, + "learning_rate": 2.041439782815596e-05, + "loss": 0.1127, "step": 103395 }, { "epoch": 4.82, - "learning_rate": 1.0398949885143687e-05, - "loss": 0.1352, + "learning_rate": 2.041392975831994e-05, + "loss": 0.0685, "step": 103400 }, { "epoch": 4.83, - "learning_rate": 1.0398481083868547e-05, - "loss": 0.0724, + "learning_rate": 2.041346168848392e-05, + "loss": 0.0788, "step": 103405 }, { "epoch": 4.83, - "learning_rate": 1.039801228259341e-05, - "loss": 0.0821, + "learning_rate": 2.0412993618647904e-05, + "loss": 0.1029, "step": 103410 }, { "epoch": 4.83, - "learning_rate": 1.039754348131827e-05, - "loss": 0.0651, + "learning_rate": 2.0412525548811884e-05, + "loss": 0.1852, "step": 103415 }, { "epoch": 4.83, - "learning_rate": 1.039707468004313e-05, - "loss": 0.2306, + "learning_rate": 2.0412057478975864e-05, + "loss": 0.1354, "step": 103420 }, { "epoch": 4.83, - "learning_rate": 1.0396605878767992e-05, - "loss": 0.1944, + "learning_rate": 2.0411589409139847e-05, + "loss": 0.3282, "step": 103425 }, { "epoch": 4.83, - "learning_rate": 1.0396137077492852e-05, - "loss": 0.0688, + "learning_rate": 2.0411121339303827e-05, + "loss": 0.0636, "step": 103430 }, { "epoch": 4.83, - "learning_rate": 1.0395668276217712e-05, - "loss": 0.0145, + "learning_rate": 2.0410653269467803e-05, + "loss": 0.0286, "step": 103435 }, { "epoch": 4.83, - "learning_rate": 1.0395199474942572e-05, - "loss": 0.0411, + "learning_rate": 2.0410185199631783e-05, + "loss": 0.0422, "step": 103440 }, { "epoch": 4.83, - "learning_rate": 1.0394730673667432e-05, - "loss": 0.0541, + "learning_rate": 2.0409717129795766e-05, + "loss": 0.0267, "step": 103445 }, { "epoch": 4.83, - "learning_rate": 1.0394261872392293e-05, - "loss": 0.0867, + "learning_rate": 2.0409249059959746e-05, + "loss": 0.0316, "step": 103450 }, { "epoch": 4.83, - "learning_rate": 1.0393793071117155e-05, - "loss": 0.0432, + "learning_rate": 2.0408780990123726e-05, + "loss": 0.1669, "step": 103455 }, { "epoch": 4.83, - "learning_rate": 1.0393324269842015e-05, - "loss": 0.068, + "learning_rate": 2.0408312920287706e-05, + "loss": 0.1521, "step": 103460 }, { "epoch": 4.83, - "learning_rate": 1.0392855468566877e-05, - "loss": 0.2069, + "learning_rate": 2.040784485045169e-05, + "loss": 0.1022, "step": 103465 }, { "epoch": 4.83, - "learning_rate": 1.0392386667291737e-05, - "loss": 0.1491, + "learning_rate": 2.040737678061567e-05, + "loss": 0.1395, "step": 103470 }, { "epoch": 4.83, - "learning_rate": 1.0391917866016597e-05, - "loss": 0.182, + "learning_rate": 2.040690871077965e-05, + "loss": 0.155, "step": 103475 }, { "epoch": 4.83, - "learning_rate": 1.0391449064741456e-05, - "loss": 0.0641, + "learning_rate": 2.040644064094363e-05, + "loss": 0.1034, "step": 103480 }, { "epoch": 4.83, - "learning_rate": 1.0390980263466316e-05, - "loss": 0.0513, + "learning_rate": 2.0405972571107612e-05, + "loss": 0.0089, "step": 103485 }, { "epoch": 4.83, - "learning_rate": 1.0390511462191178e-05, - "loss": 0.0285, + "learning_rate": 2.040550450127159e-05, + "loss": 0.0283, "step": 103490 }, { "epoch": 4.83, - "learning_rate": 1.0390042660916038e-05, - "loss": 0.0453, + "learning_rate": 2.0405036431435568e-05, + "loss": 0.0171, "step": 103495 }, { "epoch": 4.83, - "learning_rate": 1.03895738596409e-05, - "loss": 0.029, + "learning_rate": 2.040456836159955e-05, + "loss": 0.021, "step": 103500 }, { "epoch": 4.83, - "learning_rate": 1.0389105058365761e-05, - "loss": 0.0194, + "learning_rate": 2.040410029176353e-05, + "loss": 0.0439, "step": 103505 }, { "epoch": 4.83, - "learning_rate": 1.0388636257090621e-05, - "loss": 0.0486, + "learning_rate": 2.040363222192751e-05, + "loss": 0.0361, "step": 103510 }, { "epoch": 4.83, - "learning_rate": 1.0388167455815481e-05, - "loss": 0.0905, + "learning_rate": 2.040316415209149e-05, + "loss": 0.075, "step": 103515 }, { "epoch": 4.83, - "learning_rate": 1.0387698654540341e-05, - "loss": 0.1716, + "learning_rate": 2.0402696082255474e-05, + "loss": 0.18, "step": 103520 }, { "epoch": 4.83, - "learning_rate": 1.0387229853265201e-05, - "loss": 0.1719, + "learning_rate": 2.0402228012419454e-05, + "loss": 0.2485, "step": 103525 }, { "epoch": 4.83, - "learning_rate": 1.0386761051990063e-05, - "loss": 0.0856, + "learning_rate": 2.0401759942583434e-05, + "loss": 0.1009, "step": 103530 }, { "epoch": 4.83, - "learning_rate": 1.0386292250714923e-05, - "loss": 0.0262, + "learning_rate": 2.0401291872747413e-05, + "loss": 0.0334, "step": 103535 }, { "epoch": 4.83, - "learning_rate": 1.0385823449439782e-05, - "loss": 0.0259, + "learning_rate": 2.0400823802911397e-05, + "loss": 0.0407, "step": 103540 }, { "epoch": 4.83, - "learning_rate": 1.0385354648164642e-05, - "loss": 0.0864, + "learning_rate": 2.0400355733075376e-05, + "loss": 0.0176, "step": 103545 }, { "epoch": 4.83, - "learning_rate": 1.0384885846889506e-05, - "loss": 0.0321, + "learning_rate": 2.0399887663239356e-05, + "loss": 0.0289, "step": 103550 }, { "epoch": 4.83, - "learning_rate": 1.0384417045614366e-05, - "loss": 0.0462, + "learning_rate": 2.039941959340334e-05, + "loss": 0.0347, "step": 103555 }, { "epoch": 4.83, - "learning_rate": 1.0383948244339226e-05, - "loss": 0.0652, + "learning_rate": 2.0398951523567316e-05, + "loss": 0.1002, "step": 103560 }, { "epoch": 4.83, - "learning_rate": 1.0383479443064086e-05, - "loss": 0.1621, + "learning_rate": 2.0398483453731296e-05, + "loss": 0.0645, "step": 103565 }, { "epoch": 4.83, - "learning_rate": 1.0383010641788947e-05, - "loss": 0.1763, + "learning_rate": 2.0398015383895275e-05, + "loss": 0.1363, "step": 103570 }, { "epoch": 4.83, - "learning_rate": 1.0382541840513807e-05, - "loss": 0.3932, + "learning_rate": 2.039754731405926e-05, + "loss": 0.4248, "step": 103575 }, { "epoch": 4.83, - "learning_rate": 1.0382073039238667e-05, - "loss": 0.0898, + "learning_rate": 2.039707924422324e-05, + "loss": 0.0544, "step": 103580 }, { "epoch": 4.83, - "learning_rate": 1.0381604237963527e-05, - "loss": 0.0166, + "learning_rate": 2.039661117438722e-05, + "loss": 0.0228, "step": 103585 }, { "epoch": 4.83, - "learning_rate": 1.0381135436688387e-05, - "loss": 0.0185, + "learning_rate": 2.0396143104551198e-05, + "loss": 0.0528, "step": 103590 }, { "epoch": 4.83, - "learning_rate": 1.038066663541325e-05, - "loss": 0.0515, + "learning_rate": 2.039567503471518e-05, + "loss": 0.0381, "step": 103595 }, { "epoch": 4.83, - "learning_rate": 1.038019783413811e-05, - "loss": 0.0784, + "learning_rate": 2.039520696487916e-05, + "loss": 0.0577, "step": 103600 }, { "epoch": 4.83, - "learning_rate": 1.037972903286297e-05, - "loss": 0.0554, + "learning_rate": 2.039473889504314e-05, + "loss": 0.0295, "step": 103605 }, { "epoch": 4.83, - "learning_rate": 1.0379260231587832e-05, - "loss": 0.1382, + "learning_rate": 2.0394270825207124e-05, + "loss": 0.0734, "step": 103610 }, { "epoch": 4.83, - "learning_rate": 1.0378791430312692e-05, - "loss": 0.0873, + "learning_rate": 2.0393802755371104e-05, + "loss": 0.1701, "step": 103615 }, { "epoch": 4.84, - "learning_rate": 1.0378322629037552e-05, - "loss": 0.1708, + "learning_rate": 2.0393334685535084e-05, + "loss": 0.1201, "step": 103620 }, { "epoch": 4.84, - "learning_rate": 1.0377853827762411e-05, - "loss": 0.1861, + "learning_rate": 2.039286661569906e-05, + "loss": 0.2993, "step": 103625 }, { "epoch": 4.84, - "learning_rate": 1.0377385026487271e-05, - "loss": 0.0717, + "learning_rate": 2.0392398545863043e-05, + "loss": 0.07, "step": 103630 }, { "epoch": 4.84, - "learning_rate": 1.0376916225212133e-05, - "loss": 0.0392, + "learning_rate": 2.0391930476027023e-05, + "loss": 0.0109, "step": 103635 }, { "epoch": 4.84, - "learning_rate": 1.0376447423936995e-05, - "loss": 0.02, + "learning_rate": 2.0391462406191003e-05, + "loss": 0.0419, "step": 103640 }, { "epoch": 4.84, - "learning_rate": 1.0375978622661855e-05, - "loss": 0.0132, + "learning_rate": 2.0390994336354983e-05, + "loss": 0.0487, "step": 103645 }, { "epoch": 4.84, - "learning_rate": 1.0375509821386716e-05, - "loss": 0.048, + "learning_rate": 2.0390526266518966e-05, + "loss": 0.0646, "step": 103650 }, { "epoch": 4.84, - "learning_rate": 1.0375041020111576e-05, - "loss": 0.0678, + "learning_rate": 2.0390058196682946e-05, + "loss": 0.0889, "step": 103655 }, { "epoch": 4.84, - "learning_rate": 1.0374572218836436e-05, - "loss": 0.1245, + "learning_rate": 2.0389590126846926e-05, + "loss": 0.0605, "step": 103660 }, { "epoch": 4.84, - "learning_rate": 1.0374103417561296e-05, - "loss": 0.0481, + "learning_rate": 2.0389122057010906e-05, + "loss": 0.1063, "step": 103665 }, { "epoch": 4.84, - "learning_rate": 1.0373634616286156e-05, - "loss": 0.1888, + "learning_rate": 2.038865398717489e-05, + "loss": 0.2162, "step": 103670 }, { "epoch": 4.84, - "learning_rate": 1.0373165815011018e-05, - "loss": 0.235, + "learning_rate": 2.038818591733887e-05, + "loss": 0.3677, "step": 103675 }, { "epoch": 4.84, - "learning_rate": 1.0372697013735878e-05, - "loss": 0.0594, + "learning_rate": 2.038771784750285e-05, + "loss": 0.0872, "step": 103680 }, { "epoch": 4.84, - "learning_rate": 1.0372228212460737e-05, - "loss": 0.0087, + "learning_rate": 2.0387249777666828e-05, + "loss": 0.0284, "step": 103685 }, { "epoch": 4.84, - "learning_rate": 1.03717594111856e-05, - "loss": 0.0239, + "learning_rate": 2.0386781707830808e-05, + "loss": 0.029, "step": 103690 }, { "epoch": 4.84, - "learning_rate": 1.037129060991046e-05, - "loss": 0.0177, + "learning_rate": 2.0386313637994788e-05, + "loss": 0.065, "step": 103695 }, { "epoch": 4.84, - "learning_rate": 1.037082180863532e-05, - "loss": 0.0595, + "learning_rate": 2.0385845568158768e-05, + "loss": 0.0607, "step": 103700 }, { "epoch": 4.84, - "learning_rate": 1.037035300736018e-05, - "loss": 0.0848, + "learning_rate": 2.038537749832275e-05, + "loss": 0.0646, "step": 103705 }, { "epoch": 4.84, - "learning_rate": 1.036988420608504e-05, - "loss": 0.0601, + "learning_rate": 2.038490942848673e-05, + "loss": 0.0878, "step": 103710 }, { "epoch": 4.84, - "learning_rate": 1.0369415404809902e-05, - "loss": 0.131, + "learning_rate": 2.038444135865071e-05, + "loss": 0.0973, "step": 103715 }, { "epoch": 4.84, - "learning_rate": 1.0368946603534762e-05, - "loss": 0.1303, + "learning_rate": 2.038397328881469e-05, + "loss": 0.1558, "step": 103720 }, { "epoch": 4.84, - "learning_rate": 1.0368477802259622e-05, - "loss": 0.2756, + "learning_rate": 2.0383505218978674e-05, + "loss": 0.1426, "step": 103725 }, { "epoch": 4.84, - "learning_rate": 1.0368009000984482e-05, - "loss": 0.1217, + "learning_rate": 2.0383037149142653e-05, + "loss": 0.1028, "step": 103730 }, { "epoch": 4.84, - "learning_rate": 1.0367540199709345e-05, - "loss": 0.0262, + "learning_rate": 2.0382569079306633e-05, + "loss": 0.0184, "step": 103735 }, { "epoch": 4.84, - "learning_rate": 1.0367071398434205e-05, - "loss": 0.0098, + "learning_rate": 2.0382101009470616e-05, + "loss": 0.005, "step": 103740 }, { "epoch": 4.84, - "learning_rate": 1.0366602597159065e-05, - "loss": 0.0318, + "learning_rate": 2.0381632939634596e-05, + "loss": 0.0581, "step": 103745 }, { "epoch": 4.84, - "learning_rate": 1.0366133795883925e-05, - "loss": 0.0758, + "learning_rate": 2.0381164869798573e-05, + "loss": 0.077, "step": 103750 }, { "epoch": 4.84, - "learning_rate": 1.0365664994608787e-05, - "loss": 0.0736, + "learning_rate": 2.0380696799962552e-05, + "loss": 0.1039, "step": 103755 }, { "epoch": 4.84, - "learning_rate": 1.0365196193333647e-05, - "loss": 0.0925, + "learning_rate": 2.0380228730126536e-05, + "loss": 0.113, "step": 103760 }, { "epoch": 4.84, - "learning_rate": 1.0364727392058507e-05, - "loss": 0.1003, + "learning_rate": 2.0379760660290515e-05, + "loss": 0.0954, "step": 103765 }, { "epoch": 4.84, - "learning_rate": 1.0364258590783367e-05, - "loss": 0.1505, + "learning_rate": 2.0379292590454495e-05, + "loss": 0.1318, "step": 103770 }, { "epoch": 4.84, - "learning_rate": 1.0363789789508228e-05, - "loss": 0.1504, + "learning_rate": 2.0378824520618475e-05, + "loss": 0.2296, "step": 103775 }, { "epoch": 4.84, - "learning_rate": 1.036332098823309e-05, - "loss": 0.0782, + "learning_rate": 2.037835645078246e-05, + "loss": 0.0706, "step": 103780 }, { "epoch": 4.84, - "learning_rate": 1.036285218695795e-05, - "loss": 0.0325, + "learning_rate": 2.0377888380946438e-05, + "loss": 0.0263, "step": 103785 }, { "epoch": 4.84, - "learning_rate": 1.036238338568281e-05, - "loss": 0.0306, + "learning_rate": 2.0377420311110418e-05, + "loss": 0.0317, "step": 103790 }, { "epoch": 4.84, - "learning_rate": 1.0361914584407671e-05, - "loss": 0.0332, + "learning_rate": 2.03769522412744e-05, + "loss": 0.0368, "step": 103795 }, { "epoch": 4.84, - "learning_rate": 1.0361445783132531e-05, - "loss": 0.0566, + "learning_rate": 2.037648417143838e-05, + "loss": 0.0738, "step": 103800 }, { "epoch": 4.84, - "learning_rate": 1.0360976981857391e-05, - "loss": 0.052, + "learning_rate": 2.037601610160236e-05, + "loss": 0.0546, "step": 103805 }, { "epoch": 4.84, - "learning_rate": 1.0360508180582251e-05, - "loss": 0.0845, + "learning_rate": 2.037554803176634e-05, + "loss": 0.1018, "step": 103810 }, { "epoch": 4.84, - "learning_rate": 1.0360039379307113e-05, - "loss": 0.0776, + "learning_rate": 2.037507996193032e-05, + "loss": 0.1162, "step": 103815 }, { "epoch": 4.84, - "learning_rate": 1.0359570578031973e-05, - "loss": 0.1386, + "learning_rate": 2.03746118920943e-05, + "loss": 0.272, "step": 103820 }, { "epoch": 4.84, - "learning_rate": 1.0359101776756834e-05, - "loss": 0.3277, + "learning_rate": 2.037414382225828e-05, + "loss": 0.2956, "step": 103825 }, { "epoch": 4.84, - "learning_rate": 1.0358632975481694e-05, - "loss": 0.0518, + "learning_rate": 2.037367575242226e-05, + "loss": 0.0786, "step": 103830 }, { "epoch": 4.85, - "learning_rate": 1.0358164174206556e-05, - "loss": 0.0133, + "learning_rate": 2.0373207682586243e-05, + "loss": 0.0382, "step": 103835 }, { "epoch": 4.85, - "learning_rate": 1.0357695372931416e-05, - "loss": 0.0351, + "learning_rate": 2.0372739612750223e-05, + "loss": 0.013, "step": 103840 }, { "epoch": 4.85, - "learning_rate": 1.0357226571656276e-05, - "loss": 0.0212, + "learning_rate": 2.0372271542914203e-05, + "loss": 0.0151, "step": 103845 }, { "epoch": 4.85, - "learning_rate": 1.0356757770381136e-05, - "loss": 0.084, + "learning_rate": 2.0371803473078183e-05, + "loss": 0.0548, "step": 103850 }, { "epoch": 4.85, - "learning_rate": 1.0356288969105997e-05, - "loss": 0.0242, + "learning_rate": 2.0371335403242166e-05, + "loss": 0.0196, "step": 103855 }, { "epoch": 4.85, - "learning_rate": 1.0355820167830857e-05, - "loss": 0.0712, + "learning_rate": 2.0370867333406146e-05, + "loss": 0.0732, "step": 103860 }, { "epoch": 4.85, - "learning_rate": 1.0355351366555717e-05, - "loss": 0.0509, + "learning_rate": 2.0370399263570125e-05, + "loss": 0.0709, "step": 103865 }, { "epoch": 4.85, - "learning_rate": 1.0354882565280577e-05, - "loss": 0.1514, + "learning_rate": 2.036993119373411e-05, + "loss": 0.2222, "step": 103870 }, { "epoch": 4.85, - "learning_rate": 1.035441376400544e-05, - "loss": 0.2023, + "learning_rate": 2.0369463123898085e-05, + "loss": 0.2063, "step": 103875 }, { "epoch": 4.85, - "learning_rate": 1.03539449627303e-05, - "loss": 0.0733, + "learning_rate": 2.0368995054062065e-05, + "loss": 0.0882, "step": 103880 }, { "epoch": 4.85, - "learning_rate": 1.035347616145516e-05, - "loss": 0.0018, + "learning_rate": 2.0368526984226045e-05, + "loss": 0.0443, "step": 103885 }, { "epoch": 4.85, - "learning_rate": 1.035300736018002e-05, - "loss": 0.003, + "learning_rate": 2.0368058914390028e-05, + "loss": 0.0632, "step": 103890 }, { "epoch": 4.85, - "learning_rate": 1.0352538558904882e-05, - "loss": 0.0272, + "learning_rate": 2.0367590844554008e-05, + "loss": 0.0429, "step": 103895 }, { "epoch": 4.85, - "learning_rate": 1.0352069757629742e-05, - "loss": 0.0233, + "learning_rate": 2.0367122774717987e-05, + "loss": 0.0379, "step": 103900 }, { "epoch": 4.85, - "learning_rate": 1.0351600956354602e-05, - "loss": 0.1071, + "learning_rate": 2.0366654704881967e-05, + "loss": 0.0583, "step": 103905 }, { "epoch": 4.85, - "learning_rate": 1.0351132155079462e-05, - "loss": 0.0926, + "learning_rate": 2.036618663504595e-05, + "loss": 0.0705, "step": 103910 }, { "epoch": 4.85, - "learning_rate": 1.0350663353804322e-05, - "loss": 0.1107, + "learning_rate": 2.036571856520993e-05, + "loss": 0.0544, "step": 103915 }, { "epoch": 4.85, - "learning_rate": 1.0350194552529185e-05, - "loss": 0.1934, + "learning_rate": 2.036525049537391e-05, + "loss": 0.1195, "step": 103920 }, { "epoch": 4.85, - "learning_rate": 1.0349725751254045e-05, - "loss": 0.2821, + "learning_rate": 2.0364782425537893e-05, + "loss": 0.2186, "step": 103925 }, { "epoch": 4.85, - "learning_rate": 1.0349256949978905e-05, - "loss": 0.0885, + "learning_rate": 2.0364314355701873e-05, + "loss": 0.0689, "step": 103930 }, { "epoch": 4.85, - "learning_rate": 1.0348788148703766e-05, - "loss": 0.0216, + "learning_rate": 2.0363846285865853e-05, + "loss": 0.0326, "step": 103935 }, { "epoch": 4.85, - "learning_rate": 1.0348319347428626e-05, - "loss": 0.0211, + "learning_rate": 2.036337821602983e-05, + "loss": 0.0114, "step": 103940 }, { "epoch": 4.85, - "learning_rate": 1.0347850546153486e-05, - "loss": 0.0385, + "learning_rate": 2.0362910146193813e-05, + "loss": 0.0752, "step": 103945 }, { "epoch": 4.85, - "learning_rate": 1.0347381744878346e-05, - "loss": 0.1131, + "learning_rate": 2.0362442076357792e-05, + "loss": 0.0855, "step": 103950 }, { "epoch": 4.85, - "learning_rate": 1.0346912943603206e-05, - "loss": 0.1028, + "learning_rate": 2.0361974006521772e-05, + "loss": 0.0532, "step": 103955 }, { "epoch": 4.85, - "learning_rate": 1.0346444142328068e-05, - "loss": 0.0696, + "learning_rate": 2.0361505936685752e-05, + "loss": 0.077, "step": 103960 }, { "epoch": 4.85, - "learning_rate": 1.034597534105293e-05, - "loss": 0.0906, + "learning_rate": 2.0361037866849735e-05, + "loss": 0.0828, "step": 103965 }, { "epoch": 4.85, - "learning_rate": 1.034550653977779e-05, - "loss": 0.1768, + "learning_rate": 2.0360569797013715e-05, + "loss": 0.1136, "step": 103970 }, { "epoch": 4.85, - "learning_rate": 1.0345037738502651e-05, - "loss": 0.2065, + "learning_rate": 2.0360101727177695e-05, + "loss": 0.2557, "step": 103975 }, { "epoch": 4.85, - "learning_rate": 1.034456893722751e-05, - "loss": 0.1293, + "learning_rate": 2.0359633657341678e-05, + "loss": 0.0238, "step": 103980 }, { "epoch": 4.85, - "learning_rate": 1.034410013595237e-05, - "loss": 0.0061, + "learning_rate": 2.0359165587505658e-05, + "loss": 0.0383, "step": 103985 }, { "epoch": 4.85, - "learning_rate": 1.034363133467723e-05, - "loss": 0.0526, + "learning_rate": 2.0358697517669638e-05, + "loss": 0.0138, "step": 103990 }, { "epoch": 4.85, - "learning_rate": 1.034316253340209e-05, - "loss": 0.0459, + "learning_rate": 2.0358229447833618e-05, + "loss": 0.0694, "step": 103995 }, { "epoch": 4.85, - "learning_rate": 1.0342693732126952e-05, - "loss": 0.0759, + "learning_rate": 2.0357761377997597e-05, + "loss": 0.0612, "step": 104000 }, { "epoch": 4.85, - "learning_rate": 1.0342224930851812e-05, - "loss": 0.2133, + "learning_rate": 2.0357293308161577e-05, + "loss": 0.1041, "step": 104005 }, { "epoch": 4.85, - "learning_rate": 1.0341756129576672e-05, - "loss": 0.0962, + "learning_rate": 2.0356825238325557e-05, + "loss": 0.0655, "step": 104010 }, { "epoch": 4.85, - "learning_rate": 1.0341287328301535e-05, - "loss": 0.1311, + "learning_rate": 2.0356357168489537e-05, + "loss": 0.0947, "step": 104015 }, { "epoch": 4.85, - "learning_rate": 1.0340818527026395e-05, - "loss": 0.1578, + "learning_rate": 2.035588909865352e-05, + "loss": 0.0964, "step": 104020 }, { "epoch": 4.85, - "learning_rate": 1.0340349725751255e-05, - "loss": 0.2502, + "learning_rate": 2.03554210288175e-05, + "loss": 0.1924, "step": 104025 }, { "epoch": 4.85, - "learning_rate": 1.0339880924476115e-05, - "loss": 0.0771, + "learning_rate": 2.035495295898148e-05, + "loss": 0.0353, "step": 104030 }, { "epoch": 4.85, - "learning_rate": 1.0339412123200975e-05, - "loss": 0.0078, + "learning_rate": 2.0354484889145463e-05, + "loss": 0.0204, "step": 104035 }, { "epoch": 4.85, - "learning_rate": 1.0338943321925837e-05, - "loss": 0.036, + "learning_rate": 2.0354016819309443e-05, + "loss": 0.0646, "step": 104040 }, { "epoch": 4.85, - "learning_rate": 1.0338474520650697e-05, - "loss": 0.0299, + "learning_rate": 2.0353548749473423e-05, + "loss": 0.0328, "step": 104045 }, { "epoch": 4.86, - "learning_rate": 1.0338005719375557e-05, - "loss": 0.0417, + "learning_rate": 2.0353080679637402e-05, + "loss": 0.0467, "step": 104050 }, { "epoch": 4.86, - "learning_rate": 1.0337536918100417e-05, - "loss": 0.0297, + "learning_rate": 2.0352612609801386e-05, + "loss": 0.074, "step": 104055 }, { "epoch": 4.86, - "learning_rate": 1.033706811682528e-05, - "loss": 0.1254, + "learning_rate": 2.0352144539965365e-05, + "loss": 0.0339, "step": 104060 }, { "epoch": 4.86, - "learning_rate": 1.033659931555014e-05, - "loss": 0.1065, + "learning_rate": 2.0351676470129342e-05, + "loss": 0.0532, "step": 104065 }, { "epoch": 4.86, - "learning_rate": 1.0336130514275e-05, - "loss": 0.1066, + "learning_rate": 2.035120840029332e-05, + "loss": 0.2231, "step": 104070 }, { "epoch": 4.86, - "learning_rate": 1.033566171299986e-05, - "loss": 0.3243, + "learning_rate": 2.0350740330457305e-05, + "loss": 0.1354, "step": 104075 }, { "epoch": 4.86, - "learning_rate": 1.0335192911724721e-05, - "loss": 0.0862, + "learning_rate": 2.0350272260621285e-05, + "loss": 0.0876, "step": 104080 }, { "epoch": 4.86, - "learning_rate": 1.0334724110449581e-05, - "loss": 0.0327, + "learning_rate": 2.0349804190785264e-05, + "loss": 0.0249, "step": 104085 }, { "epoch": 4.86, - "learning_rate": 1.0334255309174441e-05, - "loss": 0.0051, + "learning_rate": 2.0349336120949244e-05, + "loss": 0.0487, "step": 104090 }, { "epoch": 4.86, - "learning_rate": 1.0333786507899301e-05, - "loss": 0.0404, + "learning_rate": 2.0348868051113227e-05, + "loss": 0.0439, "step": 104095 }, { "epoch": 4.86, - "learning_rate": 1.0333317706624161e-05, - "loss": 0.0774, + "learning_rate": 2.0348399981277207e-05, + "loss": 0.0249, "step": 104100 }, { "epoch": 4.86, - "learning_rate": 1.0332848905349024e-05, - "loss": 0.0577, + "learning_rate": 2.0347931911441187e-05, + "loss": 0.0729, "step": 104105 }, { "epoch": 4.86, - "learning_rate": 1.0332380104073884e-05, - "loss": 0.153, + "learning_rate": 2.034746384160517e-05, + "loss": 0.0683, "step": 104110 }, { "epoch": 4.86, - "learning_rate": 1.0331911302798744e-05, - "loss": 0.1411, + "learning_rate": 2.034699577176915e-05, + "loss": 0.065, "step": 104115 }, { "epoch": 4.86, - "learning_rate": 1.0331442501523606e-05, - "loss": 0.2148, + "learning_rate": 2.034652770193313e-05, + "loss": 0.2135, "step": 104120 }, { "epoch": 4.86, - "learning_rate": 1.0330973700248466e-05, - "loss": 0.2341, + "learning_rate": 2.034605963209711e-05, + "loss": 0.1778, "step": 104125 }, { "epoch": 4.86, - "learning_rate": 1.0330504898973326e-05, - "loss": 0.0696, + "learning_rate": 2.034559156226109e-05, + "loss": 0.0772, "step": 104130 }, { "epoch": 4.86, - "learning_rate": 1.0330036097698186e-05, - "loss": 0.0217, + "learning_rate": 2.034512349242507e-05, + "loss": 0.0333, "step": 104135 }, { "epoch": 4.86, - "learning_rate": 1.0329567296423046e-05, - "loss": 0.0227, + "learning_rate": 2.034465542258905e-05, + "loss": 0.0256, "step": 104140 }, { "epoch": 4.86, - "learning_rate": 1.0329098495147907e-05, - "loss": 0.0843, + "learning_rate": 2.034418735275303e-05, + "loss": 0.0265, "step": 104145 }, { "epoch": 4.86, - "learning_rate": 1.0328629693872769e-05, - "loss": 0.0313, + "learning_rate": 2.0343719282917012e-05, + "loss": 0.0424, "step": 104150 }, { "epoch": 4.86, - "learning_rate": 1.0328160892597629e-05, - "loss": 0.0686, + "learning_rate": 2.0343251213080992e-05, + "loss": 0.0332, "step": 104155 }, { "epoch": 4.86, - "learning_rate": 1.032769209132249e-05, - "loss": 0.1213, + "learning_rate": 2.0342783143244972e-05, + "loss": 0.1058, "step": 104160 }, { "epoch": 4.86, - "learning_rate": 1.032722329004735e-05, - "loss": 0.0856, + "learning_rate": 2.0342315073408955e-05, + "loss": 0.1111, "step": 104165 }, { "epoch": 4.86, - "learning_rate": 1.032675448877221e-05, - "loss": 0.1158, + "learning_rate": 2.0341847003572935e-05, + "loss": 0.0687, "step": 104170 }, { "epoch": 4.86, - "learning_rate": 1.032628568749707e-05, - "loss": 0.2419, + "learning_rate": 2.0341378933736915e-05, + "loss": 0.2821, "step": 104175 }, { "epoch": 4.86, - "learning_rate": 1.032581688622193e-05, - "loss": 0.0999, + "learning_rate": 2.0340910863900895e-05, + "loss": 0.0737, "step": 104180 }, { "epoch": 4.86, - "learning_rate": 1.0325348084946792e-05, - "loss": 0.0753, + "learning_rate": 2.0340442794064878e-05, + "loss": 0.0604, "step": 104185 }, { "epoch": 4.86, - "learning_rate": 1.0324879283671652e-05, - "loss": 0.0617, + "learning_rate": 2.0339974724228854e-05, + "loss": 0.015, "step": 104190 }, { "epoch": 4.86, - "learning_rate": 1.0324410482396512e-05, - "loss": 0.0473, + "learning_rate": 2.0339506654392834e-05, + "loss": 0.034, "step": 104195 }, { "epoch": 4.86, - "learning_rate": 1.0323941681121375e-05, - "loss": 0.0751, + "learning_rate": 2.0339038584556814e-05, + "loss": 0.0552, "step": 104200 }, { "epoch": 4.86, - "learning_rate": 1.0323472879846235e-05, - "loss": 0.0753, + "learning_rate": 2.0338570514720797e-05, + "loss": 0.0306, "step": 104205 }, { "epoch": 4.86, - "learning_rate": 1.0323004078571095e-05, - "loss": 0.0951, + "learning_rate": 2.0338102444884777e-05, + "loss": 0.057, "step": 104210 }, { "epoch": 4.86, - "learning_rate": 1.0322535277295955e-05, - "loss": 0.0756, + "learning_rate": 2.0337634375048757e-05, + "loss": 0.145, "step": 104215 }, { "epoch": 4.86, - "learning_rate": 1.0322066476020815e-05, - "loss": 0.0675, + "learning_rate": 2.033716630521274e-05, + "loss": 0.173, "step": 104220 }, { "epoch": 4.86, - "learning_rate": 1.0321597674745676e-05, - "loss": 0.2918, + "learning_rate": 2.033669823537672e-05, + "loss": 0.3054, "step": 104225 }, { "epoch": 4.86, - "learning_rate": 1.0321128873470536e-05, - "loss": 0.0426, + "learning_rate": 2.03362301655407e-05, + "loss": 0.0948, "step": 104230 }, { "epoch": 4.86, - "learning_rate": 1.0320660072195396e-05, - "loss": 0.0393, + "learning_rate": 2.033576209570468e-05, + "loss": 0.0214, "step": 104235 }, { "epoch": 4.86, - "learning_rate": 1.0320191270920256e-05, - "loss": 0.0563, + "learning_rate": 2.0335294025868663e-05, + "loss": 0.0272, "step": 104240 }, { "epoch": 4.86, - "learning_rate": 1.031972246964512e-05, - "loss": 0.0616, + "learning_rate": 2.0334825956032642e-05, + "loss": 0.0171, "step": 104245 }, { "epoch": 4.86, - "learning_rate": 1.031925366836998e-05, - "loss": 0.0281, + "learning_rate": 2.0334357886196622e-05, + "loss": 0.1131, "step": 104250 }, { "epoch": 4.86, - "learning_rate": 1.031878486709484e-05, - "loss": 0.0422, + "learning_rate": 2.03338898163606e-05, + "loss": 0.0713, "step": 104255 }, { "epoch": 4.86, - "learning_rate": 1.03183160658197e-05, - "loss": 0.1069, + "learning_rate": 2.0333421746524582e-05, + "loss": 0.099, "step": 104260 }, { "epoch": 4.87, - "learning_rate": 1.0317847264544561e-05, - "loss": 0.0781, + "learning_rate": 2.033295367668856e-05, + "loss": 0.0776, "step": 104265 }, { "epoch": 4.87, - "learning_rate": 1.0317378463269421e-05, - "loss": 0.1148, + "learning_rate": 2.033248560685254e-05, + "loss": 0.1112, "step": 104270 }, { "epoch": 4.87, - "learning_rate": 1.031690966199428e-05, - "loss": 0.2158, + "learning_rate": 2.033201753701652e-05, + "loss": 0.1951, "step": 104275 }, { "epoch": 4.87, - "learning_rate": 1.031644086071914e-05, - "loss": 0.0568, + "learning_rate": 2.0331549467180504e-05, + "loss": 0.0743, "step": 104280 }, { "epoch": 4.87, - "learning_rate": 1.0315972059444002e-05, - "loss": 0.0198, + "learning_rate": 2.0331081397344484e-05, + "loss": 0.0139, "step": 104285 }, { "epoch": 4.87, - "learning_rate": 1.0315503258168864e-05, - "loss": 0.0703, + "learning_rate": 2.0330613327508464e-05, + "loss": 0.0526, "step": 104290 }, { "epoch": 4.87, - "learning_rate": 1.0315034456893724e-05, - "loss": 0.0625, + "learning_rate": 2.0330145257672447e-05, + "loss": 0.0456, "step": 104295 }, { "epoch": 4.87, - "learning_rate": 1.0314565655618584e-05, - "loss": 0.0542, + "learning_rate": 2.0329677187836427e-05, + "loss": 0.0489, "step": 104300 }, { "epoch": 4.87, - "learning_rate": 1.0314096854343445e-05, - "loss": 0.0107, + "learning_rate": 2.0329209118000407e-05, + "loss": 0.0854, "step": 104305 }, { "epoch": 4.87, - "learning_rate": 1.0313628053068305e-05, - "loss": 0.195, + "learning_rate": 2.0328741048164387e-05, + "loss": 0.0793, "step": 104310 }, { "epoch": 4.87, - "learning_rate": 1.0313159251793165e-05, - "loss": 0.0716, + "learning_rate": 2.032827297832837e-05, + "loss": 0.0509, "step": 104315 }, { "epoch": 4.87, - "learning_rate": 1.0312690450518025e-05, - "loss": 0.1751, + "learning_rate": 2.0327804908492346e-05, + "loss": 0.1874, "step": 104320 }, { "epoch": 4.87, - "learning_rate": 1.0312221649242887e-05, - "loss": 0.294, + "learning_rate": 2.0327336838656326e-05, + "loss": 0.3091, "step": 104325 }, { "epoch": 4.87, - "learning_rate": 1.0311752847967747e-05, - "loss": 0.0556, + "learning_rate": 2.0326868768820306e-05, + "loss": 0.0571, "step": 104330 }, { "epoch": 4.87, - "learning_rate": 1.0311284046692607e-05, - "loss": 0.0109, + "learning_rate": 2.032640069898429e-05, + "loss": 0.0136, "step": 104335 }, { "epoch": 4.87, - "learning_rate": 1.031081524541747e-05, - "loss": 0.022, + "learning_rate": 2.032593262914827e-05, + "loss": 0.0133, "step": 104340 }, { "epoch": 4.87, - "learning_rate": 1.031034644414233e-05, - "loss": 0.0189, + "learning_rate": 2.032546455931225e-05, + "loss": 0.0101, "step": 104345 }, { "epoch": 4.87, - "learning_rate": 1.030987764286719e-05, - "loss": 0.0442, + "learning_rate": 2.0324996489476232e-05, + "loss": 0.0732, "step": 104350 }, { "epoch": 4.87, - "learning_rate": 1.030940884159205e-05, - "loss": 0.0479, + "learning_rate": 2.0324528419640212e-05, + "loss": 0.0818, "step": 104355 }, { "epoch": 4.87, - "learning_rate": 1.030894004031691e-05, - "loss": 0.0461, + "learning_rate": 2.0324060349804192e-05, + "loss": 0.1059, "step": 104360 }, { "epoch": 4.87, - "learning_rate": 1.0308471239041771e-05, - "loss": 0.0752, + "learning_rate": 2.032359227996817e-05, + "loss": 0.0913, "step": 104365 }, { "epoch": 4.87, - "learning_rate": 1.0308002437766631e-05, - "loss": 0.1493, + "learning_rate": 2.0323124210132155e-05, + "loss": 0.1832, "step": 104370 }, { "epoch": 4.87, - "learning_rate": 1.0307533636491491e-05, - "loss": 0.2768, + "learning_rate": 2.0322656140296135e-05, + "loss": 0.3439, "step": 104375 }, { "epoch": 4.87, - "learning_rate": 1.0307064835216351e-05, - "loss": 0.0381, + "learning_rate": 2.032218807046011e-05, + "loss": 0.0562, "step": 104380 }, { "epoch": 4.87, - "learning_rate": 1.0306596033941215e-05, - "loss": 0.0201, + "learning_rate": 2.032172000062409e-05, + "loss": 0.01, "step": 104385 }, { "epoch": 4.87, - "learning_rate": 1.0306127232666075e-05, - "loss": 0.0356, + "learning_rate": 2.0321251930788074e-05, + "loss": 0.056, "step": 104390 }, { "epoch": 4.87, - "learning_rate": 1.0305658431390934e-05, - "loss": 0.0462, + "learning_rate": 2.0320783860952054e-05, + "loss": 0.0272, "step": 104395 }, { "epoch": 4.87, - "learning_rate": 1.0305189630115794e-05, - "loss": 0.0339, + "learning_rate": 2.0320315791116034e-05, + "loss": 0.0291, "step": 104400 }, { "epoch": 4.87, - "learning_rate": 1.0304720828840656e-05, - "loss": 0.0334, + "learning_rate": 2.0319847721280017e-05, + "loss": 0.0698, "step": 104405 }, { "epoch": 4.87, - "learning_rate": 1.0304252027565516e-05, - "loss": 0.1274, + "learning_rate": 2.0319379651443997e-05, + "loss": 0.0667, "step": 104410 }, { "epoch": 4.87, - "learning_rate": 1.0303783226290376e-05, - "loss": 0.1074, + "learning_rate": 2.0318911581607976e-05, + "loss": 0.0519, "step": 104415 }, { "epoch": 4.87, - "learning_rate": 1.0303314425015236e-05, - "loss": 0.1418, + "learning_rate": 2.0318443511771956e-05, + "loss": 0.1331, "step": 104420 }, { "epoch": 4.87, - "learning_rate": 1.0302845623740096e-05, - "loss": 0.268, + "learning_rate": 2.031797544193594e-05, + "loss": 0.1985, "step": 104425 }, { "epoch": 4.87, - "learning_rate": 1.0302376822464959e-05, - "loss": 0.0409, + "learning_rate": 2.031750737209992e-05, + "loss": 0.0387, "step": 104430 }, { "epoch": 4.87, - "learning_rate": 1.0301908021189819e-05, - "loss": 0.0284, + "learning_rate": 2.03170393022639e-05, + "loss": 0.0271, "step": 104435 }, { "epoch": 4.87, - "learning_rate": 1.0301439219914679e-05, - "loss": 0.0847, + "learning_rate": 2.031657123242788e-05, + "loss": 0.0065, "step": 104440 }, { "epoch": 4.87, - "learning_rate": 1.030097041863954e-05, - "loss": 0.0451, + "learning_rate": 2.031610316259186e-05, + "loss": 0.0253, "step": 104445 }, { "epoch": 4.87, - "learning_rate": 1.03005016173644e-05, - "loss": 0.0835, + "learning_rate": 2.031563509275584e-05, + "loss": 0.0685, "step": 104450 }, { "epoch": 4.87, - "learning_rate": 1.030003281608926e-05, - "loss": 0.046, + "learning_rate": 2.031516702291982e-05, + "loss": 0.0359, "step": 104455 }, { "epoch": 4.87, - "learning_rate": 1.029956401481412e-05, - "loss": 0.042, + "learning_rate": 2.0314698953083798e-05, + "loss": 0.0717, "step": 104460 }, { "epoch": 4.87, - "learning_rate": 1.029909521353898e-05, - "loss": 0.084, + "learning_rate": 2.031423088324778e-05, + "loss": 0.0586, "step": 104465 }, { "epoch": 4.87, - "learning_rate": 1.0298626412263842e-05, - "loss": 0.176, + "learning_rate": 2.031376281341176e-05, + "loss": 0.1006, "step": 104470 }, { "epoch": 4.87, - "learning_rate": 1.0298157610988704e-05, - "loss": 0.3081, + "learning_rate": 2.031329474357574e-05, + "loss": 0.3085, "step": 104475 }, { "epoch": 4.88, - "learning_rate": 1.0297688809713563e-05, - "loss": 0.0617, + "learning_rate": 2.0312826673739724e-05, + "loss": 0.0849, "step": 104480 }, { "epoch": 4.88, - "learning_rate": 1.0297220008438425e-05, - "loss": 0.008, + "learning_rate": 2.0312358603903704e-05, + "loss": 0.0217, "step": 104485 }, { "epoch": 4.88, - "learning_rate": 1.0296751207163285e-05, - "loss": 0.0287, + "learning_rate": 2.0311890534067684e-05, + "loss": 0.0248, "step": 104490 }, { "epoch": 4.88, - "learning_rate": 1.0296282405888145e-05, - "loss": 0.0181, + "learning_rate": 2.0311422464231664e-05, + "loss": 0.0371, "step": 104495 }, { "epoch": 4.88, - "learning_rate": 1.0295813604613005e-05, - "loss": 0.0593, + "learning_rate": 2.0310954394395647e-05, + "loss": 0.0451, "step": 104500 }, { "epoch": 4.88, - "learning_rate": 1.0295344803337865e-05, - "loss": 0.0621, + "learning_rate": 2.0310486324559627e-05, + "loss": 0.0333, "step": 104505 }, { "epoch": 4.88, - "learning_rate": 1.0294876002062726e-05, - "loss": 0.694, + "learning_rate": 2.0310018254723603e-05, + "loss": 0.0723, "step": 104510 }, { "epoch": 4.88, - "learning_rate": 1.0294407200787586e-05, - "loss": 0.1163, + "learning_rate": 2.0309550184887583e-05, + "loss": 0.0912, "step": 104515 }, { "epoch": 4.88, - "learning_rate": 1.0293938399512446e-05, - "loss": 0.1058, + "learning_rate": 2.0309082115051566e-05, + "loss": 0.1315, "step": 104520 }, { "epoch": 4.88, - "learning_rate": 1.029346959823731e-05, - "loss": 0.322, + "learning_rate": 2.0308614045215546e-05, + "loss": 0.1392, "step": 104525 }, { "epoch": 4.88, - "learning_rate": 1.029300079696217e-05, - "loss": 0.0987, + "learning_rate": 2.0308145975379526e-05, + "loss": 0.0531, "step": 104530 }, { "epoch": 4.88, - "learning_rate": 1.029253199568703e-05, - "loss": 0.0057, + "learning_rate": 2.030767790554351e-05, + "loss": 0.0321, "step": 104535 }, { "epoch": 4.88, - "learning_rate": 1.029206319441189e-05, - "loss": 0.0148, + "learning_rate": 2.030720983570749e-05, + "loss": 0.0236, "step": 104540 }, { "epoch": 4.88, - "learning_rate": 1.029159439313675e-05, - "loss": 0.0796, + "learning_rate": 2.030674176587147e-05, + "loss": 0.0245, "step": 104545 }, { "epoch": 4.88, - "learning_rate": 1.0291125591861611e-05, - "loss": 0.0388, + "learning_rate": 2.030627369603545e-05, + "loss": 0.0448, "step": 104550 }, { "epoch": 4.88, - "learning_rate": 1.0290656790586471e-05, - "loss": 0.1049, + "learning_rate": 2.0305805626199432e-05, + "loss": 0.0494, "step": 104555 }, { "epoch": 4.88, - "learning_rate": 1.0290187989311331e-05, - "loss": 0.0605, + "learning_rate": 2.030533755636341e-05, + "loss": 0.0747, "step": 104560 }, { "epoch": 4.88, - "learning_rate": 1.028971918803619e-05, - "loss": 0.2132, + "learning_rate": 2.030486948652739e-05, + "loss": 0.1312, "step": 104565 }, { "epoch": 4.88, - "learning_rate": 1.0289250386761054e-05, - "loss": 0.0882, + "learning_rate": 2.0304401416691368e-05, + "loss": 0.2301, "step": 104570 }, { "epoch": 4.88, - "learning_rate": 1.0288781585485914e-05, - "loss": 0.2828, + "learning_rate": 2.030393334685535e-05, + "loss": 0.4589, "step": 104575 }, { "epoch": 4.88, - "learning_rate": 1.0288312784210774e-05, - "loss": 0.0809, + "learning_rate": 2.030346527701933e-05, + "loss": 0.1187, "step": 104580 }, { "epoch": 4.88, - "learning_rate": 1.0287843982935634e-05, - "loss": 0.0074, + "learning_rate": 2.030299720718331e-05, + "loss": 0.0192, "step": 104585 }, { "epoch": 4.88, - "learning_rate": 1.0287375181660496e-05, - "loss": 0.0453, + "learning_rate": 2.0302529137347294e-05, + "loss": 0.017, "step": 104590 }, { "epoch": 4.88, - "learning_rate": 1.0286906380385356e-05, - "loss": 0.0169, + "learning_rate": 2.0302061067511274e-05, + "loss": 0.0317, "step": 104595 }, { "epoch": 4.88, - "learning_rate": 1.0286437579110215e-05, - "loss": 0.064, + "learning_rate": 2.0301592997675253e-05, + "loss": 0.0473, "step": 104600 }, { "epoch": 4.88, - "learning_rate": 1.0285968777835075e-05, - "loss": 0.0555, + "learning_rate": 2.0301124927839233e-05, + "loss": 0.0878, "step": 104605 }, { "epoch": 4.88, - "learning_rate": 1.0285499976559935e-05, - "loss": 0.0382, + "learning_rate": 2.0300656858003216e-05, + "loss": 0.0681, "step": 104610 }, { "epoch": 4.88, - "learning_rate": 1.0285031175284799e-05, - "loss": 0.0682, + "learning_rate": 2.0300188788167196e-05, + "loss": 0.0966, "step": 104615 }, { "epoch": 4.88, - "learning_rate": 1.0284562374009659e-05, - "loss": 0.1367, + "learning_rate": 2.0299720718331176e-05, + "loss": 0.1574, "step": 104620 }, { "epoch": 4.88, - "learning_rate": 1.0284093572734518e-05, - "loss": 0.2854, + "learning_rate": 2.0299252648495156e-05, + "loss": 0.1579, "step": 104625 }, { "epoch": 4.88, - "learning_rate": 1.028362477145938e-05, - "loss": 0.1447, + "learning_rate": 2.029878457865914e-05, + "loss": 0.0573, "step": 104630 }, { "epoch": 4.88, - "learning_rate": 1.028315597018424e-05, - "loss": 0.0061, + "learning_rate": 2.0298316508823116e-05, + "loss": 0.0195, "step": 104635 }, { "epoch": 4.88, - "learning_rate": 1.02826871689091e-05, - "loss": 0.0346, + "learning_rate": 2.0297848438987095e-05, + "loss": 0.0106, "step": 104640 }, { "epoch": 4.88, - "learning_rate": 1.028221836763396e-05, - "loss": 0.0155, + "learning_rate": 2.0297380369151075e-05, + "loss": 0.0751, "step": 104645 }, { "epoch": 4.88, - "learning_rate": 1.0281749566358822e-05, - "loss": 0.0365, + "learning_rate": 2.029691229931506e-05, + "loss": 0.0563, "step": 104650 }, { "epoch": 4.88, - "learning_rate": 1.0281280765083681e-05, - "loss": 0.0685, + "learning_rate": 2.0296444229479038e-05, + "loss": 0.0331, "step": 104655 }, { "epoch": 4.88, - "learning_rate": 1.0280811963808541e-05, - "loss": 0.1154, + "learning_rate": 2.0295976159643018e-05, + "loss": 0.0597, "step": 104660 }, { "epoch": 4.88, - "learning_rate": 1.0280343162533403e-05, - "loss": 0.1566, + "learning_rate": 2.0295508089807e-05, + "loss": 0.2261, "step": 104665 }, { "epoch": 4.88, - "learning_rate": 1.0279874361258265e-05, - "loss": 0.0794, + "learning_rate": 2.029504001997098e-05, + "loss": 0.0822, "step": 104670 }, { "epoch": 4.88, - "learning_rate": 1.0279405559983125e-05, - "loss": 0.1238, + "learning_rate": 2.029457195013496e-05, + "loss": 0.2061, "step": 104675 }, { "epoch": 4.88, - "learning_rate": 1.0278936758707985e-05, - "loss": 0.0538, + "learning_rate": 2.029410388029894e-05, + "loss": 0.0366, "step": 104680 }, { "epoch": 4.88, - "learning_rate": 1.0278467957432844e-05, - "loss": 0.0478, + "learning_rate": 2.0293635810462924e-05, + "loss": 0.0547, "step": 104685 }, { "epoch": 4.88, - "learning_rate": 1.0277999156157706e-05, - "loss": 0.0412, + "learning_rate": 2.0293167740626904e-05, + "loss": 0.0147, "step": 104690 }, { "epoch": 4.89, - "learning_rate": 1.0277530354882566e-05, - "loss": 0.0181, + "learning_rate": 2.029269967079088e-05, + "loss": 0.0824, "step": 104695 }, { "epoch": 4.89, - "learning_rate": 1.0277061553607426e-05, - "loss": 0.0515, + "learning_rate": 2.029223160095486e-05, + "loss": 0.0723, "step": 104700 }, { "epoch": 4.89, - "learning_rate": 1.0276592752332286e-05, - "loss": 0.1135, + "learning_rate": 2.0291763531118843e-05, + "loss": 0.0523, "step": 104705 }, { "epoch": 4.89, - "learning_rate": 1.027612395105715e-05, - "loss": 0.0426, + "learning_rate": 2.0291295461282823e-05, + "loss": 0.062, "step": 104710 }, { "epoch": 4.89, - "learning_rate": 1.027565514978201e-05, - "loss": 0.0606, + "learning_rate": 2.0290827391446803e-05, + "loss": 0.0836, "step": 104715 }, { "epoch": 4.89, - "learning_rate": 1.0275186348506869e-05, - "loss": 0.1492, + "learning_rate": 2.0290359321610786e-05, + "loss": 0.1556, "step": 104720 }, { "epoch": 4.89, - "learning_rate": 1.0274717547231729e-05, - "loss": 0.271, + "learning_rate": 2.0289891251774766e-05, + "loss": 0.2514, "step": 104725 }, { "epoch": 4.89, - "learning_rate": 1.027424874595659e-05, - "loss": 0.0349, + "learning_rate": 2.0289423181938746e-05, + "loss": 0.0635, "step": 104730 }, { "epoch": 4.89, - "learning_rate": 1.027377994468145e-05, - "loss": 0.0383, + "learning_rate": 2.0288955112102725e-05, + "loss": 0.0117, "step": 104735 }, { "epoch": 4.89, - "learning_rate": 1.027331114340631e-05, - "loss": 0.085, + "learning_rate": 2.028848704226671e-05, + "loss": 0.0178, "step": 104740 }, { "epoch": 4.89, - "learning_rate": 1.027284234213117e-05, - "loss": 0.0233, + "learning_rate": 2.028801897243069e-05, + "loss": 0.0179, "step": 104745 }, { "epoch": 4.89, - "learning_rate": 1.027237354085603e-05, - "loss": 0.0605, + "learning_rate": 2.028755090259467e-05, + "loss": 0.0515, "step": 104750 }, { "epoch": 4.89, - "learning_rate": 1.0271904739580894e-05, - "loss": 0.0682, + "learning_rate": 2.0287082832758648e-05, + "loss": 0.0449, "step": 104755 }, { "epoch": 4.89, - "learning_rate": 1.0271435938305754e-05, - "loss": 0.0947, + "learning_rate": 2.0286614762922628e-05, + "loss": 0.0468, "step": 104760 }, { "epoch": 4.89, - "learning_rate": 1.0270967137030614e-05, - "loss": 0.1154, + "learning_rate": 2.0286146693086608e-05, + "loss": 0.1597, "step": 104765 }, { "epoch": 4.89, - "learning_rate": 1.0270498335755475e-05, - "loss": 0.0902, + "learning_rate": 2.0285678623250588e-05, + "loss": 0.0674, "step": 104770 }, { "epoch": 4.89, - "learning_rate": 1.0270029534480335e-05, - "loss": 0.1524, + "learning_rate": 2.028521055341457e-05, + "loss": 0.2299, "step": 104775 }, { "epoch": 4.89, - "learning_rate": 1.0269560733205195e-05, - "loss": 0.0757, + "learning_rate": 2.028474248357855e-05, + "loss": 0.0567, "step": 104780 }, { "epoch": 4.89, - "learning_rate": 1.0269091931930055e-05, - "loss": 0.0226, + "learning_rate": 2.028427441374253e-05, + "loss": 0.0322, "step": 104785 }, { "epoch": 4.89, - "learning_rate": 1.0268623130654915e-05, - "loss": 0.0261, + "learning_rate": 2.028380634390651e-05, + "loss": 0.0872, "step": 104790 }, { "epoch": 4.89, - "learning_rate": 1.0268154329379777e-05, - "loss": 0.055, + "learning_rate": 2.0283338274070493e-05, + "loss": 0.0211, "step": 104795 }, { "epoch": 4.89, - "learning_rate": 1.0267685528104638e-05, - "loss": 0.092, + "learning_rate": 2.0282870204234473e-05, + "loss": 0.0277, "step": 104800 }, { "epoch": 4.89, - "learning_rate": 1.0267216726829498e-05, - "loss": 0.0473, + "learning_rate": 2.0282402134398453e-05, + "loss": 0.1218, "step": 104805 }, { "epoch": 4.89, - "learning_rate": 1.026674792555436e-05, - "loss": 0.1125, + "learning_rate": 2.0281934064562433e-05, + "loss": 0.1367, "step": 104810 }, { "epoch": 4.89, - "learning_rate": 1.026627912427922e-05, - "loss": 0.0941, + "learning_rate": 2.0281465994726416e-05, + "loss": 0.1512, "step": 104815 }, { "epoch": 4.89, - "learning_rate": 1.026581032300408e-05, - "loss": 0.0902, + "learning_rate": 2.0280997924890396e-05, + "loss": 0.1638, "step": 104820 }, { "epoch": 4.89, - "learning_rate": 1.026534152172894e-05, - "loss": 0.328, + "learning_rate": 2.0280529855054372e-05, + "loss": 0.3937, "step": 104825 }, { "epoch": 4.89, - "learning_rate": 1.02648727204538e-05, - "loss": 0.047, + "learning_rate": 2.0280061785218356e-05, + "loss": 0.1143, "step": 104830 }, { "epoch": 4.89, - "learning_rate": 1.0264403919178661e-05, - "loss": 0.044, + "learning_rate": 2.0279593715382335e-05, + "loss": 0.0232, "step": 104835 }, { "epoch": 4.89, - "learning_rate": 1.0263935117903521e-05, - "loss": 0.054, + "learning_rate": 2.0279125645546315e-05, + "loss": 0.0451, "step": 104840 }, { "epoch": 4.89, - "learning_rate": 1.0263466316628381e-05, - "loss": 0.0373, + "learning_rate": 2.0278657575710295e-05, + "loss": 0.0206, "step": 104845 }, { "epoch": 4.89, - "learning_rate": 1.0262997515353244e-05, - "loss": 0.0988, + "learning_rate": 2.0278189505874278e-05, + "loss": 0.071, "step": 104850 }, { "epoch": 4.89, - "learning_rate": 1.0262528714078104e-05, - "loss": 0.0431, + "learning_rate": 2.0277721436038258e-05, + "loss": 0.0482, "step": 104855 }, { "epoch": 4.89, - "learning_rate": 1.0262059912802964e-05, - "loss": 0.0451, + "learning_rate": 2.0277253366202238e-05, + "loss": 0.0897, "step": 104860 }, { "epoch": 4.89, - "learning_rate": 1.0261591111527824e-05, - "loss": 0.0645, + "learning_rate": 2.0276785296366218e-05, + "loss": 0.119, "step": 104865 }, { "epoch": 4.89, - "learning_rate": 1.0261122310252684e-05, - "loss": 0.1824, + "learning_rate": 2.02763172265302e-05, + "loss": 0.1195, "step": 104870 }, { "epoch": 4.89, - "learning_rate": 1.0260653508977546e-05, - "loss": 0.3749, + "learning_rate": 2.027584915669418e-05, + "loss": 0.349, "step": 104875 }, { "epoch": 4.89, - "learning_rate": 1.0260184707702406e-05, - "loss": 0.1074, + "learning_rate": 2.027538108685816e-05, + "loss": 0.1141, "step": 104880 }, { "epoch": 4.89, - "learning_rate": 1.0259715906427266e-05, - "loss": 0.0058, + "learning_rate": 2.0274913017022137e-05, + "loss": 0.0293, "step": 104885 }, { "epoch": 4.89, - "learning_rate": 1.0259247105152125e-05, - "loss": 0.0188, + "learning_rate": 2.027444494718612e-05, + "loss": 0.0529, "step": 104890 }, { "epoch": 4.89, - "learning_rate": 1.0258778303876989e-05, - "loss": 0.0376, + "learning_rate": 2.02739768773501e-05, + "loss": 0.0387, "step": 104895 }, { "epoch": 4.89, - "learning_rate": 1.0258309502601849e-05, - "loss": 0.053, + "learning_rate": 2.027350880751408e-05, + "loss": 0.0326, "step": 104900 }, { "epoch": 4.9, - "learning_rate": 1.0257840701326709e-05, - "loss": 0.052, + "learning_rate": 2.0273040737678063e-05, + "loss": 0.0528, "step": 104905 }, { "epoch": 4.9, - "learning_rate": 1.0257371900051569e-05, - "loss": 0.0662, + "learning_rate": 2.0272572667842043e-05, + "loss": 0.1035, "step": 104910 }, { "epoch": 4.9, - "learning_rate": 1.025690309877643e-05, - "loss": 0.0689, + "learning_rate": 2.0272104598006023e-05, + "loss": 0.1141, "step": 104915 }, { "epoch": 4.9, - "learning_rate": 1.025643429750129e-05, - "loss": 0.1109, + "learning_rate": 2.0271636528170002e-05, + "loss": 0.1058, "step": 104920 }, { "epoch": 4.9, - "learning_rate": 1.025596549622615e-05, - "loss": 0.3309, + "learning_rate": 2.0271168458333986e-05, + "loss": 0.1389, "step": 104925 }, { "epoch": 4.9, - "learning_rate": 1.025549669495101e-05, - "loss": 0.0356, + "learning_rate": 2.0270700388497965e-05, + "loss": 0.0505, "step": 104930 }, { "epoch": 4.9, - "learning_rate": 1.025502789367587e-05, - "loss": 0.0493, + "learning_rate": 2.0270232318661945e-05, + "loss": 0.0205, "step": 104935 }, { "epoch": 4.9, - "learning_rate": 1.0254559092400733e-05, - "loss": 0.0227, + "learning_rate": 2.0269764248825925e-05, + "loss": 0.0159, "step": 104940 }, { "epoch": 4.9, - "learning_rate": 1.0254090291125593e-05, - "loss": 0.0385, + "learning_rate": 2.0269296178989908e-05, + "loss": 0.0234, "step": 104945 }, { "epoch": 4.9, - "learning_rate": 1.0253621489850453e-05, - "loss": 0.0224, + "learning_rate": 2.0268828109153885e-05, + "loss": 0.0806, "step": 104950 }, { "epoch": 4.9, - "learning_rate": 1.0253152688575315e-05, - "loss": 0.1233, + "learning_rate": 2.0268360039317865e-05, + "loss": 0.0315, "step": 104955 }, { "epoch": 4.9, - "learning_rate": 1.0252683887300175e-05, - "loss": 0.0783, + "learning_rate": 2.0267891969481848e-05, + "loss": 0.0854, "step": 104960 }, { "epoch": 4.9, - "learning_rate": 1.0252215086025035e-05, - "loss": 0.1145, + "learning_rate": 2.0267423899645828e-05, + "loss": 0.0845, "step": 104965 }, { "epoch": 4.9, - "learning_rate": 1.0251746284749895e-05, - "loss": 0.1291, + "learning_rate": 2.0266955829809807e-05, + "loss": 0.1718, "step": 104970 }, { "epoch": 4.9, - "learning_rate": 1.0251277483474755e-05, - "loss": 0.2316, + "learning_rate": 2.0266487759973787e-05, + "loss": 0.2382, "step": 104975 }, { "epoch": 4.9, - "learning_rate": 1.0250808682199616e-05, - "loss": 0.0912, + "learning_rate": 2.026601969013777e-05, + "loss": 0.0879, "step": 104980 }, { "epoch": 4.9, - "learning_rate": 1.0250339880924476e-05, - "loss": 0.0083, + "learning_rate": 2.026555162030175e-05, + "loss": 0.0202, "step": 104985 }, { "epoch": 4.9, - "learning_rate": 1.0249871079649338e-05, - "loss": 0.0241, + "learning_rate": 2.026508355046573e-05, + "loss": 0.0432, "step": 104990 }, { "epoch": 4.9, - "learning_rate": 1.02494022783742e-05, - "loss": 0.0891, + "learning_rate": 2.026461548062971e-05, + "loss": 0.057, "step": 104995 }, { "epoch": 4.9, - "learning_rate": 1.024893347709906e-05, - "loss": 0.0305, + "learning_rate": 2.0264147410793693e-05, + "loss": 0.054, "step": 105000 }, { "epoch": 4.9, - "learning_rate": 1.024846467582392e-05, - "loss": 0.0235, + "learning_rate": 2.0263679340957673e-05, + "loss": 0.0982, "step": 105005 }, { "epoch": 4.9, - "learning_rate": 1.0247995874548779e-05, - "loss": 0.0277, + "learning_rate": 2.0263211271121653e-05, + "loss": 0.0662, "step": 105010 }, { "epoch": 4.9, - "learning_rate": 1.0247527073273639e-05, - "loss": 0.1312, + "learning_rate": 2.0262743201285633e-05, + "loss": 0.2041, "step": 105015 }, { "epoch": 4.9, - "learning_rate": 1.02470582719985e-05, - "loss": 0.2417, + "learning_rate": 2.0262275131449612e-05, + "loss": 0.2474, "step": 105020 }, { "epoch": 4.9, - "learning_rate": 1.024658947072336e-05, - "loss": 0.2572, + "learning_rate": 2.0261807061613592e-05, + "loss": 0.1813, "step": 105025 }, { "epoch": 4.9, - "learning_rate": 1.024612066944822e-05, - "loss": 0.0608, + "learning_rate": 2.0261338991777572e-05, + "loss": 0.0601, "step": 105030 }, { "epoch": 4.9, - "learning_rate": 1.0245651868173084e-05, - "loss": 0.011, + "learning_rate": 2.0260870921941555e-05, + "loss": 0.0173, "step": 105035 }, { "epoch": 4.9, - "learning_rate": 1.0245183066897944e-05, - "loss": 0.0121, + "learning_rate": 2.0260402852105535e-05, + "loss": 0.0401, "step": 105040 }, { "epoch": 4.9, - "learning_rate": 1.0244714265622804e-05, - "loss": 0.0739, + "learning_rate": 2.0259934782269515e-05, + "loss": 0.0832, "step": 105045 }, { "epoch": 4.9, - "learning_rate": 1.0244245464347664e-05, - "loss": 0.0278, + "learning_rate": 2.0259466712433495e-05, + "loss": 0.0873, "step": 105050 }, { "epoch": 4.9, - "learning_rate": 1.0243776663072524e-05, - "loss": 0.0859, + "learning_rate": 2.0258998642597478e-05, + "loss": 0.075, "step": 105055 }, { "epoch": 4.9, - "learning_rate": 1.0243307861797385e-05, - "loss": 0.1252, + "learning_rate": 2.0258530572761458e-05, + "loss": 0.054, "step": 105060 }, { "epoch": 4.9, - "learning_rate": 1.0242839060522245e-05, - "loss": 0.0627, + "learning_rate": 2.0258062502925437e-05, + "loss": 0.1815, "step": 105065 }, { "epoch": 4.9, - "learning_rate": 1.0242370259247105e-05, - "loss": 0.2326, + "learning_rate": 2.0257594433089417e-05, + "loss": 0.1361, "step": 105070 }, { "epoch": 4.9, - "learning_rate": 1.0241901457971965e-05, - "loss": 0.2487, + "learning_rate": 2.0257126363253397e-05, + "loss": 0.2142, "step": 105075 }, { "epoch": 4.9, - "learning_rate": 1.0241432656696828e-05, - "loss": 0.0483, + "learning_rate": 2.0256658293417377e-05, + "loss": 0.045, "step": 105080 }, { "epoch": 4.9, - "learning_rate": 1.0240963855421688e-05, - "loss": 0.0036, + "learning_rate": 2.0256190223581357e-05, + "loss": 0.0105, "step": 105085 }, { "epoch": 4.9, - "learning_rate": 1.0240495054146548e-05, - "loss": 0.0322, + "learning_rate": 2.025572215374534e-05, + "loss": 0.0159, "step": 105090 }, { "epoch": 4.9, - "learning_rate": 1.0240026252871408e-05, - "loss": 0.0499, + "learning_rate": 2.025525408390932e-05, + "loss": 0.028, "step": 105095 }, { "epoch": 4.9, - "learning_rate": 1.023955745159627e-05, - "loss": 0.0546, + "learning_rate": 2.02547860140733e-05, + "loss": 0.0271, "step": 105100 }, { "epoch": 4.9, - "learning_rate": 1.023908865032113e-05, - "loss": 0.1015, + "learning_rate": 2.025431794423728e-05, + "loss": 0.0603, "step": 105105 }, { "epoch": 4.9, - "learning_rate": 1.023861984904599e-05, - "loss": 0.0561, + "learning_rate": 2.0253849874401263e-05, + "loss": 0.0385, "step": 105110 }, { "epoch": 4.9, - "learning_rate": 1.023815104777085e-05, - "loss": 0.0726, + "learning_rate": 2.0253381804565242e-05, + "loss": 0.0367, "step": 105115 }, { "epoch": 4.91, - "learning_rate": 1.0237682246495711e-05, - "loss": 0.1082, + "learning_rate": 2.0252913734729222e-05, + "loss": 0.0534, "step": 105120 }, { "epoch": 4.91, - "learning_rate": 1.0237213445220573e-05, - "loss": 0.2577, + "learning_rate": 2.0252445664893202e-05, + "loss": 0.2653, "step": 105125 }, { "epoch": 4.91, - "learning_rate": 1.0236744643945433e-05, - "loss": 0.1055, + "learning_rate": 2.0251977595057185e-05, + "loss": 0.0874, "step": 105130 }, { "epoch": 4.91, - "learning_rate": 1.0236275842670293e-05, - "loss": 0.0311, + "learning_rate": 2.0251509525221165e-05, + "loss": 0.0168, "step": 105135 }, { "epoch": 4.91, - "learning_rate": 1.0235807041395154e-05, - "loss": 0.0067, + "learning_rate": 2.025104145538514e-05, + "loss": 0.0151, "step": 105140 }, { "epoch": 4.91, - "learning_rate": 1.0235338240120014e-05, - "loss": 0.0624, + "learning_rate": 2.0250573385549125e-05, + "loss": 0.0197, "step": 105145 }, { "epoch": 4.91, - "learning_rate": 1.0234869438844874e-05, - "loss": 0.0789, + "learning_rate": 2.0250105315713105e-05, + "loss": 0.0791, "step": 105150 }, { "epoch": 4.91, - "learning_rate": 1.0234400637569734e-05, - "loss": 0.0729, + "learning_rate": 2.0249637245877084e-05, + "loss": 0.0958, "step": 105155 }, { "epoch": 4.91, - "learning_rate": 1.0233931836294596e-05, - "loss": 0.0802, + "learning_rate": 2.0249169176041064e-05, + "loss": 0.093, "step": 105160 }, { "epoch": 4.91, - "learning_rate": 1.0233463035019456e-05, - "loss": 0.0696, + "learning_rate": 2.0248701106205047e-05, + "loss": 0.0584, "step": 105165 }, { "epoch": 4.91, - "learning_rate": 1.0232994233744316e-05, - "loss": 0.1909, + "learning_rate": 2.0248233036369027e-05, + "loss": 0.0944, "step": 105170 }, { "epoch": 4.91, - "learning_rate": 1.0232525432469177e-05, - "loss": 0.1665, + "learning_rate": 2.0247764966533007e-05, + "loss": 0.3327, "step": 105175 }, { "epoch": 4.91, - "learning_rate": 1.0232056631194039e-05, - "loss": 0.0849, + "learning_rate": 2.0247296896696987e-05, + "loss": 0.0736, "step": 105180 }, { "epoch": 4.91, - "learning_rate": 1.0231587829918899e-05, - "loss": 0.0045, + "learning_rate": 2.024682882686097e-05, + "loss": 0.0463, "step": 105185 }, { "epoch": 4.91, - "learning_rate": 1.0231119028643759e-05, - "loss": 0.0452, + "learning_rate": 2.024636075702495e-05, + "loss": 0.0218, "step": 105190 }, { "epoch": 4.91, - "learning_rate": 1.0230650227368619e-05, - "loss": 0.0604, + "learning_rate": 2.024589268718893e-05, + "loss": 0.0189, "step": 105195 }, { "epoch": 4.91, - "learning_rate": 1.023018142609348e-05, - "loss": 0.0435, + "learning_rate": 2.024542461735291e-05, + "loss": 0.0521, "step": 105200 }, { "epoch": 4.91, - "learning_rate": 1.022971262481834e-05, - "loss": 0.0534, + "learning_rate": 2.024495654751689e-05, + "loss": 0.0818, "step": 105205 }, { "epoch": 4.91, - "learning_rate": 1.02292438235432e-05, - "loss": 0.0723, + "learning_rate": 2.024448847768087e-05, + "loss": 0.0788, "step": 105210 }, { "epoch": 4.91, - "learning_rate": 1.022877502226806e-05, - "loss": 0.2139, + "learning_rate": 2.024402040784485e-05, + "loss": 0.0769, "step": 105215 }, { "epoch": 4.91, - "learning_rate": 1.0228306220992923e-05, - "loss": 0.1252, + "learning_rate": 2.0243552338008832e-05, + "loss": 0.1074, "step": 105220 }, { "epoch": 4.91, - "learning_rate": 1.0227837419717783e-05, - "loss": 0.2006, + "learning_rate": 2.0243084268172812e-05, + "loss": 0.3718, "step": 105225 }, { "epoch": 4.91, - "learning_rate": 1.0227368618442643e-05, - "loss": 0.0723, + "learning_rate": 2.0242616198336792e-05, + "loss": 0.0516, "step": 105230 }, { "epoch": 4.91, - "learning_rate": 1.0226899817167503e-05, - "loss": 0.0229, + "learning_rate": 2.024214812850077e-05, + "loss": 0.0613, "step": 105235 }, { "epoch": 4.91, - "learning_rate": 1.0226431015892365e-05, - "loss": 0.0201, + "learning_rate": 2.0241680058664755e-05, + "loss": 0.0182, "step": 105240 }, { "epoch": 4.91, - "learning_rate": 1.0225962214617225e-05, - "loss": 0.0269, + "learning_rate": 2.0241211988828735e-05, + "loss": 0.0341, "step": 105245 }, { "epoch": 4.91, - "learning_rate": 1.0225493413342085e-05, - "loss": 0.0245, + "learning_rate": 2.0240743918992714e-05, + "loss": 0.0617, "step": 105250 }, { "epoch": 4.91, - "learning_rate": 1.0225024612066945e-05, - "loss": 0.1189, + "learning_rate": 2.0240275849156694e-05, + "loss": 0.0602, "step": 105255 }, { "epoch": 4.91, - "learning_rate": 1.0224555810791805e-05, - "loss": 0.0462, + "learning_rate": 2.0239807779320677e-05, + "loss": 0.0546, "step": 105260 }, { "epoch": 4.91, - "learning_rate": 1.0224087009516668e-05, - "loss": 0.0845, + "learning_rate": 2.0239339709484654e-05, + "loss": 0.0848, "step": 105265 }, { "epoch": 4.91, - "learning_rate": 1.0223618208241528e-05, - "loss": 0.1731, + "learning_rate": 2.0238871639648634e-05, + "loss": 0.0487, "step": 105270 }, { "epoch": 4.91, - "learning_rate": 1.0223149406966388e-05, - "loss": 0.2558, + "learning_rate": 2.0238403569812617e-05, + "loss": 0.1911, "step": 105275 }, { "epoch": 4.91, - "learning_rate": 1.022268060569125e-05, - "loss": 0.0636, + "learning_rate": 2.0237935499976597e-05, + "loss": 0.0394, "step": 105280 }, { "epoch": 4.91, - "learning_rate": 1.022221180441611e-05, - "loss": 0.0151, + "learning_rate": 2.0237467430140577e-05, + "loss": 0.0441, "step": 105285 }, { "epoch": 4.91, - "learning_rate": 1.022174300314097e-05, - "loss": 0.046, + "learning_rate": 2.0236999360304556e-05, + "loss": 0.0975, "step": 105290 }, { "epoch": 4.91, - "learning_rate": 1.022127420186583e-05, - "loss": 0.0598, + "learning_rate": 2.023653129046854e-05, + "loss": 0.039, "step": 105295 }, { "epoch": 4.91, - "learning_rate": 1.022080540059069e-05, - "loss": 0.0364, + "learning_rate": 2.023606322063252e-05, + "loss": 0.0503, "step": 105300 }, { "epoch": 4.91, - "learning_rate": 1.022033659931555e-05, - "loss": 0.0642, + "learning_rate": 2.02355951507965e-05, + "loss": 0.0823, "step": 105305 }, { "epoch": 4.91, - "learning_rate": 1.021986779804041e-05, - "loss": 0.0466, + "learning_rate": 2.023512708096048e-05, + "loss": 0.0958, "step": 105310 }, { "epoch": 4.91, - "learning_rate": 1.0219398996765272e-05, - "loss": 0.1549, + "learning_rate": 2.0234659011124462e-05, + "loss": 0.0884, "step": 105315 }, { "epoch": 4.91, - "learning_rate": 1.0218930195490134e-05, - "loss": 0.1041, + "learning_rate": 2.0234190941288442e-05, + "loss": 0.0599, "step": 105320 }, { "epoch": 4.91, - "learning_rate": 1.0218461394214994e-05, - "loss": 0.1938, + "learning_rate": 2.0233722871452422e-05, + "loss": 0.2357, "step": 105325 }, { "epoch": 4.91, - "learning_rate": 1.0217992592939854e-05, - "loss": 0.0585, + "learning_rate": 2.0233254801616402e-05, + "loss": 0.063, "step": 105330 }, { "epoch": 4.92, - "learning_rate": 1.0217523791664714e-05, - "loss": 0.0217, + "learning_rate": 2.023278673178038e-05, + "loss": 0.0145, "step": 105335 }, { "epoch": 4.92, - "learning_rate": 1.0217054990389574e-05, - "loss": 0.0314, + "learning_rate": 2.023231866194436e-05, + "loss": 0.0215, "step": 105340 }, { "epoch": 4.92, - "learning_rate": 1.0216586189114435e-05, - "loss": 0.0546, + "learning_rate": 2.023185059210834e-05, + "loss": 0.0558, "step": 105345 }, { "epoch": 4.92, - "learning_rate": 1.0216117387839295e-05, - "loss": 0.0785, + "learning_rate": 2.0231382522272324e-05, + "loss": 0.0673, "step": 105350 }, { "epoch": 4.92, - "learning_rate": 1.0215648586564155e-05, - "loss": 0.0318, + "learning_rate": 2.0230914452436304e-05, + "loss": 0.0946, "step": 105355 }, { "epoch": 4.92, - "learning_rate": 1.0215179785289019e-05, - "loss": 0.1693, + "learning_rate": 2.0230446382600284e-05, + "loss": 0.0833, "step": 105360 }, { "epoch": 4.92, - "learning_rate": 1.0214710984013878e-05, - "loss": 0.0898, + "learning_rate": 2.0229978312764264e-05, + "loss": 0.0887, "step": 105365 }, { "epoch": 4.92, - "learning_rate": 1.0214242182738738e-05, - "loss": 0.1602, + "learning_rate": 2.0229510242928247e-05, + "loss": 0.152, "step": 105370 }, { "epoch": 4.92, - "learning_rate": 1.0213773381463598e-05, - "loss": 0.2031, + "learning_rate": 2.0229042173092227e-05, + "loss": 0.2972, "step": 105375 }, { "epoch": 4.92, - "learning_rate": 1.0213304580188458e-05, - "loss": 0.0841, + "learning_rate": 2.0228574103256207e-05, + "loss": 0.0763, "step": 105380 }, { "epoch": 4.92, - "learning_rate": 1.021283577891332e-05, - "loss": 0.0105, + "learning_rate": 2.022810603342019e-05, + "loss": 0.0155, "step": 105385 }, { "epoch": 4.92, - "learning_rate": 1.021236697763818e-05, - "loss": 0.0378, + "learning_rate": 2.0227637963584166e-05, + "loss": 0.0149, "step": 105390 }, { "epoch": 4.92, - "learning_rate": 1.021189817636304e-05, - "loss": 0.0579, + "learning_rate": 2.0227169893748146e-05, + "loss": 0.0341, "step": 105395 }, { "epoch": 4.92, - "learning_rate": 1.02114293750879e-05, - "loss": 0.0519, + "learning_rate": 2.0226701823912126e-05, + "loss": 0.0123, "step": 105400 }, { "epoch": 4.92, - "learning_rate": 1.0210960573812763e-05, - "loss": 0.0504, + "learning_rate": 2.022623375407611e-05, + "loss": 0.0532, "step": 105405 }, { "epoch": 4.92, - "learning_rate": 1.0210491772537623e-05, - "loss": 0.0254, + "learning_rate": 2.022576568424009e-05, + "loss": 0.133, "step": 105410 }, { "epoch": 4.92, - "learning_rate": 1.0210022971262483e-05, - "loss": 0.1071, + "learning_rate": 2.022529761440407e-05, + "loss": 0.1142, "step": 105415 }, { "epoch": 4.92, - "learning_rate": 1.0209554169987343e-05, - "loss": 0.1872, + "learning_rate": 2.022482954456805e-05, + "loss": 0.1304, "step": 105420 }, { "epoch": 4.92, - "learning_rate": 1.0209085368712204e-05, - "loss": 0.2193, + "learning_rate": 2.0224361474732032e-05, + "loss": 0.2232, "step": 105425 }, { "epoch": 4.92, - "learning_rate": 1.0208616567437064e-05, - "loss": 0.0787, + "learning_rate": 2.022389340489601e-05, + "loss": 0.0572, "step": 105430 }, { "epoch": 4.92, - "learning_rate": 1.0208147766161924e-05, - "loss": 0.0144, + "learning_rate": 2.022342533505999e-05, + "loss": 0.0568, "step": 105435 }, { "epoch": 4.92, - "learning_rate": 1.0207678964886784e-05, - "loss": 0.0364, + "learning_rate": 2.0222957265223975e-05, + "loss": 0.0251, "step": 105440 }, { "epoch": 4.92, - "learning_rate": 1.0207210163611644e-05, - "loss": 0.0291, + "learning_rate": 2.0222489195387954e-05, + "loss": 0.0999, "step": 105445 }, { "epoch": 4.92, - "learning_rate": 1.0206741362336508e-05, - "loss": 0.0328, + "learning_rate": 2.0222021125551934e-05, + "loss": 0.0438, "step": 105450 }, { "epoch": 4.92, - "learning_rate": 1.0206272561061367e-05, - "loss": 0.0309, + "learning_rate": 2.022155305571591e-05, + "loss": 0.0441, "step": 105455 }, { "epoch": 4.92, - "learning_rate": 1.0205803759786227e-05, - "loss": 0.0704, + "learning_rate": 2.0221084985879894e-05, + "loss": 0.0583, "step": 105460 }, { "epoch": 4.92, - "learning_rate": 1.0205334958511089e-05, - "loss": 0.0925, + "learning_rate": 2.0220616916043874e-05, + "loss": 0.0921, "step": 105465 }, { "epoch": 4.92, - "learning_rate": 1.0204866157235949e-05, - "loss": 0.1492, + "learning_rate": 2.0220148846207854e-05, + "loss": 0.1453, "step": 105470 }, { "epoch": 4.92, - "learning_rate": 1.0204397355960809e-05, - "loss": 0.2721, + "learning_rate": 2.0219680776371833e-05, + "loss": 0.1994, "step": 105475 }, { "epoch": 4.92, - "learning_rate": 1.0203928554685669e-05, - "loss": 0.0629, + "learning_rate": 2.0219212706535817e-05, + "loss": 0.0269, "step": 105480 }, { "epoch": 4.92, - "learning_rate": 1.0203459753410529e-05, - "loss": 0.0471, + "learning_rate": 2.0218744636699796e-05, + "loss": 0.0202, "step": 105485 }, { "epoch": 4.92, - "learning_rate": 1.020299095213539e-05, - "loss": 0.0209, + "learning_rate": 2.0218276566863776e-05, + "loss": 0.0501, "step": 105490 }, { "epoch": 4.92, - "learning_rate": 1.020252215086025e-05, - "loss": 0.28, + "learning_rate": 2.0217808497027756e-05, + "loss": 0.0734, "step": 105495 }, { "epoch": 4.92, - "learning_rate": 1.0202053349585112e-05, - "loss": 0.0635, + "learning_rate": 2.021734042719174e-05, + "loss": 0.0196, "step": 105500 }, { "epoch": 4.92, - "learning_rate": 1.0201584548309974e-05, - "loss": 0.0438, + "learning_rate": 2.021687235735572e-05, + "loss": 0.0399, "step": 105505 }, { "epoch": 4.92, - "learning_rate": 1.0201115747034833e-05, - "loss": 0.0661, + "learning_rate": 2.02164042875197e-05, + "loss": 0.0559, "step": 105510 }, { "epoch": 4.92, - "learning_rate": 1.0200646945759693e-05, - "loss": 0.0527, + "learning_rate": 2.0215936217683682e-05, + "loss": 0.0896, "step": 105515 }, { "epoch": 4.92, - "learning_rate": 1.0200178144484553e-05, - "loss": 0.1037, + "learning_rate": 2.021546814784766e-05, + "loss": 0.2282, "step": 105520 }, { "epoch": 4.92, - "learning_rate": 1.0199709343209413e-05, - "loss": 0.1355, + "learning_rate": 2.0215000078011638e-05, + "loss": 0.221, "step": 105525 }, { "epoch": 4.92, - "learning_rate": 1.0199240541934275e-05, - "loss": 0.1083, + "learning_rate": 2.0214532008175618e-05, + "loss": 0.0591, "step": 105530 }, { "epoch": 4.92, - "learning_rate": 1.0198771740659135e-05, - "loss": 0.0068, + "learning_rate": 2.02140639383396e-05, + "loss": 0.0613, "step": 105535 }, { "epoch": 4.92, - "learning_rate": 1.0198302939383995e-05, - "loss": 0.0216, + "learning_rate": 2.021359586850358e-05, + "loss": 0.0099, "step": 105540 }, { "epoch": 4.92, - "learning_rate": 1.0197834138108858e-05, - "loss": 0.0254, + "learning_rate": 2.021312779866756e-05, + "loss": 0.0486, "step": 105545 }, { "epoch": 4.93, - "learning_rate": 1.0197365336833718e-05, - "loss": 0.031, + "learning_rate": 2.021265972883154e-05, + "loss": 0.0427, "step": 105550 }, { "epoch": 4.93, - "learning_rate": 1.0196896535558578e-05, - "loss": 0.1026, + "learning_rate": 2.0212191658995524e-05, + "loss": 0.0378, "step": 105555 }, { "epoch": 4.93, - "learning_rate": 1.0196427734283438e-05, - "loss": 0.0625, + "learning_rate": 2.0211723589159504e-05, + "loss": 0.0706, "step": 105560 }, { "epoch": 4.93, - "learning_rate": 1.0195958933008298e-05, - "loss": 0.1294, + "learning_rate": 2.0211255519323484e-05, + "loss": 0.1346, "step": 105565 }, { "epoch": 4.93, - "learning_rate": 1.019549013173316e-05, - "loss": 0.0863, + "learning_rate": 2.0210787449487467e-05, + "loss": 0.1657, "step": 105570 }, { "epoch": 4.93, - "learning_rate": 1.019502133045802e-05, - "loss": 0.3077, + "learning_rate": 2.0210319379651447e-05, + "loss": 0.3114, "step": 105575 }, { "epoch": 4.93, - "learning_rate": 1.019455252918288e-05, - "loss": 0.0828, + "learning_rate": 2.0209851309815423e-05, + "loss": 0.0734, "step": 105580 }, { "epoch": 4.93, - "learning_rate": 1.019408372790774e-05, - "loss": 0.0357, + "learning_rate": 2.0209383239979403e-05, + "loss": 0.0207, "step": 105585 }, { "epoch": 4.93, - "learning_rate": 1.0193614926632603e-05, - "loss": 0.0611, + "learning_rate": 2.0208915170143386e-05, + "loss": 0.02, "step": 105590 }, { "epoch": 4.93, - "learning_rate": 1.0193146125357463e-05, - "loss": 0.0175, + "learning_rate": 2.0208447100307366e-05, + "loss": 0.0228, "step": 105595 }, { "epoch": 4.93, - "learning_rate": 1.0192677324082322e-05, - "loss": 0.0586, + "learning_rate": 2.0207979030471346e-05, + "loss": 0.0201, "step": 105600 }, { "epoch": 4.93, - "learning_rate": 1.0192208522807182e-05, - "loss": 0.0491, + "learning_rate": 2.0207510960635326e-05, + "loss": 0.0427, "step": 105605 }, { "epoch": 4.93, - "learning_rate": 1.0191739721532044e-05, - "loss": 0.1044, + "learning_rate": 2.020704289079931e-05, + "loss": 0.1609, "step": 105610 }, { "epoch": 4.93, - "learning_rate": 1.0191270920256904e-05, - "loss": 0.0846, + "learning_rate": 2.020657482096329e-05, + "loss": 0.0782, "step": 105615 }, { "epoch": 4.93, - "learning_rate": 1.0190802118981764e-05, - "loss": 0.0488, + "learning_rate": 2.020610675112727e-05, + "loss": 0.1446, "step": 105620 }, { "epoch": 4.93, - "learning_rate": 1.0190333317706624e-05, - "loss": 0.1462, + "learning_rate": 2.020563868129125e-05, + "loss": 0.2206, "step": 105625 }, { "epoch": 4.93, - "learning_rate": 1.0189864516431485e-05, - "loss": 0.1076, + "learning_rate": 2.020517061145523e-05, + "loss": 0.0879, "step": 105630 }, { "epoch": 4.93, - "learning_rate": 1.0189395715156345e-05, - "loss": 0.0123, + "learning_rate": 2.020470254161921e-05, + "loss": 0.0111, "step": 105635 }, { "epoch": 4.93, - "learning_rate": 1.0188926913881207e-05, - "loss": 0.027, + "learning_rate": 2.020423447178319e-05, + "loss": 0.0185, "step": 105640 }, { "epoch": 4.93, - "learning_rate": 1.0188458112606067e-05, - "loss": 0.0457, + "learning_rate": 2.020376640194717e-05, + "loss": 0.0496, "step": 105645 }, { "epoch": 4.93, - "learning_rate": 1.0187989311330929e-05, - "loss": 0.0454, + "learning_rate": 2.020329833211115e-05, + "loss": 0.0446, "step": 105650 }, { "epoch": 4.93, - "learning_rate": 1.0187520510055788e-05, - "loss": 0.0397, + "learning_rate": 2.020283026227513e-05, + "loss": 0.047, "step": 105655 }, { "epoch": 4.93, - "learning_rate": 1.0187051708780648e-05, - "loss": 0.0971, + "learning_rate": 2.020236219243911e-05, + "loss": 0.0941, "step": 105660 }, { "epoch": 4.93, - "learning_rate": 1.0186582907505508e-05, - "loss": 0.0496, + "learning_rate": 2.0201894122603094e-05, + "loss": 0.067, "step": 105665 }, { "epoch": 4.93, - "learning_rate": 1.018611410623037e-05, - "loss": 0.2406, + "learning_rate": 2.0201426052767073e-05, + "loss": 0.1914, "step": 105670 }, { "epoch": 4.93, - "learning_rate": 1.018564530495523e-05, - "loss": 0.3111, + "learning_rate": 2.0200957982931053e-05, + "loss": 0.2685, "step": 105675 }, { "epoch": 4.93, - "learning_rate": 1.018517650368009e-05, - "loss": 0.1134, + "learning_rate": 2.0200489913095033e-05, + "loss": 0.0759, "step": 105680 }, { "epoch": 4.93, - "learning_rate": 1.0184707702404951e-05, - "loss": 0.0221, + "learning_rate": 2.0200021843259016e-05, + "loss": 0.0083, "step": 105685 }, { "epoch": 4.93, - "learning_rate": 1.0184238901129813e-05, - "loss": 0.0478, + "learning_rate": 2.0199553773422996e-05, + "loss": 0.0208, "step": 105690 }, { "epoch": 4.93, - "learning_rate": 1.0183770099854673e-05, - "loss": 0.0464, + "learning_rate": 2.0199085703586976e-05, + "loss": 0.0131, "step": 105695 }, { "epoch": 4.93, - "learning_rate": 1.0183301298579533e-05, - "loss": 0.0237, + "learning_rate": 2.019861763375096e-05, + "loss": 0.0694, "step": 105700 }, { "epoch": 4.93, - "learning_rate": 1.0182832497304393e-05, - "loss": 0.0356, + "learning_rate": 2.019814956391494e-05, + "loss": 0.0464, "step": 105705 }, { "epoch": 4.93, - "learning_rate": 1.0182363696029255e-05, - "loss": 0.0724, + "learning_rate": 2.0197681494078915e-05, + "loss": 0.0931, "step": 105710 }, { "epoch": 4.93, - "learning_rate": 1.0181894894754114e-05, - "loss": 0.0804, + "learning_rate": 2.0197213424242895e-05, + "loss": 0.1289, "step": 105715 }, { "epoch": 4.93, - "learning_rate": 1.0181426093478974e-05, - "loss": 0.1141, + "learning_rate": 2.0196745354406878e-05, + "loss": 0.1511, "step": 105720 }, { "epoch": 4.93, - "learning_rate": 1.0180957292203834e-05, - "loss": 0.1757, + "learning_rate": 2.0196277284570858e-05, + "loss": 0.2676, "step": 105725 }, { "epoch": 4.93, - "learning_rate": 1.0180488490928698e-05, - "loss": 0.0447, + "learning_rate": 2.0195809214734838e-05, + "loss": 0.0592, "step": 105730 }, { "epoch": 4.93, - "learning_rate": 1.0180019689653558e-05, - "loss": 0.0109, + "learning_rate": 2.0195341144898818e-05, + "loss": 0.0151, "step": 105735 }, { "epoch": 4.93, - "learning_rate": 1.0179550888378418e-05, - "loss": 0.0557, + "learning_rate": 2.01948730750628e-05, + "loss": 0.0093, "step": 105740 }, { "epoch": 4.93, - "learning_rate": 1.0179082087103277e-05, - "loss": 0.0157, + "learning_rate": 2.019440500522678e-05, + "loss": 0.0403, "step": 105745 }, { "epoch": 4.93, - "learning_rate": 1.0178613285828139e-05, - "loss": 0.0685, + "learning_rate": 2.019393693539076e-05, + "loss": 0.0397, "step": 105750 }, { "epoch": 4.93, - "learning_rate": 1.0178144484552999e-05, - "loss": 0.0675, + "learning_rate": 2.0193468865554744e-05, + "loss": 0.0361, "step": 105755 }, { "epoch": 4.93, - "learning_rate": 1.0177675683277859e-05, - "loss": 0.0339, + "learning_rate": 2.0193000795718724e-05, + "loss": 0.0766, "step": 105760 }, { "epoch": 4.94, - "learning_rate": 1.0177206882002719e-05, - "loss": 0.0616, + "learning_rate": 2.0192532725882703e-05, + "loss": 0.057, "step": 105765 }, { "epoch": 4.94, - "learning_rate": 1.0176738080727579e-05, - "loss": 0.1308, + "learning_rate": 2.019206465604668e-05, + "loss": 0.0599, "step": 105770 }, { "epoch": 4.94, - "learning_rate": 1.0176269279452442e-05, - "loss": 0.2863, + "learning_rate": 2.0191596586210663e-05, + "loss": 0.1622, "step": 105775 }, { "epoch": 4.94, - "learning_rate": 1.0175800478177302e-05, - "loss": 0.0831, + "learning_rate": 2.0191128516374643e-05, + "loss": 0.1162, "step": 105780 }, { "epoch": 4.94, - "learning_rate": 1.0175331676902162e-05, - "loss": 0.0167, + "learning_rate": 2.0190660446538623e-05, + "loss": 0.0279, "step": 105785 }, { "epoch": 4.94, - "learning_rate": 1.0174862875627024e-05, - "loss": 0.0625, + "learning_rate": 2.0190192376702603e-05, + "loss": 0.0621, "step": 105790 }, { "epoch": 4.94, - "learning_rate": 1.0174394074351884e-05, - "loss": 0.0709, + "learning_rate": 2.0189724306866586e-05, + "loss": 0.0392, "step": 105795 }, { "epoch": 4.94, - "learning_rate": 1.0173925273076744e-05, - "loss": 0.0417, + "learning_rate": 2.0189256237030566e-05, + "loss": 0.033, "step": 105800 }, { "epoch": 4.94, - "learning_rate": 1.0173456471801603e-05, - "loss": 0.1134, + "learning_rate": 2.0188788167194545e-05, + "loss": 0.1047, "step": 105805 }, { "epoch": 4.94, - "learning_rate": 1.0172987670526463e-05, - "loss": 0.067, + "learning_rate": 2.018832009735853e-05, + "loss": 0.0455, "step": 105810 }, { "epoch": 4.94, - "learning_rate": 1.0172518869251325e-05, - "loss": 0.1224, + "learning_rate": 2.018785202752251e-05, + "loss": 0.1053, "step": 105815 }, { "epoch": 4.94, - "learning_rate": 1.0172050067976185e-05, - "loss": 0.0973, + "learning_rate": 2.0187383957686488e-05, + "loss": 0.1814, "step": 105820 }, { "epoch": 4.94, - "learning_rate": 1.0171581266701047e-05, - "loss": 0.2426, + "learning_rate": 2.0186915887850468e-05, + "loss": 0.2513, "step": 105825 }, { "epoch": 4.94, - "learning_rate": 1.0171112465425908e-05, - "loss": 0.0653, + "learning_rate": 2.018644781801445e-05, + "loss": 0.06, "step": 105830 }, { "epoch": 4.94, - "learning_rate": 1.0170643664150768e-05, - "loss": 0.005, + "learning_rate": 2.0185979748178428e-05, + "loss": 0.0201, "step": 105835 }, { "epoch": 4.94, - "learning_rate": 1.0170174862875628e-05, - "loss": 0.0369, + "learning_rate": 2.0185511678342407e-05, + "loss": 0.0367, "step": 105840 }, { "epoch": 4.94, - "learning_rate": 1.0169706061600488e-05, - "loss": 0.0396, + "learning_rate": 2.0185043608506387e-05, + "loss": 0.0309, "step": 105845 }, { "epoch": 4.94, - "learning_rate": 1.0169237260325348e-05, - "loss": 0.0607, + "learning_rate": 2.018457553867037e-05, + "loss": 0.0528, "step": 105850 }, { "epoch": 4.94, - "learning_rate": 1.016876845905021e-05, - "loss": 0.0629, + "learning_rate": 2.018410746883435e-05, + "loss": 0.0736, "step": 105855 }, { "epoch": 4.94, - "learning_rate": 1.016829965777507e-05, - "loss": 0.1218, + "learning_rate": 2.018363939899833e-05, + "loss": 0.0785, "step": 105860 }, { "epoch": 4.94, - "learning_rate": 1.016783085649993e-05, - "loss": 0.1445, + "learning_rate": 2.018317132916231e-05, + "loss": 0.1107, "step": 105865 }, { "epoch": 4.94, - "learning_rate": 1.0167362055224793e-05, - "loss": 0.1589, + "learning_rate": 2.0182703259326293e-05, + "loss": 0.1061, "step": 105870 }, { "epoch": 4.94, - "learning_rate": 1.0166893253949653e-05, - "loss": 0.2263, + "learning_rate": 2.0182235189490273e-05, + "loss": 0.2865, "step": 105875 }, { "epoch": 4.94, - "learning_rate": 1.0166424452674513e-05, - "loss": 0.0922, + "learning_rate": 2.0181767119654253e-05, + "loss": 0.1216, "step": 105880 }, { "epoch": 4.94, - "learning_rate": 1.0165955651399373e-05, - "loss": 0.0472, + "learning_rate": 2.0181299049818236e-05, + "loss": 0.0062, "step": 105885 }, { "epoch": 4.94, - "learning_rate": 1.0165486850124232e-05, - "loss": 0.0172, + "learning_rate": 2.0180830979982216e-05, + "loss": 0.0182, "step": 105890 }, { "epoch": 4.94, - "learning_rate": 1.0165018048849094e-05, - "loss": 0.0513, + "learning_rate": 2.0180362910146196e-05, + "loss": 0.0544, "step": 105895 }, { "epoch": 4.94, - "learning_rate": 1.0164549247573954e-05, - "loss": 0.0624, + "learning_rate": 2.0179894840310172e-05, + "loss": 0.0217, "step": 105900 }, { "epoch": 4.94, - "learning_rate": 1.0164080446298814e-05, - "loss": 0.0115, + "learning_rate": 2.0179426770474155e-05, + "loss": 0.0381, "step": 105905 }, { "epoch": 4.94, - "learning_rate": 1.0163611645023674e-05, - "loss": 0.0551, + "learning_rate": 2.0178958700638135e-05, + "loss": 0.0932, "step": 105910 }, { "epoch": 4.94, - "learning_rate": 1.0163142843748537e-05, - "loss": 0.0976, + "learning_rate": 2.0178490630802115e-05, + "loss": 0.0676, "step": 105915 }, { "epoch": 4.94, - "learning_rate": 1.0162674042473397e-05, - "loss": 0.1354, + "learning_rate": 2.0178022560966095e-05, + "loss": 0.1238, "step": 105920 }, { "epoch": 4.94, - "learning_rate": 1.0162205241198257e-05, - "loss": 0.2303, + "learning_rate": 2.0177554491130078e-05, + "loss": 0.2822, "step": 105925 }, { "epoch": 4.94, - "learning_rate": 1.0161736439923117e-05, - "loss": 0.0869, + "learning_rate": 2.0177086421294058e-05, + "loss": 0.0507, "step": 105930 }, { "epoch": 4.94, - "learning_rate": 1.0161267638647979e-05, - "loss": 0.0097, + "learning_rate": 2.0176618351458038e-05, + "loss": 0.0176, "step": 105935 }, { "epoch": 4.94, - "learning_rate": 1.0160798837372839e-05, - "loss": 0.0077, + "learning_rate": 2.017615028162202e-05, + "loss": 0.0496, "step": 105940 }, { "epoch": 4.94, - "learning_rate": 1.0160330036097699e-05, - "loss": 0.0217, + "learning_rate": 2.0175682211786e-05, + "loss": 0.0493, "step": 105945 }, { "epoch": 4.94, - "learning_rate": 1.0159861234822558e-05, - "loss": 0.0444, + "learning_rate": 2.017521414194998e-05, + "loss": 0.0466, "step": 105950 }, { "epoch": 4.94, - "learning_rate": 1.0159392433547418e-05, - "loss": 0.0707, + "learning_rate": 2.017474607211396e-05, + "loss": 0.0549, "step": 105955 }, { "epoch": 4.94, - "learning_rate": 1.015892363227228e-05, - "loss": 0.0818, + "learning_rate": 2.017427800227794e-05, + "loss": 0.1039, "step": 105960 }, { "epoch": 4.94, - "learning_rate": 1.0158454830997142e-05, - "loss": 0.0797, + "learning_rate": 2.017380993244192e-05, + "loss": 0.1185, "step": 105965 }, { "epoch": 4.94, - "learning_rate": 1.0157986029722002e-05, - "loss": 0.119, + "learning_rate": 2.01733418626059e-05, + "loss": 0.1073, "step": 105970 }, { "epoch": 4.94, - "learning_rate": 1.0157517228446863e-05, - "loss": 0.4182, + "learning_rate": 2.017287379276988e-05, + "loss": 0.2062, "step": 105975 }, { "epoch": 4.95, - "learning_rate": 1.0157048427171723e-05, - "loss": 0.116, + "learning_rate": 2.0172405722933863e-05, + "loss": 0.0534, "step": 105980 }, { "epoch": 4.95, - "learning_rate": 1.0156579625896583e-05, - "loss": 0.0175, + "learning_rate": 2.0171937653097843e-05, + "loss": 0.0297, "step": 105985 }, { "epoch": 4.95, - "learning_rate": 1.0156110824621443e-05, - "loss": 0.0231, + "learning_rate": 2.0171469583261822e-05, + "loss": 0.0377, "step": 105990 }, { "epoch": 4.95, - "learning_rate": 1.0155642023346303e-05, - "loss": 0.063, + "learning_rate": 2.0171001513425806e-05, + "loss": 0.0621, "step": 105995 }, { "epoch": 4.95, - "learning_rate": 1.0155173222071165e-05, - "loss": 0.0556, + "learning_rate": 2.0170533443589785e-05, + "loss": 0.1073, "step": 106000 }, { "epoch": 4.95, - "learning_rate": 1.0154704420796025e-05, - "loss": 0.0445, + "learning_rate": 2.0170065373753765e-05, + "loss": 0.0323, "step": 106005 }, { "epoch": 4.95, - "learning_rate": 1.0154235619520886e-05, - "loss": 0.0489, + "learning_rate": 2.0169597303917745e-05, + "loss": 0.1246, "step": 106010 }, { "epoch": 4.95, - "learning_rate": 1.0153766818245748e-05, - "loss": 0.1033, + "learning_rate": 2.0169129234081728e-05, + "loss": 0.1371, "step": 106015 }, { "epoch": 4.95, - "learning_rate": 1.0153298016970608e-05, - "loss": 0.1242, + "learning_rate": 2.0168661164245708e-05, + "loss": 0.199, "step": 106020 }, { "epoch": 4.95, - "learning_rate": 1.0152829215695468e-05, - "loss": 0.2584, + "learning_rate": 2.0168193094409684e-05, + "loss": 0.2858, "step": 106025 }, { "epoch": 4.95, - "learning_rate": 1.0152360414420328e-05, - "loss": 0.0685, + "learning_rate": 2.0167725024573664e-05, + "loss": 0.0675, "step": 106030 }, { "epoch": 4.95, - "learning_rate": 1.0151891613145187e-05, - "loss": 0.0105, + "learning_rate": 2.0167256954737647e-05, + "loss": 0.0403, "step": 106035 }, { "epoch": 4.95, - "learning_rate": 1.0151422811870049e-05, - "loss": 0.0653, + "learning_rate": 2.0166788884901627e-05, + "loss": 0.0767, "step": 106040 }, { "epoch": 4.95, - "learning_rate": 1.0150954010594909e-05, - "loss": 0.0686, + "learning_rate": 2.0166320815065607e-05, + "loss": 0.0294, "step": 106045 }, { "epoch": 4.95, - "learning_rate": 1.0150485209319769e-05, - "loss": 0.0609, + "learning_rate": 2.0165852745229587e-05, + "loss": 0.0456, "step": 106050 }, { "epoch": 4.95, - "learning_rate": 1.0150016408044632e-05, - "loss": 0.0506, + "learning_rate": 2.016538467539357e-05, + "loss": 0.058, "step": 106055 }, { "epoch": 4.95, - "learning_rate": 1.0149547606769492e-05, - "loss": 0.1312, + "learning_rate": 2.016491660555755e-05, + "loss": 0.1194, "step": 106060 }, { "epoch": 4.95, - "learning_rate": 1.0149078805494352e-05, - "loss": 0.0669, + "learning_rate": 2.016444853572153e-05, + "loss": 0.0769, "step": 106065 }, { "epoch": 4.95, - "learning_rate": 1.0148610004219212e-05, - "loss": 0.1358, + "learning_rate": 2.0163980465885513e-05, + "loss": 0.2259, "step": 106070 }, { "epoch": 4.95, - "learning_rate": 1.0148141202944074e-05, - "loss": 0.2325, + "learning_rate": 2.0163512396049493e-05, + "loss": 0.2521, "step": 106075 }, { "epoch": 4.95, - "learning_rate": 1.0147672401668934e-05, - "loss": 0.0826, + "learning_rate": 2.0163044326213473e-05, + "loss": 0.111, "step": 106080 }, { "epoch": 4.95, - "learning_rate": 1.0147203600393794e-05, - "loss": 0.0157, + "learning_rate": 2.016257625637745e-05, + "loss": 0.0096, "step": 106085 }, { "epoch": 4.95, - "learning_rate": 1.0146734799118654e-05, - "loss": 0.036, + "learning_rate": 2.0162108186541432e-05, + "loss": 0.0268, "step": 106090 }, { "epoch": 4.95, - "learning_rate": 1.0146265997843513e-05, - "loss": 0.0389, + "learning_rate": 2.0161640116705412e-05, + "loss": 0.0669, "step": 106095 }, { "epoch": 4.95, - "learning_rate": 1.0145797196568377e-05, - "loss": 0.0506, + "learning_rate": 2.0161172046869392e-05, + "loss": 0.0389, "step": 106100 }, { "epoch": 4.95, - "learning_rate": 1.0145328395293237e-05, - "loss": 0.0532, + "learning_rate": 2.016070397703337e-05, + "loss": 0.0336, "step": 106105 }, { "epoch": 4.95, - "learning_rate": 1.0144859594018097e-05, - "loss": 0.0792, + "learning_rate": 2.0160235907197355e-05, + "loss": 0.0882, "step": 106110 }, { "epoch": 4.95, - "learning_rate": 1.0144390792742958e-05, - "loss": 0.1329, + "learning_rate": 2.0159767837361335e-05, + "loss": 0.0704, "step": 106115 }, { "epoch": 4.95, - "learning_rate": 1.0143921991467818e-05, - "loss": 0.1238, + "learning_rate": 2.0159299767525315e-05, + "loss": 0.191, "step": 106120 }, { "epoch": 4.95, - "learning_rate": 1.0143453190192678e-05, - "loss": 0.1671, + "learning_rate": 2.0158831697689298e-05, + "loss": 0.2403, "step": 106125 }, { "epoch": 4.95, - "learning_rate": 1.0142984388917538e-05, - "loss": 0.0674, + "learning_rate": 2.0158363627853278e-05, + "loss": 0.0827, "step": 106130 }, { "epoch": 4.95, - "learning_rate": 1.0142515587642398e-05, - "loss": 0.0211, + "learning_rate": 2.0157895558017257e-05, + "loss": 0.0183, "step": 106135 }, { "epoch": 4.95, - "learning_rate": 1.014204678636726e-05, - "loss": 0.0481, + "learning_rate": 2.0157427488181237e-05, + "loss": 0.0256, "step": 106140 }, { "epoch": 4.95, - "learning_rate": 1.014157798509212e-05, - "loss": 0.0719, + "learning_rate": 2.015695941834522e-05, + "loss": 0.013, "step": 106145 }, { "epoch": 4.95, - "learning_rate": 1.0141109183816981e-05, - "loss": 0.0752, + "learning_rate": 2.0156491348509197e-05, + "loss": 0.0292, "step": 106150 }, { "epoch": 4.95, - "learning_rate": 1.0140640382541843e-05, - "loss": 0.0941, + "learning_rate": 2.0156023278673177e-05, + "loss": 0.065, "step": 106155 }, { "epoch": 4.95, - "learning_rate": 1.0140171581266703e-05, - "loss": 0.0856, + "learning_rate": 2.0155555208837156e-05, + "loss": 0.0462, "step": 106160 }, { "epoch": 4.95, - "learning_rate": 1.0139702779991563e-05, - "loss": 0.0926, + "learning_rate": 2.015508713900114e-05, + "loss": 0.0878, "step": 106165 }, { "epoch": 4.95, - "learning_rate": 1.0139233978716423e-05, - "loss": 0.1223, + "learning_rate": 2.015461906916512e-05, + "loss": 0.0948, "step": 106170 }, { "epoch": 4.95, - "learning_rate": 1.0138765177441283e-05, - "loss": 0.2743, + "learning_rate": 2.01541509993291e-05, + "loss": 0.2333, "step": 106175 }, { "epoch": 4.95, - "learning_rate": 1.0138296376166144e-05, - "loss": 0.075, + "learning_rate": 2.0153682929493083e-05, + "loss": 0.0763, "step": 106180 }, { "epoch": 4.95, - "learning_rate": 1.0137827574891004e-05, - "loss": 0.0374, + "learning_rate": 2.0153214859657062e-05, + "loss": 0.0404, "step": 106185 }, { "epoch": 4.95, - "learning_rate": 1.0137358773615864e-05, - "loss": 0.0322, + "learning_rate": 2.0152746789821042e-05, + "loss": 0.0124, "step": 106190 }, { "epoch": 4.96, - "learning_rate": 1.0136889972340727e-05, - "loss": 0.1074, + "learning_rate": 2.0152278719985022e-05, + "loss": 0.0195, "step": 106195 }, { "epoch": 4.96, - "learning_rate": 1.0136421171065587e-05, - "loss": 0.0267, + "learning_rate": 2.0151810650149005e-05, + "loss": 0.0217, "step": 106200 }, { "epoch": 4.96, - "learning_rate": 1.0135952369790447e-05, - "loss": 0.0837, + "learning_rate": 2.0151342580312985e-05, + "loss": 0.0793, "step": 106205 }, { "epoch": 4.96, - "learning_rate": 1.0135483568515307e-05, - "loss": 0.096, + "learning_rate": 2.0150874510476965e-05, + "loss": 0.0619, "step": 106210 }, { "epoch": 4.96, - "learning_rate": 1.0135014767240167e-05, - "loss": 0.118, + "learning_rate": 2.015040644064094e-05, + "loss": 0.075, "step": 106215 }, { "epoch": 4.96, - "learning_rate": 1.0134545965965029e-05, - "loss": 0.3036, + "learning_rate": 2.0149938370804924e-05, + "loss": 0.1611, "step": 106220 }, { "epoch": 4.96, - "learning_rate": 1.0134077164689889e-05, - "loss": 0.1294, + "learning_rate": 2.0149470300968904e-05, + "loss": 0.1874, "step": 106225 }, { "epoch": 4.96, - "learning_rate": 1.0133608363414749e-05, - "loss": 0.0797, + "learning_rate": 2.0149002231132884e-05, + "loss": 0.0745, "step": 106230 }, { "epoch": 4.96, - "learning_rate": 1.0133139562139609e-05, - "loss": 0.0282, + "learning_rate": 2.0148534161296867e-05, + "loss": 0.0738, "step": 106235 }, { "epoch": 4.96, - "learning_rate": 1.0132670760864472e-05, - "loss": 0.0256, + "learning_rate": 2.0148066091460847e-05, + "loss": 0.0406, "step": 106240 }, { "epoch": 4.96, - "learning_rate": 1.0132201959589332e-05, - "loss": 0.0425, + "learning_rate": 2.0147598021624827e-05, + "loss": 0.0412, "step": 106245 }, { "epoch": 4.96, - "learning_rate": 1.0131733158314192e-05, - "loss": 0.0414, + "learning_rate": 2.0147129951788807e-05, + "loss": 0.0303, "step": 106250 }, { "epoch": 4.96, - "learning_rate": 1.0131264357039052e-05, - "loss": 0.1178, + "learning_rate": 2.014666188195279e-05, + "loss": 0.0808, "step": 106255 }, { "epoch": 4.96, - "learning_rate": 1.0130795555763913e-05, - "loss": 0.0499, + "learning_rate": 2.014619381211677e-05, + "loss": 0.0407, "step": 106260 }, { "epoch": 4.96, - "learning_rate": 1.0130326754488773e-05, - "loss": 0.0522, + "learning_rate": 2.014572574228075e-05, + "loss": 0.0812, "step": 106265 }, { "epoch": 4.96, - "learning_rate": 1.0129857953213633e-05, - "loss": 0.0924, + "learning_rate": 2.014525767244473e-05, + "loss": 0.1961, "step": 106270 }, { "epoch": 4.96, - "learning_rate": 1.0129389151938493e-05, - "loss": 0.2684, + "learning_rate": 2.014478960260871e-05, + "loss": 0.2914, "step": 106275 }, { "epoch": 4.96, - "learning_rate": 1.0128920350663353e-05, - "loss": 0.0866, + "learning_rate": 2.014432153277269e-05, + "loss": 0.0722, "step": 106280 }, { "epoch": 4.96, - "learning_rate": 1.0128451549388215e-05, - "loss": 0.0249, + "learning_rate": 2.014385346293667e-05, + "loss": 0.0075, "step": 106285 }, { "epoch": 4.96, - "learning_rate": 1.0127982748113076e-05, - "loss": 0.0251, + "learning_rate": 2.014338539310065e-05, + "loss": 0.0146, "step": 106290 }, { "epoch": 4.96, - "learning_rate": 1.0127513946837936e-05, - "loss": 0.0388, + "learning_rate": 2.0142917323264632e-05, + "loss": 0.0297, "step": 106295 }, { "epoch": 4.96, - "learning_rate": 1.0127045145562798e-05, - "loss": 0.0371, + "learning_rate": 2.014244925342861e-05, + "loss": 0.0574, "step": 106300 }, { "epoch": 4.96, - "learning_rate": 1.0126576344287658e-05, - "loss": 0.0321, + "learning_rate": 2.014198118359259e-05, + "loss": 0.0631, "step": 106305 }, { "epoch": 4.96, - "learning_rate": 1.0126107543012518e-05, - "loss": 0.0819, + "learning_rate": 2.0141513113756575e-05, + "loss": 0.0672, "step": 106310 }, { "epoch": 4.96, - "learning_rate": 1.0125638741737378e-05, - "loss": 0.0911, + "learning_rate": 2.0141045043920555e-05, + "loss": 0.1208, "step": 106315 }, { "epoch": 4.96, - "learning_rate": 1.0125169940462238e-05, - "loss": 0.0894, + "learning_rate": 2.0140576974084534e-05, + "loss": 0.2201, "step": 106320 }, { "epoch": 4.96, - "learning_rate": 1.01247011391871e-05, - "loss": 0.1787, + "learning_rate": 2.0140108904248514e-05, + "loss": 0.1805, "step": 106325 }, { "epoch": 4.96, - "learning_rate": 1.012423233791196e-05, - "loss": 0.0633, + "learning_rate": 2.0139640834412497e-05, + "loss": 0.0868, "step": 106330 }, { "epoch": 4.96, - "learning_rate": 1.012376353663682e-05, - "loss": 0.0315, + "learning_rate": 2.0139172764576477e-05, + "loss": 0.0145, "step": 106335 }, { "epoch": 4.96, - "learning_rate": 1.0123294735361682e-05, - "loss": 0.0292, + "learning_rate": 2.0138704694740454e-05, + "loss": 0.0061, "step": 106340 }, { "epoch": 4.96, - "learning_rate": 1.0122825934086542e-05, - "loss": 0.0777, + "learning_rate": 2.0138236624904433e-05, + "loss": 0.0617, "step": 106345 }, { "epoch": 4.96, - "learning_rate": 1.0122357132811402e-05, - "loss": 0.0588, + "learning_rate": 2.0137768555068417e-05, + "loss": 0.0168, "step": 106350 }, { "epoch": 4.96, - "learning_rate": 1.0121888331536262e-05, - "loss": 0.0551, + "learning_rate": 2.0137300485232396e-05, + "loss": 0.0649, "step": 106355 }, { "epoch": 4.96, - "learning_rate": 1.0121419530261122e-05, - "loss": 0.0748, + "learning_rate": 2.0136832415396376e-05, + "loss": 0.0433, "step": 106360 }, { "epoch": 4.96, - "learning_rate": 1.0120950728985984e-05, - "loss": 0.0935, + "learning_rate": 2.013636434556036e-05, + "loss": 0.0878, "step": 106365 }, { "epoch": 4.96, - "learning_rate": 1.0120481927710844e-05, - "loss": 0.135, + "learning_rate": 2.013589627572434e-05, + "loss": 0.1442, "step": 106370 }, { "epoch": 4.96, - "learning_rate": 1.0120013126435704e-05, - "loss": 0.3771, + "learning_rate": 2.013542820588832e-05, + "loss": 0.1384, "step": 106375 }, { "epoch": 4.96, - "learning_rate": 1.0119544325160567e-05, - "loss": 0.0698, + "learning_rate": 2.01349601360523e-05, + "loss": 0.0531, "step": 106380 }, { "epoch": 4.96, - "learning_rate": 1.0119075523885427e-05, - "loss": 0.0176, + "learning_rate": 2.0134492066216282e-05, + "loss": 0.0244, "step": 106385 }, { "epoch": 4.96, - "learning_rate": 1.0118606722610287e-05, - "loss": 0.0316, + "learning_rate": 2.0134023996380262e-05, + "loss": 0.0163, "step": 106390 }, { "epoch": 4.96, - "learning_rate": 1.0118137921335147e-05, - "loss": 0.062, + "learning_rate": 2.0133555926544242e-05, + "loss": 0.0431, "step": 106395 }, { "epoch": 4.96, - "learning_rate": 1.0117669120060007e-05, - "loss": 0.0531, + "learning_rate": 2.013308785670822e-05, + "loss": 0.0815, "step": 106400 }, { "epoch": 4.97, - "learning_rate": 1.0117200318784868e-05, - "loss": 0.0542, + "learning_rate": 2.01326197868722e-05, + "loss": 0.0263, "step": 106405 }, { "epoch": 4.97, - "learning_rate": 1.0116731517509728e-05, - "loss": 0.0868, + "learning_rate": 2.013215171703618e-05, + "loss": 0.1152, "step": 106410 }, { "epoch": 4.97, - "learning_rate": 1.0116262716234588e-05, - "loss": 0.1046, + "learning_rate": 2.013168364720016e-05, + "loss": 0.1205, "step": 106415 }, { "epoch": 4.97, - "learning_rate": 1.0115793914959448e-05, - "loss": 0.1983, + "learning_rate": 2.0131215577364144e-05, + "loss": 0.1934, "step": 106420 }, { "epoch": 4.97, - "learning_rate": 1.0115325113684311e-05, - "loss": 0.3316, + "learning_rate": 2.0130747507528124e-05, + "loss": 0.3416, "step": 106425 }, { "epoch": 4.97, - "learning_rate": 1.0114856312409171e-05, - "loss": 0.0411, + "learning_rate": 2.0130279437692104e-05, + "loss": 0.0675, "step": 106430 }, { "epoch": 4.97, - "learning_rate": 1.0114387511134031e-05, - "loss": 0.0243, + "learning_rate": 2.0129811367856084e-05, + "loss": 0.0093, "step": 106435 }, { "epoch": 4.97, - "learning_rate": 1.0113918709858891e-05, - "loss": 0.0512, + "learning_rate": 2.0129343298020067e-05, + "loss": 0.0252, "step": 106440 }, { "epoch": 4.97, - "learning_rate": 1.0113449908583753e-05, - "loss": 0.0213, + "learning_rate": 2.0128875228184047e-05, + "loss": 0.0438, "step": 106445 }, { "epoch": 4.97, - "learning_rate": 1.0112981107308613e-05, - "loss": 0.0605, + "learning_rate": 2.0128407158348027e-05, + "loss": 0.0559, "step": 106450 }, { "epoch": 4.97, - "learning_rate": 1.0112512306033473e-05, - "loss": 0.0454, + "learning_rate": 2.0127939088512006e-05, + "loss": 0.0352, "step": 106455 }, { "epoch": 4.97, - "learning_rate": 1.0112043504758333e-05, - "loss": 0.0441, + "learning_rate": 2.012747101867599e-05, + "loss": 0.0577, "step": 106460 }, { "epoch": 4.97, - "learning_rate": 1.0111574703483194e-05, - "loss": 0.09, + "learning_rate": 2.0127002948839966e-05, + "loss": 0.047, "step": 106465 }, { "epoch": 4.97, - "learning_rate": 1.0111105902208054e-05, - "loss": 0.093, + "learning_rate": 2.0126534879003946e-05, + "loss": 0.1003, "step": 106470 }, { "epoch": 4.97, - "learning_rate": 1.0110637100932916e-05, - "loss": 0.1816, + "learning_rate": 2.0126066809167926e-05, + "loss": 0.2476, "step": 106475 }, { "epoch": 4.97, - "learning_rate": 1.0110168299657776e-05, - "loss": 0.0599, + "learning_rate": 2.012559873933191e-05, + "loss": 0.1029, "step": 106480 }, { "epoch": 4.97, - "learning_rate": 1.0109699498382637e-05, - "loss": 0.0688, + "learning_rate": 2.012513066949589e-05, + "loss": 0.0311, "step": 106485 }, { "epoch": 4.97, - "learning_rate": 1.0109230697107497e-05, - "loss": 0.02, + "learning_rate": 2.012466259965987e-05, + "loss": 0.0146, "step": 106490 }, { "epoch": 4.97, - "learning_rate": 1.0108761895832357e-05, - "loss": 0.0035, + "learning_rate": 2.012419452982385e-05, + "loss": 0.0436, "step": 106495 }, { "epoch": 4.97, - "learning_rate": 1.0108293094557217e-05, - "loss": 0.0438, + "learning_rate": 2.012372645998783e-05, + "loss": 0.041, "step": 106500 }, { "epoch": 4.97, - "learning_rate": 1.0107824293282079e-05, - "loss": 0.0664, + "learning_rate": 2.012325839015181e-05, + "loss": 0.0536, "step": 106505 }, { "epoch": 4.97, - "learning_rate": 1.0107355492006939e-05, - "loss": 0.074, + "learning_rate": 2.012279032031579e-05, + "loss": 0.1101, "step": 106510 }, { "epoch": 4.97, - "learning_rate": 1.0106886690731799e-05, - "loss": 0.108, + "learning_rate": 2.0122322250479774e-05, + "loss": 0.0577, "step": 106515 }, { "epoch": 4.97, - "learning_rate": 1.010641788945666e-05, - "loss": 0.2912, + "learning_rate": 2.0121854180643754e-05, + "loss": 0.5049, "step": 106520 }, { "epoch": 4.97, - "learning_rate": 1.0105949088181522e-05, - "loss": 0.169, + "learning_rate": 2.0121386110807734e-05, + "loss": 0.2187, "step": 106525 }, { "epoch": 4.97, - "learning_rate": 1.0105480286906382e-05, - "loss": 0.0864, + "learning_rate": 2.012091804097171e-05, + "loss": 0.0701, "step": 106530 }, { "epoch": 4.97, - "learning_rate": 1.0105011485631242e-05, - "loss": 0.0345, + "learning_rate": 2.0120449971135694e-05, + "loss": 0.0192, "step": 106535 }, { "epoch": 4.97, - "learning_rate": 1.0104542684356102e-05, - "loss": 0.0443, + "learning_rate": 2.0119981901299673e-05, + "loss": 0.0193, "step": 106540 }, { "epoch": 4.97, - "learning_rate": 1.0104073883080963e-05, + "learning_rate": 2.0119513831463653e-05, "loss": 0.0409, "step": 106545 }, { "epoch": 4.97, - "learning_rate": 1.0103605081805823e-05, - "loss": 0.0509, + "learning_rate": 2.0119045761627636e-05, + "loss": 0.016, "step": 106550 }, { "epoch": 4.97, - "learning_rate": 1.0103136280530683e-05, - "loss": 0.0303, + "learning_rate": 2.0118577691791616e-05, + "loss": 0.0477, "step": 106555 }, { "epoch": 4.97, - "learning_rate": 1.0102667479255543e-05, - "loss": 0.0752, + "learning_rate": 2.0118109621955596e-05, + "loss": 0.0738, "step": 106560 }, { "epoch": 4.97, - "learning_rate": 1.0102198677980407e-05, - "loss": 0.0816, + "learning_rate": 2.0117641552119576e-05, + "loss": 0.082, "step": 106565 }, { "epoch": 4.97, - "learning_rate": 1.0101729876705266e-05, - "loss": 0.2227, + "learning_rate": 2.011717348228356e-05, + "loss": 0.0944, "step": 106570 }, { "epoch": 4.97, - "learning_rate": 1.0101261075430126e-05, - "loss": 0.2331, + "learning_rate": 2.011670541244754e-05, + "loss": 0.1519, "step": 106575 }, { "epoch": 4.97, - "learning_rate": 1.0100792274154986e-05, - "loss": 0.0779, + "learning_rate": 2.011623734261152e-05, + "loss": 0.0562, "step": 106580 }, { "epoch": 4.97, - "learning_rate": 1.0100323472879848e-05, - "loss": 0.0239, + "learning_rate": 2.01157692727755e-05, + "loss": 0.0012, "step": 106585 }, { "epoch": 4.97, - "learning_rate": 1.0099854671604708e-05, - "loss": 0.0714, + "learning_rate": 2.011530120293948e-05, + "loss": 0.0458, "step": 106590 }, { "epoch": 4.97, - "learning_rate": 1.0099385870329568e-05, - "loss": 0.0674, + "learning_rate": 2.0114833133103458e-05, + "loss": 0.0138, "step": 106595 }, { "epoch": 4.97, - "learning_rate": 1.0098917069054428e-05, - "loss": 0.0846, + "learning_rate": 2.0114365063267438e-05, + "loss": 0.0744, "step": 106600 }, { "epoch": 4.97, - "learning_rate": 1.0098448267779288e-05, - "loss": 0.0189, + "learning_rate": 2.011389699343142e-05, + "loss": 0.0427, "step": 106605 }, { "epoch": 4.97, - "learning_rate": 1.009797946650415e-05, - "loss": 0.1387, + "learning_rate": 2.01134289235954e-05, + "loss": 0.0133, "step": 106610 }, { "epoch": 4.97, - "learning_rate": 1.0097510665229011e-05, - "loss": 0.0821, + "learning_rate": 2.011296085375938e-05, + "loss": 0.0812, "step": 106615 }, { "epoch": 4.98, - "learning_rate": 1.0097041863953871e-05, - "loss": 0.1273, + "learning_rate": 2.011249278392336e-05, + "loss": 0.0688, "step": 106620 }, { "epoch": 4.98, - "learning_rate": 1.0096573062678733e-05, - "loss": 0.2383, + "learning_rate": 2.0112024714087344e-05, + "loss": 0.2814, "step": 106625 }, { "epoch": 4.98, - "learning_rate": 1.0096104261403592e-05, - "loss": 0.076, + "learning_rate": 2.0111556644251324e-05, + "loss": 0.0596, "step": 106630 }, { "epoch": 4.98, - "learning_rate": 1.0095635460128452e-05, - "loss": 0.0271, + "learning_rate": 2.0111088574415304e-05, + "loss": 0.0152, "step": 106635 }, { "epoch": 4.98, - "learning_rate": 1.0095166658853312e-05, - "loss": 0.0509, + "learning_rate": 2.0110620504579283e-05, + "loss": 0.0255, "step": 106640 }, { "epoch": 4.98, - "learning_rate": 1.0094697857578172e-05, - "loss": 0.0879, + "learning_rate": 2.0110152434743267e-05, + "loss": 0.0267, "step": 106645 }, { "epoch": 4.98, - "learning_rate": 1.0094229056303034e-05, - "loss": 0.0831, + "learning_rate": 2.0109684364907246e-05, + "loss": 0.0678, "step": 106650 }, { "epoch": 4.98, - "learning_rate": 1.0093760255027894e-05, + "learning_rate": 2.0109216295071223e-05, "loss": 0.0384, "step": 106655 }, { "epoch": 4.98, - "learning_rate": 1.0093291453752755e-05, - "loss": 0.0639, + "learning_rate": 2.0108748225235203e-05, + "loss": 0.1398, "step": 106660 }, { "epoch": 4.98, - "learning_rate": 1.0092822652477617e-05, - "loss": 0.0972, + "learning_rate": 2.0108280155399186e-05, + "loss": 0.1638, "step": 106665 }, { "epoch": 4.98, - "learning_rate": 1.0092353851202477e-05, - "loss": 0.1232, + "learning_rate": 2.0107812085563166e-05, + "loss": 0.1499, "step": 106670 }, { "epoch": 4.98, - "learning_rate": 1.0091885049927337e-05, - "loss": 0.2165, + "learning_rate": 2.0107344015727145e-05, + "loss": 0.2992, "step": 106675 }, { "epoch": 4.98, - "learning_rate": 1.0091416248652197e-05, - "loss": 0.0817, + "learning_rate": 2.010687594589113e-05, + "loss": 0.073, "step": 106680 }, { "epoch": 4.98, - "learning_rate": 1.0090947447377057e-05, - "loss": 0.0023, + "learning_rate": 2.010640787605511e-05, + "loss": 0.0207, "step": 106685 }, { "epoch": 4.98, - "learning_rate": 1.0090478646101918e-05, - "loss": 0.0271, + "learning_rate": 2.0105939806219088e-05, + "loss": 0.0574, "step": 106690 }, { "epoch": 4.98, - "learning_rate": 1.0090009844826778e-05, - "loss": 0.0288, + "learning_rate": 2.0105471736383068e-05, + "loss": 0.017, "step": 106695 }, { "epoch": 4.98, - "learning_rate": 1.0089541043551638e-05, - "loss": 0.0329, + "learning_rate": 2.010500366654705e-05, + "loss": 0.0377, "step": 106700 }, { "epoch": 4.98, - "learning_rate": 1.0089072242276502e-05, - "loss": 0.0796, + "learning_rate": 2.010453559671103e-05, + "loss": 0.0431, "step": 106705 }, { "epoch": 4.98, - "learning_rate": 1.0088603441001362e-05, - "loss": 0.0733, + "learning_rate": 2.010406752687501e-05, + "loss": 0.0659, "step": 106710 }, { "epoch": 4.98, - "learning_rate": 1.0088134639726221e-05, - "loss": 0.0385, + "learning_rate": 2.010359945703899e-05, + "loss": 0.0594, "step": 106715 }, { "epoch": 4.98, - "learning_rate": 1.0087665838451081e-05, - "loss": 0.1423, + "learning_rate": 2.010313138720297e-05, + "loss": 0.0843, "step": 106720 }, { "epoch": 4.98, - "learning_rate": 1.0087197037175941e-05, - "loss": 0.3247, + "learning_rate": 2.010266331736695e-05, + "loss": 0.2111, "step": 106725 }, { "epoch": 4.98, - "learning_rate": 1.0086728235900803e-05, - "loss": 0.1012, + "learning_rate": 2.010219524753093e-05, + "loss": 0.0898, "step": 106730 }, { "epoch": 4.98, - "learning_rate": 1.0086259434625663e-05, - "loss": 0.0257, + "learning_rate": 2.0101727177694913e-05, + "loss": 0.0166, "step": 106735 }, { "epoch": 4.98, - "learning_rate": 1.0085790633350523e-05, - "loss": 0.023, + "learning_rate": 2.0101259107858893e-05, + "loss": 0.0505, "step": 106740 }, { "epoch": 4.98, - "learning_rate": 1.0085321832075383e-05, - "loss": 0.0512, + "learning_rate": 2.0100791038022873e-05, + "loss": 0.0412, "step": 106745 }, { "epoch": 4.98, - "learning_rate": 1.0084853030800246e-05, - "loss": 0.0609, + "learning_rate": 2.0100322968186853e-05, + "loss": 0.0662, "step": 106750 }, { "epoch": 4.98, - "learning_rate": 1.0084384229525106e-05, - "loss": 0.0121, + "learning_rate": 2.0099854898350836e-05, + "loss": 0.1071, "step": 106755 }, { "epoch": 4.98, - "learning_rate": 1.0083915428249966e-05, - "loss": 0.0427, + "learning_rate": 2.0099386828514816e-05, + "loss": 0.0952, "step": 106760 }, { "epoch": 4.98, - "learning_rate": 1.0083446626974826e-05, - "loss": 0.142, + "learning_rate": 2.0098918758678796e-05, + "loss": 0.0402, "step": 106765 }, { "epoch": 4.98, - "learning_rate": 1.0082977825699688e-05, - "loss": 0.0849, + "learning_rate": 2.0098450688842776e-05, + "loss": 0.0902, "step": 106770 }, { "epoch": 4.98, - "learning_rate": 1.0082509024424547e-05, - "loss": 0.3115, + "learning_rate": 2.009798261900676e-05, + "loss": 0.2457, "step": 106775 }, { "epoch": 4.98, - "learning_rate": 1.0082040223149407e-05, - "loss": 0.0612, + "learning_rate": 2.0097514549170735e-05, + "loss": 0.0579, "step": 106780 }, { "epoch": 4.98, - "learning_rate": 1.0081571421874267e-05, - "loss": 0.0387, + "learning_rate": 2.0097046479334715e-05, + "loss": 0.0956, "step": 106785 }, { "epoch": 4.98, - "learning_rate": 1.0081102620599127e-05, - "loss": 0.0611, + "learning_rate": 2.0096578409498698e-05, + "loss": 0.0177, "step": 106790 }, { "epoch": 4.98, - "learning_rate": 1.0080633819323989e-05, - "loss": 0.051, + "learning_rate": 2.0096110339662678e-05, + "loss": 0.012, "step": 106795 }, { "epoch": 4.98, - "learning_rate": 1.008016501804885e-05, - "loss": 0.0848, + "learning_rate": 2.0095642269826658e-05, + "loss": 0.013, "step": 106800 }, { "epoch": 4.98, - "learning_rate": 1.007969621677371e-05, - "loss": 0.0298, + "learning_rate": 2.0095174199990638e-05, + "loss": 0.0629, "step": 106805 }, { "epoch": 4.98, - "learning_rate": 1.0079227415498572e-05, - "loss": 0.1119, + "learning_rate": 2.009470613015462e-05, + "loss": 0.0589, "step": 106810 }, { "epoch": 4.98, - "learning_rate": 1.0078758614223432e-05, - "loss": 0.0883, + "learning_rate": 2.00942380603186e-05, + "loss": 0.0978, "step": 106815 }, { "epoch": 4.98, - "learning_rate": 1.0078289812948292e-05, - "loss": 0.0992, + "learning_rate": 2.009376999048258e-05, + "loss": 0.2097, "step": 106820 }, { "epoch": 4.98, - "learning_rate": 1.0077821011673152e-05, - "loss": 0.1845, + "learning_rate": 2.009330192064656e-05, + "loss": 0.2931, "step": 106825 }, { "epoch": 4.98, - "learning_rate": 1.0077352210398012e-05, - "loss": 0.0822, + "learning_rate": 2.0092833850810544e-05, + "loss": 0.086, "step": 106830 }, { "epoch": 4.99, - "learning_rate": 1.0076883409122873e-05, - "loss": 0.0048, + "learning_rate": 2.0092365780974523e-05, + "loss": 0.0377, "step": 106835 }, { "epoch": 4.99, - "learning_rate": 1.0076414607847733e-05, - "loss": 0.0381, + "learning_rate": 2.0091897711138503e-05, + "loss": 0.0515, "step": 106840 }, { "epoch": 4.99, - "learning_rate": 1.0075945806572595e-05, - "loss": 0.0755, + "learning_rate": 2.0091429641302483e-05, + "loss": 0.0271, "step": 106845 }, { "epoch": 4.99, - "learning_rate": 1.0075477005297457e-05, - "loss": 0.0941, + "learning_rate": 2.0090961571466463e-05, + "loss": 0.0894, "step": 106850 }, { "epoch": 4.99, - "learning_rate": 1.0075008204022317e-05, - "loss": 0.0333, + "learning_rate": 2.0090493501630443e-05, + "loss": 0.0605, "step": 106855 }, { "epoch": 4.99, - "learning_rate": 1.0074539402747177e-05, - "loss": 0.0741, + "learning_rate": 2.0090025431794422e-05, + "loss": 0.0501, "step": 106860 }, { "epoch": 4.99, - "learning_rate": 1.0074070601472036e-05, - "loss": 0.1271, + "learning_rate": 2.0089557361958406e-05, + "loss": 0.076, "step": 106865 }, { "epoch": 4.99, - "learning_rate": 1.0073601800196896e-05, - "loss": 0.1023, + "learning_rate": 2.0089089292122385e-05, + "loss": 0.0764, "step": 106870 }, { "epoch": 4.99, - "learning_rate": 1.0073132998921758e-05, - "loss": 0.378, + "learning_rate": 2.0088621222286365e-05, + "loss": 0.3147, "step": 106875 }, { "epoch": 4.99, - "learning_rate": 1.0072664197646618e-05, - "loss": 0.0827, + "learning_rate": 2.0088153152450345e-05, + "loss": 0.1005, "step": 106880 }, { "epoch": 4.99, - "learning_rate": 1.0072195396371478e-05, - "loss": 0.0111, + "learning_rate": 2.0087685082614328e-05, + "loss": 0.0212, "step": 106885 }, { "epoch": 4.99, - "learning_rate": 1.0071726595096341e-05, - "loss": 0.0459, + "learning_rate": 2.0087217012778308e-05, + "loss": 0.0086, "step": 106890 }, { "epoch": 4.99, - "learning_rate": 1.0071257793821201e-05, - "loss": 0.0547, + "learning_rate": 2.0086748942942288e-05, + "loss": 0.0932, "step": 106895 }, { "epoch": 4.99, - "learning_rate": 1.0070788992546061e-05, - "loss": 0.0094, + "learning_rate": 2.0086280873106268e-05, + "loss": 0.0622, "step": 106900 }, { "epoch": 4.99, - "learning_rate": 1.0070320191270921e-05, - "loss": 0.0941, + "learning_rate": 2.008581280327025e-05, + "loss": 0.0714, "step": 106905 }, { "epoch": 4.99, - "learning_rate": 1.0069851389995781e-05, - "loss": 0.1168, + "learning_rate": 2.0085344733434227e-05, + "loss": 0.1308, "step": 106910 }, { "epoch": 4.99, - "learning_rate": 1.0069382588720643e-05, - "loss": 0.081, + "learning_rate": 2.0084876663598207e-05, + "loss": 0.0868, "step": 106915 }, { "epoch": 4.99, - "learning_rate": 1.0068913787445502e-05, - "loss": 0.1258, + "learning_rate": 2.008440859376219e-05, + "loss": 0.2101, "step": 106920 }, { "epoch": 4.99, - "learning_rate": 1.0068444986170362e-05, - "loss": 0.3271, + "learning_rate": 2.008394052392617e-05, + "loss": 0.2443, "step": 106925 }, { "epoch": 4.99, - "learning_rate": 1.0067976184895222e-05, - "loss": 0.1, + "learning_rate": 2.008347245409015e-05, + "loss": 0.0691, "step": 106930 }, { "epoch": 4.99, - "learning_rate": 1.0067507383620084e-05, - "loss": 0.0084, + "learning_rate": 2.008300438425413e-05, + "loss": 0.0105, "step": 106935 }, { "epoch": 4.99, - "learning_rate": 1.0067038582344946e-05, - "loss": 0.0214, + "learning_rate": 2.0082536314418113e-05, + "loss": 0.0665, "step": 106940 }, { "epoch": 4.99, - "learning_rate": 1.0066569781069806e-05, - "loss": 0.0321, + "learning_rate": 2.0082068244582093e-05, + "loss": 0.0285, "step": 106945 }, { "epoch": 4.99, - "learning_rate": 1.0066100979794665e-05, - "loss": 0.0446, + "learning_rate": 2.0081600174746073e-05, + "loss": 0.0145, "step": 106950 }, { "epoch": 4.99, - "learning_rate": 1.0065632178519527e-05, - "loss": 0.0823, + "learning_rate": 2.0081132104910053e-05, + "loss": 0.029, "step": 106955 }, { "epoch": 4.99, - "learning_rate": 1.0065163377244387e-05, - "loss": 0.0646, + "learning_rate": 2.0080664035074036e-05, + "loss": 0.0785, "step": 106960 }, { "epoch": 4.99, - "learning_rate": 1.0064694575969247e-05, - "loss": 0.0987, + "learning_rate": 2.0080195965238016e-05, + "loss": 0.094, "step": 106965 }, { "epoch": 4.99, - "learning_rate": 1.0064225774694107e-05, - "loss": 0.1067, + "learning_rate": 2.0079727895401992e-05, + "loss": 0.1372, "step": 106970 }, { "epoch": 4.99, - "learning_rate": 1.0063756973418969e-05, - "loss": 0.3464, + "learning_rate": 2.0079259825565975e-05, + "loss": 0.3084, "step": 106975 }, { "epoch": 4.99, - "learning_rate": 1.0063288172143828e-05, - "loss": 0.0552, + "learning_rate": 2.0078791755729955e-05, + "loss": 0.0579, "step": 106980 }, { "epoch": 4.99, - "learning_rate": 1.006281937086869e-05, - "loss": 0.032, + "learning_rate": 2.0078323685893935e-05, + "loss": 0.029, "step": 106985 }, { "epoch": 4.99, - "learning_rate": 1.006235056959355e-05, - "loss": 0.0183, + "learning_rate": 2.0077855616057915e-05, + "loss": 0.0169, "step": 106990 }, { "epoch": 4.99, - "learning_rate": 1.0061881768318412e-05, - "loss": 0.0422, + "learning_rate": 2.0077387546221898e-05, + "loss": 0.0575, "step": 106995 }, { "epoch": 4.99, - "learning_rate": 1.0061412967043272e-05, - "loss": 0.0519, + "learning_rate": 2.0076919476385878e-05, + "loss": 0.0628, "step": 107000 }, { "epoch": 4.99, - "learning_rate": 1.0060944165768132e-05, - "loss": 0.0515, + "learning_rate": 2.0076451406549857e-05, + "loss": 0.042, "step": 107005 }, { "epoch": 4.99, - "learning_rate": 1.0060475364492991e-05, - "loss": 0.0477, + "learning_rate": 2.0075983336713837e-05, + "loss": 0.0571, "step": 107010 }, { "epoch": 4.99, - "learning_rate": 1.0060006563217853e-05, - "loss": 0.0601, + "learning_rate": 2.007551526687782e-05, + "loss": 0.1239, "step": 107015 }, { "epoch": 4.99, - "learning_rate": 1.0059537761942713e-05, - "loss": 0.1691, + "learning_rate": 2.00750471970418e-05, + "loss": 0.0678, "step": 107020 }, { "epoch": 4.99, - "learning_rate": 1.0059068960667573e-05, - "loss": 0.1457, + "learning_rate": 2.007457912720578e-05, + "loss": 0.2689, "step": 107025 }, { "epoch": 4.99, - "learning_rate": 1.0058600159392435e-05, - "loss": 0.0356, + "learning_rate": 2.0074111057369763e-05, + "loss": 0.0869, "step": 107030 }, { "epoch": 4.99, - "learning_rate": 1.0058131358117296e-05, - "loss": 0.0334, + "learning_rate": 2.007364298753374e-05, + "loss": 0.0098, "step": 107035 }, { "epoch": 4.99, - "learning_rate": 1.0057662556842156e-05, - "loss": 0.01, + "learning_rate": 2.007317491769772e-05, + "loss": 0.0197, "step": 107040 }, { "epoch": 4.99, - "learning_rate": 1.0057193755567016e-05, - "loss": 0.0349, + "learning_rate": 2.00727068478617e-05, + "loss": 0.0201, "step": 107045 }, { "epoch": 5.0, - "learning_rate": 1.0056724954291876e-05, - "loss": 0.035, + "learning_rate": 2.0072238778025683e-05, + "loss": 0.0626, "step": 107050 }, { "epoch": 5.0, - "learning_rate": 1.0056256153016738e-05, - "loss": 0.018, + "learning_rate": 2.0071770708189662e-05, + "loss": 0.0748, "step": 107055 }, { "epoch": 5.0, - "learning_rate": 1.0055787351741598e-05, - "loss": 0.0728, + "learning_rate": 2.0071302638353642e-05, + "loss": 0.0678, "step": 107060 }, { "epoch": 5.0, - "learning_rate": 1.0055318550466457e-05, - "loss": 0.0912, + "learning_rate": 2.0070834568517622e-05, + "loss": 0.0411, "step": 107065 }, { "epoch": 5.0, - "learning_rate": 1.0054849749191317e-05, - "loss": 0.1536, + "learning_rate": 2.0070366498681605e-05, + "loss": 0.0822, "step": 107070 }, { "epoch": 5.0, - "learning_rate": 1.005438094791618e-05, - "loss": 0.1777, + "learning_rate": 2.0069898428845585e-05, + "loss": 0.3818, "step": 107075 }, { "epoch": 5.0, - "learning_rate": 1.005391214664104e-05, - "loss": 0.0382, + "learning_rate": 2.0069430359009565e-05, + "loss": 0.0757, "step": 107080 }, { "epoch": 5.0, - "learning_rate": 1.00534433453659e-05, - "loss": 0.019, + "learning_rate": 2.0068962289173545e-05, + "loss": 0.0385, "step": 107085 }, { "epoch": 5.0, - "learning_rate": 1.005297454409076e-05, - "loss": 0.0254, + "learning_rate": 2.0068494219337528e-05, + "loss": 0.0241, "step": 107090 }, { "epoch": 5.0, - "learning_rate": 1.0052505742815622e-05, - "loss": 0.023, + "learning_rate": 2.0068026149501508e-05, + "loss": 0.0376, "step": 107095 }, { "epoch": 5.0, - "learning_rate": 1.0052036941540482e-05, - "loss": 0.0637, + "learning_rate": 2.0067558079665484e-05, + "loss": 0.065, "step": 107100 }, { "epoch": 5.0, - "learning_rate": 1.0051568140265342e-05, - "loss": 0.1354, + "learning_rate": 2.0067090009829467e-05, + "loss": 0.0412, "step": 107105 }, { "epoch": 5.0, - "learning_rate": 1.0051099338990202e-05, - "loss": 0.0935, + "learning_rate": 2.0066621939993447e-05, + "loss": 0.0555, "step": 107110 }, { "epoch": 5.0, - "learning_rate": 1.0050630537715062e-05, - "loss": 0.0908, + "learning_rate": 2.0066153870157427e-05, + "loss": 0.0562, "step": 107115 }, { "epoch": 5.0, - "learning_rate": 1.0050161736439924e-05, - "loss": 0.1455, + "learning_rate": 2.0065685800321407e-05, + "loss": 0.1101, "step": 107120 }, { "epoch": 5.0, - "learning_rate": 1.0049692935164785e-05, - "loss": 0.2236, + "learning_rate": 2.006521773048539e-05, + "loss": 0.2763, "step": 107125 }, { "epoch": 5.0, - "learning_rate": 1.0049224133889645e-05, - "loss": 0.0443, + "learning_rate": 2.006474966064937e-05, + "loss": 0.0956, "step": 107130 }, { "epoch": 5.0, - "learning_rate": 1.0048755332614507e-05, - "loss": 0.0502, + "learning_rate": 2.006428159081335e-05, + "loss": 0.0233, "step": 107135 }, { "epoch": 5.0, - "learning_rate": 1.0048286531339367e-05, - "loss": 0.0426, + "learning_rate": 2.006381352097733e-05, + "loss": 0.0579, "step": 107140 }, { "epoch": 5.0, - "learning_rate": 1.0047817730064227e-05, - "loss": 0.0321, + "learning_rate": 2.0063345451141313e-05, + "loss": 0.0647, "step": 107145 }, { "epoch": 5.0, - "learning_rate": 1.0047348928789087e-05, - "loss": 0.1014, + "learning_rate": 2.0062877381305293e-05, + "loss": 0.1375, "step": 107150 }, { "epoch": 5.0, - "learning_rate": 1.0046880127513946e-05, - "loss": 0.2579, + "learning_rate": 2.0062409311469272e-05, + "loss": 0.154, "step": 107155 }, { "epoch": 5.0, - "eval_cer": 0.009420852950587375, - "eval_loss": 0.05503755807876587, - "eval_runtime": 386.7254, - "eval_samples_per_second": 49.26, - "eval_steps_per_second": 12.316, - "eval_wer": 0.07860459848227433, + "eval_cer": 0.01047122841437006, + "eval_loss": 0.3056192398071289, + "eval_runtime": 400.6494, + "eval_samples_per_second": 47.548, + "eval_steps_per_second": 11.888, + "eval_wer": 0.08437930506658503, "step": 107155 }, { "epoch": 5.0, - "learning_rate": 1.0046411326238808e-05, - "loss": 0.0851, + "learning_rate": 2.0061941241633252e-05, + "loss": 0.2281, "step": 107160 }, { "epoch": 5.0, - "learning_rate": 1.0045942524963668e-05, - "loss": 0.0317, + "learning_rate": 2.0061473171797232e-05, + "loss": 0.0066, "step": 107165 }, { "epoch": 5.0, - "learning_rate": 1.004547372368853e-05, - "loss": 0.0301, + "learning_rate": 2.0061005101961212e-05, + "loss": 0.0466, "step": 107170 }, { "epoch": 5.0, - "learning_rate": 1.0045004922413391e-05, - "loss": 0.0684, + "learning_rate": 2.006053703212519e-05, + "loss": 0.0275, "step": 107175 }, { "epoch": 5.0, - "learning_rate": 1.0044536121138251e-05, - "loss": 0.2923, + "learning_rate": 2.0060068962289175e-05, + "loss": 0.0441, "step": 107180 }, { "epoch": 5.0, - "learning_rate": 1.0044067319863111e-05, - "loss": 0.0366, + "learning_rate": 2.0059600892453155e-05, + "loss": 0.0903, "step": 107185 }, { "epoch": 5.0, - "learning_rate": 1.0043598518587971e-05, - "loss": 0.0468, + "learning_rate": 2.0059132822617134e-05, + "loss": 0.04, "step": 107190 }, { "epoch": 5.0, - "learning_rate": 1.0043129717312831e-05, - "loss": 0.0388, + "learning_rate": 2.0058664752781114e-05, + "loss": 0.0742, "step": 107195 }, { "epoch": 5.0, - "learning_rate": 1.0042660916037693e-05, - "loss": 0.2321, + "learning_rate": 2.0058196682945097e-05, + "loss": 0.2088, "step": 107200 }, { "epoch": 5.0, - "learning_rate": 1.0042192114762553e-05, - "loss": 0.2216, + "learning_rate": 2.0057728613109077e-05, + "loss": 0.2766, "step": 107205 }, { "epoch": 5.0, - "learning_rate": 1.0041723313487413e-05, - "loss": 0.0782, + "learning_rate": 2.0057260543273057e-05, + "loss": 0.0981, "step": 107210 }, { "epoch": 5.0, - "learning_rate": 1.0041254512212276e-05, - "loss": 0.0567, + "learning_rate": 2.005679247343704e-05, + "loss": 0.0143, "step": 107215 }, { "epoch": 5.0, - "learning_rate": 1.0040785710937136e-05, - "loss": 0.0363, + "learning_rate": 2.005632440360102e-05, + "loss": 0.0425, "step": 107220 }, { "epoch": 5.0, - "learning_rate": 1.0040316909661996e-05, - "loss": 0.0799, + "learning_rate": 2.0055856333764997e-05, + "loss": 0.0182, "step": 107225 }, { "epoch": 5.0, - "learning_rate": 1.0039848108386856e-05, - "loss": 0.0457, + "learning_rate": 2.0055388263928976e-05, + "loss": 0.0502, "step": 107230 }, { "epoch": 5.0, - "learning_rate": 1.0039379307111716e-05, - "loss": 0.0405, + "learning_rate": 2.005492019409296e-05, + "loss": 0.1032, "step": 107235 }, { "epoch": 5.0, - "learning_rate": 1.0038910505836577e-05, - "loss": 0.0579, + "learning_rate": 2.005445212425694e-05, + "loss": 0.048, "step": 107240 }, { "epoch": 5.0, - "learning_rate": 1.0038441704561437e-05, - "loss": 0.1194, + "learning_rate": 2.005398405442092e-05, + "loss": 0.1986, "step": 107245 }, { "epoch": 5.0, - "learning_rate": 1.0037972903286297e-05, - "loss": 0.1571, + "learning_rate": 2.00535159845849e-05, + "loss": 0.0733, "step": 107250 }, { "epoch": 5.0, - "learning_rate": 1.0037504102011157e-05, - "loss": 0.1615, + "learning_rate": 2.0053047914748882e-05, + "loss": 0.1917, "step": 107255 }, { "epoch": 5.0, - "learning_rate": 1.0037035300736017e-05, - "loss": 0.0786, + "learning_rate": 2.0052579844912862e-05, + "loss": 0.0699, "step": 107260 }, { "epoch": 5.01, - "learning_rate": 1.003656649946088e-05, - "loss": 0.0064, + "learning_rate": 2.0052111775076842e-05, + "loss": 0.0066, "step": 107265 }, { "epoch": 5.01, - "learning_rate": 1.003609769818574e-05, - "loss": 0.0207, + "learning_rate": 2.005164370524082e-05, + "loss": 0.0271, "step": 107270 }, { "epoch": 5.01, - "learning_rate": 1.00356288969106e-05, - "loss": 0.0736, + "learning_rate": 2.0051175635404805e-05, + "loss": 0.0568, "step": 107275 }, { "epoch": 5.01, - "learning_rate": 1.0035160095635462e-05, - "loss": 0.0528, + "learning_rate": 2.0050707565568785e-05, + "loss": 0.0512, "step": 107280 }, { "epoch": 5.01, - "learning_rate": 1.0034691294360322e-05, - "loss": 0.0678, + "learning_rate": 2.0050239495732765e-05, + "loss": 0.048, "step": 107285 }, { "epoch": 5.01, - "learning_rate": 1.0034222493085182e-05, - "loss": 0.1855, + "learning_rate": 2.0049771425896744e-05, + "loss": 0.0643, "step": 107290 }, { "epoch": 5.01, - "learning_rate": 1.0033753691810042e-05, - "loss": 0.0432, + "learning_rate": 2.0049303356060724e-05, + "loss": 0.0423, "step": 107295 }, { "epoch": 5.01, - "learning_rate": 1.0033284890534901e-05, - "loss": 0.1558, + "learning_rate": 2.0048835286224704e-05, + "loss": 0.1011, "step": 107300 }, { "epoch": 5.01, - "learning_rate": 1.0032816089259763e-05, - "loss": 0.2139, + "learning_rate": 2.0048367216388684e-05, + "loss": 0.1959, "step": 107305 }, { "epoch": 5.01, - "learning_rate": 1.0032347287984625e-05, - "loss": 0.123, + "learning_rate": 2.0047899146552667e-05, + "loss": 0.1008, "step": 107310 }, { "epoch": 5.01, - "learning_rate": 1.0031878486709485e-05, - "loss": 0.0071, + "learning_rate": 2.0047431076716647e-05, + "loss": 0.0105, "step": 107315 }, { "epoch": 5.01, - "learning_rate": 1.0031409685434346e-05, - "loss": 0.0337, + "learning_rate": 2.0046963006880627e-05, + "loss": 0.0261, "step": 107320 }, { "epoch": 5.01, - "learning_rate": 1.0030940884159206e-05, - "loss": 0.0941, + "learning_rate": 2.0046494937044606e-05, + "loss": 0.057, "step": 107325 }, { "epoch": 5.01, - "learning_rate": 1.0030472082884066e-05, - "loss": 0.053, + "learning_rate": 2.004602686720859e-05, + "loss": 0.0574, "step": 107330 }, { "epoch": 5.01, - "learning_rate": 1.0030003281608926e-05, - "loss": 0.0497, + "learning_rate": 2.004555879737257e-05, + "loss": 0.0444, "step": 107335 }, { "epoch": 5.01, - "learning_rate": 1.0029534480333786e-05, - "loss": 0.1278, + "learning_rate": 2.004509072753655e-05, + "loss": 0.0726, "step": 107340 }, { "epoch": 5.01, - "learning_rate": 1.0029065679058648e-05, - "loss": 0.0806, + "learning_rate": 2.0044622657700532e-05, + "loss": 0.1483, "step": 107345 }, { "epoch": 5.01, - "learning_rate": 1.0028596877783508e-05, - "loss": 0.1094, + "learning_rate": 2.004415458786451e-05, + "loss": 0.1812, "step": 107350 }, { "epoch": 5.01, - "learning_rate": 1.002812807650837e-05, - "loss": 0.2907, + "learning_rate": 2.004368651802849e-05, + "loss": 0.2602, "step": 107355 }, { "epoch": 5.01, - "learning_rate": 1.0027659275233231e-05, - "loss": 0.0953, + "learning_rate": 2.004321844819247e-05, + "loss": 0.0962, "step": 107360 }, { "epoch": 5.01, - "learning_rate": 1.002719047395809e-05, - "loss": 0.0025, + "learning_rate": 2.0042750378356452e-05, + "loss": 0.0291, "step": 107365 }, { "epoch": 5.01, - "learning_rate": 1.002672167268295e-05, - "loss": 0.0136, + "learning_rate": 2.004228230852043e-05, + "loss": 0.0285, "step": 107370 }, { "epoch": 5.01, - "learning_rate": 1.002625287140781e-05, - "loss": 0.0422, + "learning_rate": 2.004181423868441e-05, + "loss": 0.0111, "step": 107375 }, { "epoch": 5.01, - "learning_rate": 1.002578407013267e-05, - "loss": 0.0434, + "learning_rate": 2.004134616884839e-05, + "loss": 0.0751, "step": 107380 }, { "epoch": 5.01, - "learning_rate": 1.0025315268857532e-05, - "loss": 0.0872, + "learning_rate": 2.0040878099012374e-05, + "loss": 0.0388, "step": 107385 }, { "epoch": 5.01, - "learning_rate": 1.0024846467582392e-05, - "loss": 0.1152, + "learning_rate": 2.0040410029176354e-05, + "loss": 0.0616, "step": 107390 }, { "epoch": 5.01, - "learning_rate": 1.0024377666307252e-05, - "loss": 0.072, + "learning_rate": 2.0039941959340334e-05, + "loss": 0.0996, "step": 107395 }, { "epoch": 5.01, - "learning_rate": 1.0023908865032115e-05, - "loss": 0.1422, + "learning_rate": 2.0039473889504317e-05, + "loss": 0.2294, "step": 107400 }, { "epoch": 5.01, - "learning_rate": 1.0023440063756975e-05, - "loss": 0.3538, + "learning_rate": 2.0039005819668297e-05, + "loss": 0.2863, "step": 107405 }, { "epoch": 5.01, - "learning_rate": 1.0022971262481835e-05, - "loss": 0.0914, + "learning_rate": 2.0038537749832277e-05, + "loss": 0.0951, "step": 107410 }, { "epoch": 5.01, - "learning_rate": 1.0022502461206695e-05, - "loss": 0.0062, + "learning_rate": 2.0038069679996253e-05, + "loss": 0.0173, "step": 107415 }, { "epoch": 5.01, - "learning_rate": 1.0022033659931555e-05, - "loss": 0.0165, + "learning_rate": 2.0037601610160237e-05, + "loss": 0.0208, "step": 107420 }, { "epoch": 5.01, - "learning_rate": 1.0021564858656417e-05, - "loss": 0.0065, + "learning_rate": 2.0037133540324216e-05, + "loss": 0.0452, "step": 107425 }, { "epoch": 5.01, - "learning_rate": 1.0021096057381277e-05, - "loss": 0.0167, + "learning_rate": 2.0036665470488196e-05, + "loss": 0.0564, "step": 107430 }, { "epoch": 5.01, - "learning_rate": 1.0020627256106137e-05, - "loss": 0.0677, + "learning_rate": 2.0036197400652176e-05, + "loss": 0.0418, "step": 107435 }, { "epoch": 5.01, - "learning_rate": 1.0020158454830997e-05, - "loss": 0.0261, + "learning_rate": 2.003572933081616e-05, + "loss": 0.0892, "step": 107440 }, { "epoch": 5.01, - "learning_rate": 1.0019689653555858e-05, - "loss": 0.1933, + "learning_rate": 2.003526126098014e-05, + "loss": 0.1128, "step": 107445 }, { "epoch": 5.01, - "learning_rate": 1.001922085228072e-05, - "loss": 0.1438, + "learning_rate": 2.003479319114412e-05, + "loss": 0.0974, "step": 107450 }, { "epoch": 5.01, - "learning_rate": 1.001875205100558e-05, - "loss": 0.3481, + "learning_rate": 2.00343251213081e-05, + "loss": 0.3027, "step": 107455 }, { "epoch": 5.01, - "learning_rate": 1.001828324973044e-05, - "loss": 0.099, + "learning_rate": 2.0033857051472082e-05, + "loss": 0.1216, "step": 107460 }, { "epoch": 5.01, - "learning_rate": 1.0017814448455301e-05, - "loss": 0.0102, + "learning_rate": 2.003338898163606e-05, + "loss": 0.0134, "step": 107465 }, { "epoch": 5.01, - "learning_rate": 1.0017345647180161e-05, - "loss": 0.0269, + "learning_rate": 2.003292091180004e-05, + "loss": 0.0154, "step": 107470 }, { "epoch": 5.01, - "learning_rate": 1.0016876845905021e-05, - "loss": 0.0258, + "learning_rate": 2.003245284196402e-05, + "loss": 0.0212, "step": 107475 }, { "epoch": 5.02, - "learning_rate": 1.0016408044629881e-05, - "loss": 0.0617, + "learning_rate": 2.0031984772128e-05, + "loss": 0.0355, "step": 107480 }, { "epoch": 5.02, - "learning_rate": 1.0015939243354743e-05, - "loss": 0.0454, + "learning_rate": 2.003151670229198e-05, + "loss": 0.0548, "step": 107485 }, { "epoch": 5.02, - "learning_rate": 1.0015470442079603e-05, - "loss": 0.0692, + "learning_rate": 2.003104863245596e-05, + "loss": 0.0728, "step": 107490 }, { "epoch": 5.02, - "learning_rate": 1.0015001640804464e-05, - "loss": 0.0918, + "learning_rate": 2.0030580562619944e-05, + "loss": 0.1133, "step": 107495 }, { "epoch": 5.02, - "learning_rate": 1.0014532839529326e-05, - "loss": 0.1296, + "learning_rate": 2.0030112492783924e-05, + "loss": 0.2453, "step": 107500 }, { "epoch": 5.02, - "learning_rate": 1.0014064038254186e-05, - "loss": 0.1777, + "learning_rate": 2.0029644422947904e-05, + "loss": 0.2323, "step": 107505 }, { "epoch": 5.02, - "learning_rate": 1.0013595236979046e-05, - "loss": 0.1225, + "learning_rate": 2.0029176353111883e-05, + "loss": 0.0877, "step": 107510 }, { "epoch": 5.02, - "learning_rate": 1.0013126435703906e-05, - "loss": 0.023, + "learning_rate": 2.0028708283275867e-05, + "loss": 0.0271, "step": 107515 }, { "epoch": 5.02, - "learning_rate": 1.0012657634428766e-05, - "loss": 0.0163, + "learning_rate": 2.0028240213439846e-05, + "loss": 0.0266, "step": 107520 }, { "epoch": 5.02, - "learning_rate": 1.0012188833153627e-05, - "loss": 0.0298, + "learning_rate": 2.0027772143603826e-05, + "loss": 0.0286, "step": 107525 }, { "epoch": 5.02, - "learning_rate": 1.0011720031878487e-05, - "loss": 0.0466, + "learning_rate": 2.002730407376781e-05, + "loss": 0.0588, "step": 107530 }, { "epoch": 5.02, - "learning_rate": 1.0011251230603347e-05, - "loss": 0.0597, + "learning_rate": 2.002683600393179e-05, + "loss": 0.0766, "step": 107535 }, { "epoch": 5.02, - "learning_rate": 1.001078242932821e-05, - "loss": 0.0681, + "learning_rate": 2.0026367934095766e-05, + "loss": 0.0341, "step": 107540 }, { "epoch": 5.02, - "learning_rate": 1.001031362805307e-05, - "loss": 0.0633, + "learning_rate": 2.0025899864259746e-05, + "loss": 0.0538, "step": 107545 }, { "epoch": 5.02, - "learning_rate": 1.000984482677793e-05, - "loss": 0.2266, + "learning_rate": 2.002543179442373e-05, + "loss": 0.1054, "step": 107550 }, { "epoch": 5.02, - "learning_rate": 1.000937602550279e-05, - "loss": 0.1517, + "learning_rate": 2.002496372458771e-05, + "loss": 0.2869, "step": 107555 }, { "epoch": 5.02, - "learning_rate": 1.000890722422765e-05, - "loss": 0.0963, + "learning_rate": 2.002449565475169e-05, + "loss": 0.0956, "step": 107560 }, { "epoch": 5.02, - "learning_rate": 1.0008438422952512e-05, - "loss": 0.0585, + "learning_rate": 2.0024027584915668e-05, + "loss": 0.0077, "step": 107565 }, { "epoch": 5.02, - "learning_rate": 1.0007969621677372e-05, - "loss": 0.0269, + "learning_rate": 2.002355951507965e-05, + "loss": 0.0113, "step": 107570 }, { "epoch": 5.02, - "learning_rate": 1.0007500820402232e-05, - "loss": 0.0769, + "learning_rate": 2.002309144524363e-05, + "loss": 0.0333, "step": 107575 }, { "epoch": 5.02, - "learning_rate": 1.0007032019127092e-05, - "loss": 0.1234, + "learning_rate": 2.002262337540761e-05, + "loss": 0.0715, "step": 107580 }, { "epoch": 5.02, - "learning_rate": 1.0006563217851952e-05, - "loss": 0.0889, + "learning_rate": 2.0022155305571594e-05, + "loss": 0.0504, "step": 107585 }, { "epoch": 5.02, - "learning_rate": 1.0006094416576815e-05, - "loss": 0.0737, + "learning_rate": 2.0021687235735574e-05, + "loss": 0.121, "step": 107590 }, { "epoch": 5.02, - "learning_rate": 1.0005625615301675e-05, - "loss": 0.1059, + "learning_rate": 2.0021219165899554e-05, + "loss": 0.1177, "step": 107595 }, { "epoch": 5.02, - "learning_rate": 1.0005156814026535e-05, - "loss": 0.1443, + "learning_rate": 2.0020751096063534e-05, + "loss": 0.1506, "step": 107600 }, { "epoch": 5.02, - "learning_rate": 1.0004688012751396e-05, - "loss": 0.2487, + "learning_rate": 2.0020283026227514e-05, + "loss": 0.2863, "step": 107605 }, { "epoch": 5.02, - "learning_rate": 1.0004219211476256e-05, - "loss": 0.091, + "learning_rate": 2.0019814956391493e-05, + "loss": 0.0883, "step": 107610 }, { "epoch": 5.02, - "learning_rate": 1.0003750410201116e-05, - "loss": 0.0299, + "learning_rate": 2.0019346886555473e-05, + "loss": 0.0209, "step": 107615 }, { "epoch": 5.02, - "learning_rate": 1.0003281608925976e-05, - "loss": 0.0715, + "learning_rate": 2.0018878816719453e-05, + "loss": 0.0254, "step": 107620 }, { "epoch": 5.02, - "learning_rate": 1.0002812807650836e-05, - "loss": 0.0117, + "learning_rate": 2.0018410746883436e-05, + "loss": 0.0278, "step": 107625 }, { "epoch": 5.02, - "learning_rate": 1.0002344006375698e-05, - "loss": 0.0426, + "learning_rate": 2.0017942677047416e-05, + "loss": 0.0648, "step": 107630 }, { "epoch": 5.02, - "learning_rate": 1.000187520510056e-05, - "loss": 0.0876, + "learning_rate": 2.0017474607211396e-05, + "loss": 0.0521, "step": 107635 }, { "epoch": 5.02, - "learning_rate": 1.000140640382542e-05, - "loss": 0.0392, + "learning_rate": 2.001700653737538e-05, + "loss": 0.0961, "step": 107640 }, { "epoch": 5.02, - "learning_rate": 1.0000937602550281e-05, - "loss": 0.0633, + "learning_rate": 2.001653846753936e-05, + "loss": 0.0782, "step": 107645 }, { "epoch": 5.02, - "learning_rate": 1.0000468801275141e-05, - "loss": 0.0946, + "learning_rate": 2.001607039770334e-05, + "loss": 0.1107, "step": 107650 }, { "epoch": 5.02, - "learning_rate": 1e-05, - "loss": 0.2691, + "learning_rate": 2.001560232786732e-05, + "loss": 0.3322, "step": 107655 }, { "epoch": 5.02, - "learning_rate": 9.99953119872486e-06, - "loss": 0.0602, + "learning_rate": 2.00151342580313e-05, + "loss": 0.1183, "step": 107660 }, { "epoch": 5.02, - "learning_rate": 9.99906239744972e-06, - "loss": 0.0247, + "learning_rate": 2.0014666188195278e-05, + "loss": 0.0211, "step": 107665 }, { "epoch": 5.02, - "learning_rate": 9.998593596174582e-06, - "loss": 0.0206, + "learning_rate": 2.0014198118359258e-05, + "loss": 0.0234, "step": 107670 }, { "epoch": 5.02, - "learning_rate": 9.998124794899444e-06, - "loss": 0.0339, + "learning_rate": 2.0013730048523238e-05, + "loss": 0.0696, "step": 107675 }, { "epoch": 5.02, - "learning_rate": 9.997655993624304e-06, - "loss": 0.0258, + "learning_rate": 2.001326197868722e-05, + "loss": 0.062, "step": 107680 }, { "epoch": 5.02, - "learning_rate": 9.997187192349164e-06, - "loss": 0.0441, + "learning_rate": 2.00127939088512e-05, + "loss": 0.0222, "step": 107685 }, { "epoch": 5.02, - "learning_rate": 9.996718391074025e-06, - "loss": 0.0765, + "learning_rate": 2.001232583901518e-05, + "loss": 0.1283, "step": 107690 }, { "epoch": 5.03, - "learning_rate": 9.996249589798885e-06, - "loss": 0.0778, + "learning_rate": 2.001185776917916e-05, + "loss": 0.1168, "step": 107695 }, { "epoch": 5.03, - "learning_rate": 9.995780788523745e-06, - "loss": 0.1124, + "learning_rate": 2.0011389699343144e-05, + "loss": 0.1143, "step": 107700 }, { "epoch": 5.03, - "learning_rate": 9.995311987248605e-06, - "loss": 0.2551, + "learning_rate": 2.0010921629507123e-05, + "loss": 0.2185, "step": 107705 }, { "epoch": 5.03, - "learning_rate": 9.994843185973467e-06, - "loss": 0.0799, + "learning_rate": 2.0010453559671103e-05, + "loss": 0.1142, "step": 107710 }, { "epoch": 5.03, - "learning_rate": 9.994374384698328e-06, - "loss": 0.0109, + "learning_rate": 2.0009985489835086e-05, + "loss": 0.0544, "step": 107715 }, { "epoch": 5.03, - "learning_rate": 9.993905583423188e-06, - "loss": 0.0043, + "learning_rate": 2.0009517419999066e-05, + "loss": 0.0104, "step": 107720 }, { "epoch": 5.03, - "learning_rate": 9.993436782148048e-06, - "loss": 0.0223, + "learning_rate": 2.0009049350163046e-05, + "loss": 0.065, "step": 107725 }, { "epoch": 5.03, - "learning_rate": 9.992967980872908e-06, - "loss": 0.0584, + "learning_rate": 2.0008581280327023e-05, + "loss": 0.0304, "step": 107730 }, { "epoch": 5.03, - "learning_rate": 9.992499179597768e-06, - "loss": 0.1062, + "learning_rate": 2.0008113210491006e-05, + "loss": 0.0209, "step": 107735 }, { "epoch": 5.03, - "learning_rate": 9.99203037832263e-06, - "loss": 0.1359, + "learning_rate": 2.0007645140654986e-05, + "loss": 0.0691, "step": 107740 }, { "epoch": 5.03, - "learning_rate": 9.99156157704749e-06, - "loss": 0.0437, + "learning_rate": 2.0007177070818965e-05, + "loss": 0.0848, "step": 107745 }, { "epoch": 5.03, - "learning_rate": 9.991092775772351e-06, - "loss": 0.1378, + "learning_rate": 2.0006709000982945e-05, + "loss": 0.1277, "step": 107750 }, { "epoch": 5.03, - "learning_rate": 9.990623974497211e-06, - "loss": 0.2283, + "learning_rate": 2.000624093114693e-05, + "loss": 0.2697, "step": 107755 }, { "epoch": 5.03, - "learning_rate": 9.990155173222073e-06, - "loss": 0.1127, + "learning_rate": 2.0005772861310908e-05, + "loss": 0.0765, "step": 107760 }, { "epoch": 5.03, - "learning_rate": 9.989686371946933e-06, - "loss": 0.0283, + "learning_rate": 2.0005304791474888e-05, + "loss": 0.0239, "step": 107765 }, { "epoch": 5.03, - "learning_rate": 9.989217570671793e-06, - "loss": 0.0402, + "learning_rate": 2.000483672163887e-05, + "loss": 0.0392, "step": 107770 }, { "epoch": 5.03, - "learning_rate": 9.988748769396653e-06, - "loss": 0.0186, + "learning_rate": 2.000436865180285e-05, + "loss": 0.0229, "step": 107775 }, { "epoch": 5.03, - "learning_rate": 9.988279968121514e-06, - "loss": 0.014, + "learning_rate": 2.000390058196683e-05, + "loss": 0.0264, "step": 107780 }, { "epoch": 5.03, - "learning_rate": 9.987811166846374e-06, - "loss": 0.0694, + "learning_rate": 2.000343251213081e-05, + "loss": 0.0326, "step": 107785 }, { "epoch": 5.03, - "learning_rate": 9.987342365571236e-06, - "loss": 0.0693, + "learning_rate": 2.000296444229479e-05, + "loss": 0.0699, "step": 107790 }, { "epoch": 5.03, - "learning_rate": 9.986873564296096e-06, - "loss": 0.1298, + "learning_rate": 2.000249637245877e-05, + "loss": 0.0837, "step": 107795 }, { "epoch": 5.03, - "learning_rate": 9.986404763020956e-06, - "loss": 0.1687, + "learning_rate": 2.000202830262275e-05, + "loss": 0.1972, "step": 107800 }, { "epoch": 5.03, - "learning_rate": 9.985935961745816e-06, - "loss": 0.4536, + "learning_rate": 2.000156023278673e-05, + "loss": 0.2828, "step": 107805 }, { "epoch": 5.03, - "learning_rate": 9.985467160470677e-06, - "loss": 0.0928, + "learning_rate": 2.0001092162950713e-05, + "loss": 0.111, "step": 107810 }, { "epoch": 5.03, - "learning_rate": 9.984998359195537e-06, - "loss": 0.0202, + "learning_rate": 2.0000624093114693e-05, + "loss": 0.0229, "step": 107815 }, { "epoch": 5.03, - "learning_rate": 9.984529557920399e-06, - "loss": 0.0235, + "learning_rate": 2.0000156023278673e-05, + "loss": 0.0302, "step": 107820 }, { "epoch": 5.03, - "learning_rate": 9.984060756645259e-06, - "loss": 0.0206, + "learning_rate": 1.9999687953442656e-05, + "loss": 0.0128, "step": 107825 }, { "epoch": 5.03, - "learning_rate": 9.98359195537012e-06, - "loss": 0.0537, + "learning_rate": 1.9999219883606636e-05, + "loss": 0.052, "step": 107830 }, { "epoch": 5.03, - "learning_rate": 9.98312315409498e-06, - "loss": 0.0599, + "learning_rate": 1.9998751813770616e-05, + "loss": 0.0558, "step": 107835 }, { "epoch": 5.03, - "learning_rate": 9.98265435281984e-06, - "loss": 0.07, + "learning_rate": 1.9998283743934595e-05, + "loss": 0.0267, "step": 107840 }, { "epoch": 5.03, - "learning_rate": 9.9821855515447e-06, - "loss": 0.0455, + "learning_rate": 1.999781567409858e-05, + "loss": 0.1043, "step": 107845 }, { "epoch": 5.03, - "learning_rate": 9.981716750269562e-06, - "loss": 0.1554, + "learning_rate": 1.999734760426256e-05, + "loss": 0.0958, "step": 107850 }, { "epoch": 5.03, - "learning_rate": 9.981247948994422e-06, - "loss": 0.2871, + "learning_rate": 1.9996879534426535e-05, + "loss": 0.3116, "step": 107855 }, { "epoch": 5.03, - "learning_rate": 9.980779147719284e-06, - "loss": 0.0652, + "learning_rate": 1.9996411464590515e-05, + "loss": 0.1039, "step": 107860 }, { "epoch": 5.03, - "learning_rate": 9.980310346444143e-06, - "loss": 0.0221, + "learning_rate": 1.9995943394754498e-05, + "loss": 0.0166, "step": 107865 }, { "epoch": 5.03, - "learning_rate": 9.979841545169003e-06, - "loss": 0.1025, + "learning_rate": 1.9995475324918478e-05, + "loss": 0.0146, "step": 107870 }, { "epoch": 5.03, - "learning_rate": 9.979372743893863e-06, - "loss": 0.0503, + "learning_rate": 1.9995007255082458e-05, + "loss": 0.023, "step": 107875 }, { "epoch": 5.03, - "learning_rate": 9.978903942618725e-06, - "loss": 0.0458, + "learning_rate": 1.9994539185246437e-05, + "loss": 0.0598, "step": 107880 }, { "epoch": 5.03, - "learning_rate": 9.978435141343585e-06, - "loss": 0.0423, + "learning_rate": 1.999407111541042e-05, + "loss": 0.0678, "step": 107885 }, { "epoch": 5.03, - "learning_rate": 9.977966340068446e-06, - "loss": 0.0973, + "learning_rate": 1.99936030455744e-05, + "loss": 0.0628, "step": 107890 }, { "epoch": 5.03, - "learning_rate": 9.977497538793306e-06, - "loss": 0.1922, + "learning_rate": 1.999313497573838e-05, + "loss": 0.1074, "step": 107895 }, { "epoch": 5.03, - "learning_rate": 9.977028737518168e-06, - "loss": 0.1962, + "learning_rate": 1.9992666905902363e-05, + "loss": 0.1304, "step": 107900 }, { "epoch": 5.03, - "learning_rate": 9.976559936243028e-06, - "loss": 0.4812, + "learning_rate": 1.9992198836066343e-05, + "loss": 0.1148, "step": 107905 }, { "epoch": 5.04, - "learning_rate": 9.976091134967888e-06, - "loss": 0.0984, + "learning_rate": 1.9991730766230323e-05, + "loss": 0.1128, "step": 107910 }, { "epoch": 5.04, - "learning_rate": 9.975622333692748e-06, - "loss": 0.0128, + "learning_rate": 1.9991262696394303e-05, + "loss": 0.032, "step": 107915 }, { "epoch": 5.04, - "learning_rate": 9.975153532417608e-06, - "loss": 0.0496, + "learning_rate": 1.9990794626558283e-05, + "loss": 0.0157, "step": 107920 }, { "epoch": 5.04, - "learning_rate": 9.97468473114247e-06, - "loss": 0.0373, + "learning_rate": 1.9990326556722262e-05, + "loss": 0.0243, "step": 107925 }, { "epoch": 5.04, - "learning_rate": 9.974215929867331e-06, - "loss": 0.0919, + "learning_rate": 1.9989858486886242e-05, + "loss": 0.0213, "step": 107930 }, { "epoch": 5.04, - "learning_rate": 9.973747128592191e-06, - "loss": 0.0504, + "learning_rate": 1.9989390417050222e-05, + "loss": 0.0519, "step": 107935 }, { "epoch": 5.04, - "learning_rate": 9.973278327317051e-06, - "loss": 0.0204, + "learning_rate": 1.9988922347214205e-05, + "loss": 0.0214, "step": 107940 }, { "epoch": 5.04, - "learning_rate": 9.972809526041911e-06, - "loss": 0.1317, + "learning_rate": 1.9988454277378185e-05, + "loss": 0.0673, "step": 107945 }, { "epoch": 5.04, - "learning_rate": 9.972340724766772e-06, - "loss": 0.1127, + "learning_rate": 1.9987986207542165e-05, + "loss": 0.1193, "step": 107950 }, { "epoch": 5.04, - "learning_rate": 9.971871923491632e-06, - "loss": 0.1917, + "learning_rate": 1.9987518137706148e-05, + "loss": 0.2094, "step": 107955 }, { "epoch": 5.04, - "learning_rate": 9.971403122216492e-06, - "loss": 0.0728, + "learning_rate": 1.9987050067870128e-05, + "loss": 0.0492, "step": 107960 }, { "epoch": 5.04, - "learning_rate": 9.970934320941354e-06, - "loss": 0.0223, + "learning_rate": 1.9986581998034108e-05, + "loss": 0.0148, "step": 107965 }, { "epoch": 5.04, - "learning_rate": 9.970465519666216e-06, - "loss": 0.0309, + "learning_rate": 1.9986113928198088e-05, + "loss": 0.0093, "step": 107970 }, { "epoch": 5.04, - "learning_rate": 9.969996718391076e-06, - "loss": 0.028, + "learning_rate": 1.998564585836207e-05, + "loss": 0.0382, "step": 107975 }, { "epoch": 5.04, - "learning_rate": 9.969527917115935e-06, - "loss": 0.0405, + "learning_rate": 1.9985177788526047e-05, + "loss": 0.034, "step": 107980 }, { "epoch": 5.04, - "learning_rate": 9.969059115840795e-06, - "loss": 0.0097, + "learning_rate": 1.9984709718690027e-05, + "loss": 0.0545, "step": 107985 }, { "epoch": 5.04, - "learning_rate": 9.968590314565655e-06, - "loss": 0.0913, + "learning_rate": 1.9984241648854007e-05, + "loss": 0.0425, "step": 107990 }, { "epoch": 5.04, - "learning_rate": 9.968121513290517e-06, - "loss": 0.0693, + "learning_rate": 1.998377357901799e-05, + "loss": 0.117, "step": 107995 }, { "epoch": 5.04, - "learning_rate": 9.967652712015377e-06, - "loss": 0.2201, + "learning_rate": 1.998330550918197e-05, + "loss": 0.1429, "step": 108000 }, { "epoch": 5.04, - "learning_rate": 9.967183910740239e-06, - "loss": 0.1502, + "learning_rate": 1.998283743934595e-05, + "loss": 0.1566, "step": 108005 }, { "epoch": 5.04, - "learning_rate": 9.966715109465098e-06, - "loss": 0.0937, + "learning_rate": 1.9982369369509933e-05, + "loss": 0.1024, "step": 108010 }, { "epoch": 5.04, - "learning_rate": 9.96624630818996e-06, - "loss": 0.0061, + "learning_rate": 1.9981901299673913e-05, + "loss": 0.0342, "step": 108015 }, { "epoch": 5.04, - "learning_rate": 9.96577750691482e-06, - "loss": 0.0177, + "learning_rate": 1.9981433229837893e-05, + "loss": 0.0275, "step": 108020 }, { "epoch": 5.04, - "learning_rate": 9.96530870563968e-06, - "loss": 0.0185, + "learning_rate": 1.9980965160001872e-05, + "loss": 0.0547, "step": 108025 }, { "epoch": 5.04, - "learning_rate": 9.96483990436454e-06, - "loss": 0.0367, + "learning_rate": 1.9980497090165856e-05, + "loss": 0.0506, "step": 108030 }, { "epoch": 5.04, - "learning_rate": 9.964371103089402e-06, - "loss": 0.0741, + "learning_rate": 1.9980029020329835e-05, + "loss": 0.0555, "step": 108035 }, { "epoch": 5.04, - "learning_rate": 9.963902301814261e-06, - "loss": 0.0528, + "learning_rate": 1.9979560950493815e-05, + "loss": 0.1102, "step": 108040 }, { "epoch": 5.04, - "learning_rate": 9.963433500539123e-06, - "loss": 0.097, + "learning_rate": 1.997909288065779e-05, + "loss": 0.0892, "step": 108045 }, { "epoch": 5.04, - "learning_rate": 9.962964699263983e-06, - "loss": 0.0841, + "learning_rate": 1.9978624810821775e-05, + "loss": 0.1418, "step": 108050 }, { "epoch": 5.04, - "learning_rate": 9.962495897988843e-06, - "loss": 0.2782, + "learning_rate": 1.9978156740985755e-05, + "loss": 0.2388, "step": 108055 }, { "epoch": 5.04, - "learning_rate": 9.962027096713703e-06, - "loss": 0.1019, + "learning_rate": 1.9977688671149735e-05, + "loss": 0.0692, "step": 108060 }, { "epoch": 5.04, - "learning_rate": 9.961558295438565e-06, - "loss": 0.0174, + "learning_rate": 1.9977220601313714e-05, + "loss": 0.0093, "step": 108065 }, { "epoch": 5.04, - "learning_rate": 9.961089494163424e-06, - "loss": 0.0117, + "learning_rate": 1.9976752531477698e-05, + "loss": 0.0247, "step": 108070 }, { "epoch": 5.04, - "learning_rate": 9.960620692888286e-06, - "loss": 0.0491, + "learning_rate": 1.9976284461641677e-05, + "loss": 0.1178, "step": 108075 }, { "epoch": 5.04, - "learning_rate": 9.960151891613146e-06, - "loss": 0.033, + "learning_rate": 1.9975816391805657e-05, + "loss": 0.0076, "step": 108080 }, { "epoch": 5.04, - "learning_rate": 9.959683090338008e-06, - "loss": 0.0309, + "learning_rate": 1.997534832196964e-05, + "loss": 0.101, "step": 108085 }, { "epoch": 5.04, - "learning_rate": 9.959214289062868e-06, - "loss": 0.1008, + "learning_rate": 1.997488025213362e-05, + "loss": 0.0435, "step": 108090 }, { "epoch": 5.04, - "learning_rate": 9.958745487787727e-06, - "loss": 0.0941, + "learning_rate": 1.99744121822976e-05, + "loss": 0.0495, "step": 108095 }, { "epoch": 5.04, - "learning_rate": 9.958276686512587e-06, - "loss": 0.1057, + "learning_rate": 1.997394411246158e-05, + "loss": 0.0827, "step": 108100 }, { "epoch": 5.04, - "learning_rate": 9.957807885237449e-06, - "loss": 0.2477, + "learning_rate": 1.9973476042625563e-05, + "loss": 0.3196, "step": 108105 }, { "epoch": 5.04, - "learning_rate": 9.957339083962309e-06, - "loss": 0.1212, + "learning_rate": 1.997300797278954e-05, + "loss": 0.1141, "step": 108110 }, { "epoch": 5.04, - "learning_rate": 9.95687028268717e-06, - "loss": 0.0426, + "learning_rate": 1.997253990295352e-05, + "loss": 0.0326, "step": 108115 }, { "epoch": 5.05, - "learning_rate": 9.95640148141203e-06, - "loss": 0.0479, + "learning_rate": 1.99720718331175e-05, + "loss": 0.007, "step": 108120 }, { "epoch": 5.05, - "learning_rate": 9.95593268013689e-06, - "loss": 0.0258, + "learning_rate": 1.9971603763281482e-05, + "loss": 0.0376, "step": 108125 }, { "epoch": 5.05, - "learning_rate": 9.95546387886175e-06, - "loss": 0.0331, + "learning_rate": 1.9971135693445462e-05, + "loss": 0.0227, "step": 108130 }, { "epoch": 5.05, - "learning_rate": 9.954995077586612e-06, - "loss": 0.0456, + "learning_rate": 1.9970667623609442e-05, + "loss": 0.0599, "step": 108135 }, { "epoch": 5.05, - "learning_rate": 9.954526276311472e-06, - "loss": 0.0962, + "learning_rate": 1.9970199553773425e-05, + "loss": 0.1095, "step": 108140 }, { "epoch": 5.05, - "learning_rate": 9.954057475036334e-06, - "loss": 0.0962, + "learning_rate": 1.9969731483937405e-05, + "loss": 0.1097, "step": 108145 }, { "epoch": 5.05, - "learning_rate": 9.953588673761194e-06, - "loss": 0.0943, + "learning_rate": 1.9969263414101385e-05, + "loss": 0.1297, "step": 108150 }, { "epoch": 5.05, - "learning_rate": 9.953119872486055e-06, - "loss": 0.3301, + "learning_rate": 1.9968795344265365e-05, + "loss": 0.3539, "step": 108155 }, { "epoch": 5.05, - "learning_rate": 9.952651071210915e-06, - "loss": 0.0468, + "learning_rate": 1.9968327274429348e-05, + "loss": 0.0678, "step": 108160 }, { "epoch": 5.05, - "learning_rate": 9.952182269935775e-06, - "loss": 0.0289, + "learning_rate": 1.9967859204593328e-05, + "loss": 0.0034, "step": 108165 }, { "epoch": 5.05, - "learning_rate": 9.951713468660635e-06, - "loss": 0.0435, + "learning_rate": 1.9967391134757304e-05, + "loss": 0.0262, "step": 108170 }, { "epoch": 5.05, - "learning_rate": 9.951244667385495e-06, - "loss": 0.0206, + "learning_rate": 1.9966923064921284e-05, + "loss": 0.0212, "step": 108175 }, { "epoch": 5.05, - "learning_rate": 9.950775866110357e-06, - "loss": 0.0396, + "learning_rate": 1.9966454995085267e-05, + "loss": 0.0074, "step": 108180 }, { "epoch": 5.05, - "learning_rate": 9.950307064835218e-06, - "loss": 0.1861, + "learning_rate": 1.9965986925249247e-05, + "loss": 0.0373, "step": 108185 }, { "epoch": 5.05, - "learning_rate": 9.949838263560078e-06, - "loss": 0.0588, + "learning_rate": 1.9965518855413227e-05, + "loss": 0.1219, "step": 108190 }, { "epoch": 5.05, - "learning_rate": 9.949369462284938e-06, - "loss": 0.1091, + "learning_rate": 1.996505078557721e-05, + "loss": 0.0762, "step": 108195 }, { "epoch": 5.05, - "learning_rate": 9.948900661009798e-06, - "loss": 0.1173, + "learning_rate": 1.996458271574119e-05, + "loss": 0.1576, "step": 108200 }, { "epoch": 5.05, - "learning_rate": 9.94843185973466e-06, - "loss": 0.1653, + "learning_rate": 1.996411464590517e-05, + "loss": 0.4029, "step": 108205 }, { "epoch": 5.05, - "learning_rate": 9.94796305845952e-06, - "loss": 0.0869, + "learning_rate": 1.996364657606915e-05, + "loss": 0.0652, "step": 108210 }, { "epoch": 5.05, - "learning_rate": 9.94749425718438e-06, - "loss": 0.0382, + "learning_rate": 1.9963178506233133e-05, + "loss": 0.0256, "step": 108215 }, { "epoch": 5.05, - "learning_rate": 9.947025455909241e-06, - "loss": 0.0396, + "learning_rate": 1.9962710436397112e-05, + "loss": 0.0294, "step": 108220 }, { "epoch": 5.05, - "learning_rate": 9.946556654634103e-06, - "loss": 0.0266, + "learning_rate": 1.9962242366561092e-05, + "loss": 0.0296, "step": 108225 }, { "epoch": 5.05, - "learning_rate": 9.946087853358963e-06, - "loss": 0.0353, + "learning_rate": 1.9961774296725072e-05, + "loss": 0.0812, "step": 108230 }, { "epoch": 5.05, - "learning_rate": 9.945619052083823e-06, - "loss": 0.021, + "learning_rate": 1.9961306226889052e-05, + "loss": 0.0937, "step": 108235 }, { "epoch": 5.05, - "learning_rate": 9.945150250808683e-06, - "loss": 0.1304, + "learning_rate": 1.996083815705303e-05, + "loss": 0.0606, "step": 108240 }, { "epoch": 5.05, - "learning_rate": 9.944681449533542e-06, - "loss": 0.0706, + "learning_rate": 1.996037008721701e-05, + "loss": 0.061, "step": 108245 }, { "epoch": 5.05, - "learning_rate": 9.944212648258404e-06, - "loss": 0.1884, + "learning_rate": 1.9959902017380995e-05, + "loss": 0.203, "step": 108250 }, { "epoch": 5.05, - "learning_rate": 9.943743846983264e-06, - "loss": 0.286, + "learning_rate": 1.9959433947544975e-05, + "loss": 0.2459, "step": 108255 }, { "epoch": 5.05, - "learning_rate": 9.943275045708126e-06, - "loss": 0.09, + "learning_rate": 1.9958965877708954e-05, + "loss": 0.0883, "step": 108260 }, { "epoch": 5.05, - "learning_rate": 9.942806244432986e-06, - "loss": 0.0328, + "learning_rate": 1.9958497807872934e-05, + "loss": 0.0191, "step": 108265 }, { "epoch": 5.05, - "learning_rate": 9.942337443157846e-06, - "loss": 0.0323, + "learning_rate": 1.9958029738036917e-05, + "loss": 0.0246, "step": 108270 }, { "epoch": 5.05, - "learning_rate": 9.941868641882707e-06, - "loss": 0.0482, + "learning_rate": 1.9957561668200897e-05, + "loss": 0.0349, "step": 108275 }, { "epoch": 5.05, - "learning_rate": 9.941399840607567e-06, - "loss": 0.0552, + "learning_rate": 1.9957093598364877e-05, + "loss": 0.0193, "step": 108280 }, { "epoch": 5.05, - "learning_rate": 9.940931039332427e-06, - "loss": 0.032, + "learning_rate": 1.9956625528528857e-05, + "loss": 0.0166, "step": 108285 }, { "epoch": 5.05, - "learning_rate": 9.940462238057289e-06, - "loss": 0.1099, + "learning_rate": 1.995615745869284e-05, + "loss": 0.0424, "step": 108290 }, { "epoch": 5.05, - "learning_rate": 9.939993436782149e-06, - "loss": 0.0978, + "learning_rate": 1.995568938885682e-05, + "loss": 0.0996, "step": 108295 }, { "epoch": 5.05, - "learning_rate": 9.93952463550701e-06, - "loss": 0.1157, + "learning_rate": 1.9955221319020796e-05, + "loss": 0.0837, "step": 108300 }, { "epoch": 5.05, - "learning_rate": 9.93905583423187e-06, - "loss": 0.3533, + "learning_rate": 1.9954753249184776e-05, + "loss": 0.2317, "step": 108305 }, { "epoch": 5.05, - "learning_rate": 9.93858703295673e-06, - "loss": 0.1178, + "learning_rate": 1.995428517934876e-05, + "loss": 0.0631, "step": 108310 }, { "epoch": 5.05, - "learning_rate": 9.93811823168159e-06, - "loss": 0.0112, + "learning_rate": 1.995381710951274e-05, + "loss": 0.0092, "step": 108315 }, { "epoch": 5.05, - "learning_rate": 9.937649430406452e-06, - "loss": 0.025, + "learning_rate": 1.995334903967672e-05, + "loss": 0.0463, "step": 108320 }, { "epoch": 5.05, - "learning_rate": 9.937180629131312e-06, - "loss": 0.0197, + "learning_rate": 1.9952880969840702e-05, + "loss": 0.0692, "step": 108325 }, { "epoch": 5.05, - "learning_rate": 9.936711827856173e-06, - "loss": 0.0526, + "learning_rate": 1.9952412900004682e-05, + "loss": 0.1098, "step": 108330 }, { "epoch": 5.06, - "learning_rate": 9.936243026581033e-06, - "loss": 0.0522, + "learning_rate": 1.9951944830168662e-05, + "loss": 0.0232, "step": 108335 }, { "epoch": 5.06, - "learning_rate": 9.935774225305895e-06, - "loss": 0.0656, + "learning_rate": 1.995147676033264e-05, + "loss": 0.0757, "step": 108340 }, { "epoch": 5.06, - "learning_rate": 9.935305424030755e-06, - "loss": 0.0849, + "learning_rate": 1.9951008690496625e-05, + "loss": 0.0634, "step": 108345 }, { "epoch": 5.06, - "learning_rate": 9.934836622755615e-06, - "loss": 0.1757, + "learning_rate": 1.9950540620660605e-05, + "loss": 0.1501, "step": 108350 }, { "epoch": 5.06, - "learning_rate": 9.934367821480475e-06, - "loss": 0.3589, + "learning_rate": 1.9950072550824584e-05, + "loss": 0.2485, "step": 108355 }, { "epoch": 5.06, - "learning_rate": 9.933899020205336e-06, - "loss": 0.0822, + "learning_rate": 1.994960448098856e-05, + "loss": 0.074, "step": 108360 }, { "epoch": 5.06, - "learning_rate": 9.933430218930196e-06, - "loss": 0.005, + "learning_rate": 1.9949136411152544e-05, + "loss": 0.002, "step": 108365 }, { "epoch": 5.06, - "learning_rate": 9.932961417655058e-06, - "loss": 0.0121, + "learning_rate": 1.9948668341316524e-05, + "loss": 0.0157, "step": 108370 }, { "epoch": 5.06, - "learning_rate": 9.932492616379918e-06, - "loss": 0.0704, + "learning_rate": 1.9948200271480504e-05, + "loss": 0.0315, "step": 108375 }, { "epoch": 5.06, - "learning_rate": 9.932023815104778e-06, - "loss": 0.0304, + "learning_rate": 1.9947732201644487e-05, + "loss": 0.0429, "step": 108380 }, { "epoch": 5.06, - "learning_rate": 9.931555013829638e-06, - "loss": 0.0896, + "learning_rate": 1.9947264131808467e-05, + "loss": 0.0697, "step": 108385 }, { "epoch": 5.06, - "learning_rate": 9.9310862125545e-06, - "loss": 0.122, + "learning_rate": 1.9946796061972447e-05, + "loss": 0.1044, "step": 108390 }, { "epoch": 5.06, - "learning_rate": 9.930617411279359e-06, - "loss": 0.1162, + "learning_rate": 1.9946327992136426e-05, + "loss": 0.0647, "step": 108395 }, { "epoch": 5.06, - "learning_rate": 9.93014861000422e-06, - "loss": 0.166, + "learning_rate": 1.994585992230041e-05, + "loss": 0.1474, "step": 108400 }, { "epoch": 5.06, - "learning_rate": 9.92967980872908e-06, - "loss": 0.2683, + "learning_rate": 1.994539185246439e-05, + "loss": 0.2588, "step": 108405 }, { "epoch": 5.06, - "learning_rate": 9.929211007453942e-06, - "loss": 0.1068, + "learning_rate": 1.994492378262837e-05, + "loss": 0.0882, "step": 108410 }, { "epoch": 5.06, - "learning_rate": 9.928742206178802e-06, - "loss": 0.0398, + "learning_rate": 1.994445571279235e-05, + "loss": 0.0126, "step": 108415 }, { "epoch": 5.06, - "learning_rate": 9.928273404903662e-06, - "loss": 0.0374, + "learning_rate": 1.9943987642956332e-05, + "loss": 0.0829, "step": 108420 }, { "epoch": 5.06, - "learning_rate": 9.927804603628522e-06, - "loss": 0.0562, + "learning_rate": 1.994351957312031e-05, + "loss": 0.0278, "step": 108425 }, { "epoch": 5.06, - "learning_rate": 9.927335802353382e-06, - "loss": 0.0613, + "learning_rate": 1.994305150328429e-05, + "loss": 0.0716, "step": 108430 }, { "epoch": 5.06, - "learning_rate": 9.926867001078244e-06, - "loss": 0.0699, + "learning_rate": 1.994258343344827e-05, + "loss": 0.05, "step": 108435 }, { "epoch": 5.06, - "learning_rate": 9.926398199803105e-06, - "loss": 0.0459, + "learning_rate": 1.994211536361225e-05, + "loss": 0.0553, "step": 108440 }, { "epoch": 5.06, - "learning_rate": 9.925929398527965e-06, - "loss": 0.095, + "learning_rate": 1.994164729377623e-05, + "loss": 0.0836, "step": 108445 }, { "epoch": 5.06, - "learning_rate": 9.925460597252825e-06, - "loss": 0.2374, + "learning_rate": 1.994117922394021e-05, + "loss": 0.1713, "step": 108450 }, { "epoch": 5.06, - "learning_rate": 9.924991795977685e-06, - "loss": 0.2296, + "learning_rate": 1.9940711154104194e-05, + "loss": 0.333, "step": 108455 }, { "epoch": 5.06, - "learning_rate": 9.924522994702547e-06, - "loss": 0.0743, + "learning_rate": 1.9940243084268174e-05, + "loss": 0.0791, "step": 108460 }, { "epoch": 5.06, - "learning_rate": 9.924054193427407e-06, - "loss": 0.0263, + "learning_rate": 1.9939775014432154e-05, + "loss": 0.0026, "step": 108465 }, { "epoch": 5.06, - "learning_rate": 9.923585392152267e-06, - "loss": 0.0442, + "learning_rate": 1.9939306944596134e-05, + "loss": 0.0195, "step": 108470 }, { "epoch": 5.06, - "learning_rate": 9.923116590877128e-06, - "loss": 0.0705, + "learning_rate": 1.9938838874760117e-05, + "loss": 0.0614, "step": 108475 }, { "epoch": 5.06, - "learning_rate": 9.92264778960199e-06, - "loss": 0.0398, + "learning_rate": 1.9938370804924097e-05, + "loss": 0.0245, "step": 108480 }, { "epoch": 5.06, - "learning_rate": 9.92217898832685e-06, - "loss": 0.093, + "learning_rate": 1.9937902735088077e-05, + "loss": 0.1075, "step": 108485 }, { "epoch": 5.06, - "learning_rate": 9.92171018705171e-06, - "loss": 0.0881, + "learning_rate": 1.9937434665252053e-05, + "loss": 0.0982, "step": 108490 }, { "epoch": 5.06, - "learning_rate": 9.92124138577657e-06, - "loss": 0.0818, + "learning_rate": 1.9936966595416036e-05, + "loss": 0.0975, "step": 108495 }, { "epoch": 5.06, - "learning_rate": 9.92077258450143e-06, - "loss": 0.1662, + "learning_rate": 1.9936498525580016e-05, + "loss": 0.1059, "step": 108500 }, { "epoch": 5.06, - "learning_rate": 9.920303783226291e-06, - "loss": 0.2044, + "learning_rate": 1.9936030455743996e-05, + "loss": 0.2697, "step": 108505 }, { "epoch": 5.06, - "learning_rate": 9.919834981951151e-06, - "loss": 0.1161, + "learning_rate": 1.993556238590798e-05, + "loss": 0.0815, "step": 108510 }, { "epoch": 5.06, - "learning_rate": 9.919366180676013e-06, - "loss": 0.0101, + "learning_rate": 1.993509431607196e-05, + "loss": 0.0152, "step": 108515 }, { "epoch": 5.06, - "learning_rate": 9.918897379400873e-06, - "loss": 0.028, + "learning_rate": 1.993462624623594e-05, + "loss": 0.0037, "step": 108520 }, { "epoch": 5.06, - "learning_rate": 9.918428578125733e-06, - "loss": 0.0676, + "learning_rate": 1.993415817639992e-05, + "loss": 0.024, "step": 108525 }, { "epoch": 5.06, - "learning_rate": 9.917959776850594e-06, - "loss": 0.0428, + "learning_rate": 1.9933690106563902e-05, + "loss": 0.0337, "step": 108530 }, { "epoch": 5.06, - "learning_rate": 9.917490975575454e-06, - "loss": 0.0329, + "learning_rate": 1.993322203672788e-05, + "loss": 0.0328, "step": 108535 }, { "epoch": 5.06, - "learning_rate": 9.917022174300314e-06, - "loss": 0.1119, + "learning_rate": 1.993275396689186e-05, + "loss": 0.0977, "step": 108540 }, { "epoch": 5.06, - "learning_rate": 9.916553373025176e-06, - "loss": 0.1168, + "learning_rate": 1.993228589705584e-05, + "loss": 0.0749, "step": 108545 }, { "epoch": 5.07, - "learning_rate": 9.916084571750036e-06, - "loss": 0.1714, + "learning_rate": 1.993181782721982e-05, + "loss": 0.0727, "step": 108550 }, { "epoch": 5.07, - "learning_rate": 9.915615770474897e-06, - "loss": 0.2954, + "learning_rate": 1.99313497573838e-05, + "loss": 0.1907, "step": 108555 }, { "epoch": 5.07, - "learning_rate": 9.915146969199757e-06, - "loss": 0.0688, + "learning_rate": 1.993088168754778e-05, + "loss": 0.1216, "step": 108560 }, { "epoch": 5.07, - "learning_rate": 9.914678167924617e-06, - "loss": 0.0247, + "learning_rate": 1.9930413617711764e-05, + "loss": 0.015, "step": 108565 }, { "epoch": 5.07, - "learning_rate": 9.914209366649477e-06, - "loss": 0.0196, + "learning_rate": 1.9929945547875744e-05, + "loss": 0.0347, "step": 108570 }, { "epoch": 5.07, - "learning_rate": 9.913740565374339e-06, - "loss": 0.0296, + "learning_rate": 1.9929477478039723e-05, + "loss": 0.0521, "step": 108575 }, { "epoch": 5.07, - "learning_rate": 9.913271764099199e-06, - "loss": 0.031, + "learning_rate": 1.9929009408203703e-05, + "loss": 0.0345, "step": 108580 }, { "epoch": 5.07, - "learning_rate": 9.91280296282406e-06, - "loss": 0.0539, + "learning_rate": 1.9928541338367687e-05, + "loss": 0.1507, "step": 108585 }, { "epoch": 5.07, - "learning_rate": 9.91233416154892e-06, - "loss": 0.0774, + "learning_rate": 1.9928073268531666e-05, + "loss": 0.0541, "step": 108590 }, { "epoch": 5.07, - "learning_rate": 9.91186536027378e-06, - "loss": 0.0738, + "learning_rate": 1.9927605198695646e-05, + "loss": 0.1411, "step": 108595 }, { "epoch": 5.07, - "learning_rate": 9.911396558998642e-06, - "loss": 0.1213, + "learning_rate": 1.9927137128859626e-05, + "loss": 0.0966, "step": 108600 }, { "epoch": 5.07, - "learning_rate": 9.910927757723502e-06, - "loss": 0.3294, + "learning_rate": 1.992666905902361e-05, + "loss": 0.3565, "step": 108605 }, { "epoch": 5.07, - "learning_rate": 9.910458956448362e-06, - "loss": 0.1068, + "learning_rate": 1.992620098918759e-05, + "loss": 0.0761, "step": 108610 }, { "epoch": 5.07, - "learning_rate": 9.909990155173223e-06, - "loss": 0.0209, + "learning_rate": 1.9925732919351565e-05, + "loss": 0.0107, "step": 108615 }, { "epoch": 5.07, - "learning_rate": 9.909521353898083e-06, - "loss": 0.0218, + "learning_rate": 1.992526484951555e-05, + "loss": 0.0415, "step": 108620 }, { "epoch": 5.07, - "learning_rate": 9.909052552622945e-06, - "loss": 0.0547, + "learning_rate": 1.992479677967953e-05, + "loss": 0.0473, "step": 108625 }, { "epoch": 5.07, - "learning_rate": 9.908583751347805e-06, - "loss": 0.0334, + "learning_rate": 1.9924328709843508e-05, + "loss": 0.0466, "step": 108630 }, { "epoch": 5.07, - "learning_rate": 9.908114950072665e-06, - "loss": 0.0353, + "learning_rate": 1.9923860640007488e-05, + "loss": 0.0748, "step": 108635 }, { "epoch": 5.07, - "learning_rate": 9.907646148797525e-06, - "loss": 0.0555, + "learning_rate": 1.992339257017147e-05, + "loss": 0.0615, "step": 108640 }, { "epoch": 5.07, - "learning_rate": 9.907177347522386e-06, - "loss": 0.1224, + "learning_rate": 1.992292450033545e-05, + "loss": 0.0818, "step": 108645 }, { "epoch": 5.07, - "learning_rate": 9.906708546247246e-06, - "loss": 0.2319, + "learning_rate": 1.992245643049943e-05, + "loss": 0.1387, "step": 108650 }, { "epoch": 5.07, - "learning_rate": 9.906239744972108e-06, - "loss": 0.326, + "learning_rate": 1.992198836066341e-05, + "loss": 0.1599, "step": 108655 }, { "epoch": 5.07, - "learning_rate": 9.905770943696968e-06, - "loss": 0.1049, + "learning_rate": 1.9921520290827394e-05, + "loss": 0.092, "step": 108660 }, { "epoch": 5.07, - "learning_rate": 9.90530214242183e-06, - "loss": 0.0455, + "learning_rate": 1.9921052220991374e-05, + "loss": 0.0035, "step": 108665 }, { "epoch": 5.07, - "learning_rate": 9.90483334114669e-06, - "loss": 0.0231, + "learning_rate": 1.9920584151155354e-05, + "loss": 0.0742, "step": 108670 }, { "epoch": 5.07, - "learning_rate": 9.90436453987155e-06, - "loss": 0.068, + "learning_rate": 1.992011608131933e-05, + "loss": 0.0384, "step": 108675 }, { "epoch": 5.07, - "learning_rate": 9.90389573859641e-06, - "loss": 0.0369, + "learning_rate": 1.9919648011483313e-05, + "loss": 0.062, "step": 108680 }, { "epoch": 5.07, - "learning_rate": 9.903426937321269e-06, - "loss": 0.0645, + "learning_rate": 1.9919179941647293e-05, + "loss": 0.0603, "step": 108685 }, { "epoch": 5.07, - "learning_rate": 9.90295813604613e-06, - "loss": 0.172, + "learning_rate": 1.9918711871811273e-05, + "loss": 0.0544, "step": 108690 }, { "epoch": 5.07, - "learning_rate": 9.902489334770992e-06, - "loss": 0.1062, + "learning_rate": 1.9918243801975256e-05, + "loss": 0.0725, "step": 108695 }, { "epoch": 5.07, - "learning_rate": 9.902020533495852e-06, - "loss": 0.1049, + "learning_rate": 1.9917775732139236e-05, + "loss": 0.0622, "step": 108700 }, { "epoch": 5.07, - "learning_rate": 9.901551732220712e-06, - "loss": 0.1897, + "learning_rate": 1.9917307662303216e-05, + "loss": 0.1814, "step": 108705 }, { "epoch": 5.07, - "learning_rate": 9.901082930945572e-06, - "loss": 0.083, + "learning_rate": 1.9916839592467196e-05, + "loss": 0.1081, "step": 108710 }, { "epoch": 5.07, - "learning_rate": 9.900614129670434e-06, - "loss": 0.0249, + "learning_rate": 1.991637152263118e-05, + "loss": 0.0102, "step": 108715 }, { "epoch": 5.07, - "learning_rate": 9.900145328395294e-06, - "loss": 0.0172, + "learning_rate": 1.991590345279516e-05, + "loss": 0.0491, "step": 108720 }, { "epoch": 5.07, - "learning_rate": 9.899676527120154e-06, - "loss": 0.0507, + "learning_rate": 1.991543538295914e-05, + "loss": 0.0383, "step": 108725 }, { "epoch": 5.07, - "learning_rate": 9.899207725845015e-06, - "loss": 0.1042, + "learning_rate": 1.9914967313123118e-05, + "loss": 0.0566, "step": 108730 }, { "epoch": 5.07, - "learning_rate": 9.898738924569877e-06, - "loss": 0.0282, + "learning_rate": 1.99144992432871e-05, + "loss": 0.0968, "step": 108735 }, { "epoch": 5.07, - "learning_rate": 9.898270123294737e-06, - "loss": 0.0746, + "learning_rate": 1.9914031173451078e-05, + "loss": 0.0376, "step": 108740 }, { "epoch": 5.07, - "learning_rate": 9.897801322019597e-06, - "loss": 0.2293, + "learning_rate": 1.9913563103615058e-05, + "loss": 0.0629, "step": 108745 }, { "epoch": 5.07, - "learning_rate": 9.897332520744457e-06, - "loss": 0.1048, + "learning_rate": 1.991309503377904e-05, + "loss": 0.1504, "step": 108750 }, { "epoch": 5.07, - "learning_rate": 9.896863719469317e-06, - "loss": 0.2079, + "learning_rate": 1.991262696394302e-05, + "loss": 0.1772, "step": 108755 }, { "epoch": 5.07, - "learning_rate": 9.896394918194178e-06, - "loss": 0.1101, + "learning_rate": 1.9912158894107e-05, + "loss": 0.0588, "step": 108760 }, { "epoch": 5.08, - "learning_rate": 9.895926116919038e-06, - "loss": 0.0265, + "learning_rate": 1.991169082427098e-05, + "loss": 0.0283, "step": 108765 }, { "epoch": 5.08, - "learning_rate": 9.8954573156439e-06, - "loss": 0.0457, + "learning_rate": 1.9911222754434963e-05, + "loss": 0.0323, "step": 108770 }, { "epoch": 5.08, - "learning_rate": 9.89498851436876e-06, - "loss": 0.0172, + "learning_rate": 1.9910754684598943e-05, + "loss": 0.0519, "step": 108775 }, { "epoch": 5.08, - "learning_rate": 9.89451971309362e-06, - "loss": 0.063, + "learning_rate": 1.9910286614762923e-05, + "loss": 0.0487, "step": 108780 }, { "epoch": 5.08, - "learning_rate": 9.894050911818481e-06, - "loss": 0.0205, + "learning_rate": 1.9909818544926903e-05, + "loss": 0.0511, "step": 108785 }, { "epoch": 5.08, - "learning_rate": 9.893582110543341e-06, - "loss": 0.0684, + "learning_rate": 1.9909350475090886e-05, + "loss": 0.1119, "step": 108790 }, { "epoch": 5.08, - "learning_rate": 9.893113309268201e-06, - "loss": 0.122, + "learning_rate": 1.9908882405254866e-05, + "loss": 0.1324, "step": 108795 }, { "epoch": 5.08, - "learning_rate": 9.892644507993063e-06, - "loss": 0.1329, + "learning_rate": 1.9908414335418846e-05, + "loss": 0.0891, "step": 108800 }, { "epoch": 5.08, - "learning_rate": 9.892175706717923e-06, - "loss": 0.4706, + "learning_rate": 1.9907946265582826e-05, + "loss": 0.3014, "step": 108805 }, { "epoch": 5.08, - "learning_rate": 9.891706905442784e-06, - "loss": 0.0969, + "learning_rate": 1.9907478195746805e-05, + "loss": 0.0648, "step": 108810 }, { "epoch": 5.08, - "learning_rate": 9.891238104167644e-06, - "loss": 0.0258, + "learning_rate": 1.9907010125910785e-05, + "loss": 0.0412, "step": 108815 }, { "epoch": 5.08, - "learning_rate": 9.890769302892504e-06, - "loss": 0.0226, + "learning_rate": 1.9906542056074765e-05, + "loss": 0.0233, "step": 108820 }, { "epoch": 5.08, - "learning_rate": 9.890300501617364e-06, - "loss": 0.0337, + "learning_rate": 1.9906073986238748e-05, + "loss": 0.0372, "step": 108825 }, { "epoch": 5.08, - "learning_rate": 9.889831700342226e-06, - "loss": 0.0366, + "learning_rate": 1.9905605916402728e-05, + "loss": 0.021, "step": 108830 }, { "epoch": 5.08, - "learning_rate": 9.889362899067086e-06, - "loss": 0.0877, + "learning_rate": 1.9905137846566708e-05, + "loss": 0.0671, "step": 108835 }, { "epoch": 5.08, - "learning_rate": 9.888894097791947e-06, - "loss": 0.099, + "learning_rate": 1.9904669776730688e-05, + "loss": 0.0496, "step": 108840 }, { "epoch": 5.08, - "learning_rate": 9.888425296516807e-06, - "loss": 0.0709, + "learning_rate": 1.990420170689467e-05, + "loss": 0.096, "step": 108845 }, { "epoch": 5.08, - "learning_rate": 9.887956495241667e-06, - "loss": 0.0783, + "learning_rate": 1.990373363705865e-05, + "loss": 0.2162, "step": 108850 }, { "epoch": 5.08, - "learning_rate": 9.887487693966529e-06, - "loss": 0.3613, + "learning_rate": 1.990326556722263e-05, + "loss": 0.2876, "step": 108855 }, { "epoch": 5.08, - "learning_rate": 9.887018892691389e-06, - "loss": 0.1013, + "learning_rate": 1.9902797497386614e-05, + "loss": 0.1049, "step": 108860 }, { "epoch": 5.08, - "learning_rate": 9.886550091416249e-06, - "loss": 0.0218, + "learning_rate": 1.990232942755059e-05, + "loss": 0.0321, "step": 108865 }, { "epoch": 5.08, - "learning_rate": 9.88608129014111e-06, - "loss": 0.0138, + "learning_rate": 1.990186135771457e-05, + "loss": 0.0234, "step": 108870 }, { "epoch": 5.08, - "learning_rate": 9.88561248886597e-06, - "loss": 0.011, + "learning_rate": 1.990139328787855e-05, + "loss": 0.0315, "step": 108875 }, { "epoch": 5.08, - "learning_rate": 9.885143687590832e-06, - "loss": 0.0332, + "learning_rate": 1.9900925218042533e-05, + "loss": 0.0524, "step": 108880 }, { "epoch": 5.08, - "learning_rate": 9.884674886315692e-06, - "loss": 0.0582, + "learning_rate": 1.9900457148206513e-05, + "loss": 0.0736, "step": 108885 }, { "epoch": 5.08, - "learning_rate": 9.884206085040552e-06, - "loss": 0.0643, + "learning_rate": 1.9899989078370493e-05, + "loss": 0.0548, "step": 108890 }, { "epoch": 5.08, - "learning_rate": 9.883737283765412e-06, - "loss": 0.0878, + "learning_rate": 1.9899521008534472e-05, + "loss": 0.0471, "step": 108895 }, { "epoch": 5.08, - "learning_rate": 9.883268482490273e-06, - "loss": 0.1158, + "learning_rate": 1.9899052938698456e-05, + "loss": 0.0777, "step": 108900 }, { "epoch": 5.08, - "learning_rate": 9.882799681215133e-06, - "loss": 0.1516, + "learning_rate": 1.9898584868862436e-05, + "loss": 0.2967, "step": 108905 }, { "epoch": 5.08, - "learning_rate": 9.882330879939995e-06, - "loss": 0.1366, + "learning_rate": 1.9898116799026415e-05, + "loss": 0.0762, "step": 108910 }, { "epoch": 5.08, - "learning_rate": 9.881862078664855e-06, - "loss": 0.0155, + "learning_rate": 1.9897648729190395e-05, + "loss": 0.0249, "step": 108915 }, { "epoch": 5.08, - "learning_rate": 9.881393277389715e-06, - "loss": 0.017, + "learning_rate": 1.989718065935438e-05, + "loss": 0.0066, "step": 108920 }, { "epoch": 5.08, - "learning_rate": 9.880924476114576e-06, - "loss": 0.0443, + "learning_rate": 1.9896712589518358e-05, + "loss": 0.0401, "step": 108925 }, { "epoch": 5.08, - "learning_rate": 9.880455674839436e-06, - "loss": 0.0512, + "learning_rate": 1.9896244519682335e-05, + "loss": 0.0177, "step": 108930 }, { "epoch": 5.08, - "learning_rate": 9.879986873564296e-06, - "loss": 0.0061, + "learning_rate": 1.9895776449846318e-05, + "loss": 0.053, "step": 108935 }, { "epoch": 5.08, - "learning_rate": 9.879518072289156e-06, - "loss": 0.2454, + "learning_rate": 1.9895308380010298e-05, + "loss": 0.0755, "step": 108940 }, { "epoch": 5.08, - "learning_rate": 9.879049271014018e-06, - "loss": 0.1088, + "learning_rate": 1.9894840310174277e-05, + "loss": 0.0829, "step": 108945 }, { "epoch": 5.08, - "learning_rate": 9.87858046973888e-06, - "loss": 0.1084, + "learning_rate": 1.9894372240338257e-05, + "loss": 0.054, "step": 108950 }, { "epoch": 5.08, - "learning_rate": 9.87811166846374e-06, - "loss": 0.3174, + "learning_rate": 1.989390417050224e-05, + "loss": 0.2801, "step": 108955 }, { "epoch": 5.08, - "learning_rate": 9.8776428671886e-06, - "loss": 0.1272, + "learning_rate": 1.989343610066622e-05, + "loss": 0.1002, "step": 108960 }, { "epoch": 5.08, - "learning_rate": 9.87717406591346e-06, - "loss": 0.0291, + "learning_rate": 1.98929680308302e-05, + "loss": 0.0489, "step": 108965 }, { "epoch": 5.08, - "learning_rate": 9.876705264638321e-06, - "loss": 0.0765, + "learning_rate": 1.989249996099418e-05, + "loss": 0.0142, "step": 108970 }, { "epoch": 5.08, - "learning_rate": 9.876236463363181e-06, - "loss": 0.0419, + "learning_rate": 1.9892031891158163e-05, + "loss": 0.0266, "step": 108975 }, { "epoch": 5.09, - "learning_rate": 9.87576766208804e-06, - "loss": 0.0655, + "learning_rate": 1.9891563821322143e-05, + "loss": 0.009, "step": 108980 }, { "epoch": 5.09, - "learning_rate": 9.875298860812902e-06, - "loss": 0.0704, + "learning_rate": 1.9891095751486123e-05, + "loss": 0.0245, "step": 108985 }, { "epoch": 5.09, - "learning_rate": 9.874830059537764e-06, - "loss": 0.0645, + "learning_rate": 1.9890627681650103e-05, + "loss": 0.0302, "step": 108990 }, { "epoch": 5.09, - "learning_rate": 9.874361258262624e-06, - "loss": 0.1264, + "learning_rate": 1.9890159611814082e-05, + "loss": 0.1848, "step": 108995 }, { "epoch": 5.09, - "learning_rate": 9.873892456987484e-06, - "loss": 0.0886, + "learning_rate": 1.9889691541978062e-05, + "loss": 0.1523, "step": 109000 }, { "epoch": 5.09, - "learning_rate": 9.873423655712344e-06, - "loss": 0.2432, + "learning_rate": 1.9889223472142042e-05, + "loss": 0.2539, "step": 109005 }, { "epoch": 5.09, - "learning_rate": 9.872954854437204e-06, - "loss": 0.1445, + "learning_rate": 1.9888755402306025e-05, + "loss": 0.0602, "step": 109010 }, { "epoch": 5.09, - "learning_rate": 9.872486053162065e-06, - "loss": 0.0159, + "learning_rate": 1.9888287332470005e-05, + "loss": 0.0185, "step": 109015 }, { "epoch": 5.09, - "learning_rate": 9.872017251886925e-06, - "loss": 0.0234, + "learning_rate": 1.9887819262633985e-05, + "loss": 0.0311, "step": 109020 }, { "epoch": 5.09, - "learning_rate": 9.871548450611787e-06, - "loss": 0.0951, + "learning_rate": 1.9887351192797965e-05, + "loss": 0.0301, "step": 109025 }, { "epoch": 5.09, - "learning_rate": 9.871079649336647e-06, - "loss": 0.0328, + "learning_rate": 1.9886883122961948e-05, + "loss": 0.0487, "step": 109030 }, { "epoch": 5.09, - "learning_rate": 9.870610848061507e-06, - "loss": 0.0443, + "learning_rate": 1.9886415053125928e-05, + "loss": 0.0442, "step": 109035 }, { "epoch": 5.09, - "learning_rate": 9.870142046786368e-06, - "loss": 0.0716, + "learning_rate": 1.9885946983289908e-05, + "loss": 0.0629, "step": 109040 }, { "epoch": 5.09, - "learning_rate": 9.869673245511228e-06, - "loss": 0.1568, + "learning_rate": 1.988547891345389e-05, + "loss": 0.0322, "step": 109045 }, { "epoch": 5.09, - "learning_rate": 9.869204444236088e-06, - "loss": 0.0911, + "learning_rate": 1.988501084361787e-05, + "loss": 0.1423, "step": 109050 }, { "epoch": 5.09, - "learning_rate": 9.86873564296095e-06, - "loss": 0.3134, + "learning_rate": 1.9884542773781847e-05, + "loss": 0.1959, "step": 109055 }, { "epoch": 5.09, - "learning_rate": 9.86826684168581e-06, - "loss": 0.0868, + "learning_rate": 1.9884074703945827e-05, + "loss": 0.0796, "step": 109060 }, { "epoch": 5.09, - "learning_rate": 9.867798040410672e-06, - "loss": 0.0205, + "learning_rate": 1.988360663410981e-05, + "loss": 0.0153, "step": 109065 }, { "epoch": 5.09, - "learning_rate": 9.867329239135531e-06, - "loss": 0.0272, + "learning_rate": 1.988313856427379e-05, + "loss": 0.0432, "step": 109070 }, { "epoch": 5.09, - "learning_rate": 9.866860437860391e-06, - "loss": 0.0647, + "learning_rate": 1.988267049443777e-05, + "loss": 0.0233, "step": 109075 }, { "epoch": 5.09, - "learning_rate": 9.866391636585251e-06, - "loss": 0.0704, + "learning_rate": 1.988220242460175e-05, + "loss": 0.072, "step": 109080 }, { "epoch": 5.09, - "learning_rate": 9.865922835310113e-06, - "loss": 0.1365, + "learning_rate": 1.9881734354765733e-05, + "loss": 0.0219, "step": 109085 }, { "epoch": 5.09, - "learning_rate": 9.865454034034973e-06, - "loss": 0.0343, + "learning_rate": 1.9881266284929712e-05, + "loss": 0.0939, "step": 109090 }, { "epoch": 5.09, - "learning_rate": 9.864985232759835e-06, - "loss": 0.0741, + "learning_rate": 1.9880798215093692e-05, + "loss": 0.1399, "step": 109095 }, { "epoch": 5.09, - "learning_rate": 9.864516431484694e-06, - "loss": 0.0869, + "learning_rate": 1.9880330145257672e-05, + "loss": 0.0927, "step": 109100 }, { "epoch": 5.09, - "learning_rate": 9.864047630209554e-06, - "loss": 0.1493, + "learning_rate": 1.9879862075421655e-05, + "loss": 0.3199, "step": 109105 }, { "epoch": 5.09, - "learning_rate": 9.863578828934416e-06, - "loss": 0.0724, + "learning_rate": 1.9879394005585635e-05, + "loss": 0.0955, "step": 109110 }, { "epoch": 5.09, - "learning_rate": 9.863110027659276e-06, - "loss": 0.0082, + "learning_rate": 1.9878925935749615e-05, + "loss": 0.0204, "step": 109115 }, { "epoch": 5.09, - "learning_rate": 9.862641226384136e-06, - "loss": 0.0439, + "learning_rate": 1.9878457865913595e-05, + "loss": 0.0235, "step": 109120 }, { "epoch": 5.09, - "learning_rate": 9.862172425108997e-06, - "loss": 0.0226, + "learning_rate": 1.9877989796077575e-05, + "loss": 0.0211, "step": 109125 }, { "epoch": 5.09, - "learning_rate": 9.861703623833857e-06, - "loss": 0.0362, + "learning_rate": 1.9877521726241554e-05, + "loss": 0.0828, "step": 109130 }, { "epoch": 5.09, - "learning_rate": 9.861234822558719e-06, - "loss": 0.0551, + "learning_rate": 1.9877053656405534e-05, + "loss": 0.0352, "step": 109135 }, { "epoch": 5.09, - "learning_rate": 9.860766021283579e-06, - "loss": 0.0948, + "learning_rate": 1.9876585586569517e-05, + "loss": 0.0946, "step": 109140 }, { "epoch": 5.09, - "learning_rate": 9.860297220008439e-06, - "loss": 0.1267, + "learning_rate": 1.9876117516733497e-05, + "loss": 0.117, "step": 109145 }, { "epoch": 5.09, - "learning_rate": 9.859828418733299e-06, - "loss": 0.1699, + "learning_rate": 1.9875649446897477e-05, + "loss": 0.0887, "step": 109150 }, { "epoch": 5.09, - "learning_rate": 9.85935961745816e-06, - "loss": 0.3653, + "learning_rate": 1.9875181377061457e-05, + "loss": 0.3419, "step": 109155 }, { "epoch": 5.09, - "learning_rate": 9.85889081618302e-06, - "loss": 0.0848, + "learning_rate": 1.987471330722544e-05, + "loss": 0.0958, "step": 109160 }, { "epoch": 5.09, - "learning_rate": 9.858422014907882e-06, - "loss": 0.0162, + "learning_rate": 1.987424523738942e-05, + "loss": 0.0051, "step": 109165 }, { "epoch": 5.09, - "learning_rate": 9.857953213632742e-06, - "loss": 0.0587, + "learning_rate": 1.98737771675534e-05, + "loss": 0.0062, "step": 109170 }, { "epoch": 5.09, - "learning_rate": 9.857484412357602e-06, - "loss": 0.0293, + "learning_rate": 1.9873309097717383e-05, + "loss": 0.0651, "step": 109175 }, { "epoch": 5.09, - "learning_rate": 9.857015611082464e-06, - "loss": 0.0354, + "learning_rate": 1.987284102788136e-05, + "loss": 0.0492, "step": 109180 }, { "epoch": 5.09, - "learning_rate": 9.856546809807323e-06, - "loss": 0.0765, + "learning_rate": 1.987237295804534e-05, + "loss": 0.0565, "step": 109185 }, { "epoch": 5.09, - "learning_rate": 9.856078008532183e-06, - "loss": 0.0687, + "learning_rate": 1.987190488820932e-05, + "loss": 0.0249, "step": 109190 }, { "epoch": 5.1, - "learning_rate": 9.855609207257043e-06, - "loss": 0.1037, + "learning_rate": 1.9871436818373302e-05, + "loss": 0.0683, "step": 109195 }, { "epoch": 5.1, - "learning_rate": 9.855140405981905e-06, - "loss": 0.0823, + "learning_rate": 1.9870968748537282e-05, + "loss": 0.0964, "step": 109200 }, { "epoch": 5.1, - "learning_rate": 9.854671604706767e-06, - "loss": 0.1663, + "learning_rate": 1.9870500678701262e-05, + "loss": 0.2278, "step": 109205 }, { "epoch": 5.1, - "learning_rate": 9.854202803431627e-06, - "loss": 0.0917, + "learning_rate": 1.987003260886524e-05, + "loss": 0.1113, "step": 109210 }, { "epoch": 5.1, - "learning_rate": 9.853734002156486e-06, - "loss": 0.0222, + "learning_rate": 1.9869564539029225e-05, + "loss": 0.0046, "step": 109215 }, { "epoch": 5.1, - "learning_rate": 9.853265200881346e-06, - "loss": 0.0279, + "learning_rate": 1.9869096469193205e-05, + "loss": 0.0305, "step": 109220 }, { "epoch": 5.1, - "learning_rate": 9.852796399606208e-06, - "loss": 0.0287, + "learning_rate": 1.9868628399357184e-05, + "loss": 0.0536, "step": 109225 }, { "epoch": 5.1, - "learning_rate": 9.852327598331068e-06, - "loss": 0.0478, + "learning_rate": 1.9868160329521168e-05, + "loss": 0.07, "step": 109230 }, { "epoch": 5.1, - "learning_rate": 9.851858797055928e-06, - "loss": 0.0536, + "learning_rate": 1.9867692259685148e-05, + "loss": 0.0396, "step": 109235 }, { "epoch": 5.1, - "learning_rate": 9.85138999578079e-06, - "loss": 0.1013, + "learning_rate": 1.9867224189849127e-05, + "loss": 0.0604, "step": 109240 }, { "epoch": 5.1, - "learning_rate": 9.85092119450565e-06, - "loss": 0.0836, + "learning_rate": 1.9866756120013104e-05, + "loss": 0.0755, "step": 109245 }, { "epoch": 5.1, - "learning_rate": 9.850452393230511e-06, - "loss": 0.1171, + "learning_rate": 1.9866288050177087e-05, + "loss": 0.0472, "step": 109250 }, { "epoch": 5.1, - "learning_rate": 9.849983591955371e-06, - "loss": 0.2507, + "learning_rate": 1.9865819980341067e-05, + "loss": 0.3291, "step": 109255 }, { "epoch": 5.1, - "learning_rate": 9.849514790680231e-06, - "loss": 0.1138, + "learning_rate": 1.9865351910505047e-05, + "loss": 0.0664, "step": 109260 }, { "epoch": 5.1, - "learning_rate": 9.849045989405091e-06, - "loss": 0.0085, + "learning_rate": 1.9864883840669026e-05, + "loss": 0.0553, "step": 109265 }, { "epoch": 5.1, - "learning_rate": 9.848577188129953e-06, - "loss": 0.0624, + "learning_rate": 1.986441577083301e-05, + "loss": 0.0179, "step": 109270 }, { "epoch": 5.1, - "learning_rate": 9.848108386854814e-06, - "loss": 0.0254, + "learning_rate": 1.986394770099699e-05, + "loss": 0.0309, "step": 109275 }, { "epoch": 5.1, - "learning_rate": 9.847639585579674e-06, - "loss": 0.025, + "learning_rate": 1.986347963116097e-05, + "loss": 0.1217, "step": 109280 }, { "epoch": 5.1, - "learning_rate": 9.847170784304534e-06, - "loss": 0.1019, + "learning_rate": 1.986301156132495e-05, + "loss": 0.0098, "step": 109285 }, { "epoch": 5.1, - "learning_rate": 9.846701983029394e-06, - "loss": 0.0681, + "learning_rate": 1.9862543491488932e-05, + "loss": 0.0419, "step": 109290 }, { "epoch": 5.1, - "learning_rate": 9.846233181754256e-06, - "loss": 0.1788, + "learning_rate": 1.9862075421652912e-05, + "loss": 0.1041, "step": 109295 }, { "epoch": 5.1, - "learning_rate": 9.845764380479115e-06, - "loss": 0.121, + "learning_rate": 1.9861607351816892e-05, + "loss": 0.1852, "step": 109300 }, { "epoch": 5.1, - "learning_rate": 9.845295579203975e-06, - "loss": 0.2625, + "learning_rate": 1.9861139281980875e-05, + "loss": 0.2511, "step": 109305 }, { "epoch": 5.1, - "learning_rate": 9.844826777928837e-06, - "loss": 0.0834, + "learning_rate": 1.986067121214485e-05, + "loss": 0.1335, "step": 109310 }, { "epoch": 5.1, - "learning_rate": 9.844357976653699e-06, - "loss": 0.0291, + "learning_rate": 1.986020314230883e-05, + "loss": 0.0228, "step": 109315 }, { "epoch": 5.1, - "learning_rate": 9.843889175378559e-06, - "loss": 0.0222, + "learning_rate": 1.985973507247281e-05, + "loss": 0.0262, "step": 109320 }, { "epoch": 5.1, - "learning_rate": 9.843420374103419e-06, - "loss": 0.0092, + "learning_rate": 1.9859267002636794e-05, + "loss": 0.0537, "step": 109325 }, { "epoch": 5.1, - "learning_rate": 9.842951572828278e-06, - "loss": 0.0696, + "learning_rate": 1.9858798932800774e-05, + "loss": 0.0259, "step": 109330 }, { "epoch": 5.1, - "learning_rate": 9.842482771553138e-06, - "loss": 0.0675, + "learning_rate": 1.9858330862964754e-05, + "loss": 0.0377, "step": 109335 }, { "epoch": 5.1, - "learning_rate": 9.842013970278e-06, - "loss": 0.0944, + "learning_rate": 1.9857862793128734e-05, + "loss": 0.0861, "step": 109340 }, { "epoch": 5.1, - "learning_rate": 9.84154516900286e-06, - "loss": 0.1017, + "learning_rate": 1.9857394723292717e-05, + "loss": 0.0825, "step": 109345 }, { "epoch": 5.1, - "learning_rate": 9.841076367727722e-06, - "loss": 0.0985, + "learning_rate": 1.9856926653456697e-05, + "loss": 0.1216, "step": 109350 }, { "epoch": 5.1, - "learning_rate": 9.840607566452582e-06, - "loss": 0.1998, + "learning_rate": 1.9856458583620677e-05, + "loss": 0.3061, "step": 109355 }, { "epoch": 5.1, - "learning_rate": 9.840138765177441e-06, - "loss": 0.0572, + "learning_rate": 1.985599051378466e-05, + "loss": 0.0897, "step": 109360 }, { "epoch": 5.1, - "learning_rate": 9.839669963902303e-06, - "loss": 0.0025, + "learning_rate": 1.985552244394864e-05, + "loss": 0.0143, "step": 109365 }, { "epoch": 5.1, - "learning_rate": 9.839201162627163e-06, - "loss": 0.0281, + "learning_rate": 1.9855054374112616e-05, + "loss": 0.0774, "step": 109370 }, { "epoch": 5.1, - "learning_rate": 9.838732361352023e-06, - "loss": 0.0602, + "learning_rate": 1.9854586304276596e-05, + "loss": 0.051, "step": 109375 }, { "epoch": 5.1, - "learning_rate": 9.838263560076885e-06, - "loss": 0.0456, + "learning_rate": 1.985411823444058e-05, + "loss": 0.0607, "step": 109380 }, { "epoch": 5.1, - "learning_rate": 9.837794758801745e-06, - "loss": 0.0741, + "learning_rate": 1.985365016460456e-05, + "loss": 0.0335, "step": 109385 }, { "epoch": 5.1, - "learning_rate": 9.837325957526606e-06, - "loss": 0.0782, + "learning_rate": 1.985318209476854e-05, + "loss": 0.0198, "step": 109390 }, { "epoch": 5.1, - "learning_rate": 9.836857156251466e-06, - "loss": 0.1094, + "learning_rate": 1.985271402493252e-05, + "loss": 0.107, "step": 109395 }, { "epoch": 5.1, - "learning_rate": 9.836388354976326e-06, - "loss": 0.1464, + "learning_rate": 1.9852245955096502e-05, + "loss": 0.0944, "step": 109400 }, { "epoch": 5.1, - "learning_rate": 9.835919553701186e-06, - "loss": 0.2158, + "learning_rate": 1.985177788526048e-05, + "loss": 0.1961, "step": 109405 }, { "epoch": 5.11, - "learning_rate": 9.835450752426048e-06, - "loss": 0.0742, + "learning_rate": 1.985130981542446e-05, + "loss": 0.1435, "step": 109410 }, { "epoch": 5.11, - "learning_rate": 9.834981951150908e-06, - "loss": 0.0242, + "learning_rate": 1.9850841745588445e-05, + "loss": 0.0028, "step": 109415 }, { "epoch": 5.11, - "learning_rate": 9.834513149875769e-06, - "loss": 0.0241, + "learning_rate": 1.9850373675752424e-05, + "loss": 0.0088, "step": 109420 }, { "epoch": 5.11, - "learning_rate": 9.834044348600629e-06, - "loss": 0.044, + "learning_rate": 1.9849905605916404e-05, + "loss": 0.0605, "step": 109425 }, { "epoch": 5.11, - "learning_rate": 9.833575547325489e-06, - "loss": 0.0221, + "learning_rate": 1.9849437536080384e-05, + "loss": 0.0319, "step": 109430 }, { "epoch": 5.11, - "learning_rate": 9.83310674605035e-06, - "loss": 0.033, + "learning_rate": 1.9848969466244364e-05, + "loss": 0.0557, "step": 109435 }, { "epoch": 5.11, - "learning_rate": 9.83263794477521e-06, - "loss": 0.0301, + "learning_rate": 1.9848501396408344e-05, + "loss": 0.0314, "step": 109440 }, { "epoch": 5.11, - "learning_rate": 9.83216914350007e-06, - "loss": 0.1131, + "learning_rate": 1.9848033326572324e-05, + "loss": 0.0682, "step": 109445 }, { "epoch": 5.11, - "learning_rate": 9.831700342224932e-06, - "loss": 0.0834, + "learning_rate": 1.9847565256736303e-05, + "loss": 0.0871, "step": 109450 }, { "epoch": 5.11, - "learning_rate": 9.831231540949792e-06, - "loss": 0.4049, + "learning_rate": 1.9847097186900287e-05, + "loss": 0.1376, "step": 109455 }, { "epoch": 5.11, - "learning_rate": 9.830762739674654e-06, - "loss": 0.1173, + "learning_rate": 1.9846629117064266e-05, + "loss": 0.0955, "step": 109460 }, { "epoch": 5.11, - "learning_rate": 9.830293938399514e-06, - "loss": 0.0218, + "learning_rate": 1.9846161047228246e-05, + "loss": 0.0466, "step": 109465 }, { "epoch": 5.11, - "learning_rate": 9.829825137124374e-06, - "loss": 0.0059, + "learning_rate": 1.9845692977392226e-05, + "loss": 0.0186, "step": 109470 }, { "epoch": 5.11, - "learning_rate": 9.829356335849234e-06, - "loss": 0.0232, + "learning_rate": 1.984522490755621e-05, + "loss": 0.0502, "step": 109475 }, { "epoch": 5.11, - "learning_rate": 9.828887534574095e-06, - "loss": 0.0481, + "learning_rate": 1.984475683772019e-05, + "loss": 0.0288, "step": 109480 }, { "epoch": 5.11, - "learning_rate": 9.828418733298955e-06, - "loss": 0.0579, + "learning_rate": 1.984428876788417e-05, + "loss": 0.0691, "step": 109485 }, { "epoch": 5.11, - "learning_rate": 9.827949932023817e-06, - "loss": 0.0558, + "learning_rate": 1.9843820698048152e-05, + "loss": 0.1139, "step": 109490 }, { "epoch": 5.11, - "learning_rate": 9.827481130748677e-06, - "loss": 0.099, + "learning_rate": 1.9843352628212132e-05, + "loss": 0.0465, "step": 109495 }, { "epoch": 5.11, - "learning_rate": 9.827012329473537e-06, - "loss": 0.0847, + "learning_rate": 1.984288455837611e-05, + "loss": 0.1461, "step": 109500 }, { "epoch": 5.11, - "learning_rate": 9.826543528198398e-06, - "loss": 0.2918, + "learning_rate": 1.9842416488540088e-05, + "loss": 0.4018, "step": 109505 }, { "epoch": 5.11, - "learning_rate": 9.826074726923258e-06, - "loss": 0.0681, + "learning_rate": 1.984194841870407e-05, + "loss": 0.1156, "step": 109510 }, { "epoch": 5.11, - "learning_rate": 9.825605925648118e-06, - "loss": 0.0063, + "learning_rate": 1.984148034886805e-05, + "loss": 0.0103, "step": 109515 }, { "epoch": 5.11, - "learning_rate": 9.825137124372978e-06, - "loss": 0.0311, + "learning_rate": 1.984101227903203e-05, + "loss": 0.0116, "step": 109520 }, { "epoch": 5.11, - "learning_rate": 9.82466832309784e-06, - "loss": 0.0393, + "learning_rate": 1.984054420919601e-05, + "loss": 0.0445, "step": 109525 }, { "epoch": 5.11, - "learning_rate": 9.824199521822701e-06, - "loss": 0.0303, + "learning_rate": 1.9840076139359994e-05, + "loss": 0.0631, "step": 109530 }, { "epoch": 5.11, - "learning_rate": 9.823730720547561e-06, - "loss": 0.0746, + "learning_rate": 1.9839608069523974e-05, + "loss": 0.0392, "step": 109535 }, { "epoch": 5.11, - "learning_rate": 9.823261919272421e-06, - "loss": 0.0436, + "learning_rate": 1.9839139999687954e-05, + "loss": 0.0428, "step": 109540 }, { "epoch": 5.11, - "learning_rate": 9.822793117997281e-06, - "loss": 0.0995, + "learning_rate": 1.9838671929851937e-05, + "loss": 0.0361, "step": 109545 }, { "epoch": 5.11, - "learning_rate": 9.822324316722143e-06, - "loss": 0.1595, + "learning_rate": 1.9838203860015917e-05, + "loss": 0.1607, "step": 109550 }, { "epoch": 5.11, - "learning_rate": 9.821855515447003e-06, - "loss": 0.2667, + "learning_rate": 1.9837735790179897e-05, + "loss": 0.1441, "step": 109555 }, { "epoch": 5.11, - "learning_rate": 9.821386714171863e-06, - "loss": 0.0697, + "learning_rate": 1.9837267720343873e-05, + "loss": 0.115, "step": 109560 }, { "epoch": 5.11, - "learning_rate": 9.820917912896724e-06, - "loss": 0.0058, + "learning_rate": 1.9836799650507856e-05, + "loss": 0.0244, "step": 109565 }, { "epoch": 5.11, - "learning_rate": 9.820449111621584e-06, - "loss": 0.0118, + "learning_rate": 1.9836331580671836e-05, + "loss": 0.0314, "step": 109570 }, { "epoch": 5.11, - "learning_rate": 9.819980310346446e-06, - "loss": 0.0321, + "learning_rate": 1.9835863510835816e-05, + "loss": 0.0761, "step": 109575 }, { "epoch": 5.11, - "learning_rate": 9.819511509071306e-06, - "loss": 0.1037, + "learning_rate": 1.9835395440999796e-05, + "loss": 0.0143, "step": 109580 }, { "epoch": 5.11, - "learning_rate": 9.819042707796166e-06, - "loss": 0.0554, + "learning_rate": 1.983492737116378e-05, + "loss": 0.1119, "step": 109585 }, { "epoch": 5.11, - "learning_rate": 9.818573906521026e-06, - "loss": 0.07, + "learning_rate": 1.983445930132776e-05, + "loss": 0.0829, "step": 109590 }, { "epoch": 5.11, - "learning_rate": 9.818105105245887e-06, - "loss": 0.0791, + "learning_rate": 1.983399123149174e-05, + "loss": 0.0504, "step": 109595 }, { "epoch": 5.11, - "learning_rate": 9.817636303970747e-06, - "loss": 0.1093, + "learning_rate": 1.983352316165572e-05, + "loss": 0.1692, "step": 109600 }, { "epoch": 5.11, - "learning_rate": 9.817167502695609e-06, - "loss": 0.1866, + "learning_rate": 1.98330550918197e-05, + "loss": 0.3093, "step": 109605 }, { "epoch": 5.11, - "learning_rate": 9.816698701420469e-06, - "loss": 0.1044, + "learning_rate": 1.983258702198368e-05, + "loss": 0.1222, "step": 109610 }, { "epoch": 5.11, - "learning_rate": 9.816229900145329e-06, - "loss": 0.0066, + "learning_rate": 1.983211895214766e-05, + "loss": 0.012, "step": 109615 }, { "epoch": 5.12, - "learning_rate": 9.81576109887019e-06, - "loss": 0.0126, + "learning_rate": 1.9831650882311644e-05, + "loss": 0.0075, "step": 109620 }, { "epoch": 5.12, - "learning_rate": 9.81529229759505e-06, - "loss": 0.0748, + "learning_rate": 1.983118281247562e-05, + "loss": 0.0579, "step": 109625 }, { "epoch": 5.12, - "learning_rate": 9.81482349631991e-06, - "loss": 0.0243, + "learning_rate": 1.98307147426396e-05, + "loss": 0.0612, "step": 109630 }, { "epoch": 5.12, - "learning_rate": 9.814354695044772e-06, - "loss": 0.0525, + "learning_rate": 1.983024667280358e-05, + "loss": 0.0556, "step": 109635 }, { "epoch": 5.12, - "learning_rate": 9.813885893769632e-06, - "loss": 0.1166, + "learning_rate": 1.9829778602967564e-05, + "loss": 0.0437, "step": 109640 }, { "epoch": 5.12, - "learning_rate": 9.813417092494493e-06, - "loss": 0.0423, + "learning_rate": 1.9829310533131543e-05, + "loss": 0.0792, "step": 109645 }, { "epoch": 5.12, - "learning_rate": 9.812948291219353e-06, - "loss": 0.0814, + "learning_rate": 1.9828842463295523e-05, + "loss": 0.1223, "step": 109650 }, { "epoch": 5.12, - "learning_rate": 9.812479489944213e-06, - "loss": 0.2327, + "learning_rate": 1.9828374393459506e-05, + "loss": 0.3392, "step": 109655 }, { "epoch": 5.12, - "learning_rate": 9.812010688669073e-06, - "loss": 0.0626, + "learning_rate": 1.9827906323623486e-05, + "loss": 0.0866, "step": 109660 }, { "epoch": 5.12, - "learning_rate": 9.811541887393935e-06, - "loss": 0.0234, + "learning_rate": 1.9827438253787466e-05, + "loss": 0.0211, "step": 109665 }, { "epoch": 5.12, - "learning_rate": 9.811073086118795e-06, - "loss": 0.0034, + "learning_rate": 1.9826970183951446e-05, + "loss": 0.0431, "step": 109670 }, { "epoch": 5.12, - "learning_rate": 9.810604284843656e-06, - "loss": 0.04, + "learning_rate": 1.982650211411543e-05, + "loss": 0.0404, "step": 109675 }, { "epoch": 5.12, - "learning_rate": 9.810135483568516e-06, - "loss": 0.0362, + "learning_rate": 1.982603404427941e-05, + "loss": 0.0617, "step": 109680 }, { "epoch": 5.12, - "learning_rate": 9.809666682293376e-06, - "loss": 0.0336, + "learning_rate": 1.982556597444339e-05, + "loss": 0.0751, "step": 109685 }, { "epoch": 5.12, - "learning_rate": 9.809197881018238e-06, - "loss": 0.0665, + "learning_rate": 1.9825097904607365e-05, + "loss": 0.075, "step": 109690 }, { "epoch": 5.12, - "learning_rate": 9.808729079743098e-06, - "loss": 0.0669, + "learning_rate": 1.982462983477135e-05, + "loss": 0.0324, "step": 109695 }, { "epoch": 5.12, - "learning_rate": 9.808260278467958e-06, - "loss": 0.0928, + "learning_rate": 1.9824161764935328e-05, + "loss": 0.1645, "step": 109700 }, { "epoch": 5.12, - "learning_rate": 9.80779147719282e-06, - "loss": 0.1467, + "learning_rate": 1.9823693695099308e-05, + "loss": 0.21, "step": 109705 }, { "epoch": 5.12, - "learning_rate": 9.80732267591768e-06, - "loss": 0.0562, + "learning_rate": 1.9823225625263288e-05, + "loss": 0.1039, "step": 109710 }, { "epoch": 5.12, - "learning_rate": 9.80685387464254e-06, - "loss": 0.0215, + "learning_rate": 1.982275755542727e-05, + "loss": 0.0176, "step": 109715 }, { "epoch": 5.12, - "learning_rate": 9.8063850733674e-06, - "loss": 0.0401, + "learning_rate": 1.982228948559125e-05, + "loss": 0.0113, "step": 109720 }, { "epoch": 5.12, - "learning_rate": 9.80591627209226e-06, - "loss": 0.0385, + "learning_rate": 1.982182141575523e-05, + "loss": 0.0502, "step": 109725 }, { "epoch": 5.12, - "learning_rate": 9.80544747081712e-06, - "loss": 0.0622, + "learning_rate": 1.9821353345919214e-05, + "loss": 0.0387, "step": 109730 }, { "epoch": 5.12, - "learning_rate": 9.804978669541982e-06, - "loss": 0.043, + "learning_rate": 1.9820885276083194e-05, + "loss": 0.045, "step": 109735 }, { "epoch": 5.12, - "learning_rate": 9.804509868266842e-06, - "loss": 0.1189, + "learning_rate": 1.9820417206247173e-05, + "loss": 0.0706, "step": 109740 }, { "epoch": 5.12, - "learning_rate": 9.804041066991704e-06, - "loss": 0.163, + "learning_rate": 1.9819949136411153e-05, + "loss": 0.052, "step": 109745 }, { "epoch": 5.12, - "learning_rate": 9.803572265716564e-06, - "loss": 0.226, + "learning_rate": 1.9819481066575133e-05, + "loss": 0.1419, "step": 109750 }, { "epoch": 5.12, - "learning_rate": 9.803103464441424e-06, - "loss": 0.2025, + "learning_rate": 1.9819012996739113e-05, + "loss": 0.2479, "step": 109755 }, { "epoch": 5.12, - "learning_rate": 9.802634663166285e-06, - "loss": 0.1312, + "learning_rate": 1.9818544926903093e-05, + "loss": 0.0898, "step": 109760 }, { "epoch": 5.12, - "learning_rate": 9.802165861891145e-06, - "loss": 0.043, + "learning_rate": 1.9818076857067073e-05, + "loss": 0.019, "step": 109765 }, { "epoch": 5.12, - "learning_rate": 9.801697060616005e-06, - "loss": 0.0261, + "learning_rate": 1.9817608787231056e-05, + "loss": 0.0451, "step": 109770 }, { "epoch": 5.12, - "learning_rate": 9.801228259340865e-06, - "loss": 0.0473, + "learning_rate": 1.9817140717395036e-05, + "loss": 0.079, "step": 109775 }, { "epoch": 5.12, - "learning_rate": 9.800759458065727e-06, - "loss": 0.082, + "learning_rate": 1.9816672647559015e-05, + "loss": 0.0535, "step": 109780 }, { "epoch": 5.12, - "learning_rate": 9.800290656790588e-06, - "loss": 0.0697, + "learning_rate": 1.9816204577723e-05, + "loss": 0.0464, "step": 109785 }, { "epoch": 5.12, - "learning_rate": 9.799821855515448e-06, - "loss": 0.0762, + "learning_rate": 1.981573650788698e-05, + "loss": 0.04, "step": 109790 }, { "epoch": 5.12, - "learning_rate": 9.799353054240308e-06, - "loss": 0.14, + "learning_rate": 1.9815268438050958e-05, + "loss": 0.0715, "step": 109795 }, { "epoch": 5.12, - "learning_rate": 9.798884252965168e-06, - "loss": 0.2307, + "learning_rate": 1.9814800368214938e-05, + "loss": 0.1363, "step": 109800 }, { "epoch": 5.12, - "learning_rate": 9.79841545169003e-06, - "loss": 0.469, + "learning_rate": 1.981433229837892e-05, + "loss": 0.2877, "step": 109805 }, { "epoch": 5.12, - "learning_rate": 9.79794665041489e-06, - "loss": 0.0755, + "learning_rate": 1.98138642285429e-05, + "loss": 0.0731, "step": 109810 }, { "epoch": 5.12, - "learning_rate": 9.79747784913975e-06, - "loss": 0.0116, + "learning_rate": 1.9813396158706878e-05, + "loss": 0.0122, "step": 109815 }, { "epoch": 5.12, - "learning_rate": 9.797009047864611e-06, - "loss": 0.0471, + "learning_rate": 1.9812928088870857e-05, + "loss": 0.0292, "step": 109820 }, { "epoch": 5.12, - "learning_rate": 9.796540246589471e-06, - "loss": 0.0208, + "learning_rate": 1.981246001903484e-05, + "loss": 0.021, "step": 109825 }, { "epoch": 5.12, - "learning_rate": 9.796071445314333e-06, - "loss": 0.0371, + "learning_rate": 1.981199194919882e-05, + "loss": 0.0248, "step": 109830 }, { "epoch": 5.13, - "learning_rate": 9.795602644039193e-06, - "loss": 0.0534, + "learning_rate": 1.98115238793628e-05, + "loss": 0.072, "step": 109835 }, { "epoch": 5.13, - "learning_rate": 9.795133842764053e-06, - "loss": 0.0863, + "learning_rate": 1.9811055809526783e-05, + "loss": 0.0789, "step": 109840 }, { "epoch": 5.13, - "learning_rate": 9.794665041488913e-06, - "loss": 0.086, + "learning_rate": 1.9810587739690763e-05, + "loss": 0.1255, "step": 109845 }, { "epoch": 5.13, - "learning_rate": 9.794196240213774e-06, - "loss": 0.0667, + "learning_rate": 1.9810119669854743e-05, + "loss": 0.0984, "step": 109850 }, { "epoch": 5.13, - "learning_rate": 9.793727438938634e-06, - "loss": 0.2425, + "learning_rate": 1.9809651600018723e-05, + "loss": 0.2753, "step": 109855 }, { "epoch": 5.13, - "learning_rate": 9.793258637663496e-06, - "loss": 0.1246, + "learning_rate": 1.9809183530182706e-05, + "loss": 0.0942, "step": 109860 }, { "epoch": 5.13, - "learning_rate": 9.792789836388356e-06, - "loss": 0.0116, + "learning_rate": 1.9808715460346686e-05, + "loss": 0.0144, "step": 109865 }, { "epoch": 5.13, - "learning_rate": 9.792321035113216e-06, - "loss": 0.0217, + "learning_rate": 1.9808247390510666e-05, + "loss": 0.0344, "step": 109870 }, { "epoch": 5.13, - "learning_rate": 9.791852233838077e-06, - "loss": 0.0429, + "learning_rate": 1.9807779320674645e-05, + "loss": 0.0086, "step": 109875 }, { "epoch": 5.13, - "learning_rate": 9.791383432562937e-06, - "loss": 0.0657, + "learning_rate": 1.9807311250838625e-05, + "loss": 0.0418, "step": 109880 }, { "epoch": 5.13, - "learning_rate": 9.790914631287797e-06, - "loss": 0.0421, + "learning_rate": 1.9806843181002605e-05, + "loss": 0.0054, "step": 109885 }, { "epoch": 5.13, - "learning_rate": 9.790445830012659e-06, - "loss": 0.0833, + "learning_rate": 1.9806375111166585e-05, + "loss": 0.171, "step": 109890 }, { "epoch": 5.13, - "learning_rate": 9.789977028737519e-06, - "loss": 0.1219, + "learning_rate": 1.9805907041330565e-05, + "loss": 0.1835, "step": 109895 }, { "epoch": 5.13, - "learning_rate": 9.78950822746238e-06, - "loss": 0.1307, + "learning_rate": 1.9805438971494548e-05, + "loss": 0.0735, "step": 109900 }, { "epoch": 5.13, - "learning_rate": 9.78903942618724e-06, - "loss": 0.3266, + "learning_rate": 1.9804970901658528e-05, + "loss": 0.2796, "step": 109905 }, { "epoch": 5.13, - "learning_rate": 9.7885706249121e-06, - "loss": 0.0828, + "learning_rate": 1.9804502831822508e-05, + "loss": 0.0815, "step": 109910 }, { "epoch": 5.13, - "learning_rate": 9.78810182363696e-06, - "loss": 0.0045, + "learning_rate": 1.980403476198649e-05, + "loss": 0.0089, "step": 109915 }, { "epoch": 5.13, - "learning_rate": 9.787633022361822e-06, - "loss": 0.0327, + "learning_rate": 1.980356669215047e-05, + "loss": 0.03, "step": 109920 }, { "epoch": 5.13, - "learning_rate": 9.787164221086682e-06, - "loss": 0.0602, + "learning_rate": 1.980309862231445e-05, + "loss": 0.0196, "step": 109925 }, { "epoch": 5.13, - "learning_rate": 9.786695419811543e-06, - "loss": 0.0439, + "learning_rate": 1.980263055247843e-05, + "loss": 0.0337, "step": 109930 }, { "epoch": 5.13, - "learning_rate": 9.786226618536403e-06, - "loss": 0.0382, + "learning_rate": 1.9802162482642413e-05, + "loss": 0.0401, "step": 109935 }, { "epoch": 5.13, - "learning_rate": 9.785757817261263e-06, - "loss": 0.1368, + "learning_rate": 1.980169441280639e-05, + "loss": 0.0442, "step": 109940 }, { "epoch": 5.13, - "learning_rate": 9.785289015986125e-06, - "loss": 0.1942, + "learning_rate": 1.980122634297037e-05, + "loss": 0.1494, "step": 109945 }, { "epoch": 5.13, - "learning_rate": 9.784820214710985e-06, - "loss": 0.1453, + "learning_rate": 1.980075827313435e-05, + "loss": 0.1265, "step": 109950 }, { "epoch": 5.13, - "learning_rate": 9.784351413435845e-06, - "loss": 0.301, + "learning_rate": 1.9800290203298333e-05, + "loss": 0.2844, "step": 109955 }, { "epoch": 5.13, - "learning_rate": 9.783882612160706e-06, - "loss": 0.0891, + "learning_rate": 1.9799822133462313e-05, + "loss": 0.1019, "step": 109960 }, { "epoch": 5.13, - "learning_rate": 9.783413810885566e-06, - "loss": 0.0568, + "learning_rate": 1.9799354063626292e-05, + "loss": 0.0123, "step": 109965 }, { "epoch": 5.13, - "learning_rate": 9.782945009610428e-06, - "loss": 0.0115, + "learning_rate": 1.9798885993790276e-05, + "loss": 0.0095, "step": 109970 }, { "epoch": 5.13, - "learning_rate": 9.782476208335288e-06, - "loss": 0.0228, + "learning_rate": 1.9798417923954255e-05, + "loss": 0.0265, "step": 109975 }, { "epoch": 5.13, - "learning_rate": 9.782007407060148e-06, - "loss": 0.0214, + "learning_rate": 1.9797949854118235e-05, + "loss": 0.021, "step": 109980 }, { "epoch": 5.13, - "learning_rate": 9.781538605785008e-06, - "loss": 0.0547, + "learning_rate": 1.9797481784282215e-05, + "loss": 0.0645, "step": 109985 }, { "epoch": 5.13, - "learning_rate": 9.78106980450987e-06, - "loss": 0.0195, + "learning_rate": 1.9797013714446198e-05, + "loss": 0.0931, "step": 109990 }, { "epoch": 5.13, - "learning_rate": 9.78060100323473e-06, - "loss": 0.0542, + "learning_rate": 1.9796545644610178e-05, + "loss": 0.1094, "step": 109995 }, { "epoch": 5.13, - "learning_rate": 9.780132201959591e-06, - "loss": 0.094, + "learning_rate": 1.9796077574774158e-05, + "loss": 0.0775, "step": 110000 }, { "epoch": 5.13, - "learning_rate": 9.77966340068445e-06, - "loss": 0.2812, + "learning_rate": 1.9795609504938134e-05, + "loss": 0.2269, "step": 110005 }, { "epoch": 5.13, - "learning_rate": 9.77919459940931e-06, - "loss": 0.0647, + "learning_rate": 1.9795141435102118e-05, + "loss": 0.0967, "step": 110010 }, { "epoch": 5.13, - "learning_rate": 9.778725798134172e-06, - "loss": 0.0145, + "learning_rate": 1.9794673365266097e-05, + "loss": 0.0193, "step": 110015 }, { "epoch": 5.13, - "learning_rate": 9.778256996859032e-06, - "loss": 0.0324, + "learning_rate": 1.9794205295430077e-05, + "loss": 0.0253, "step": 110020 }, { "epoch": 5.13, - "learning_rate": 9.777788195583892e-06, - "loss": 0.0429, + "learning_rate": 1.979373722559406e-05, + "loss": 0.019, "step": 110025 }, { "epoch": 5.13, - "learning_rate": 9.777319394308752e-06, - "loss": 0.0336, + "learning_rate": 1.979326915575804e-05, + "loss": 0.0276, "step": 110030 }, { "epoch": 5.13, - "learning_rate": 9.776850593033614e-06, - "loss": 0.1098, + "learning_rate": 1.979280108592202e-05, + "loss": 0.022, "step": 110035 }, { "epoch": 5.13, - "learning_rate": 9.776381791758475e-06, - "loss": 0.0341, + "learning_rate": 1.9792333016086e-05, + "loss": 0.0489, "step": 110040 }, { "epoch": 5.13, - "learning_rate": 9.775912990483335e-06, - "loss": 0.0452, + "learning_rate": 1.9791864946249983e-05, + "loss": 0.0738, "step": 110045 }, { "epoch": 5.14, - "learning_rate": 9.775444189208195e-06, - "loss": 0.1617, + "learning_rate": 1.9791396876413963e-05, + "loss": 0.0942, "step": 110050 }, { "epoch": 5.14, - "learning_rate": 9.774975387933055e-06, - "loss": 0.1159, + "learning_rate": 1.9790928806577943e-05, + "loss": 0.193, "step": 110055 }, { "epoch": 5.14, - "learning_rate": 9.774506586657917e-06, - "loss": 0.1084, + "learning_rate": 1.9790460736741922e-05, + "loss": 0.0939, "step": 110060 }, { "epoch": 5.14, - "learning_rate": 9.774037785382777e-06, - "loss": 0.0082, + "learning_rate": 1.9789992666905902e-05, + "loss": 0.0361, "step": 110065 }, { "epoch": 5.14, - "learning_rate": 9.773568984107637e-06, - "loss": 0.0467, + "learning_rate": 1.9789524597069882e-05, + "loss": 0.0421, "step": 110070 }, { "epoch": 5.14, - "learning_rate": 9.773100182832498e-06, - "loss": 0.0535, + "learning_rate": 1.9789056527233862e-05, + "loss": 0.0508, "step": 110075 }, { "epoch": 5.14, - "learning_rate": 9.772631381557358e-06, - "loss": 0.0298, + "learning_rate": 1.9788588457397842e-05, + "loss": 0.0558, "step": 110080 }, { "epoch": 5.14, - "learning_rate": 9.77216258028222e-06, - "loss": 0.0495, + "learning_rate": 1.9788120387561825e-05, + "loss": 0.0885, "step": 110085 }, { "epoch": 5.14, - "learning_rate": 9.77169377900708e-06, - "loss": 0.096, + "learning_rate": 1.9787652317725805e-05, + "loss": 0.0277, "step": 110090 }, { "epoch": 5.14, - "learning_rate": 9.77122497773194e-06, - "loss": 0.1652, + "learning_rate": 1.9787184247889785e-05, + "loss": 0.107, "step": 110095 }, { "epoch": 5.14, - "learning_rate": 9.7707561764568e-06, - "loss": 0.1424, + "learning_rate": 1.9786716178053768e-05, + "loss": 0.1939, "step": 110100 }, { "epoch": 5.14, - "learning_rate": 9.770287375181661e-06, - "loss": 0.196, + "learning_rate": 1.9786248108217748e-05, + "loss": 0.1977, "step": 110105 }, { "epoch": 5.14, - "learning_rate": 9.769818573906521e-06, - "loss": 0.0597, + "learning_rate": 1.9785780038381727e-05, + "loss": 0.1027, "step": 110110 }, { "epoch": 5.14, - "learning_rate": 9.769349772631383e-06, - "loss": 0.0164, + "learning_rate": 1.9785311968545707e-05, + "loss": 0.0353, "step": 110115 }, { "epoch": 5.14, - "learning_rate": 9.768880971356243e-06, - "loss": 0.0235, + "learning_rate": 1.978484389870969e-05, + "loss": 0.0567, "step": 110120 }, { "epoch": 5.14, - "learning_rate": 9.768412170081103e-06, - "loss": 0.0419, + "learning_rate": 1.978437582887367e-05, + "loss": 0.0267, "step": 110125 }, { "epoch": 5.14, - "learning_rate": 9.767943368805964e-06, - "loss": 0.0616, + "learning_rate": 1.9783907759037647e-05, + "loss": 0.0342, "step": 110130 }, { "epoch": 5.14, - "learning_rate": 9.767474567530824e-06, - "loss": 0.0316, + "learning_rate": 1.9783439689201627e-05, + "loss": 0.0597, "step": 110135 }, { "epoch": 5.14, - "learning_rate": 9.767005766255684e-06, - "loss": 0.1045, + "learning_rate": 1.978297161936561e-05, + "loss": 0.0619, "step": 110140 }, { "epoch": 5.14, - "learning_rate": 9.766536964980546e-06, - "loss": 0.1055, + "learning_rate": 1.978250354952959e-05, + "loss": 0.1377, "step": 110145 }, { "epoch": 5.14, - "learning_rate": 9.766068163705406e-06, - "loss": 0.1031, + "learning_rate": 1.978203547969357e-05, + "loss": 0.1431, "step": 110150 }, { "epoch": 5.14, - "learning_rate": 9.765599362430267e-06, - "loss": 0.2691, + "learning_rate": 1.9781567409857553e-05, + "loss": 0.2907, "step": 110155 }, { "epoch": 5.14, - "learning_rate": 9.765130561155127e-06, - "loss": 0.1233, + "learning_rate": 1.9781099340021532e-05, + "loss": 0.0809, "step": 110160 }, { "epoch": 5.14, - "learning_rate": 9.764661759879987e-06, - "loss": 0.0138, + "learning_rate": 1.9780631270185512e-05, + "loss": 0.0393, "step": 110165 }, { "epoch": 5.14, - "learning_rate": 9.764192958604847e-06, - "loss": 0.0331, + "learning_rate": 1.9780163200349492e-05, + "loss": 0.022, "step": 110170 }, { "epoch": 5.14, - "learning_rate": 9.763724157329709e-06, - "loss": 0.047, + "learning_rate": 1.9779695130513475e-05, + "loss": 0.0264, "step": 110175 }, { "epoch": 5.14, - "learning_rate": 9.763255356054569e-06, - "loss": 0.141, + "learning_rate": 1.9779227060677455e-05, + "loss": 0.0209, "step": 110180 }, { "epoch": 5.14, - "learning_rate": 9.76278655477943e-06, - "loss": 0.0968, + "learning_rate": 1.9778758990841435e-05, + "loss": 0.0871, "step": 110185 }, { "epoch": 5.14, - "learning_rate": 9.76231775350429e-06, - "loss": 0.0991, + "learning_rate": 1.9778290921005415e-05, + "loss": 0.0717, "step": 110190 }, { "epoch": 5.14, - "learning_rate": 9.76184895222915e-06, - "loss": 0.0415, + "learning_rate": 1.9777822851169394e-05, + "loss": 0.0652, "step": 110195 }, { "epoch": 5.14, - "learning_rate": 9.761380150954012e-06, - "loss": 0.1172, + "learning_rate": 1.9777354781333374e-05, + "loss": 0.125, "step": 110200 }, { "epoch": 5.14, - "learning_rate": 9.760911349678872e-06, - "loss": 0.552, + "learning_rate": 1.9776886711497354e-05, + "loss": 0.2714, "step": 110205 }, { "epoch": 5.14, - "learning_rate": 9.760442548403732e-06, - "loss": 0.0777, + "learning_rate": 1.9776418641661337e-05, + "loss": 0.079, "step": 110210 }, { "epoch": 5.14, - "learning_rate": 9.759973747128593e-06, - "loss": 0.0227, + "learning_rate": 1.9775950571825317e-05, + "loss": 0.0182, "step": 110215 }, { "epoch": 5.14, - "learning_rate": 9.759504945853453e-06, - "loss": 0.0074, + "learning_rate": 1.9775482501989297e-05, + "loss": 0.0405, "step": 110220 }, { "epoch": 5.14, - "learning_rate": 9.759036144578315e-06, - "loss": 0.046, + "learning_rate": 1.9775014432153277e-05, + "loss": 0.0469, "step": 110225 }, { "epoch": 5.14, - "learning_rate": 9.758567343303175e-06, - "loss": 0.0665, + "learning_rate": 1.977454636231726e-05, + "loss": 0.07, "step": 110230 }, { "epoch": 5.14, - "learning_rate": 9.758098542028035e-06, - "loss": 0.0453, + "learning_rate": 1.977407829248124e-05, + "loss": 0.0232, "step": 110235 }, { "epoch": 5.14, - "learning_rate": 9.757629740752895e-06, - "loss": 0.1279, + "learning_rate": 1.977361022264522e-05, + "loss": 0.0961, "step": 110240 }, { "epoch": 5.14, - "learning_rate": 9.757160939477756e-06, - "loss": 0.0697, + "learning_rate": 1.97731421528092e-05, + "loss": 0.0883, "step": 110245 }, { "epoch": 5.14, - "learning_rate": 9.756692138202616e-06, - "loss": 0.1728, + "learning_rate": 1.9772674082973183e-05, + "loss": 0.1344, "step": 110250 }, { "epoch": 5.14, - "learning_rate": 9.756223336927478e-06, - "loss": 0.2205, + "learning_rate": 1.977220601313716e-05, + "loss": 0.2512, "step": 110255 }, { "epoch": 5.14, - "learning_rate": 9.755754535652338e-06, - "loss": 0.0749, + "learning_rate": 1.977173794330114e-05, + "loss": 0.0838, "step": 110260 }, { "epoch": 5.15, - "learning_rate": 9.755285734377198e-06, - "loss": 0.0081, + "learning_rate": 1.9771269873465122e-05, + "loss": 0.0134, "step": 110265 }, { "epoch": 5.15, - "learning_rate": 9.75481693310206e-06, - "loss": 0.0299, + "learning_rate": 1.9770801803629102e-05, + "loss": 0.0411, "step": 110270 }, { "epoch": 5.15, - "learning_rate": 9.75434813182692e-06, - "loss": 0.0241, + "learning_rate": 1.9770333733793082e-05, + "loss": 0.019, "step": 110275 }, { "epoch": 5.15, - "learning_rate": 9.75387933055178e-06, - "loss": 0.0374, + "learning_rate": 1.976986566395706e-05, + "loss": 0.0298, "step": 110280 }, { "epoch": 5.15, - "learning_rate": 9.75341052927664e-06, - "loss": 0.1022, + "learning_rate": 1.9769397594121045e-05, + "loss": 0.058, "step": 110285 }, { "epoch": 5.15, - "learning_rate": 9.752941728001501e-06, - "loss": 0.121, + "learning_rate": 1.9768929524285025e-05, + "loss": 0.0979, "step": 110290 }, { "epoch": 5.15, - "learning_rate": 9.752472926726363e-06, - "loss": 0.0993, + "learning_rate": 1.9768461454449004e-05, + "loss": 0.1331, "step": 110295 }, { "epoch": 5.15, - "learning_rate": 9.752004125451223e-06, - "loss": 0.0829, + "learning_rate": 1.9767993384612984e-05, + "loss": 0.1576, "step": 110300 }, { "epoch": 5.15, - "learning_rate": 9.751535324176082e-06, - "loss": 0.302, + "learning_rate": 1.9767525314776967e-05, + "loss": 0.2574, "step": 110305 }, { "epoch": 5.15, - "learning_rate": 9.751066522900942e-06, - "loss": 0.1145, + "learning_rate": 1.9767057244940947e-05, + "loss": 0.0811, "step": 110310 }, { "epoch": 5.15, - "learning_rate": 9.750597721625804e-06, - "loss": 0.0027, + "learning_rate": 1.9766589175104927e-05, + "loss": 0.0221, "step": 110315 }, { "epoch": 5.15, - "learning_rate": 9.750128920350664e-06, - "loss": 0.0676, + "learning_rate": 1.9766121105268903e-05, + "loss": 0.0507, "step": 110320 }, { "epoch": 5.15, - "learning_rate": 9.749660119075524e-06, - "loss": 0.026, + "learning_rate": 1.9765653035432887e-05, + "loss": 0.0347, "step": 110325 }, { "epoch": 5.15, - "learning_rate": 9.749191317800385e-06, - "loss": 0.0794, + "learning_rate": 1.9765184965596867e-05, + "loss": 0.1006, "step": 110330 }, { "epoch": 5.15, - "learning_rate": 9.748722516525245e-06, - "loss": 0.021, + "learning_rate": 1.9764716895760846e-05, + "loss": 0.1213, "step": 110335 }, { "epoch": 5.15, - "learning_rate": 9.748253715250107e-06, - "loss": 0.0603, + "learning_rate": 1.976424882592483e-05, + "loss": 0.0538, "step": 110340 }, { "epoch": 5.15, - "learning_rate": 9.747784913974967e-06, - "loss": 0.1819, + "learning_rate": 1.976378075608881e-05, + "loss": 0.0601, "step": 110345 }, { "epoch": 5.15, - "learning_rate": 9.747316112699827e-06, - "loss": 0.0732, + "learning_rate": 1.976331268625279e-05, + "loss": 0.1508, "step": 110350 }, { "epoch": 5.15, - "learning_rate": 9.746847311424687e-06, - "loss": 0.2178, + "learning_rate": 1.976284461641677e-05, + "loss": 0.313, "step": 110355 }, { "epoch": 5.15, - "learning_rate": 9.746378510149548e-06, - "loss": 0.0651, + "learning_rate": 1.9762376546580752e-05, + "loss": 0.0817, "step": 110360 }, { "epoch": 5.15, - "learning_rate": 9.745909708874408e-06, - "loss": 0.015, + "learning_rate": 1.9761908476744732e-05, + "loss": 0.0098, "step": 110365 }, { "epoch": 5.15, - "learning_rate": 9.74544090759927e-06, - "loss": 0.0355, + "learning_rate": 1.9761440406908712e-05, + "loss": 0.0174, "step": 110370 }, { "epoch": 5.15, - "learning_rate": 9.74497210632413e-06, - "loss": 0.0303, + "learning_rate": 1.976097233707269e-05, + "loss": 0.0282, "step": 110375 }, { "epoch": 5.15, - "learning_rate": 9.74450330504899e-06, - "loss": 0.0798, + "learning_rate": 1.976050426723667e-05, + "loss": 0.0413, "step": 110380 }, { "epoch": 5.15, - "learning_rate": 9.744034503773852e-06, - "loss": 0.1023, + "learning_rate": 1.976003619740065e-05, + "loss": 0.1125, "step": 110385 }, { "epoch": 5.15, - "learning_rate": 9.743565702498711e-06, - "loss": 0.0334, + "learning_rate": 1.975956812756463e-05, + "loss": 0.0591, "step": 110390 }, { "epoch": 5.15, - "learning_rate": 9.743096901223571e-06, - "loss": 0.0199, + "learning_rate": 1.9759100057728614e-05, + "loss": 0.1243, "step": 110395 }, { "epoch": 5.15, - "learning_rate": 9.742628099948433e-06, - "loss": 0.088, + "learning_rate": 1.9758631987892594e-05, + "loss": 0.1461, "step": 110400 }, { "epoch": 5.15, - "learning_rate": 9.742159298673293e-06, - "loss": 0.3244, + "learning_rate": 1.9758163918056574e-05, + "loss": 0.1913, "step": 110405 }, { "epoch": 5.15, - "learning_rate": 9.741690497398155e-06, - "loss": 0.0964, + "learning_rate": 1.9757695848220554e-05, + "loss": 0.1557, "step": 110410 }, { "epoch": 5.15, - "learning_rate": 9.741221696123015e-06, - "loss": 0.0172, + "learning_rate": 1.9757227778384537e-05, + "loss": 0.0067, "step": 110415 }, { "epoch": 5.15, - "learning_rate": 9.740752894847874e-06, - "loss": 0.0479, + "learning_rate": 1.9756759708548517e-05, + "loss": 0.0315, "step": 110420 }, { "epoch": 5.15, - "learning_rate": 9.740284093572734e-06, - "loss": 0.0263, + "learning_rate": 1.9756291638712497e-05, + "loss": 0.0133, "step": 110425 }, { "epoch": 5.15, - "learning_rate": 9.739815292297596e-06, - "loss": 0.0396, + "learning_rate": 1.9755823568876476e-05, + "loss": 0.0649, "step": 110430 }, { "epoch": 5.15, - "learning_rate": 9.739346491022456e-06, - "loss": 0.0519, + "learning_rate": 1.975535549904046e-05, + "loss": 0.0657, "step": 110435 }, { "epoch": 5.15, - "learning_rate": 9.738877689747318e-06, - "loss": 0.037, + "learning_rate": 1.975488742920444e-05, + "loss": 0.124, "step": 110440 }, { "epoch": 5.15, - "learning_rate": 9.738408888472178e-06, - "loss": 0.1083, + "learning_rate": 1.9754419359368416e-05, + "loss": 0.1486, "step": 110445 }, { "epoch": 5.15, - "learning_rate": 9.737940087197037e-06, - "loss": 0.1127, + "learning_rate": 1.97539512895324e-05, + "loss": 0.0536, "step": 110450 }, { "epoch": 5.15, - "learning_rate": 9.737471285921899e-06, - "loss": 0.2734, + "learning_rate": 1.975348321969638e-05, + "loss": 0.1381, "step": 110455 }, { "epoch": 5.15, - "learning_rate": 9.737002484646759e-06, - "loss": 0.0929, + "learning_rate": 1.975301514986036e-05, + "loss": 0.108, "step": 110460 }, { "epoch": 5.15, - "learning_rate": 9.736533683371619e-06, - "loss": 0.0117, + "learning_rate": 1.975254708002434e-05, + "loss": 0.0206, "step": 110465 }, { "epoch": 5.15, - "learning_rate": 9.73606488209648e-06, - "loss": 0.0104, + "learning_rate": 1.9752079010188322e-05, + "loss": 0.0361, "step": 110470 }, { "epoch": 5.15, - "learning_rate": 9.73559608082134e-06, - "loss": 0.0432, + "learning_rate": 1.97516109403523e-05, + "loss": 0.0181, "step": 110475 }, { "epoch": 5.16, - "learning_rate": 9.735127279546202e-06, - "loss": 0.0554, + "learning_rate": 1.975114287051628e-05, + "loss": 0.06, "step": 110480 }, { "epoch": 5.16, - "learning_rate": 9.734658478271062e-06, - "loss": 0.0221, + "learning_rate": 1.975067480068026e-05, + "loss": 0.0212, "step": 110485 }, { "epoch": 5.16, - "learning_rate": 9.734189676995922e-06, - "loss": 0.0528, + "learning_rate": 1.9750206730844244e-05, + "loss": 0.115, "step": 110490 }, { "epoch": 5.16, - "learning_rate": 9.733720875720782e-06, - "loss": 0.0755, + "learning_rate": 1.9749738661008224e-05, + "loss": 0.1317, "step": 110495 }, { "epoch": 5.16, - "learning_rate": 9.733252074445644e-06, - "loss": 0.1656, + "learning_rate": 1.9749270591172204e-05, + "loss": 0.0967, "step": 110500 }, { "epoch": 5.16, - "learning_rate": 9.732783273170504e-06, - "loss": 0.2056, + "learning_rate": 1.9748802521336184e-05, + "loss": 0.2695, "step": 110505 }, { "epoch": 5.16, - "learning_rate": 9.732314471895365e-06, - "loss": 0.0353, + "learning_rate": 1.9748334451500164e-05, + "loss": 0.0843, "step": 110510 }, { "epoch": 5.16, - "learning_rate": 9.731845670620225e-06, - "loss": 0.0527, + "learning_rate": 1.9747866381664143e-05, + "loss": 0.0789, "step": 110515 }, { "epoch": 5.16, - "learning_rate": 9.731376869345085e-06, - "loss": 0.0427, + "learning_rate": 1.9747398311828123e-05, + "loss": 0.0247, "step": 110520 }, { "epoch": 5.16, - "learning_rate": 9.730908068069947e-06, - "loss": 0.0344, + "learning_rate": 1.9746930241992106e-05, + "loss": 0.01, "step": 110525 }, { "epoch": 5.16, - "learning_rate": 9.730439266794807e-06, - "loss": 0.0411, + "learning_rate": 1.9746462172156086e-05, + "loss": 0.0604, "step": 110530 }, { "epoch": 5.16, - "learning_rate": 9.729970465519666e-06, - "loss": 0.0392, + "learning_rate": 1.9745994102320066e-05, + "loss": 0.04, "step": 110535 }, { "epoch": 5.16, - "learning_rate": 9.729501664244526e-06, - "loss": 0.0767, + "learning_rate": 1.9745526032484046e-05, + "loss": 0.0844, "step": 110540 }, { "epoch": 5.16, - "learning_rate": 9.729032862969388e-06, - "loss": 0.0604, + "learning_rate": 1.974505796264803e-05, + "loss": 0.0918, "step": 110545 }, { "epoch": 5.16, - "learning_rate": 9.72856406169425e-06, - "loss": 0.1222, + "learning_rate": 1.974458989281201e-05, + "loss": 0.0549, "step": 110550 }, { "epoch": 5.16, - "learning_rate": 9.72809526041911e-06, - "loss": 0.1243, + "learning_rate": 1.974412182297599e-05, + "loss": 0.2742, "step": 110555 }, { "epoch": 5.16, - "learning_rate": 9.72762645914397e-06, - "loss": 0.0953, + "learning_rate": 1.974365375313997e-05, + "loss": 0.1223, "step": 110560 }, { "epoch": 5.16, - "learning_rate": 9.72715765786883e-06, - "loss": 0.0375, + "learning_rate": 1.9743185683303952e-05, + "loss": 0.0247, "step": 110565 }, { "epoch": 5.16, - "learning_rate": 9.726688856593691e-06, - "loss": 0.0353, + "learning_rate": 1.9742717613467928e-05, + "loss": 0.0248, "step": 110570 }, { "epoch": 5.16, - "learning_rate": 9.726220055318551e-06, - "loss": 0.0194, + "learning_rate": 1.9742249543631908e-05, + "loss": 0.0497, "step": 110575 }, { "epoch": 5.16, - "learning_rate": 9.725751254043411e-06, - "loss": 0.0545, + "learning_rate": 1.974178147379589e-05, + "loss": 0.0411, "step": 110580 }, { "epoch": 5.16, - "learning_rate": 9.725282452768273e-06, - "loss": 0.05, + "learning_rate": 1.974131340395987e-05, + "loss": 0.0564, "step": 110585 }, { "epoch": 5.16, - "learning_rate": 9.724813651493133e-06, - "loss": 0.0395, + "learning_rate": 1.974084533412385e-05, + "loss": 0.0612, "step": 110590 }, { "epoch": 5.16, - "learning_rate": 9.724344850217994e-06, - "loss": 0.1318, + "learning_rate": 1.974037726428783e-05, + "loss": 0.0849, "step": 110595 }, { "epoch": 5.16, - "learning_rate": 9.723876048942854e-06, - "loss": 0.162, + "learning_rate": 1.9739909194451814e-05, + "loss": 0.1368, "step": 110600 }, { "epoch": 5.16, - "learning_rate": 9.723407247667714e-06, - "loss": 0.2088, + "learning_rate": 1.9739441124615794e-05, + "loss": 0.2273, "step": 110605 }, { "epoch": 5.16, - "learning_rate": 9.722938446392574e-06, - "loss": 0.0818, + "learning_rate": 1.9738973054779774e-05, + "loss": 0.0915, "step": 110610 }, { "epoch": 5.16, - "learning_rate": 9.722469645117436e-06, - "loss": 0.0435, + "learning_rate": 1.9738504984943753e-05, + "loss": 0.0302, "step": 110615 }, { "epoch": 5.16, - "learning_rate": 9.722000843842296e-06, - "loss": 0.0415, + "learning_rate": 1.9738036915107737e-05, + "loss": 0.0141, "step": 110620 }, { "epoch": 5.16, - "learning_rate": 9.721532042567157e-06, - "loss": 0.0257, + "learning_rate": 1.9737568845271716e-05, + "loss": 0.0395, "step": 110625 }, { "epoch": 5.16, - "learning_rate": 9.721063241292017e-06, - "loss": 0.0665, + "learning_rate": 1.9737100775435696e-05, + "loss": 0.0479, "step": 110630 }, { "epoch": 5.16, - "learning_rate": 9.720594440016877e-06, - "loss": 0.0439, + "learning_rate": 1.9736632705599676e-05, + "loss": 0.0368, "step": 110635 }, { "epoch": 5.16, - "learning_rate": 9.720125638741739e-06, - "loss": 0.0854, + "learning_rate": 1.9736164635763656e-05, + "loss": 0.0793, "step": 110640 }, { "epoch": 5.16, - "learning_rate": 9.719656837466599e-06, - "loss": 0.1346, + "learning_rate": 1.9735696565927636e-05, + "loss": 0.171, "step": 110645 }, { "epoch": 5.16, - "learning_rate": 9.719188036191459e-06, - "loss": 0.1229, + "learning_rate": 1.9735228496091615e-05, + "loss": 0.0557, "step": 110650 }, { "epoch": 5.16, - "learning_rate": 9.71871923491632e-06, - "loss": 0.2846, + "learning_rate": 1.97347604262556e-05, + "loss": 0.1604, "step": 110655 }, { "epoch": 5.16, - "learning_rate": 9.71825043364118e-06, - "loss": 0.1014, + "learning_rate": 1.973429235641958e-05, + "loss": 0.1022, "step": 110660 }, { "epoch": 5.16, - "learning_rate": 9.717781632366042e-06, - "loss": 0.0192, + "learning_rate": 1.973382428658356e-05, + "loss": 0.0333, "step": 110665 }, { "epoch": 5.16, - "learning_rate": 9.717312831090902e-06, - "loss": 0.0153, + "learning_rate": 1.9733356216747538e-05, + "loss": 0.0214, "step": 110670 }, { "epoch": 5.16, - "learning_rate": 9.716844029815762e-06, - "loss": 0.0637, + "learning_rate": 1.973288814691152e-05, + "loss": 0.0072, "step": 110675 }, { "epoch": 5.16, - "learning_rate": 9.716375228540622e-06, - "loss": 0.0521, + "learning_rate": 1.97324200770755e-05, + "loss": 0.0539, "step": 110680 }, { "epoch": 5.16, - "learning_rate": 9.715906427265483e-06, - "loss": 0.0251, + "learning_rate": 1.973195200723948e-05, + "loss": 0.111, "step": 110685 }, { "epoch": 5.16, - "learning_rate": 9.715437625990343e-06, - "loss": 0.0877, + "learning_rate": 1.973148393740346e-05, + "loss": 0.082, "step": 110690 }, { "epoch": 5.17, - "learning_rate": 9.714968824715205e-06, - "loss": 0.1777, + "learning_rate": 1.9731015867567444e-05, + "loss": 0.0596, "step": 110695 }, { "epoch": 5.17, - "learning_rate": 9.714500023440065e-06, - "loss": 0.117, + "learning_rate": 1.973054779773142e-05, + "loss": 0.1788, "step": 110700 }, { "epoch": 5.17, - "learning_rate": 9.714031222164925e-06, - "loss": 0.3445, + "learning_rate": 1.97300797278954e-05, + "loss": 0.4119, "step": 110705 }, { "epoch": 5.17, - "learning_rate": 9.713562420889786e-06, - "loss": 0.0773, + "learning_rate": 1.9729611658059383e-05, + "loss": 0.0922, "step": 110710 }, { "epoch": 5.17, - "learning_rate": 9.713093619614646e-06, - "loss": 0.0227, + "learning_rate": 1.9729143588223363e-05, + "loss": 0.0367, "step": 110715 }, { "epoch": 5.17, - "learning_rate": 9.712624818339506e-06, - "loss": 0.0136, + "learning_rate": 1.9728675518387343e-05, + "loss": 0.014, "step": 110720 }, { "epoch": 5.17, - "learning_rate": 9.712156017064368e-06, - "loss": 0.1036, + "learning_rate": 1.9728207448551323e-05, + "loss": 0.0198, "step": 110725 }, { "epoch": 5.17, - "learning_rate": 9.711687215789228e-06, - "loss": 0.0342, + "learning_rate": 1.9727739378715306e-05, + "loss": 0.0265, "step": 110730 }, { "epoch": 5.17, - "learning_rate": 9.71121841451409e-06, - "loss": 0.0781, + "learning_rate": 1.9727271308879286e-05, + "loss": 0.085, "step": 110735 }, { "epoch": 5.17, - "learning_rate": 9.71074961323895e-06, - "loss": 0.0958, + "learning_rate": 1.9726803239043266e-05, + "loss": 0.0348, "step": 110740 }, { "epoch": 5.17, - "learning_rate": 9.710280811963809e-06, - "loss": 0.0397, + "learning_rate": 1.9726335169207246e-05, + "loss": 0.051, "step": 110745 }, { "epoch": 5.17, - "learning_rate": 9.709812010688669e-06, - "loss": 0.1622, + "learning_rate": 1.972586709937123e-05, + "loss": 0.2785, "step": 110750 }, { "epoch": 5.17, - "learning_rate": 9.70934320941353e-06, - "loss": 0.1598, + "learning_rate": 1.972539902953521e-05, + "loss": 0.2824, "step": 110755 }, { "epoch": 5.17, - "learning_rate": 9.70887440813839e-06, - "loss": 0.0718, + "learning_rate": 1.9724930959699185e-05, + "loss": 0.0585, "step": 110760 }, { "epoch": 5.17, - "learning_rate": 9.708405606863252e-06, - "loss": 0.0014, + "learning_rate": 1.9724462889863168e-05, + "loss": 0.0155, "step": 110765 }, { "epoch": 5.17, - "learning_rate": 9.707936805588112e-06, - "loss": 0.0651, + "learning_rate": 1.9723994820027148e-05, + "loss": 0.0977, "step": 110770 }, { "epoch": 5.17, - "learning_rate": 9.707468004312972e-06, - "loss": 0.0471, + "learning_rate": 1.9723526750191128e-05, + "loss": 0.0207, "step": 110775 }, { "epoch": 5.17, - "learning_rate": 9.706999203037834e-06, - "loss": 0.0614, + "learning_rate": 1.9723058680355108e-05, + "loss": 0.0192, "step": 110780 }, { "epoch": 5.17, - "learning_rate": 9.706530401762694e-06, - "loss": 0.1316, + "learning_rate": 1.972259061051909e-05, + "loss": 0.0715, "step": 110785 }, { "epoch": 5.17, - "learning_rate": 9.706061600487554e-06, - "loss": 0.0523, + "learning_rate": 1.972212254068307e-05, + "loss": 0.0608, "step": 110790 }, { "epoch": 5.17, - "learning_rate": 9.705592799212414e-06, - "loss": 0.0895, + "learning_rate": 1.972165447084705e-05, + "loss": 0.061, "step": 110795 }, { "epoch": 5.17, - "learning_rate": 9.705123997937275e-06, - "loss": 0.2817, + "learning_rate": 1.972118640101103e-05, + "loss": 0.0875, "step": 110800 }, { "epoch": 5.17, - "learning_rate": 9.704655196662137e-06, - "loss": 0.2633, + "learning_rate": 1.9720718331175014e-05, + "loss": 0.3302, "step": 110805 }, { "epoch": 5.17, - "learning_rate": 9.704186395386997e-06, - "loss": 0.0954, + "learning_rate": 1.9720250261338993e-05, + "loss": 0.0613, "step": 110810 }, { "epoch": 5.17, - "learning_rate": 9.703717594111857e-06, - "loss": 0.03, + "learning_rate": 1.9719782191502973e-05, + "loss": 0.0608, "step": 110815 }, { "epoch": 5.17, - "learning_rate": 9.703248792836717e-06, - "loss": 0.0293, + "learning_rate": 1.9719314121666956e-05, + "loss": 0.0223, "step": 110820 }, { "epoch": 5.17, - "learning_rate": 9.702779991561578e-06, - "loss": 0.0626, + "learning_rate": 1.9718846051830933e-05, + "loss": 0.0359, "step": 110825 }, { "epoch": 5.17, - "learning_rate": 9.702311190286438e-06, - "loss": 0.0295, + "learning_rate": 1.9718377981994913e-05, + "loss": 0.0732, "step": 110830 }, { "epoch": 5.17, - "learning_rate": 9.7018423890113e-06, - "loss": 0.0541, + "learning_rate": 1.9717909912158892e-05, + "loss": 0.0487, "step": 110835 }, { "epoch": 5.17, - "learning_rate": 9.70137358773616e-06, - "loss": 0.0424, + "learning_rate": 1.9717441842322876e-05, + "loss": 0.1954, "step": 110840 }, { "epoch": 5.17, - "learning_rate": 9.70090478646102e-06, - "loss": 0.0833, + "learning_rate": 1.9716973772486855e-05, + "loss": 0.0865, "step": 110845 }, { "epoch": 5.17, - "learning_rate": 9.700435985185881e-06, - "loss": 0.1339, + "learning_rate": 1.9716505702650835e-05, + "loss": 0.1747, "step": 110850 }, { "epoch": 5.17, - "learning_rate": 9.699967183910741e-06, - "loss": 0.1832, + "learning_rate": 1.9716037632814815e-05, + "loss": 0.2356, "step": 110855 }, { "epoch": 5.17, - "learning_rate": 9.699498382635601e-06, - "loss": 0.0981, + "learning_rate": 1.97155695629788e-05, + "loss": 0.0584, "step": 110860 }, { "epoch": 5.17, - "learning_rate": 9.699029581360461e-06, - "loss": 0.0074, + "learning_rate": 1.9715101493142778e-05, + "loss": 0.0236, "step": 110865 }, { "epoch": 5.17, - "learning_rate": 9.698560780085323e-06, - "loss": 0.0129, + "learning_rate": 1.9714633423306758e-05, + "loss": 0.082, "step": 110870 }, { "epoch": 5.17, - "learning_rate": 9.698091978810184e-06, - "loss": 0.0642, + "learning_rate": 1.9714165353470738e-05, + "loss": 0.074, "step": 110875 }, { "epoch": 5.17, - "learning_rate": 9.697623177535044e-06, - "loss": 0.0414, + "learning_rate": 1.971369728363472e-05, + "loss": 0.0306, "step": 110880 }, { "epoch": 5.17, - "learning_rate": 9.697154376259904e-06, - "loss": 0.1197, + "learning_rate": 1.97132292137987e-05, + "loss": 0.0617, "step": 110885 }, { "epoch": 5.17, - "learning_rate": 9.696685574984764e-06, - "loss": 0.0702, + "learning_rate": 1.9712761143962677e-05, + "loss": 0.0567, "step": 110890 }, { "epoch": 5.17, - "learning_rate": 9.696216773709626e-06, - "loss": 0.0753, + "learning_rate": 1.971229307412666e-05, + "loss": 0.2122, "step": 110895 }, { "epoch": 5.17, - "learning_rate": 9.695747972434486e-06, - "loss": 0.0906, + "learning_rate": 1.971182500429064e-05, + "loss": 0.0813, "step": 110900 }, { "epoch": 5.17, - "learning_rate": 9.695279171159346e-06, - "loss": 0.2717, + "learning_rate": 1.971135693445462e-05, + "loss": 0.1831, "step": 110905 }, { "epoch": 5.18, - "learning_rate": 9.694810369884207e-06, - "loss": 0.079, + "learning_rate": 1.97108888646186e-05, + "loss": 0.0841, "step": 110910 }, { "epoch": 5.18, - "learning_rate": 9.694341568609067e-06, - "loss": 0.0099, + "learning_rate": 1.9710420794782583e-05, + "loss": 0.012, "step": 110915 }, { "epoch": 5.18, - "learning_rate": 9.693872767333929e-06, - "loss": 0.0995, + "learning_rate": 1.9709952724946563e-05, + "loss": 0.0085, "step": 110920 }, { "epoch": 5.18, - "learning_rate": 9.693403966058789e-06, - "loss": 0.0321, + "learning_rate": 1.9709484655110543e-05, + "loss": 0.0407, "step": 110925 }, { "epoch": 5.18, - "learning_rate": 9.692935164783649e-06, - "loss": 0.0265, + "learning_rate": 1.9709016585274523e-05, + "loss": 0.0195, "step": 110930 }, { "epoch": 5.18, - "learning_rate": 9.692466363508509e-06, - "loss": 0.0334, + "learning_rate": 1.9708548515438506e-05, + "loss": 0.0296, "step": 110935 }, { "epoch": 5.18, - "learning_rate": 9.69199756223337e-06, - "loss": 0.1348, + "learning_rate": 1.9708080445602486e-05, + "loss": 0.0288, "step": 110940 }, { "epoch": 5.18, - "learning_rate": 9.69152876095823e-06, - "loss": 0.1584, + "learning_rate": 1.9707612375766465e-05, + "loss": 0.0627, "step": 110945 }, { "epoch": 5.18, - "learning_rate": 9.691059959683092e-06, - "loss": 0.1126, + "learning_rate": 1.9707144305930445e-05, + "loss": 0.0905, "step": 110950 }, { "epoch": 5.18, - "learning_rate": 9.690591158407952e-06, - "loss": 0.3161, + "learning_rate": 1.9706676236094425e-05, + "loss": 0.2121, "step": 110955 }, { "epoch": 5.18, - "learning_rate": 9.690122357132812e-06, - "loss": 0.0961, + "learning_rate": 1.9706208166258405e-05, + "loss": 0.0716, "step": 110960 }, { "epoch": 5.18, - "learning_rate": 9.689653555857673e-06, - "loss": 0.0145, + "learning_rate": 1.9705740096422385e-05, + "loss": 0.0658, "step": 110965 }, { "epoch": 5.18, - "learning_rate": 9.689184754582533e-06, - "loss": 0.0359, + "learning_rate": 1.9705272026586368e-05, + "loss": 0.0166, "step": 110970 }, { "epoch": 5.18, - "learning_rate": 9.688715953307393e-06, - "loss": 0.0614, + "learning_rate": 1.9704803956750348e-05, + "loss": 0.0147, "step": 110975 }, { "epoch": 5.18, - "learning_rate": 9.688247152032255e-06, - "loss": 0.0338, + "learning_rate": 1.9704335886914328e-05, + "loss": 0.0395, "step": 110980 }, { "epoch": 5.18, - "learning_rate": 9.687778350757115e-06, - "loss": 0.0604, + "learning_rate": 1.9703867817078307e-05, + "loss": 0.0582, "step": 110985 }, { "epoch": 5.18, - "learning_rate": 9.687309549481976e-06, - "loss": 0.0677, + "learning_rate": 1.970339974724229e-05, + "loss": 0.1089, "step": 110990 }, { "epoch": 5.18, - "learning_rate": 9.686840748206836e-06, - "loss": 0.1014, + "learning_rate": 1.970293167740627e-05, + "loss": 0.2325, "step": 110995 }, { "epoch": 5.18, - "learning_rate": 9.686371946931696e-06, - "loss": 0.1705, + "learning_rate": 1.970246360757025e-05, + "loss": 0.107, "step": 111000 }, { "epoch": 5.18, - "learning_rate": 9.685903145656556e-06, - "loss": 0.1605, + "learning_rate": 1.9701995537734233e-05, + "loss": 0.2474, "step": 111005 }, { "epoch": 5.18, - "learning_rate": 9.685434344381418e-06, - "loss": 0.1118, + "learning_rate": 1.9701527467898213e-05, + "loss": 0.0762, "step": 111010 }, { "epoch": 5.18, - "learning_rate": 9.684965543106278e-06, - "loss": 0.0396, + "learning_rate": 1.970105939806219e-05, + "loss": 0.0072, "step": 111015 }, { "epoch": 5.18, - "learning_rate": 9.68449674183114e-06, - "loss": 0.0123, + "learning_rate": 1.970059132822617e-05, + "loss": 0.0369, "step": 111020 }, { "epoch": 5.18, - "learning_rate": 9.684027940556e-06, - "loss": 0.0372, + "learning_rate": 1.9700123258390153e-05, + "loss": 0.0663, "step": 111025 }, { "epoch": 5.18, - "learning_rate": 9.68355913928086e-06, - "loss": 0.0584, + "learning_rate": 1.9699655188554132e-05, + "loss": 0.0528, "step": 111030 }, { "epoch": 5.18, - "learning_rate": 9.68309033800572e-06, - "loss": 0.0816, + "learning_rate": 1.9699187118718112e-05, + "loss": 0.0774, "step": 111035 }, { "epoch": 5.18, - "learning_rate": 9.68262153673058e-06, - "loss": 0.0772, + "learning_rate": 1.9698719048882092e-05, + "loss": 0.0948, "step": 111040 }, { "epoch": 5.18, - "learning_rate": 9.68215273545544e-06, - "loss": 0.0797, + "learning_rate": 1.9698250979046075e-05, + "loss": 0.1069, "step": 111045 }, { "epoch": 5.18, - "learning_rate": 9.681683934180302e-06, - "loss": 0.1896, + "learning_rate": 1.9697782909210055e-05, + "loss": 0.1404, "step": 111050 }, { "epoch": 5.18, - "learning_rate": 9.681215132905162e-06, - "loss": 0.2032, + "learning_rate": 1.9697314839374035e-05, + "loss": 0.2589, "step": 111055 }, { "epoch": 5.18, - "learning_rate": 9.680746331630024e-06, - "loss": 0.069, + "learning_rate": 1.9696846769538018e-05, + "loss": 0.1131, "step": 111060 }, { "epoch": 5.18, - "learning_rate": 9.680277530354884e-06, - "loss": 0.0224, + "learning_rate": 1.9696378699701998e-05, + "loss": 0.0044, "step": 111065 }, { "epoch": 5.18, - "learning_rate": 9.679808729079744e-06, - "loss": 0.0307, + "learning_rate": 1.9695910629865978e-05, + "loss": 0.0492, "step": 111070 }, { "epoch": 5.18, - "learning_rate": 9.679339927804604e-06, - "loss": 0.0233, + "learning_rate": 1.9695442560029958e-05, + "loss": 0.0269, "step": 111075 }, { "epoch": 5.18, - "learning_rate": 9.678871126529465e-06, - "loss": 0.0791, + "learning_rate": 1.9694974490193937e-05, + "loss": 0.0845, "step": 111080 }, { "epoch": 5.18, - "learning_rate": 9.678402325254325e-06, - "loss": 0.0779, + "learning_rate": 1.9694506420357917e-05, + "loss": 0.0111, "step": 111085 }, { "epoch": 5.18, - "learning_rate": 9.677933523979187e-06, - "loss": 0.05, + "learning_rate": 1.9694038350521897e-05, + "loss": 0.1043, "step": 111090 }, { "epoch": 5.18, - "learning_rate": 9.677464722704047e-06, - "loss": 0.1181, + "learning_rate": 1.9693570280685877e-05, + "loss": 0.0694, "step": 111095 }, { "epoch": 5.18, - "learning_rate": 9.676995921428907e-06, - "loss": 0.1765, + "learning_rate": 1.969310221084986e-05, + "loss": 0.2383, "step": 111100 }, { "epoch": 5.18, - "learning_rate": 9.676527120153768e-06, - "loss": 0.263, + "learning_rate": 1.969263414101384e-05, + "loss": 0.1431, "step": 111105 }, { "epoch": 5.18, - "learning_rate": 9.676058318878628e-06, - "loss": 0.0956, + "learning_rate": 1.969216607117782e-05, + "loss": 0.0978, "step": 111110 }, { "epoch": 5.18, - "learning_rate": 9.675589517603488e-06, - "loss": 0.02, + "learning_rate": 1.96916980013418e-05, + "loss": 0.027, "step": 111115 }, { "epoch": 5.19, - "learning_rate": 9.675120716328348e-06, - "loss": 0.0617, + "learning_rate": 1.9691229931505783e-05, + "loss": 0.0469, "step": 111120 }, { "epoch": 5.19, - "learning_rate": 9.67465191505321e-06, - "loss": 0.0207, + "learning_rate": 1.9690761861669763e-05, + "loss": 0.0079, "step": 111125 }, { "epoch": 5.19, - "learning_rate": 9.674183113778071e-06, - "loss": 0.0165, + "learning_rate": 1.9690293791833742e-05, + "loss": 0.0597, "step": 111130 }, { "epoch": 5.19, - "learning_rate": 9.673714312502931e-06, - "loss": 0.0235, + "learning_rate": 1.9689825721997726e-05, + "loss": 0.0752, "step": 111135 }, { "epoch": 5.19, - "learning_rate": 9.673245511227791e-06, - "loss": 0.0613, + "learning_rate": 1.9689357652161702e-05, + "loss": 0.0729, "step": 111140 }, { "epoch": 5.19, - "learning_rate": 9.672776709952651e-06, - "loss": 0.0705, + "learning_rate": 1.9688889582325682e-05, + "loss": 0.1438, "step": 111145 }, { "epoch": 5.19, - "learning_rate": 9.672307908677513e-06, - "loss": 0.166, + "learning_rate": 1.968842151248966e-05, + "loss": 0.1069, "step": 111150 }, { "epoch": 5.19, - "learning_rate": 9.671839107402373e-06, - "loss": 0.1962, + "learning_rate": 1.9687953442653645e-05, + "loss": 0.2336, "step": 111155 }, { "epoch": 5.19, - "learning_rate": 9.671370306127233e-06, - "loss": 0.1197, + "learning_rate": 1.9687485372817625e-05, + "loss": 0.1066, "step": 111160 }, { "epoch": 5.19, - "learning_rate": 9.670901504852094e-06, - "loss": 0.0283, + "learning_rate": 1.9687017302981604e-05, + "loss": 0.0093, "step": 111165 }, { "epoch": 5.19, - "learning_rate": 9.670432703576954e-06, - "loss": 0.0122, + "learning_rate": 1.9686549233145584e-05, + "loss": 0.0175, "step": 111170 }, { "epoch": 5.19, - "learning_rate": 9.669963902301816e-06, - "loss": 0.0291, + "learning_rate": 1.9686081163309567e-05, + "loss": 0.0336, "step": 111175 }, { "epoch": 5.19, - "learning_rate": 9.669495101026676e-06, - "loss": 0.0279, + "learning_rate": 1.9685613093473547e-05, + "loss": 0.0291, "step": 111180 }, { "epoch": 5.19, - "learning_rate": 9.669026299751536e-06, - "loss": 0.0471, + "learning_rate": 1.9685145023637527e-05, + "loss": 0.0956, "step": 111185 }, { "epoch": 5.19, - "learning_rate": 9.668557498476396e-06, - "loss": 0.0318, + "learning_rate": 1.968467695380151e-05, + "loss": 0.1322, "step": 111190 }, { "epoch": 5.19, - "learning_rate": 9.668088697201257e-06, - "loss": 0.0819, + "learning_rate": 1.968420888396549e-05, + "loss": 0.0762, "step": 111195 }, { "epoch": 5.19, - "learning_rate": 9.667619895926117e-06, - "loss": 0.0862, + "learning_rate": 1.968374081412947e-05, + "loss": 0.1447, "step": 111200 }, { "epoch": 5.19, - "learning_rate": 9.667151094650979e-06, - "loss": 0.2278, + "learning_rate": 1.9683272744293446e-05, + "loss": 0.267, "step": 111205 }, { "epoch": 5.19, - "learning_rate": 9.666682293375839e-06, - "loss": 0.0734, + "learning_rate": 1.968280467445743e-05, + "loss": 0.0827, "step": 111210 }, { "epoch": 5.19, - "learning_rate": 9.666213492100699e-06, - "loss": 0.0034, + "learning_rate": 1.968233660462141e-05, + "loss": 0.0192, "step": 111215 }, { "epoch": 5.19, - "learning_rate": 9.66574469082556e-06, - "loss": 0.0454, + "learning_rate": 1.968186853478539e-05, + "loss": 0.0261, "step": 111220 }, { "epoch": 5.19, - "learning_rate": 9.66527588955042e-06, - "loss": 0.0222, + "learning_rate": 1.968140046494937e-05, + "loss": 0.0283, "step": 111225 }, { "epoch": 5.19, - "learning_rate": 9.66480708827528e-06, - "loss": 0.0421, + "learning_rate": 1.9680932395113352e-05, + "loss": 0.0409, "step": 111230 }, { "epoch": 5.19, - "learning_rate": 9.664338287000142e-06, - "loss": 0.0392, + "learning_rate": 1.9680464325277332e-05, + "loss": 0.0565, "step": 111235 }, { "epoch": 5.19, - "learning_rate": 9.663869485725002e-06, - "loss": 0.0903, + "learning_rate": 1.9679996255441312e-05, + "loss": 0.037, "step": 111240 }, { "epoch": 5.19, - "learning_rate": 9.663400684449863e-06, - "loss": 0.0414, + "learning_rate": 1.9679528185605295e-05, + "loss": 0.082, "step": 111245 }, { "epoch": 5.19, - "learning_rate": 9.662931883174723e-06, - "loss": 0.1924, + "learning_rate": 1.9679060115769275e-05, + "loss": 0.1654, "step": 111250 }, { "epoch": 5.19, - "learning_rate": 9.662463081899583e-06, - "loss": 0.314, + "learning_rate": 1.9678592045933255e-05, + "loss": 0.2135, "step": 111255 }, { "epoch": 5.19, - "learning_rate": 9.661994280624443e-06, - "loss": 0.1274, + "learning_rate": 1.9678123976097235e-05, + "loss": 0.0968, "step": 111260 }, { "epoch": 5.19, - "learning_rate": 9.661525479349305e-06, - "loss": 0.0009, + "learning_rate": 1.9677655906261214e-05, + "loss": 0.0184, "step": 111265 }, { "epoch": 5.19, - "learning_rate": 9.661056678074165e-06, - "loss": 0.0254, + "learning_rate": 1.9677187836425194e-05, + "loss": 0.0273, "step": 111270 }, { "epoch": 5.19, - "learning_rate": 9.660587876799026e-06, - "loss": 0.0338, + "learning_rate": 1.9676719766589174e-05, + "loss": 0.0492, "step": 111275 }, { "epoch": 5.19, - "learning_rate": 9.660119075523886e-06, - "loss": 0.0337, + "learning_rate": 1.9676251696753154e-05, + "loss": 0.0092, "step": 111280 }, { "epoch": 5.19, - "learning_rate": 9.659650274248746e-06, - "loss": 0.0433, + "learning_rate": 1.9675783626917137e-05, + "loss": 0.0195, "step": 111285 }, { "epoch": 5.19, - "learning_rate": 9.659181472973608e-06, - "loss": 0.0439, + "learning_rate": 1.9675315557081117e-05, + "loss": 0.0678, "step": 111290 }, { "epoch": 5.19, - "learning_rate": 9.658712671698468e-06, - "loss": 0.0995, + "learning_rate": 1.9674847487245097e-05, + "loss": 0.0728, "step": 111295 }, { "epoch": 5.19, - "learning_rate": 9.658243870423328e-06, - "loss": 0.0904, + "learning_rate": 1.9674379417409076e-05, + "loss": 0.2535, "step": 111300 }, { "epoch": 5.19, - "learning_rate": 9.65777506914819e-06, - "loss": 0.1863, + "learning_rate": 1.967391134757306e-05, + "loss": 0.426, "step": 111305 }, { "epoch": 5.19, - "learning_rate": 9.65730626787305e-06, - "loss": 0.0394, + "learning_rate": 1.967344327773704e-05, + "loss": 0.0608, "step": 111310 }, { "epoch": 5.19, - "learning_rate": 9.656837466597911e-06, - "loss": 0.026, + "learning_rate": 1.967297520790102e-05, + "loss": 0.0156, "step": 111315 }, { "epoch": 5.19, - "learning_rate": 9.656368665322771e-06, - "loss": 0.0371, + "learning_rate": 1.9672507138065003e-05, + "loss": 0.0193, "step": 111320 }, { "epoch": 5.19, - "learning_rate": 9.655899864047631e-06, - "loss": 0.0225, + "learning_rate": 1.9672039068228982e-05, + "loss": 0.02, "step": 111325 }, { "epoch": 5.19, - "learning_rate": 9.65543106277249e-06, - "loss": 0.0385, + "learning_rate": 1.967157099839296e-05, + "loss": 0.0276, "step": 111330 }, { "epoch": 5.2, - "learning_rate": 9.654962261497352e-06, - "loss": 0.039, + "learning_rate": 1.967110292855694e-05, + "loss": 0.0229, "step": 111335 }, { "epoch": 5.2, - "learning_rate": 9.654493460222212e-06, - "loss": 0.0777, + "learning_rate": 1.9670634858720922e-05, + "loss": 0.0713, "step": 111340 }, { "epoch": 5.2, - "learning_rate": 9.654024658947074e-06, - "loss": 0.1588, + "learning_rate": 1.96701667888849e-05, + "loss": 0.0358, "step": 111345 }, { "epoch": 5.2, - "learning_rate": 9.653555857671934e-06, - "loss": 0.1348, + "learning_rate": 1.966969871904888e-05, + "loss": 0.1086, "step": 111350 }, { "epoch": 5.2, - "learning_rate": 9.653087056396794e-06, - "loss": 0.308, + "learning_rate": 1.966923064921286e-05, + "loss": 0.2482, "step": 111355 }, { "epoch": 5.2, - "learning_rate": 9.652618255121655e-06, - "loss": 0.0975, + "learning_rate": 1.9668762579376844e-05, + "loss": 0.0605, "step": 111360 }, { "epoch": 5.2, - "learning_rate": 9.652149453846515e-06, - "loss": 0.0231, + "learning_rate": 1.9668294509540824e-05, + "loss": 0.0677, "step": 111365 }, { "epoch": 5.2, - "learning_rate": 9.651680652571375e-06, - "loss": 0.0098, + "learning_rate": 1.9667826439704804e-05, + "loss": 0.0161, "step": 111370 }, { "epoch": 5.2, - "learning_rate": 9.651211851296235e-06, - "loss": 0.0357, + "learning_rate": 1.9667358369868787e-05, + "loss": 0.0455, "step": 111375 }, { "epoch": 5.2, - "learning_rate": 9.650743050021097e-06, - "loss": 0.0299, + "learning_rate": 1.9666890300032767e-05, + "loss": 0.0679, "step": 111380 }, { "epoch": 5.2, - "learning_rate": 9.650274248745959e-06, - "loss": 0.1363, + "learning_rate": 1.9666422230196747e-05, + "loss": 0.0935, "step": 111385 }, { "epoch": 5.2, - "learning_rate": 9.649805447470818e-06, - "loss": 0.0921, + "learning_rate": 1.9665954160360727e-05, + "loss": 0.0661, "step": 111390 }, { "epoch": 5.2, - "learning_rate": 9.649336646195678e-06, - "loss": 0.1071, + "learning_rate": 1.9665486090524707e-05, + "loss": 0.1018, "step": 111395 }, { "epoch": 5.2, - "learning_rate": 9.648867844920538e-06, - "loss": 0.189, + "learning_rate": 1.9665018020688686e-05, + "loss": 0.0815, "step": 111400 }, { "epoch": 5.2, - "learning_rate": 9.6483990436454e-06, - "loss": 0.2794, + "learning_rate": 1.9664549950852666e-05, + "loss": 0.2701, "step": 111405 }, { "epoch": 5.2, - "learning_rate": 9.64793024237026e-06, - "loss": 0.119, + "learning_rate": 1.9664081881016646e-05, + "loss": 0.0921, "step": 111410 }, { "epoch": 5.2, - "learning_rate": 9.64746144109512e-06, - "loss": 0.0195, + "learning_rate": 1.966361381118063e-05, + "loss": 0.0212, "step": 111415 }, { "epoch": 5.2, - "learning_rate": 9.646992639819981e-06, - "loss": 0.0185, + "learning_rate": 1.966314574134461e-05, + "loss": 0.0067, "step": 111420 }, { "epoch": 5.2, - "learning_rate": 9.646523838544841e-06, - "loss": 0.0481, + "learning_rate": 1.966267767150859e-05, + "loss": 0.0183, "step": 111425 }, { "epoch": 5.2, - "learning_rate": 9.646055037269703e-06, - "loss": 0.0161, + "learning_rate": 1.9662209601672572e-05, + "loss": 0.047, "step": 111430 }, { "epoch": 5.2, - "learning_rate": 9.645586235994563e-06, - "loss": 0.1384, + "learning_rate": 1.9661741531836552e-05, + "loss": 0.0329, "step": 111435 }, { "epoch": 5.2, - "learning_rate": 9.645117434719423e-06, - "loss": 0.0685, + "learning_rate": 1.9661273462000532e-05, + "loss": 0.0714, "step": 111440 }, { "epoch": 5.2, - "learning_rate": 9.644648633444283e-06, - "loss": 0.1083, + "learning_rate": 1.966080539216451e-05, + "loss": 0.1642, "step": 111445 }, { "epoch": 5.2, - "learning_rate": 9.644179832169144e-06, - "loss": 0.163, + "learning_rate": 1.9660337322328495e-05, + "loss": 0.1374, "step": 111450 }, { "epoch": 5.2, - "learning_rate": 9.643711030894004e-06, - "loss": 0.2136, + "learning_rate": 1.965986925249247e-05, + "loss": 0.3429, "step": 111455 }, { "epoch": 5.2, - "learning_rate": 9.643242229618866e-06, - "loss": 0.0811, + "learning_rate": 1.965940118265645e-05, + "loss": 0.0749, "step": 111460 }, { "epoch": 5.2, - "learning_rate": 9.642773428343726e-06, - "loss": 0.0723, + "learning_rate": 1.965893311282043e-05, + "loss": 0.0049, "step": 111465 }, { "epoch": 5.2, - "learning_rate": 9.642304627068586e-06, - "loss": 0.0389, + "learning_rate": 1.9658465042984414e-05, + "loss": 0.0224, "step": 111470 }, { "epoch": 5.2, - "learning_rate": 9.641835825793448e-06, - "loss": 0.0227, + "learning_rate": 1.9657996973148394e-05, + "loss": 0.0377, "step": 111475 }, { "epoch": 5.2, - "learning_rate": 9.641367024518307e-06, - "loss": 0.035, + "learning_rate": 1.9657528903312374e-05, + "loss": 0.0437, "step": 111480 }, { "epoch": 5.2, - "learning_rate": 9.640898223243167e-06, - "loss": 0.1291, + "learning_rate": 1.9657060833476353e-05, + "loss": 0.0387, "step": 111485 }, { "epoch": 5.2, - "learning_rate": 9.640429421968029e-06, - "loss": 0.0659, + "learning_rate": 1.9656592763640337e-05, + "loss": 0.0173, "step": 111490 }, { "epoch": 5.2, - "learning_rate": 9.639960620692889e-06, - "loss": 0.0662, + "learning_rate": 1.9656124693804316e-05, + "loss": 0.0637, "step": 111495 }, { "epoch": 5.2, - "learning_rate": 9.63949181941775e-06, - "loss": 0.1634, + "learning_rate": 1.9655656623968296e-05, + "loss": 0.0824, "step": 111500 }, { "epoch": 5.2, - "learning_rate": 9.63902301814261e-06, - "loss": 0.205, + "learning_rate": 1.965518855413228e-05, + "loss": 0.3628, "step": 111505 }, { "epoch": 5.2, - "learning_rate": 9.63855421686747e-06, - "loss": 0.1024, + "learning_rate": 1.965472048429626e-05, + "loss": 0.0806, "step": 111510 }, { "epoch": 5.2, - "learning_rate": 9.63808541559233e-06, - "loss": 0.0215, + "learning_rate": 1.965425241446024e-05, + "loss": 0.0205, "step": 111515 }, { "epoch": 5.2, - "learning_rate": 9.637616614317192e-06, - "loss": 0.0243, + "learning_rate": 1.9653784344624216e-05, + "loss": 0.0191, "step": 111520 }, { "epoch": 5.2, - "learning_rate": 9.637147813042052e-06, - "loss": 0.0472, + "learning_rate": 1.96533162747882e-05, + "loss": 0.0597, "step": 111525 }, { "epoch": 5.2, - "learning_rate": 9.636679011766914e-06, - "loss": 0.0452, + "learning_rate": 1.965284820495218e-05, + "loss": 0.0344, "step": 111530 }, { "epoch": 5.2, - "learning_rate": 9.636210210491773e-06, - "loss": 0.0496, + "learning_rate": 1.965238013511616e-05, + "loss": 0.0529, "step": 111535 }, { "epoch": 5.2, - "learning_rate": 9.635741409216633e-06, - "loss": 0.0587, + "learning_rate": 1.9651912065280138e-05, + "loss": 0.0434, "step": 111540 }, { "epoch": 5.2, - "learning_rate": 9.635272607941495e-06, - "loss": 0.0864, + "learning_rate": 1.965144399544412e-05, + "loss": 0.0558, "step": 111545 }, { "epoch": 5.21, - "learning_rate": 9.634803806666355e-06, - "loss": 0.1494, + "learning_rate": 1.96509759256081e-05, + "loss": 0.0847, "step": 111550 }, { "epoch": 5.21, - "learning_rate": 9.634335005391215e-06, - "loss": 0.3651, + "learning_rate": 1.965050785577208e-05, + "loss": 0.377, "step": 111555 }, { "epoch": 5.21, - "learning_rate": 9.633866204116077e-06, - "loss": 0.1454, + "learning_rate": 1.9650039785936064e-05, + "loss": 0.1218, "step": 111560 }, { "epoch": 5.21, - "learning_rate": 9.633397402840936e-06, - "loss": 0.0134, + "learning_rate": 1.9649571716100044e-05, + "loss": 0.0077, "step": 111565 }, { "epoch": 5.21, - "learning_rate": 9.632928601565798e-06, - "loss": 0.0202, + "learning_rate": 1.9649103646264024e-05, + "loss": 0.022, "step": 111570 }, { "epoch": 5.21, - "learning_rate": 9.632459800290658e-06, - "loss": 0.0516, + "learning_rate": 1.9648635576428004e-05, + "loss": 0.0306, "step": 111575 }, { "epoch": 5.21, - "learning_rate": 9.631990999015518e-06, - "loss": 0.0357, + "learning_rate": 1.9648167506591984e-05, + "loss": 0.0459, "step": 111580 }, { "epoch": 5.21, - "learning_rate": 9.631522197740378e-06, - "loss": 0.1427, + "learning_rate": 1.9647699436755963e-05, + "loss": 0.0247, "step": 111585 }, { "epoch": 5.21, - "learning_rate": 9.631053396465238e-06, - "loss": 0.093, + "learning_rate": 1.9647231366919943e-05, + "loss": 0.0693, "step": 111590 }, { "epoch": 5.21, - "learning_rate": 9.6305845951901e-06, - "loss": 0.0727, + "learning_rate": 1.9646763297083923e-05, + "loss": 0.0899, "step": 111595 }, { "epoch": 5.21, - "learning_rate": 9.630115793914961e-06, - "loss": 0.1679, + "learning_rate": 1.9646295227247906e-05, + "loss": 0.1582, "step": 111600 }, { "epoch": 5.21, - "learning_rate": 9.629646992639821e-06, - "loss": 0.3079, + "learning_rate": 1.9645827157411886e-05, + "loss": 0.2881, "step": 111605 }, { "epoch": 5.21, - "learning_rate": 9.629178191364681e-06, - "loss": 0.0678, + "learning_rate": 1.9645359087575866e-05, + "loss": 0.0521, "step": 111610 }, { "epoch": 5.21, - "learning_rate": 9.628709390089543e-06, - "loss": 0.0201, + "learning_rate": 1.964489101773985e-05, + "loss": 0.0215, "step": 111615 }, { "epoch": 5.21, - "learning_rate": 9.628240588814403e-06, - "loss": 0.0482, + "learning_rate": 1.964442294790383e-05, + "loss": 0.0771, "step": 111620 }, { "epoch": 5.21, - "learning_rate": 9.627771787539262e-06, - "loss": 0.0246, + "learning_rate": 1.964395487806781e-05, + "loss": 0.0421, "step": 111625 }, { "epoch": 5.21, - "learning_rate": 9.627302986264122e-06, - "loss": 0.0237, + "learning_rate": 1.964348680823179e-05, + "loss": 0.1172, "step": 111630 }, { "epoch": 5.21, - "learning_rate": 9.626834184988984e-06, - "loss": 0.04, + "learning_rate": 1.9643018738395772e-05, + "loss": 0.0541, "step": 111635 }, { "epoch": 5.21, - "learning_rate": 9.626365383713846e-06, - "loss": 0.0834, + "learning_rate": 1.964255066855975e-05, + "loss": 0.0533, "step": 111640 }, { "epoch": 5.21, - "learning_rate": 9.625896582438706e-06, - "loss": 0.0939, + "learning_rate": 1.9642082598723728e-05, + "loss": 0.1241, "step": 111645 }, { "epoch": 5.21, - "learning_rate": 9.625427781163566e-06, - "loss": 0.1839, + "learning_rate": 1.9641614528887708e-05, + "loss": 0.157, "step": 111650 }, { "epoch": 5.21, - "learning_rate": 9.624958979888425e-06, - "loss": 0.479, + "learning_rate": 1.964114645905169e-05, + "loss": 0.2193, "step": 111655 }, { "epoch": 5.21, - "learning_rate": 9.624490178613287e-06, - "loss": 0.0678, + "learning_rate": 1.964067838921567e-05, + "loss": 0.0819, "step": 111660 }, { "epoch": 5.21, - "learning_rate": 9.624021377338147e-06, - "loss": 0.0189, + "learning_rate": 1.964021031937965e-05, + "loss": 0.029, "step": 111665 }, { "epoch": 5.21, - "learning_rate": 9.623552576063007e-06, - "loss": 0.0767, + "learning_rate": 1.9639742249543634e-05, + "loss": 0.0211, "step": 111670 }, { "epoch": 5.21, - "learning_rate": 9.623083774787869e-06, - "loss": 0.0489, + "learning_rate": 1.9639274179707614e-05, + "loss": 0.0458, "step": 111675 }, { "epoch": 5.21, - "learning_rate": 9.622614973512729e-06, - "loss": 0.0195, + "learning_rate": 1.9638806109871593e-05, + "loss": 0.0307, "step": 111680 }, { "epoch": 5.21, - "learning_rate": 9.62214617223759e-06, - "loss": 0.0491, + "learning_rate": 1.9638338040035573e-05, + "loss": 0.056, "step": 111685 }, { "epoch": 5.21, - "learning_rate": 9.62167737096245e-06, - "loss": 0.0982, + "learning_rate": 1.9637869970199556e-05, + "loss": 0.088, "step": 111690 }, { "epoch": 5.21, - "learning_rate": 9.62120856968731e-06, - "loss": 0.1037, + "learning_rate": 1.9637401900363536e-05, + "loss": 0.0491, "step": 111695 }, { "epoch": 5.21, - "learning_rate": 9.62073976841217e-06, - "loss": 0.2179, + "learning_rate": 1.9636933830527516e-05, + "loss": 0.1082, "step": 111700 }, { "epoch": 5.21, - "learning_rate": 9.620270967137032e-06, - "loss": 0.1734, + "learning_rate": 1.9636465760691496e-05, + "loss": 0.1652, "step": 111705 }, { "epoch": 5.21, - "learning_rate": 9.619802165861892e-06, - "loss": 0.1025, + "learning_rate": 1.9635997690855476e-05, + "loss": 0.0682, "step": 111710 }, { "epoch": 5.21, - "learning_rate": 9.619333364586753e-06, - "loss": 0.0164, + "learning_rate": 1.9635529621019456e-05, + "loss": 0.0708, "step": 111715 }, { "epoch": 5.21, - "learning_rate": 9.618864563311613e-06, - "loss": 0.0755, + "learning_rate": 1.9635061551183435e-05, + "loss": 0.0237, "step": 111720 }, { "epoch": 5.21, - "learning_rate": 9.618395762036473e-06, - "loss": 0.0321, + "learning_rate": 1.9634593481347415e-05, + "loss": 0.0134, "step": 111725 }, { "epoch": 5.21, - "learning_rate": 9.617926960761335e-06, - "loss": 0.016, + "learning_rate": 1.96341254115114e-05, + "loss": 0.0708, "step": 111730 }, { "epoch": 5.21, - "learning_rate": 9.617458159486195e-06, - "loss": 0.0218, + "learning_rate": 1.9633657341675378e-05, + "loss": 0.0396, "step": 111735 }, { "epoch": 5.21, - "learning_rate": 9.616989358211054e-06, - "loss": 0.0938, + "learning_rate": 1.9633189271839358e-05, + "loss": 0.0557, "step": 111740 }, { "epoch": 5.21, - "learning_rate": 9.616520556935916e-06, - "loss": 0.0579, + "learning_rate": 1.963272120200334e-05, + "loss": 0.0624, "step": 111745 }, { "epoch": 5.21, - "learning_rate": 9.616051755660776e-06, - "loss": 0.1434, + "learning_rate": 1.963225313216732e-05, + "loss": 0.0826, "step": 111750 }, { "epoch": 5.21, - "learning_rate": 9.615582954385638e-06, - "loss": 0.291, + "learning_rate": 1.96317850623313e-05, + "loss": 0.2036, "step": 111755 }, { "epoch": 5.21, - "learning_rate": 9.615114153110498e-06, - "loss": 0.0967, + "learning_rate": 1.963131699249528e-05, + "loss": 0.107, "step": 111760 }, { "epoch": 5.22, - "learning_rate": 9.614645351835358e-06, - "loss": 0.0102, + "learning_rate": 1.9630848922659264e-05, + "loss": 0.0192, "step": 111765 }, { "epoch": 5.22, - "learning_rate": 9.614176550560217e-06, - "loss": 0.0649, + "learning_rate": 1.963038085282324e-05, + "loss": 0.0308, "step": 111770 }, { "epoch": 5.22, - "learning_rate": 9.613707749285079e-06, - "loss": 0.0443, + "learning_rate": 1.962991278298722e-05, + "loss": 0.0742, "step": 111775 }, { "epoch": 5.22, - "learning_rate": 9.613238948009939e-06, - "loss": 0.0384, + "learning_rate": 1.96294447131512e-05, + "loss": 0.0563, "step": 111780 }, { "epoch": 5.22, - "learning_rate": 9.6127701467348e-06, - "loss": 0.0649, + "learning_rate": 1.9628976643315183e-05, + "loss": 0.1071, "step": 111785 }, { "epoch": 5.22, - "learning_rate": 9.61230134545966e-06, - "loss": 0.0567, + "learning_rate": 1.9628508573479163e-05, + "loss": 0.0712, "step": 111790 }, { "epoch": 5.22, - "learning_rate": 9.61183254418452e-06, - "loss": 0.0485, + "learning_rate": 1.9628040503643143e-05, + "loss": 0.0505, "step": 111795 }, { "epoch": 5.22, - "learning_rate": 9.611363742909382e-06, - "loss": 0.1323, + "learning_rate": 1.9627572433807126e-05, + "loss": 0.1082, "step": 111800 }, { "epoch": 5.22, - "learning_rate": 9.610894941634242e-06, - "loss": 0.2319, + "learning_rate": 1.9627104363971106e-05, + "loss": 0.1755, "step": 111805 }, { "epoch": 5.22, - "learning_rate": 9.610426140359102e-06, - "loss": 0.0972, + "learning_rate": 1.9626636294135086e-05, + "loss": 0.0665, "step": 111810 }, { "epoch": 5.22, - "learning_rate": 9.609957339083964e-06, - "loss": 0.0731, + "learning_rate": 1.9626168224299065e-05, + "loss": 0.0272, "step": 111815 }, { "epoch": 5.22, - "learning_rate": 9.609488537808824e-06, - "loss": 0.0028, + "learning_rate": 1.962570015446305e-05, + "loss": 0.0528, "step": 111820 }, { "epoch": 5.22, - "learning_rate": 9.609019736533685e-06, - "loss": 0.0447, + "learning_rate": 1.962523208462703e-05, + "loss": 0.0286, "step": 111825 }, { "epoch": 5.22, - "learning_rate": 9.608550935258545e-06, - "loss": 0.0537, + "learning_rate": 1.962476401479101e-05, + "loss": 0.0444, "step": 111830 }, { "epoch": 5.22, - "learning_rate": 9.608082133983405e-06, - "loss": 0.059, + "learning_rate": 1.9624295944954985e-05, + "loss": 0.0083, "step": 111835 }, { "epoch": 5.22, - "learning_rate": 9.607613332708265e-06, - "loss": 0.067, + "learning_rate": 1.9623827875118968e-05, + "loss": 0.056, "step": 111840 }, { "epoch": 5.22, - "learning_rate": 9.607144531433125e-06, - "loss": 0.125, + "learning_rate": 1.9623359805282948e-05, + "loss": 0.0516, "step": 111845 }, { "epoch": 5.22, - "learning_rate": 9.606675730157987e-06, - "loss": 0.0797, + "learning_rate": 1.9622891735446928e-05, + "loss": 0.1149, "step": 111850 }, { "epoch": 5.22, - "learning_rate": 9.606206928882848e-06, - "loss": 0.2286, + "learning_rate": 1.962242366561091e-05, + "loss": 0.2653, "step": 111855 }, { "epoch": 5.22, - "learning_rate": 9.605738127607708e-06, - "loss": 0.1629, + "learning_rate": 1.962195559577489e-05, + "loss": 0.0583, "step": 111860 }, { "epoch": 5.22, - "learning_rate": 9.605269326332568e-06, - "loss": 0.0717, + "learning_rate": 1.962148752593887e-05, + "loss": 0.0272, "step": 111865 }, { "epoch": 5.22, - "learning_rate": 9.60480052505743e-06, - "loss": 0.0218, + "learning_rate": 1.962101945610285e-05, + "loss": 0.0361, "step": 111870 }, { "epoch": 5.22, - "learning_rate": 9.60433172378229e-06, - "loss": 0.04, + "learning_rate": 1.9620551386266833e-05, + "loss": 0.0786, "step": 111875 }, { "epoch": 5.22, - "learning_rate": 9.60386292250715e-06, - "loss": 0.0429, + "learning_rate": 1.9620083316430813e-05, + "loss": 0.0156, "step": 111880 }, { "epoch": 5.22, - "learning_rate": 9.60339412123201e-06, - "loss": 0.0769, + "learning_rate": 1.9619615246594793e-05, + "loss": 0.0496, "step": 111885 }, { "epoch": 5.22, - "learning_rate": 9.602925319956871e-06, - "loss": 0.0637, + "learning_rate": 1.9619147176758773e-05, + "loss": 0.1061, "step": 111890 }, { "epoch": 5.22, - "learning_rate": 9.602456518681733e-06, - "loss": 0.0872, + "learning_rate": 1.9618679106922756e-05, + "loss": 0.2002, "step": 111895 }, { "epoch": 5.22, - "learning_rate": 9.601987717406593e-06, - "loss": 0.1529, + "learning_rate": 1.9618211037086733e-05, + "loss": 0.2066, "step": 111900 }, { "epoch": 5.22, - "learning_rate": 9.601518916131453e-06, - "loss": 0.3044, + "learning_rate": 1.9617742967250712e-05, + "loss": 0.2476, "step": 111905 }, { "epoch": 5.22, - "learning_rate": 9.601050114856313e-06, - "loss": 0.1141, + "learning_rate": 1.9617274897414692e-05, + "loss": 0.0779, "step": 111910 }, { "epoch": 5.22, - "learning_rate": 9.600581313581173e-06, - "loss": 0.0324, + "learning_rate": 1.9616806827578675e-05, + "loss": 0.0107, "step": 111915 }, { "epoch": 5.22, - "learning_rate": 9.600112512306034e-06, - "loss": 0.0232, + "learning_rate": 1.9616338757742655e-05, + "loss": 0.0337, "step": 111920 }, { "epoch": 5.22, - "learning_rate": 9.599643711030894e-06, - "loss": 0.0344, + "learning_rate": 1.9615870687906635e-05, + "loss": 0.0045, "step": 111925 }, { "epoch": 5.22, - "learning_rate": 9.599174909755756e-06, - "loss": 0.0322, + "learning_rate": 1.9615402618070618e-05, + "loss": 0.0407, "step": 111930 }, { "epoch": 5.22, - "learning_rate": 9.598706108480616e-06, - "loss": 0.0338, + "learning_rate": 1.9614934548234598e-05, + "loss": 0.1012, "step": 111935 }, { "epoch": 5.22, - "learning_rate": 9.598237307205477e-06, - "loss": 0.0504, + "learning_rate": 1.9614466478398578e-05, + "loss": 0.0699, "step": 111940 }, { "epoch": 5.22, - "learning_rate": 9.597768505930337e-06, - "loss": 0.0789, + "learning_rate": 1.9613998408562558e-05, + "loss": 0.0735, "step": 111945 }, { "epoch": 5.22, - "learning_rate": 9.597299704655197e-06, - "loss": 0.1561, + "learning_rate": 1.961353033872654e-05, + "loss": 0.1774, "step": 111950 }, { "epoch": 5.22, - "learning_rate": 9.596830903380057e-06, - "loss": 0.2588, + "learning_rate": 1.961306226889052e-05, + "loss": 0.4124, "step": 111955 }, { "epoch": 5.22, - "learning_rate": 9.596362102104919e-06, - "loss": 0.0918, + "learning_rate": 1.9612594199054497e-05, + "loss": 0.1023, "step": 111960 }, { "epoch": 5.22, - "learning_rate": 9.595893300829779e-06, - "loss": 0.017, + "learning_rate": 1.9612126129218477e-05, + "loss": 0.0124, "step": 111965 }, { "epoch": 5.22, - "learning_rate": 9.59542449955464e-06, - "loss": 0.0308, + "learning_rate": 1.961165805938246e-05, + "loss": 0.0339, "step": 111970 }, { "epoch": 5.22, - "learning_rate": 9.5949556982795e-06, - "loss": 0.0633, + "learning_rate": 1.961118998954644e-05, + "loss": 0.043, "step": 111975 }, { "epoch": 5.23, - "learning_rate": 9.59448689700436e-06, - "loss": 0.0678, + "learning_rate": 1.961072191971042e-05, + "loss": 0.0424, "step": 111980 }, { "epoch": 5.23, - "learning_rate": 9.594018095729222e-06, - "loss": 0.058, + "learning_rate": 1.9610253849874403e-05, + "loss": 0.0454, "step": 111985 }, { "epoch": 5.23, - "learning_rate": 9.593549294454082e-06, - "loss": 0.0967, + "learning_rate": 1.9609785780038383e-05, + "loss": 0.0227, "step": 111990 }, { "epoch": 5.23, - "learning_rate": 9.593080493178942e-06, - "loss": 0.0807, + "learning_rate": 1.9609317710202363e-05, + "loss": 0.1189, "step": 111995 }, { "epoch": 5.23, - "learning_rate": 9.592611691903803e-06, - "loss": 0.0845, + "learning_rate": 1.9608849640366342e-05, + "loss": 0.1497, "step": 112000 }, { "epoch": 5.23, - "learning_rate": 9.592142890628663e-06, - "loss": 0.132, + "learning_rate": 1.9608381570530326e-05, + "loss": 0.1821, "step": 112005 }, { "epoch": 5.23, - "learning_rate": 9.591674089353525e-06, - "loss": 0.099, + "learning_rate": 1.9607913500694305e-05, + "loss": 0.0637, "step": 112010 }, { "epoch": 5.23, - "learning_rate": 9.591205288078385e-06, - "loss": 0.0097, + "learning_rate": 1.9607445430858285e-05, + "loss": 0.0312, "step": 112015 }, { "epoch": 5.23, - "learning_rate": 9.590736486803245e-06, - "loss": 0.0306, + "learning_rate": 1.9606977361022265e-05, + "loss": 0.0168, "step": 112020 }, { "epoch": 5.23, - "learning_rate": 9.590267685528105e-06, - "loss": 0.0284, + "learning_rate": 1.9606509291186245e-05, + "loss": 0.0196, "step": 112025 }, { "epoch": 5.23, - "learning_rate": 9.589798884252966e-06, - "loss": 0.0683, + "learning_rate": 1.9606041221350225e-05, + "loss": 0.0181, "step": 112030 }, { "epoch": 5.23, - "learning_rate": 9.589330082977826e-06, - "loss": 0.0548, + "learning_rate": 1.9605573151514205e-05, + "loss": 0.0354, "step": 112035 }, { "epoch": 5.23, - "learning_rate": 9.588861281702688e-06, - "loss": 0.0709, + "learning_rate": 1.9605105081678188e-05, + "loss": 0.0099, "step": 112040 }, { "epoch": 5.23, - "learning_rate": 9.588392480427548e-06, - "loss": 0.0962, + "learning_rate": 1.9604637011842168e-05, + "loss": 0.0787, "step": 112045 }, { "epoch": 5.23, - "learning_rate": 9.587923679152408e-06, - "loss": 0.1094, + "learning_rate": 1.9604168942006147e-05, + "loss": 0.1301, "step": 112050 }, { "epoch": 5.23, - "learning_rate": 9.58745487787727e-06, - "loss": 0.3249, + "learning_rate": 1.9603700872170127e-05, + "loss": 0.2297, "step": 112055 }, { "epoch": 5.23, - "learning_rate": 9.58698607660213e-06, - "loss": 0.0767, + "learning_rate": 1.960323280233411e-05, + "loss": 0.1077, "step": 112060 }, { "epoch": 5.23, - "learning_rate": 9.586517275326989e-06, - "loss": 0.0391, + "learning_rate": 1.960276473249809e-05, + "loss": 0.0131, "step": 112065 }, { "epoch": 5.23, - "learning_rate": 9.58604847405185e-06, - "loss": 0.0196, + "learning_rate": 1.960229666266207e-05, + "loss": 0.015, "step": 112070 }, { "epoch": 5.23, - "learning_rate": 9.58557967277671e-06, - "loss": 0.0382, + "learning_rate": 1.960182859282605e-05, + "loss": 0.031, "step": 112075 }, { "epoch": 5.23, - "learning_rate": 9.585110871501572e-06, - "loss": 0.0314, + "learning_rate": 1.9601360522990033e-05, + "loss": 0.0598, "step": 112080 }, { "epoch": 5.23, - "learning_rate": 9.584642070226432e-06, - "loss": 0.0662, + "learning_rate": 1.9600892453154013e-05, + "loss": 0.0245, "step": 112085 }, { "epoch": 5.23, - "learning_rate": 9.584173268951292e-06, - "loss": 0.0838, + "learning_rate": 1.960042438331799e-05, + "loss": 0.123, "step": 112090 }, { "epoch": 5.23, - "learning_rate": 9.583704467676152e-06, - "loss": 0.0725, + "learning_rate": 1.959995631348197e-05, + "loss": 0.0955, "step": 112095 }, { "epoch": 5.23, - "learning_rate": 9.583235666401012e-06, - "loss": 0.1953, + "learning_rate": 1.9599488243645952e-05, + "loss": 0.0778, "step": 112100 }, { "epoch": 5.23, - "learning_rate": 9.582766865125874e-06, - "loss": 0.2514, + "learning_rate": 1.9599020173809932e-05, + "loss": 0.1973, "step": 112105 }, { "epoch": 5.23, - "learning_rate": 9.582298063850735e-06, - "loss": 0.1253, + "learning_rate": 1.9598552103973912e-05, + "loss": 0.0517, "step": 112110 }, { "epoch": 5.23, - "learning_rate": 9.581829262575595e-06, - "loss": 0.013, + "learning_rate": 1.9598084034137895e-05, + "loss": 0.0407, "step": 112115 }, { "epoch": 5.23, - "learning_rate": 9.581360461300455e-06, - "loss": 0.0103, + "learning_rate": 1.9597615964301875e-05, + "loss": 0.0433, "step": 112120 }, { "epoch": 5.23, - "learning_rate": 9.580891660025317e-06, - "loss": 0.0346, + "learning_rate": 1.9597147894465855e-05, + "loss": 0.0469, "step": 112125 }, { "epoch": 5.23, - "learning_rate": 9.580422858750177e-06, - "loss": 0.0409, + "learning_rate": 1.9596679824629835e-05, + "loss": 0.0289, "step": 112130 }, { "epoch": 5.23, - "learning_rate": 9.579954057475037e-06, - "loss": 0.0676, + "learning_rate": 1.9596211754793818e-05, + "loss": 0.0418, "step": 112135 }, { "epoch": 5.23, - "learning_rate": 9.579485256199897e-06, - "loss": 0.0795, + "learning_rate": 1.9595743684957798e-05, + "loss": 0.0387, "step": 112140 }, { "epoch": 5.23, - "learning_rate": 9.579016454924758e-06, - "loss": 0.125, + "learning_rate": 1.9595275615121777e-05, + "loss": 0.0774, "step": 112145 }, { "epoch": 5.23, - "learning_rate": 9.57854765364962e-06, - "loss": 0.0777, + "learning_rate": 1.9594807545285754e-05, + "loss": 0.1102, "step": 112150 }, { "epoch": 5.23, - "learning_rate": 9.57807885237448e-06, - "loss": 0.1965, + "learning_rate": 1.9594339475449737e-05, + "loss": 0.1914, "step": 112155 }, { "epoch": 5.23, - "learning_rate": 9.57761005109934e-06, - "loss": 0.0961, + "learning_rate": 1.9593871405613717e-05, + "loss": 0.0328, "step": 112160 }, { "epoch": 5.23, - "learning_rate": 9.5771412498242e-06, - "loss": 0.0095, + "learning_rate": 1.9593403335777697e-05, + "loss": 0.0186, "step": 112165 }, { "epoch": 5.23, - "learning_rate": 9.57667244854906e-06, - "loss": 0.0606, + "learning_rate": 1.959293526594168e-05, + "loss": 0.054, "step": 112170 }, { "epoch": 5.23, - "learning_rate": 9.576203647273921e-06, - "loss": 0.0411, + "learning_rate": 1.959246719610566e-05, + "loss": 0.0239, "step": 112175 }, { "epoch": 5.23, - "learning_rate": 9.575734845998781e-06, - "loss": 0.0521, + "learning_rate": 1.959199912626964e-05, + "loss": 0.0354, "step": 112180 }, { "epoch": 5.23, - "learning_rate": 9.575266044723643e-06, - "loss": 0.0854, + "learning_rate": 1.959153105643362e-05, + "loss": 0.0277, "step": 112185 }, { "epoch": 5.23, - "learning_rate": 9.574797243448503e-06, - "loss": 0.0624, + "learning_rate": 1.9591062986597603e-05, + "loss": 0.0406, "step": 112190 }, { "epoch": 5.24, - "learning_rate": 9.574328442173364e-06, - "loss": 0.0915, + "learning_rate": 1.9590594916761582e-05, + "loss": 0.0996, "step": 112195 }, { "epoch": 5.24, - "learning_rate": 9.573859640898224e-06, - "loss": 0.1472, + "learning_rate": 1.9590126846925562e-05, + "loss": 0.1688, "step": 112200 }, { "epoch": 5.24, - "learning_rate": 9.573390839623084e-06, - "loss": 0.2083, + "learning_rate": 1.9589658777089542e-05, + "loss": 0.2485, "step": 112205 }, { "epoch": 5.24, - "learning_rate": 9.572922038347944e-06, - "loss": 0.1117, + "learning_rate": 1.9589190707253525e-05, + "loss": 0.071, "step": 112210 }, { "epoch": 5.24, - "learning_rate": 9.572453237072806e-06, - "loss": 0.0546, + "learning_rate": 1.9588722637417502e-05, + "loss": 0.0275, "step": 112215 }, { "epoch": 5.24, - "learning_rate": 9.571984435797666e-06, - "loss": 0.0196, + "learning_rate": 1.958825456758148e-05, + "loss": 0.0164, "step": 112220 }, { "epoch": 5.24, - "learning_rate": 9.571515634522527e-06, - "loss": 0.0796, + "learning_rate": 1.9587786497745465e-05, + "loss": 0.0407, "step": 112225 }, { "epoch": 5.24, - "learning_rate": 9.571046833247387e-06, - "loss": 0.0926, + "learning_rate": 1.9587318427909445e-05, + "loss": 0.077, "step": 112230 }, { "epoch": 5.24, - "learning_rate": 9.570578031972247e-06, - "loss": 0.04, + "learning_rate": 1.9586850358073424e-05, + "loss": 0.0259, "step": 112235 }, { "epoch": 5.24, - "learning_rate": 9.570109230697107e-06, - "loss": 0.0726, + "learning_rate": 1.9586382288237404e-05, + "loss": 0.0811, "step": 112240 }, { "epoch": 5.24, - "learning_rate": 9.569640429421969e-06, - "loss": 0.0903, + "learning_rate": 1.9585914218401387e-05, + "loss": 0.0865, "step": 112245 }, { "epoch": 5.24, - "learning_rate": 9.569171628146829e-06, - "loss": 0.1231, + "learning_rate": 1.9585446148565367e-05, + "loss": 0.1475, "step": 112250 }, { "epoch": 5.24, - "learning_rate": 9.56870282687169e-06, - "loss": 0.2307, + "learning_rate": 1.9584978078729347e-05, + "loss": 0.19, "step": 112255 }, { "epoch": 5.24, - "learning_rate": 9.56823402559655e-06, - "loss": 0.0727, + "learning_rate": 1.9584510008893327e-05, + "loss": 0.1259, "step": 112260 }, { "epoch": 5.24, - "learning_rate": 9.567765224321412e-06, - "loss": 0.0215, + "learning_rate": 1.958404193905731e-05, + "loss": 0.0136, "step": 112265 }, { "epoch": 5.24, - "learning_rate": 9.567296423046272e-06, - "loss": 0.0273, + "learning_rate": 1.958357386922129e-05, + "loss": 0.012, "step": 112270 }, { "epoch": 5.24, - "learning_rate": 9.566827621771132e-06, - "loss": 0.0588, + "learning_rate": 1.958310579938527e-05, + "loss": 0.1002, "step": 112275 }, { "epoch": 5.24, - "learning_rate": 9.566358820495992e-06, - "loss": 0.0252, + "learning_rate": 1.9582637729549246e-05, + "loss": 0.0339, "step": 112280 }, { "epoch": 5.24, - "learning_rate": 9.565890019220853e-06, - "loss": 0.077, + "learning_rate": 1.958216965971323e-05, + "loss": 0.0468, "step": 112285 }, { "epoch": 5.24, - "learning_rate": 9.565421217945713e-06, - "loss": 0.0925, + "learning_rate": 1.958170158987721e-05, + "loss": 0.0399, "step": 112290 }, { "epoch": 5.24, - "learning_rate": 9.564952416670575e-06, - "loss": 0.0397, + "learning_rate": 1.958123352004119e-05, + "loss": 0.1602, "step": 112295 }, { "epoch": 5.24, - "learning_rate": 9.564483615395435e-06, - "loss": 0.0737, + "learning_rate": 1.9580765450205172e-05, + "loss": 0.11, "step": 112300 }, { "epoch": 5.24, - "learning_rate": 9.564014814120295e-06, - "loss": 0.3351, + "learning_rate": 1.9580297380369152e-05, + "loss": 0.2559, "step": 112305 }, { "epoch": 5.24, - "learning_rate": 9.563546012845156e-06, - "loss": 0.0933, + "learning_rate": 1.9579829310533132e-05, + "loss": 0.1001, "step": 112310 }, { "epoch": 5.24, - "learning_rate": 9.563077211570016e-06, - "loss": 0.0317, + "learning_rate": 1.957936124069711e-05, + "loss": 0.0064, "step": 112315 }, { "epoch": 5.24, - "learning_rate": 9.562608410294876e-06, - "loss": 0.0686, + "learning_rate": 1.9578893170861095e-05, + "loss": 0.0354, "step": 112320 }, { "epoch": 5.24, - "learning_rate": 9.562139609019738e-06, - "loss": 0.0393, + "learning_rate": 1.9578425101025075e-05, + "loss": 0.0245, "step": 112325 }, { "epoch": 5.24, - "learning_rate": 9.561670807744598e-06, - "loss": 0.0416, + "learning_rate": 1.9577957031189054e-05, + "loss": 0.0257, "step": 112330 }, { "epoch": 5.24, - "learning_rate": 9.56120200646946e-06, - "loss": 0.0359, + "learning_rate": 1.9577488961353034e-05, + "loss": 0.0805, "step": 112335 }, { "epoch": 5.24, - "learning_rate": 9.56073320519432e-06, - "loss": 0.0612, + "learning_rate": 1.9577020891517014e-05, + "loss": 0.0582, "step": 112340 }, { "epoch": 5.24, - "learning_rate": 9.56026440391918e-06, - "loss": 0.1432, + "learning_rate": 1.9576552821680994e-05, + "loss": 0.0365, "step": 112345 }, { "epoch": 5.24, - "learning_rate": 9.55979560264404e-06, - "loss": 0.1108, + "learning_rate": 1.9576084751844974e-05, + "loss": 0.1449, "step": 112350 }, { "epoch": 5.24, - "learning_rate": 9.5593268013689e-06, - "loss": 0.1979, + "learning_rate": 1.9575616682008957e-05, + "loss": 0.4486, "step": 112355 }, { "epoch": 5.24, - "learning_rate": 9.55885800009376e-06, - "loss": 0.0787, + "learning_rate": 1.9575148612172937e-05, + "loss": 0.0677, "step": 112360 }, { "epoch": 5.24, - "learning_rate": 9.558389198818622e-06, - "loss": 0.0204, + "learning_rate": 1.9574680542336917e-05, + "loss": 0.0581, "step": 112365 }, { "epoch": 5.24, - "learning_rate": 9.557920397543482e-06, - "loss": 0.0358, + "learning_rate": 1.9574212472500896e-05, + "loss": 0.0427, "step": 112370 }, { "epoch": 5.24, - "learning_rate": 9.557451596268342e-06, - "loss": 0.0349, + "learning_rate": 1.957374440266488e-05, + "loss": 0.0305, "step": 112375 }, { "epoch": 5.24, - "learning_rate": 9.556982794993204e-06, - "loss": 0.0715, + "learning_rate": 1.957327633282886e-05, + "loss": 0.0895, "step": 112380 }, { "epoch": 5.24, - "learning_rate": 9.556513993718064e-06, - "loss": 0.0856, + "learning_rate": 1.957280826299284e-05, + "loss": 0.0425, "step": 112385 }, { "epoch": 5.24, - "learning_rate": 9.556045192442924e-06, - "loss": 0.0288, + "learning_rate": 1.957234019315682e-05, + "loss": 0.0423, "step": 112390 }, { "epoch": 5.24, - "learning_rate": 9.555576391167784e-06, - "loss": 0.0933, + "learning_rate": 1.9571872123320802e-05, + "loss": 0.0906, "step": 112395 }, { "epoch": 5.24, - "learning_rate": 9.555107589892645e-06, - "loss": 0.0958, + "learning_rate": 1.9571404053484782e-05, + "loss": 0.105, "step": 112400 }, { "epoch": 5.24, - "learning_rate": 9.554638788617507e-06, - "loss": 0.1079, + "learning_rate": 1.957093598364876e-05, + "loss": 0.3361, "step": 112405 }, { "epoch": 5.25, - "learning_rate": 9.554169987342367e-06, - "loss": 0.1363, + "learning_rate": 1.9570467913812742e-05, + "loss": 0.0886, "step": 112410 }, { "epoch": 5.25, - "learning_rate": 9.553701186067227e-06, - "loss": 0.0393, + "learning_rate": 1.956999984397672e-05, + "loss": 0.0157, "step": 112415 }, { "epoch": 5.25, - "learning_rate": 9.553232384792087e-06, - "loss": 0.0401, + "learning_rate": 1.95695317741407e-05, + "loss": 0.0182, "step": 112420 }, { "epoch": 5.25, - "learning_rate": 9.552763583516947e-06, - "loss": 0.0343, + "learning_rate": 1.956906370430468e-05, + "loss": 0.0371, "step": 112425 }, { "epoch": 5.25, - "learning_rate": 9.552294782241808e-06, - "loss": 0.0443, + "learning_rate": 1.9568595634468664e-05, + "loss": 0.0467, "step": 112430 }, { "epoch": 5.25, - "learning_rate": 9.55182598096667e-06, - "loss": 0.1155, + "learning_rate": 1.9568127564632644e-05, + "loss": 0.0259, "step": 112435 }, { "epoch": 5.25, - "learning_rate": 9.55135717969153e-06, - "loss": 0.1119, + "learning_rate": 1.9567659494796624e-05, + "loss": 0.0579, "step": 112440 }, { "epoch": 5.25, - "learning_rate": 9.55088837841639e-06, - "loss": 0.0663, + "learning_rate": 1.9567191424960604e-05, + "loss": 0.0569, "step": 112445 }, { "epoch": 5.25, - "learning_rate": 9.550419577141251e-06, - "loss": 0.1516, + "learning_rate": 1.9566723355124587e-05, + "loss": 0.0771, "step": 112450 }, { "epoch": 5.25, - "learning_rate": 9.549950775866111e-06, - "loss": 0.1459, + "learning_rate": 1.9566255285288567e-05, + "loss": 0.3373, "step": 112455 }, { "epoch": 5.25, - "learning_rate": 9.549481974590971e-06, - "loss": 0.0909, + "learning_rate": 1.9565787215452547e-05, + "loss": 0.1083, "step": 112460 }, { "epoch": 5.25, - "learning_rate": 9.549013173315831e-06, - "loss": 0.0203, + "learning_rate": 1.9565319145616526e-05, + "loss": 0.0297, "step": 112465 }, { "epoch": 5.25, - "learning_rate": 9.548544372040693e-06, - "loss": 0.0109, + "learning_rate": 1.9564851075780506e-05, + "loss": 0.0185, "step": 112470 }, { "epoch": 5.25, - "learning_rate": 9.548075570765555e-06, - "loss": 0.0185, + "learning_rate": 1.9564383005944486e-05, + "loss": 0.0379, "step": 112475 }, { "epoch": 5.25, - "learning_rate": 9.547606769490414e-06, - "loss": 0.0329, + "learning_rate": 1.9563914936108466e-05, + "loss": 0.0347, "step": 112480 }, { "epoch": 5.25, - "learning_rate": 9.547137968215274e-06, - "loss": 0.0415, + "learning_rate": 1.956344686627245e-05, + "loss": 0.0441, "step": 112485 }, { "epoch": 5.25, - "learning_rate": 9.546669166940134e-06, - "loss": 0.0309, + "learning_rate": 1.956297879643643e-05, + "loss": 0.0318, "step": 112490 }, { "epoch": 5.25, - "learning_rate": 9.546200365664994e-06, - "loss": 0.0831, + "learning_rate": 1.956251072660041e-05, + "loss": 0.0473, "step": 112495 }, { "epoch": 5.25, - "learning_rate": 9.545731564389856e-06, - "loss": 0.1126, + "learning_rate": 1.956204265676439e-05, + "loss": 0.1756, "step": 112500 }, { "epoch": 5.25, - "learning_rate": 9.545262763114716e-06, - "loss": 0.258, + "learning_rate": 1.9561574586928372e-05, + "loss": 0.2137, "step": 112505 }, { "epoch": 5.25, - "learning_rate": 9.544793961839577e-06, - "loss": 0.071, + "learning_rate": 1.956110651709235e-05, + "loss": 0.0724, "step": 112510 }, { "epoch": 5.25, - "learning_rate": 9.544325160564437e-06, - "loss": 0.0292, + "learning_rate": 1.956063844725633e-05, + "loss": 0.0371, "step": 112515 }, { "epoch": 5.25, - "learning_rate": 9.543856359289299e-06, - "loss": 0.0233, + "learning_rate": 1.956017037742031e-05, + "loss": 0.0332, "step": 112520 }, { "epoch": 5.25, - "learning_rate": 9.543387558014159e-06, - "loss": 0.046, + "learning_rate": 1.9559702307584294e-05, + "loss": 0.0264, "step": 112525 }, { "epoch": 5.25, - "learning_rate": 9.542918756739019e-06, - "loss": 0.0284, + "learning_rate": 1.955923423774827e-05, + "loss": 0.0421, "step": 112530 }, { "epoch": 5.25, - "learning_rate": 9.542449955463879e-06, - "loss": 0.0231, + "learning_rate": 1.955876616791225e-05, + "loss": 0.0344, "step": 112535 }, { "epoch": 5.25, - "learning_rate": 9.54198115418874e-06, - "loss": 0.0682, + "learning_rate": 1.9558298098076234e-05, + "loss": 0.0957, "step": 112540 }, { "epoch": 5.25, - "learning_rate": 9.5415123529136e-06, - "loss": 0.1536, + "learning_rate": 1.9557830028240214e-05, + "loss": 0.081, "step": 112545 }, { "epoch": 5.25, - "learning_rate": 9.541043551638462e-06, - "loss": 0.1765, + "learning_rate": 1.9557361958404194e-05, + "loss": 0.1064, "step": 112550 }, { "epoch": 5.25, - "learning_rate": 9.540574750363322e-06, - "loss": 0.2059, + "learning_rate": 1.9556893888568173e-05, + "loss": 0.3689, "step": 112555 }, { "epoch": 5.25, - "learning_rate": 9.540105949088182e-06, - "loss": 0.0781, + "learning_rate": 1.9556425818732157e-05, + "loss": 0.0779, "step": 112560 }, { "epoch": 5.25, - "learning_rate": 9.539637147813042e-06, - "loss": 0.0169, + "learning_rate": 1.9555957748896136e-05, + "loss": 0.0111, "step": 112565 }, { "epoch": 5.25, - "learning_rate": 9.539168346537903e-06, - "loss": 0.0471, + "learning_rate": 1.9555489679060116e-05, + "loss": 0.0339, "step": 112570 }, { "epoch": 5.25, - "learning_rate": 9.538699545262763e-06, - "loss": 0.0264, + "learning_rate": 1.9555021609224096e-05, + "loss": 0.0756, "step": 112575 }, { "epoch": 5.25, - "learning_rate": 9.538230743987625e-06, - "loss": 0.0753, + "learning_rate": 1.955455353938808e-05, + "loss": 0.0311, "step": 112580 }, { "epoch": 5.25, - "learning_rate": 9.537761942712485e-06, - "loss": 0.0412, + "learning_rate": 1.955408546955206e-05, + "loss": 0.0085, "step": 112585 }, { "epoch": 5.25, - "learning_rate": 9.537293141437347e-06, - "loss": 0.1463, + "learning_rate": 1.955361739971604e-05, + "loss": 0.0244, "step": 112590 }, { "epoch": 5.25, - "learning_rate": 9.536824340162206e-06, - "loss": 0.0818, + "learning_rate": 1.955314932988002e-05, + "loss": 0.1689, "step": 112595 }, { "epoch": 5.25, - "learning_rate": 9.536355538887066e-06, - "loss": 0.1009, + "learning_rate": 1.9552681260044e-05, + "loss": 0.2137, "step": 112600 }, { "epoch": 5.25, - "learning_rate": 9.535886737611926e-06, - "loss": 0.3041, + "learning_rate": 1.955221319020798e-05, + "loss": 0.3035, "step": 112605 }, { "epoch": 5.25, - "learning_rate": 9.535417936336788e-06, - "loss": 0.0742, + "learning_rate": 1.9551745120371958e-05, + "loss": 0.0886, "step": 112610 }, { "epoch": 5.25, - "learning_rate": 9.534949135061648e-06, - "loss": 0.0117, + "learning_rate": 1.955127705053594e-05, + "loss": 0.0256, "step": 112615 }, { "epoch": 5.26, - "learning_rate": 9.53448033378651e-06, - "loss": 0.0098, + "learning_rate": 1.955080898069992e-05, + "loss": 0.0214, "step": 112620 }, { "epoch": 5.26, - "learning_rate": 9.53401153251137e-06, - "loss": 0.0516, + "learning_rate": 1.95503409108639e-05, + "loss": 0.0361, "step": 112625 }, { "epoch": 5.26, - "learning_rate": 9.53354273123623e-06, - "loss": 0.0204, + "learning_rate": 1.954987284102788e-05, + "loss": 0.0185, "step": 112630 }, { "epoch": 5.26, - "learning_rate": 9.533073929961091e-06, - "loss": 0.0442, + "learning_rate": 1.9549404771191864e-05, + "loss": 0.0177, "step": 112635 }, { "epoch": 5.26, - "learning_rate": 9.532605128685951e-06, - "loss": 0.0485, + "learning_rate": 1.9548936701355844e-05, + "loss": 0.0903, "step": 112640 }, { "epoch": 5.26, - "learning_rate": 9.532136327410811e-06, - "loss": 0.1164, + "learning_rate": 1.9548468631519824e-05, + "loss": 0.1136, "step": 112645 }, { "epoch": 5.26, - "learning_rate": 9.531667526135673e-06, - "loss": 0.1315, + "learning_rate": 1.9548000561683807e-05, + "loss": 0.0742, "step": 112650 }, { "epoch": 5.26, - "learning_rate": 9.531198724860532e-06, - "loss": 0.214, + "learning_rate": 1.9547532491847783e-05, + "loss": 0.2375, "step": 112655 }, { "epoch": 5.26, - "learning_rate": 9.530729923585394e-06, - "loss": 0.0651, + "learning_rate": 1.9547064422011763e-05, + "loss": 0.0826, "step": 112660 }, { "epoch": 5.26, - "learning_rate": 9.530261122310254e-06, - "loss": 0.0374, + "learning_rate": 1.9546596352175743e-05, + "loss": 0.0188, "step": 112665 }, { "epoch": 5.26, - "learning_rate": 9.529792321035114e-06, - "loss": 0.0385, + "learning_rate": 1.9546128282339726e-05, + "loss": 0.0134, "step": 112670 }, { "epoch": 5.26, - "learning_rate": 9.529323519759974e-06, - "loss": 0.0665, + "learning_rate": 1.9545660212503706e-05, + "loss": 0.0569, "step": 112675 }, { "epoch": 5.26, - "learning_rate": 9.528854718484834e-06, - "loss": 0.0401, + "learning_rate": 1.9545192142667686e-05, + "loss": 0.0538, "step": 112680 }, { "epoch": 5.26, - "learning_rate": 9.528385917209695e-06, - "loss": 0.0409, + "learning_rate": 1.9544724072831666e-05, + "loss": 0.0633, "step": 112685 }, { "epoch": 5.26, - "learning_rate": 9.527917115934557e-06, - "loss": 0.0688, + "learning_rate": 1.954425600299565e-05, + "loss": 0.0867, "step": 112690 }, { "epoch": 5.26, - "learning_rate": 9.527448314659417e-06, - "loss": 0.0379, + "learning_rate": 1.954378793315963e-05, + "loss": 0.0678, "step": 112695 }, { "epoch": 5.26, - "learning_rate": 9.526979513384277e-06, - "loss": 0.1727, + "learning_rate": 1.954331986332361e-05, + "loss": 0.1252, "step": 112700 }, { "epoch": 5.26, - "learning_rate": 9.526510712109139e-06, - "loss": 0.2757, + "learning_rate": 1.9542851793487588e-05, + "loss": 0.3431, "step": 112705 }, { "epoch": 5.26, - "learning_rate": 9.526041910833999e-06, - "loss": 0.1255, + "learning_rate": 1.954238372365157e-05, + "loss": 0.1395, "step": 112710 }, { "epoch": 5.26, - "learning_rate": 9.525573109558858e-06, - "loss": 0.0101, + "learning_rate": 1.954191565381555e-05, + "loss": 0.0143, "step": 112715 }, { "epoch": 5.26, - "learning_rate": 9.525104308283718e-06, - "loss": 0.0336, + "learning_rate": 1.9541447583979528e-05, + "loss": 0.0029, "step": 112720 }, { "epoch": 5.26, - "learning_rate": 9.52463550700858e-06, - "loss": 0.0769, + "learning_rate": 1.954097951414351e-05, + "loss": 0.0392, "step": 112725 }, { "epoch": 5.26, - "learning_rate": 9.524166705733442e-06, - "loss": 0.0466, + "learning_rate": 1.954051144430749e-05, + "loss": 0.0339, "step": 112730 }, { "epoch": 5.26, - "learning_rate": 9.523697904458302e-06, - "loss": 0.0252, + "learning_rate": 1.954004337447147e-05, + "loss": 0.0461, "step": 112735 }, { "epoch": 5.26, - "learning_rate": 9.523229103183162e-06, - "loss": 0.1031, + "learning_rate": 1.953957530463545e-05, + "loss": 0.0724, "step": 112740 }, { "epoch": 5.26, - "learning_rate": 9.522760301908021e-06, - "loss": 0.0618, + "learning_rate": 1.9539107234799434e-05, + "loss": 0.1139, "step": 112745 }, { "epoch": 5.26, - "learning_rate": 9.522291500632881e-06, - "loss": 0.1581, + "learning_rate": 1.9538639164963413e-05, + "loss": 0.0481, "step": 112750 }, { "epoch": 5.26, - "learning_rate": 9.521822699357743e-06, - "loss": 0.3139, + "learning_rate": 1.9538171095127393e-05, + "loss": 0.2985, "step": 112755 }, { "epoch": 5.26, - "learning_rate": 9.521353898082603e-06, - "loss": 0.1242, + "learning_rate": 1.9537703025291373e-05, + "loss": 0.0937, "step": 112760 }, { "epoch": 5.26, - "learning_rate": 9.520885096807465e-06, - "loss": 0.0239, + "learning_rate": 1.9537234955455356e-05, + "loss": 0.0097, "step": 112765 }, { "epoch": 5.26, - "learning_rate": 9.520416295532324e-06, - "loss": 0.0471, + "learning_rate": 1.9536766885619336e-05, + "loss": 0.0133, "step": 112770 }, { "epoch": 5.26, - "learning_rate": 9.519947494257186e-06, - "loss": 0.057, + "learning_rate": 1.9536298815783316e-05, + "loss": 0.0346, "step": 112775 }, { "epoch": 5.26, - "learning_rate": 9.519478692982046e-06, - "loss": 0.0291, + "learning_rate": 1.95358307459473e-05, + "loss": 0.0275, "step": 112780 }, { "epoch": 5.26, - "learning_rate": 9.519009891706906e-06, - "loss": 0.0251, + "learning_rate": 1.9535362676111275e-05, + "loss": 0.0499, "step": 112785 }, { "epoch": 5.26, - "learning_rate": 9.518541090431766e-06, - "loss": 0.0595, + "learning_rate": 1.9534894606275255e-05, + "loss": 0.0649, "step": 112790 }, { "epoch": 5.26, - "learning_rate": 9.518072289156628e-06, - "loss": 0.0964, + "learning_rate": 1.9534426536439235e-05, + "loss": 0.0683, "step": 112795 }, { "epoch": 5.26, - "learning_rate": 9.517603487881487e-06, - "loss": 0.195, + "learning_rate": 1.9533958466603218e-05, + "loss": 0.1345, "step": 112800 }, { "epoch": 5.26, - "learning_rate": 9.517134686606349e-06, - "loss": 0.3448, + "learning_rate": 1.9533490396767198e-05, + "loss": 0.3015, "step": 112805 }, { "epoch": 5.26, - "learning_rate": 9.516665885331209e-06, - "loss": 0.0959, + "learning_rate": 1.9533022326931178e-05, + "loss": 0.0419, "step": 112810 }, { "epoch": 5.26, - "learning_rate": 9.516197084056069e-06, - "loss": 0.0107, + "learning_rate": 1.9532554257095158e-05, + "loss": 0.0367, "step": 112815 }, { "epoch": 5.26, - "learning_rate": 9.515728282780929e-06, - "loss": 0.0301, + "learning_rate": 1.953208618725914e-05, + "loss": 0.0143, "step": 112820 }, { "epoch": 5.26, - "learning_rate": 9.51525948150579e-06, - "loss": 0.0361, + "learning_rate": 1.953161811742312e-05, + "loss": 0.0249, "step": 112825 }, { "epoch": 5.26, - "learning_rate": 9.51479068023065e-06, - "loss": 0.076, + "learning_rate": 1.95311500475871e-05, + "loss": 0.0797, "step": 112830 }, { "epoch": 5.27, - "learning_rate": 9.514321878955512e-06, - "loss": 0.0261, + "learning_rate": 1.9530681977751084e-05, + "loss": 0.0271, "step": 112835 }, { "epoch": 5.27, - "learning_rate": 9.513853077680372e-06, - "loss": 0.063, + "learning_rate": 1.9530213907915064e-05, + "loss": 0.3989, "step": 112840 }, { "epoch": 5.27, - "learning_rate": 9.513384276405234e-06, - "loss": 0.1227, + "learning_rate": 1.952974583807904e-05, + "loss": 0.0482, "step": 112845 }, { "epoch": 5.27, - "learning_rate": 9.512915475130094e-06, - "loss": 0.0745, + "learning_rate": 1.952927776824302e-05, + "loss": 0.0755, "step": 112850 }, { "epoch": 5.27, - "learning_rate": 9.512446673854954e-06, - "loss": 0.2815, + "learning_rate": 1.9528809698407003e-05, + "loss": 0.3914, "step": 112855 }, { "epoch": 5.27, - "learning_rate": 9.511977872579813e-06, - "loss": 0.0662, + "learning_rate": 1.9528341628570983e-05, + "loss": 0.0659, "step": 112860 }, { "epoch": 5.27, - "learning_rate": 9.511509071304675e-06, - "loss": 0.0319, + "learning_rate": 1.9527873558734963e-05, + "loss": 0.0203, "step": 112865 }, { "epoch": 5.27, - "learning_rate": 9.511040270029535e-06, - "loss": 0.0437, + "learning_rate": 1.9527405488898943e-05, + "loss": 0.023, "step": 112870 }, { "epoch": 5.27, - "learning_rate": 9.510571468754397e-06, - "loss": 0.0295, + "learning_rate": 1.9526937419062926e-05, + "loss": 0.0729, "step": 112875 }, { "epoch": 5.27, - "learning_rate": 9.510102667479257e-06, - "loss": 0.0628, + "learning_rate": 1.9526469349226906e-05, + "loss": 0.0174, "step": 112880 }, { "epoch": 5.27, - "learning_rate": 9.509633866204117e-06, - "loss": 0.0924, + "learning_rate": 1.9526001279390885e-05, + "loss": 0.061, "step": 112885 }, { "epoch": 5.27, - "learning_rate": 9.509165064928976e-06, - "loss": 0.132, + "learning_rate": 1.9525533209554865e-05, + "loss": 0.0296, "step": 112890 }, { "epoch": 5.27, - "learning_rate": 9.508696263653838e-06, - "loss": 0.0339, + "learning_rate": 1.952506513971885e-05, + "loss": 0.0545, "step": 112895 }, { "epoch": 5.27, - "learning_rate": 9.508227462378698e-06, - "loss": 0.0754, + "learning_rate": 1.9524597069882828e-05, + "loss": 0.1424, "step": 112900 }, { "epoch": 5.27, - "learning_rate": 9.50775866110356e-06, - "loss": 0.1831, + "learning_rate": 1.9524129000046808e-05, + "loss": 0.2919, "step": 112905 }, { "epoch": 5.27, - "learning_rate": 9.50728985982842e-06, - "loss": 0.085, + "learning_rate": 1.9523660930210788e-05, + "loss": 0.0627, "step": 112910 }, { "epoch": 5.27, - "learning_rate": 9.506821058553281e-06, - "loss": 0.0107, + "learning_rate": 1.9523192860374768e-05, + "loss": 0.0175, "step": 112915 }, { "epoch": 5.27, - "learning_rate": 9.506352257278141e-06, - "loss": 0.02, + "learning_rate": 1.9522724790538747e-05, + "loss": 0.0329, "step": 112920 }, { "epoch": 5.27, - "learning_rate": 9.505883456003001e-06, - "loss": 0.0387, + "learning_rate": 1.9522256720702727e-05, + "loss": 0.0219, "step": 112925 }, { "epoch": 5.27, - "learning_rate": 9.505414654727861e-06, - "loss": 0.041, + "learning_rate": 1.952178865086671e-05, + "loss": 0.0457, "step": 112930 }, { "epoch": 5.27, - "learning_rate": 9.504945853452721e-06, - "loss": 0.0226, + "learning_rate": 1.952132058103069e-05, + "loss": 0.1068, "step": 112935 }, { "epoch": 5.27, - "learning_rate": 9.504477052177583e-06, - "loss": 0.0996, + "learning_rate": 1.952085251119467e-05, + "loss": 0.0768, "step": 112940 }, { "epoch": 5.27, - "learning_rate": 9.504008250902444e-06, - "loss": 0.0475, + "learning_rate": 1.952038444135865e-05, + "loss": 0.0544, "step": 112945 }, { "epoch": 5.27, - "learning_rate": 9.503539449627304e-06, - "loss": 0.0855, + "learning_rate": 1.9519916371522633e-05, + "loss": 0.1756, "step": 112950 }, { "epoch": 5.27, - "learning_rate": 9.503070648352164e-06, - "loss": 0.2394, + "learning_rate": 1.9519448301686613e-05, + "loss": 0.2249, "step": 112955 }, { "epoch": 5.27, - "learning_rate": 9.502601847077026e-06, - "loss": 0.0788, + "learning_rate": 1.9518980231850593e-05, + "loss": 0.1085, "step": 112960 }, { "epoch": 5.27, - "learning_rate": 9.502133045801886e-06, - "loss": 0.0119, + "learning_rate": 1.9518512162014576e-05, + "loss": 0.0104, "step": 112965 }, { "epoch": 5.27, - "learning_rate": 9.501664244526746e-06, - "loss": 0.026, + "learning_rate": 1.9518044092178552e-05, + "loss": 0.0099, "step": 112970 }, { "epoch": 5.27, - "learning_rate": 9.501195443251605e-06, - "loss": 0.0528, + "learning_rate": 1.9517576022342532e-05, + "loss": 0.0547, "step": 112975 }, { "epoch": 5.27, - "learning_rate": 9.500726641976467e-06, - "loss": 0.0647, + "learning_rate": 1.9517107952506512e-05, + "loss": 0.006, "step": 112980 }, { "epoch": 5.27, - "learning_rate": 9.500257840701329e-06, - "loss": 0.0906, + "learning_rate": 1.9516639882670495e-05, + "loss": 0.0807, "step": 112985 }, { "epoch": 5.27, - "learning_rate": 9.499789039426189e-06, - "loss": 0.0556, + "learning_rate": 1.9516171812834475e-05, + "loss": 0.0682, "step": 112990 }, { "epoch": 5.27, - "learning_rate": 9.499320238151049e-06, - "loss": 0.1014, + "learning_rate": 1.9515703742998455e-05, + "loss": 0.0511, "step": 112995 }, { "epoch": 5.27, - "learning_rate": 9.498851436875909e-06, - "loss": 0.1325, + "learning_rate": 1.9515235673162435e-05, + "loss": 0.1043, "step": 113000 }, { "epoch": 5.27, - "learning_rate": 9.498382635600768e-06, - "loss": 0.2624, + "learning_rate": 1.9514767603326418e-05, + "loss": 0.2172, "step": 113005 }, { "epoch": 5.27, - "learning_rate": 9.49791383432563e-06, - "loss": 0.0816, + "learning_rate": 1.9514299533490398e-05, + "loss": 0.1032, "step": 113010 }, { "epoch": 5.27, - "learning_rate": 9.49744503305049e-06, - "loss": 0.0375, + "learning_rate": 1.9513831463654378e-05, + "loss": 0.0323, "step": 113015 }, { "epoch": 5.27, - "learning_rate": 9.496976231775352e-06, - "loss": 0.0399, + "learning_rate": 1.951336339381836e-05, + "loss": 0.0387, "step": 113020 }, { "epoch": 5.27, - "learning_rate": 9.496507430500212e-06, - "loss": 0.043, + "learning_rate": 1.951289532398234e-05, + "loss": 0.0241, "step": 113025 }, { "epoch": 5.27, - "learning_rate": 9.496038629225073e-06, - "loss": 0.081, + "learning_rate": 1.951242725414632e-05, + "loss": 0.092, "step": 113030 }, { "epoch": 5.27, - "learning_rate": 9.495569827949933e-06, - "loss": 0.1011, + "learning_rate": 1.9511959184310297e-05, + "loss": 0.0376, "step": 113035 }, { "epoch": 5.27, - "learning_rate": 9.495101026674793e-06, - "loss": 0.0112, + "learning_rate": 1.951149111447428e-05, + "loss": 0.0761, "step": 113040 }, { "epoch": 5.27, - "learning_rate": 9.494632225399653e-06, - "loss": 0.0777, + "learning_rate": 1.951102304463826e-05, + "loss": 0.1044, "step": 113045 }, { "epoch": 5.28, - "learning_rate": 9.494163424124515e-06, - "loss": 0.1272, + "learning_rate": 1.951055497480224e-05, + "loss": 0.0979, "step": 113050 }, { "epoch": 5.28, - "learning_rate": 9.493694622849375e-06, - "loss": 0.2898, + "learning_rate": 1.951008690496622e-05, + "loss": 0.1984, "step": 113055 }, { "epoch": 5.28, - "learning_rate": 9.493225821574236e-06, - "loss": 0.0519, + "learning_rate": 1.9509618835130203e-05, + "loss": 0.1093, "step": 113060 }, { "epoch": 5.28, - "learning_rate": 9.492757020299096e-06, - "loss": 0.0454, + "learning_rate": 1.9509150765294183e-05, + "loss": 0.0098, "step": 113065 }, { "epoch": 5.28, - "learning_rate": 9.492288219023956e-06, - "loss": 0.0607, + "learning_rate": 1.9508682695458162e-05, + "loss": 0.0279, "step": 113070 }, { "epoch": 5.28, - "learning_rate": 9.491819417748816e-06, - "loss": 0.0444, + "learning_rate": 1.9508214625622146e-05, + "loss": 0.0105, "step": 113075 }, { "epoch": 5.28, - "learning_rate": 9.491350616473678e-06, - "loss": 0.0473, + "learning_rate": 1.9507746555786125e-05, + "loss": 0.0422, "step": 113080 }, { "epoch": 5.28, - "learning_rate": 9.490881815198538e-06, - "loss": 0.0502, + "learning_rate": 1.9507278485950105e-05, + "loss": 0.0607, "step": 113085 }, { "epoch": 5.28, - "learning_rate": 9.4904130139234e-06, - "loss": 0.0632, + "learning_rate": 1.9506810416114085e-05, + "loss": 0.0423, "step": 113090 }, { "epoch": 5.28, - "learning_rate": 9.489944212648259e-06, - "loss": 0.1565, + "learning_rate": 1.9506342346278068e-05, + "loss": 0.1197, "step": 113095 }, { "epoch": 5.28, - "learning_rate": 9.48947541137312e-06, - "loss": 0.1296, + "learning_rate": 1.9505874276442045e-05, + "loss": 0.1269, "step": 113100 }, { "epoch": 5.28, - "learning_rate": 9.48900661009798e-06, - "loss": 0.306, + "learning_rate": 1.9505406206606024e-05, + "loss": 0.0675, "step": 113105 }, { "epoch": 5.28, - "learning_rate": 9.48853780882284e-06, - "loss": 0.0971, + "learning_rate": 1.9504938136770004e-05, + "loss": 0.0978, "step": 113110 }, { "epoch": 5.28, - "learning_rate": 9.4880690075477e-06, - "loss": 0.0155, + "learning_rate": 1.9504470066933987e-05, + "loss": 0.0127, "step": 113115 }, { "epoch": 5.28, - "learning_rate": 9.487600206272562e-06, - "loss": 0.0806, + "learning_rate": 1.9504001997097967e-05, + "loss": 0.0038, "step": 113120 }, { "epoch": 5.28, - "learning_rate": 9.487131404997422e-06, - "loss": 0.0255, + "learning_rate": 1.9503533927261947e-05, + "loss": 0.0389, "step": 113125 }, { "epoch": 5.28, - "learning_rate": 9.486662603722284e-06, - "loss": 0.0244, + "learning_rate": 1.9503065857425927e-05, + "loss": 0.0935, "step": 113130 }, { "epoch": 5.28, - "learning_rate": 9.486193802447144e-06, - "loss": 0.0816, + "learning_rate": 1.950259778758991e-05, + "loss": 0.0748, "step": 113135 }, { "epoch": 5.28, - "learning_rate": 9.485725001172004e-06, - "loss": 0.0911, + "learning_rate": 1.950212971775389e-05, + "loss": 0.0262, "step": 113140 }, { "epoch": 5.28, - "learning_rate": 9.485256199896864e-06, - "loss": 0.1238, + "learning_rate": 1.950166164791787e-05, + "loss": 0.1161, "step": 113145 }, { "epoch": 5.28, - "learning_rate": 9.484787398621725e-06, - "loss": 0.0755, + "learning_rate": 1.9501193578081853e-05, + "loss": 0.1605, "step": 113150 }, { "epoch": 5.28, - "learning_rate": 9.484318597346585e-06, - "loss": 0.2097, + "learning_rate": 1.9500725508245833e-05, + "loss": 0.3538, "step": 113155 }, { "epoch": 5.28, - "learning_rate": 9.483849796071447e-06, - "loss": 0.0803, + "learning_rate": 1.950025743840981e-05, + "loss": 0.0922, "step": 113160 }, { "epoch": 5.28, - "learning_rate": 9.483380994796307e-06, - "loss": 0.0552, + "learning_rate": 1.949978936857379e-05, + "loss": 0.0414, "step": 113165 }, { "epoch": 5.28, - "learning_rate": 9.482912193521168e-06, - "loss": 0.0221, + "learning_rate": 1.9499321298737772e-05, + "loss": 0.0324, "step": 113170 }, { "epoch": 5.28, - "learning_rate": 9.482443392246028e-06, - "loss": 0.0107, + "learning_rate": 1.9498853228901752e-05, + "loss": 0.0449, "step": 113175 }, { "epoch": 5.28, - "learning_rate": 9.481974590970888e-06, - "loss": 0.0665, + "learning_rate": 1.9498385159065732e-05, + "loss": 0.0201, "step": 113180 }, { "epoch": 5.28, - "learning_rate": 9.481505789695748e-06, - "loss": 0.0372, + "learning_rate": 1.9497917089229712e-05, + "loss": 0.0612, "step": 113185 }, { "epoch": 5.28, - "learning_rate": 9.481036988420608e-06, - "loss": 0.0734, + "learning_rate": 1.9497449019393695e-05, + "loss": 0.074, "step": 113190 }, { "epoch": 5.28, - "learning_rate": 9.48056818714547e-06, - "loss": 0.0895, + "learning_rate": 1.9496980949557675e-05, + "loss": 0.0943, "step": 113195 }, { "epoch": 5.28, - "learning_rate": 9.480099385870331e-06, - "loss": 0.1003, + "learning_rate": 1.9496512879721655e-05, + "loss": 0.2681, "step": 113200 }, { "epoch": 5.28, - "learning_rate": 9.479630584595191e-06, - "loss": 0.2142, + "learning_rate": 1.9496044809885638e-05, + "loss": 0.1637, "step": 113205 }, { "epoch": 5.28, - "learning_rate": 9.479161783320051e-06, - "loss": 0.0662, + "learning_rate": 1.9495576740049618e-05, + "loss": 0.0801, "step": 113210 }, { "epoch": 5.28, - "learning_rate": 9.478692982044911e-06, - "loss": 0.0217, + "learning_rate": 1.9495108670213597e-05, + "loss": 0.0084, "step": 113215 }, { "epoch": 5.28, - "learning_rate": 9.478224180769773e-06, - "loss": 0.0106, + "learning_rate": 1.9494640600377577e-05, + "loss": 0.0187, "step": 113220 }, { "epoch": 5.28, - "learning_rate": 9.477755379494633e-06, - "loss": 0.0723, + "learning_rate": 1.9494172530541557e-05, + "loss": 0.0583, "step": 113225 }, { "epoch": 5.28, - "learning_rate": 9.477286578219493e-06, - "loss": 0.0531, + "learning_rate": 1.9493704460705537e-05, + "loss": 0.0464, "step": 113230 }, { "epoch": 5.28, - "learning_rate": 9.476817776944354e-06, - "loss": 0.055, + "learning_rate": 1.9493236390869517e-05, + "loss": 0.0635, "step": 113235 }, { "epoch": 5.28, - "learning_rate": 9.476348975669216e-06, - "loss": 0.1626, + "learning_rate": 1.9492768321033496e-05, + "loss": 0.0418, "step": 113240 }, { "epoch": 5.28, - "learning_rate": 9.475880174394076e-06, - "loss": 0.0953, + "learning_rate": 1.949230025119748e-05, + "loss": 0.1154, "step": 113245 }, { "epoch": 5.28, - "learning_rate": 9.475411373118936e-06, - "loss": 0.159, + "learning_rate": 1.949183218136146e-05, + "loss": 0.1197, "step": 113250 }, { "epoch": 5.28, - "learning_rate": 9.474942571843796e-06, - "loss": 0.1822, + "learning_rate": 1.949136411152544e-05, + "loss": 0.1797, "step": 113255 }, { "epoch": 5.28, - "learning_rate": 9.474473770568656e-06, - "loss": 0.0671, + "learning_rate": 1.9490896041689423e-05, + "loss": 0.0348, "step": 113260 }, { "epoch": 5.29, - "learning_rate": 9.474004969293517e-06, - "loss": 0.0157, + "learning_rate": 1.9490427971853402e-05, + "loss": 0.0346, "step": 113265 }, { "epoch": 5.29, - "learning_rate": 9.473536168018377e-06, - "loss": 0.0845, + "learning_rate": 1.9489959902017382e-05, + "loss": 0.0274, "step": 113270 }, { "epoch": 5.29, - "learning_rate": 9.473067366743239e-06, - "loss": 0.0454, + "learning_rate": 1.9489491832181362e-05, + "loss": 0.0413, "step": 113275 }, { "epoch": 5.29, - "learning_rate": 9.472598565468099e-06, - "loss": 0.0397, + "learning_rate": 1.9489023762345345e-05, + "loss": 0.034, "step": 113280 }, { "epoch": 5.29, - "learning_rate": 9.47212976419296e-06, - "loss": 0.0304, + "learning_rate": 1.9488555692509325e-05, + "loss": 0.0667, "step": 113285 }, { "epoch": 5.29, - "learning_rate": 9.47166096291782e-06, - "loss": 0.0607, + "learning_rate": 1.94880876226733e-05, + "loss": 0.0976, "step": 113290 }, { "epoch": 5.29, - "learning_rate": 9.47119216164268e-06, - "loss": 0.1027, + "learning_rate": 1.948761955283728e-05, + "loss": 0.072, "step": 113295 }, { "epoch": 5.29, - "learning_rate": 9.47072336036754e-06, - "loss": 0.0812, + "learning_rate": 1.9487151483001264e-05, + "loss": 0.096, "step": 113300 }, { "epoch": 5.29, - "learning_rate": 9.470254559092402e-06, - "loss": 0.2528, + "learning_rate": 1.9486683413165244e-05, + "loss": 0.2783, "step": 113305 }, { "epoch": 5.29, - "learning_rate": 9.469785757817262e-06, - "loss": 0.0729, + "learning_rate": 1.9486215343329224e-05, + "loss": 0.0722, "step": 113310 }, { "epoch": 5.29, - "learning_rate": 9.469316956542123e-06, - "loss": 0.0214, + "learning_rate": 1.9485747273493204e-05, + "loss": 0.0131, "step": 113315 }, { "epoch": 5.29, - "learning_rate": 9.468848155266983e-06, - "loss": 0.044, + "learning_rate": 1.9485279203657187e-05, + "loss": 0.0229, "step": 113320 }, { "epoch": 5.29, - "learning_rate": 9.468379353991843e-06, - "loss": 0.0151, + "learning_rate": 1.9484811133821167e-05, + "loss": 0.0197, "step": 113325 }, { "epoch": 5.29, - "learning_rate": 9.467910552716703e-06, - "loss": 0.0327, + "learning_rate": 1.9484343063985147e-05, + "loss": 0.0571, "step": 113330 }, { "epoch": 5.29, - "learning_rate": 9.467441751441565e-06, - "loss": 0.0605, + "learning_rate": 1.948387499414913e-05, + "loss": 0.0593, "step": 113335 }, { "epoch": 5.29, - "learning_rate": 9.466972950166425e-06, - "loss": 0.1161, + "learning_rate": 1.948340692431311e-05, + "loss": 0.0368, "step": 113340 }, { "epoch": 5.29, - "learning_rate": 9.466504148891286e-06, - "loss": 0.1778, + "learning_rate": 1.948293885447709e-05, + "loss": 0.1464, "step": 113345 }, { "epoch": 5.29, - "learning_rate": 9.466035347616146e-06, - "loss": 0.0627, + "learning_rate": 1.9482470784641066e-05, + "loss": 0.1159, "step": 113350 }, { "epoch": 5.29, - "learning_rate": 9.465566546341008e-06, - "loss": 0.2428, + "learning_rate": 1.948200271480505e-05, + "loss": 0.3171, "step": 113355 }, { "epoch": 5.29, - "learning_rate": 9.465097745065868e-06, - "loss": 0.114, + "learning_rate": 1.948153464496903e-05, + "loss": 0.0913, "step": 113360 }, { "epoch": 5.29, - "learning_rate": 9.464628943790728e-06, - "loss": 0.0497, + "learning_rate": 1.948106657513301e-05, + "loss": 0.0106, "step": 113365 }, { "epoch": 5.29, - "learning_rate": 9.464160142515588e-06, - "loss": 0.0244, + "learning_rate": 1.948059850529699e-05, + "loss": 0.0026, "step": 113370 }, { "epoch": 5.29, - "learning_rate": 9.46369134124045e-06, - "loss": 0.0336, + "learning_rate": 1.9480130435460972e-05, + "loss": 0.0339, "step": 113375 }, { "epoch": 5.29, - "learning_rate": 9.46322253996531e-06, - "loss": 0.0594, + "learning_rate": 1.947966236562495e-05, + "loss": 0.0112, "step": 113380 }, { "epoch": 5.29, - "learning_rate": 9.462753738690171e-06, - "loss": 0.0556, + "learning_rate": 1.947919429578893e-05, + "loss": 0.0315, "step": 113385 }, { "epoch": 5.29, - "learning_rate": 9.46228493741503e-06, - "loss": 0.0888, + "learning_rate": 1.9478726225952915e-05, + "loss": 0.0489, "step": 113390 }, { "epoch": 5.29, - "learning_rate": 9.46181613613989e-06, - "loss": 0.0998, + "learning_rate": 1.9478258156116895e-05, + "loss": 0.0552, "step": 113395 }, { "epoch": 5.29, - "learning_rate": 9.46134733486475e-06, - "loss": 0.0813, + "learning_rate": 1.9477790086280874e-05, + "loss": 0.0628, "step": 113400 }, { "epoch": 5.29, - "learning_rate": 9.460878533589612e-06, - "loss": 0.239, + "learning_rate": 1.9477322016444854e-05, + "loss": 0.141, "step": 113405 }, { "epoch": 5.29, - "learning_rate": 9.460409732314472e-06, - "loss": 0.0837, + "learning_rate": 1.9476853946608837e-05, + "loss": 0.0818, "step": 113410 }, { "epoch": 5.29, - "learning_rate": 9.459940931039334e-06, - "loss": 0.016, + "learning_rate": 1.9476385876772814e-05, + "loss": 0.007, "step": 113415 }, { "epoch": 5.29, - "learning_rate": 9.459472129764194e-06, - "loss": 0.0274, + "learning_rate": 1.9475917806936794e-05, + "loss": 0.0075, "step": 113420 }, { "epoch": 5.29, - "learning_rate": 9.459003328489055e-06, - "loss": 0.0265, + "learning_rate": 1.9475449737100773e-05, + "loss": 0.0702, "step": 113425 }, { "epoch": 5.29, - "learning_rate": 9.458534527213915e-06, - "loss": 0.0475, + "learning_rate": 1.9474981667264757e-05, + "loss": 0.0426, "step": 113430 }, { "epoch": 5.29, - "learning_rate": 9.458065725938775e-06, - "loss": 0.0298, + "learning_rate": 1.9474513597428736e-05, + "loss": 0.0519, "step": 113435 }, { "epoch": 5.29, - "learning_rate": 9.457596924663635e-06, - "loss": 0.096, + "learning_rate": 1.9474045527592716e-05, + "loss": 0.0277, "step": 113440 }, { "epoch": 5.29, - "learning_rate": 9.457128123388495e-06, - "loss": 0.0634, + "learning_rate": 1.94735774577567e-05, + "loss": 0.0925, "step": 113445 }, { "epoch": 5.29, - "learning_rate": 9.456659322113357e-06, - "loss": 0.1662, + "learning_rate": 1.947310938792068e-05, + "loss": 0.0994, "step": 113450 }, { "epoch": 5.29, - "learning_rate": 9.456190520838218e-06, - "loss": 0.1727, + "learning_rate": 1.947264131808466e-05, + "loss": 0.2192, "step": 113455 }, { "epoch": 5.29, - "learning_rate": 9.455721719563078e-06, - "loss": 0.0671, + "learning_rate": 1.947217324824864e-05, + "loss": 0.0977, "step": 113460 }, { "epoch": 5.29, - "learning_rate": 9.455252918287938e-06, - "loss": 0.0211, + "learning_rate": 1.9471705178412622e-05, + "loss": 0.0301, "step": 113465 }, { "epoch": 5.29, - "learning_rate": 9.454784117012798e-06, - "loss": 0.0502, + "learning_rate": 1.9471237108576602e-05, + "loss": 0.0387, "step": 113470 }, { "epoch": 5.29, - "learning_rate": 9.45431531573766e-06, - "loss": 0.0177, + "learning_rate": 1.9470769038740582e-05, + "loss": 0.0233, "step": 113475 }, { "epoch": 5.3, - "learning_rate": 9.45384651446252e-06, - "loss": 0.0675, + "learning_rate": 1.9470300968904558e-05, + "loss": 0.0186, "step": 113480 }, { "epoch": 5.3, - "learning_rate": 9.45337771318738e-06, - "loss": 0.031, + "learning_rate": 1.946983289906854e-05, + "loss": 0.0467, "step": 113485 }, { "epoch": 5.3, - "learning_rate": 9.452908911912241e-06, - "loss": 0.113, + "learning_rate": 1.946936482923252e-05, + "loss": 0.1292, "step": 113490 }, { "epoch": 5.3, - "learning_rate": 9.452440110637103e-06, - "loss": 0.1023, + "learning_rate": 1.94688967593965e-05, + "loss": 0.1369, "step": 113495 }, { "epoch": 5.3, - "learning_rate": 9.451971309361963e-06, - "loss": 0.1159, + "learning_rate": 1.946842868956048e-05, + "loss": 0.1568, "step": 113500 }, { "epoch": 5.3, - "learning_rate": 9.451502508086823e-06, - "loss": 0.2263, + "learning_rate": 1.9467960619724464e-05, + "loss": 0.2017, "step": 113505 }, { "epoch": 5.3, - "learning_rate": 9.451033706811683e-06, - "loss": 0.0983, + "learning_rate": 1.9467492549888444e-05, + "loss": 0.101, "step": 113510 }, { "epoch": 5.3, - "learning_rate": 9.450564905536543e-06, - "loss": 0.0051, + "learning_rate": 1.9467024480052424e-05, + "loss": 0.017, "step": 113515 }, { "epoch": 5.3, - "learning_rate": 9.450096104261404e-06, - "loss": 0.027, + "learning_rate": 1.9466556410216407e-05, + "loss": 0.0235, "step": 113520 }, { "epoch": 5.3, - "learning_rate": 9.449627302986264e-06, - "loss": 0.0367, + "learning_rate": 1.9466088340380387e-05, + "loss": 0.0576, "step": 113525 }, { "epoch": 5.3, - "learning_rate": 9.449158501711126e-06, - "loss": 0.0155, + "learning_rate": 1.9465620270544367e-05, + "loss": 0.0602, "step": 113530 }, { "epoch": 5.3, - "learning_rate": 9.448689700435986e-06, - "loss": 0.0694, + "learning_rate": 1.9465152200708346e-05, + "loss": 0.0423, "step": 113535 }, { "epoch": 5.3, - "learning_rate": 9.448220899160846e-06, - "loss": 0.0864, + "learning_rate": 1.9464684130872326e-05, + "loss": 0.0849, "step": 113540 }, { "epoch": 5.3, - "learning_rate": 9.447752097885707e-06, - "loss": 0.1005, + "learning_rate": 1.9464216061036306e-05, + "loss": 0.1013, "step": 113545 }, { "epoch": 5.3, - "learning_rate": 9.447283296610567e-06, - "loss": 0.1241, + "learning_rate": 1.9463747991200286e-05, + "loss": 0.0708, "step": 113550 }, { "epoch": 5.3, - "learning_rate": 9.446814495335427e-06, - "loss": 0.2432, + "learning_rate": 1.9463279921364266e-05, + "loss": 0.1387, "step": 113555 }, { "epoch": 5.3, - "learning_rate": 9.446345694060289e-06, - "loss": 0.1048, + "learning_rate": 1.946281185152825e-05, + "loss": 0.0904, "step": 113560 }, { "epoch": 5.3, - "learning_rate": 9.445876892785149e-06, - "loss": 0.0231, + "learning_rate": 1.946234378169223e-05, + "loss": 0.0545, "step": 113565 }, { "epoch": 5.3, - "learning_rate": 9.44540809151001e-06, - "loss": 0.0078, + "learning_rate": 1.946187571185621e-05, + "loss": 0.0376, "step": 113570 }, { "epoch": 5.3, - "learning_rate": 9.44493929023487e-06, - "loss": 0.0215, + "learning_rate": 1.946140764202019e-05, + "loss": 0.007, "step": 113575 }, { "epoch": 5.3, - "learning_rate": 9.44447048895973e-06, - "loss": 0.0166, + "learning_rate": 1.946093957218417e-05, + "loss": 0.0606, "step": 113580 }, { "epoch": 5.3, - "learning_rate": 9.44400168768459e-06, - "loss": 0.0405, + "learning_rate": 1.946047150234815e-05, + "loss": 0.0721, "step": 113585 }, { "epoch": 5.3, - "learning_rate": 9.443532886409452e-06, - "loss": 0.0889, + "learning_rate": 1.946000343251213e-05, + "loss": 0.0283, "step": 113590 }, { "epoch": 5.3, - "learning_rate": 9.443064085134312e-06, - "loss": 0.1219, + "learning_rate": 1.9459535362676114e-05, + "loss": 0.0894, "step": 113595 }, { "epoch": 5.3, - "learning_rate": 9.442595283859173e-06, - "loss": 0.0947, + "learning_rate": 1.9459067292840094e-05, + "loss": 0.1034, "step": 113600 }, { "epoch": 5.3, - "learning_rate": 9.442126482584033e-06, - "loss": 0.1704, + "learning_rate": 1.945859922300407e-05, + "loss": 0.2977, "step": 113605 }, { "epoch": 5.3, - "learning_rate": 9.441657681308895e-06, - "loss": 0.1318, + "learning_rate": 1.945813115316805e-05, + "loss": 0.077, "step": 113610 }, { "epoch": 5.3, - "learning_rate": 9.441188880033755e-06, - "loss": 0.0423, + "learning_rate": 1.9457663083332034e-05, + "loss": 0.0122, "step": 113615 }, { "epoch": 5.3, - "learning_rate": 9.440720078758615e-06, - "loss": 0.0138, + "learning_rate": 1.9457195013496013e-05, + "loss": 0.0197, "step": 113620 }, { "epoch": 5.3, - "learning_rate": 9.440251277483475e-06, - "loss": 0.0495, + "learning_rate": 1.9456726943659993e-05, + "loss": 0.0704, "step": 113625 }, { "epoch": 5.3, - "learning_rate": 9.439782476208336e-06, - "loss": 0.0623, + "learning_rate": 1.9456258873823976e-05, + "loss": 0.0479, "step": 113630 }, { "epoch": 5.3, - "learning_rate": 9.439313674933196e-06, - "loss": 0.0298, + "learning_rate": 1.9455790803987956e-05, + "loss": 0.054, "step": 113635 }, { "epoch": 5.3, - "learning_rate": 9.438844873658058e-06, - "loss": 0.0894, + "learning_rate": 1.9455322734151936e-05, + "loss": 0.1483, "step": 113640 }, { "epoch": 5.3, - "learning_rate": 9.438376072382918e-06, - "loss": 0.0652, + "learning_rate": 1.9454854664315916e-05, + "loss": 0.0263, "step": 113645 }, { "epoch": 5.3, - "learning_rate": 9.437907271107778e-06, - "loss": 0.1397, + "learning_rate": 1.94543865944799e-05, + "loss": 0.1169, "step": 113650 }, { "epoch": 5.3, - "learning_rate": 9.437438469832638e-06, - "loss": 0.2945, + "learning_rate": 1.945391852464388e-05, + "loss": 0.163, "step": 113655 }, { "epoch": 5.3, - "learning_rate": 9.4369696685575e-06, - "loss": 0.0713, + "learning_rate": 1.945345045480786e-05, + "loss": 0.1126, "step": 113660 }, { "epoch": 5.3, - "learning_rate": 9.43650086728236e-06, - "loss": 0.0093, + "learning_rate": 1.945298238497184e-05, + "loss": 0.0287, "step": 113665 }, { "epoch": 5.3, - "learning_rate": 9.436032066007221e-06, - "loss": 0.0079, + "learning_rate": 1.945251431513582e-05, + "loss": 0.0136, "step": 113670 }, { "epoch": 5.3, - "learning_rate": 9.435563264732081e-06, - "loss": 0.0536, + "learning_rate": 1.9452046245299798e-05, + "loss": 0.0352, "step": 113675 }, { "epoch": 5.3, - "learning_rate": 9.435094463456943e-06, - "loss": 0.0159, + "learning_rate": 1.9451578175463778e-05, + "loss": 0.0299, "step": 113680 }, { "epoch": 5.3, - "learning_rate": 9.434625662181802e-06, - "loss": 0.0041, + "learning_rate": 1.945111010562776e-05, + "loss": 0.0435, "step": 113685 }, { "epoch": 5.3, - "learning_rate": 9.434156860906662e-06, - "loss": 0.1139, + "learning_rate": 1.945064203579174e-05, + "loss": 0.0586, "step": 113690 }, { "epoch": 5.31, - "learning_rate": 9.433688059631522e-06, - "loss": 0.0644, + "learning_rate": 1.945017396595572e-05, + "loss": 0.0574, "step": 113695 }, { "epoch": 5.31, - "learning_rate": 9.433219258356382e-06, - "loss": 0.1674, + "learning_rate": 1.94497058961197e-05, + "loss": 0.083, "step": 113700 }, { "epoch": 5.31, - "learning_rate": 9.432750457081244e-06, - "loss": 0.173, + "learning_rate": 1.9449237826283684e-05, + "loss": 0.4821, "step": 113705 }, { "epoch": 5.31, - "learning_rate": 9.432281655806106e-06, - "loss": 0.0968, + "learning_rate": 1.9448769756447664e-05, + "loss": 0.1036, "step": 113710 }, { "epoch": 5.31, - "learning_rate": 9.431812854530965e-06, - "loss": 0.01, + "learning_rate": 1.9448301686611644e-05, + "loss": 0.015, "step": 113715 }, { "epoch": 5.31, - "learning_rate": 9.431344053255825e-06, - "loss": 0.0379, + "learning_rate": 1.9447833616775623e-05, + "loss": 0.0086, "step": 113720 }, { "epoch": 5.31, - "learning_rate": 9.430875251980685e-06, - "loss": 0.0285, + "learning_rate": 1.9447365546939607e-05, + "loss": 0.0349, "step": 113725 }, { "epoch": 5.31, - "learning_rate": 9.430406450705547e-06, - "loss": 0.0131, + "learning_rate": 1.9446897477103583e-05, + "loss": 0.0999, "step": 113730 }, { "epoch": 5.31, - "learning_rate": 9.429937649430407e-06, - "loss": 0.0455, + "learning_rate": 1.9446429407267563e-05, + "loss": 0.0824, "step": 113735 }, { "epoch": 5.31, - "learning_rate": 9.429468848155267e-06, - "loss": 0.0469, + "learning_rate": 1.9445961337431543e-05, + "loss": 0.0128, "step": 113740 }, { "epoch": 5.31, - "learning_rate": 9.429000046880128e-06, - "loss": 0.0754, + "learning_rate": 1.9445493267595526e-05, + "loss": 0.0633, "step": 113745 }, { "epoch": 5.31, - "learning_rate": 9.42853124560499e-06, - "loss": 0.1469, + "learning_rate": 1.9445025197759506e-05, + "loss": 0.1275, "step": 113750 }, { "epoch": 5.31, - "learning_rate": 9.42806244432985e-06, - "loss": 0.1817, + "learning_rate": 1.9444557127923485e-05, + "loss": 0.2938, "step": 113755 }, { "epoch": 5.31, - "learning_rate": 9.42759364305471e-06, - "loss": 0.0986, + "learning_rate": 1.944408905808747e-05, + "loss": 0.0871, "step": 113760 }, { "epoch": 5.31, - "learning_rate": 9.42712484177957e-06, - "loss": 0.016, + "learning_rate": 1.944362098825145e-05, + "loss": 0.0137, "step": 113765 }, { "epoch": 5.31, - "learning_rate": 9.42665604050443e-06, - "loss": 0.0096, + "learning_rate": 1.9443152918415428e-05, + "loss": 0.0188, "step": 113770 }, { "epoch": 5.31, - "learning_rate": 9.426187239229291e-06, - "loss": 0.0313, + "learning_rate": 1.9442684848579408e-05, + "loss": 0.0179, "step": 113775 }, { "epoch": 5.31, - "learning_rate": 9.425718437954151e-06, - "loss": 0.0259, + "learning_rate": 1.944221677874339e-05, + "loss": 0.0515, "step": 113780 }, { "epoch": 5.31, - "learning_rate": 9.425249636679013e-06, - "loss": 0.0804, + "learning_rate": 1.944174870890737e-05, + "loss": 0.0632, "step": 113785 }, { "epoch": 5.31, - "learning_rate": 9.424780835403873e-06, - "loss": 0.1046, + "learning_rate": 1.944128063907135e-05, + "loss": 0.1045, "step": 113790 }, { "epoch": 5.31, - "learning_rate": 9.424312034128733e-06, - "loss": 0.178, + "learning_rate": 1.9440812569235327e-05, + "loss": 0.0666, "step": 113795 }, { "epoch": 5.31, - "learning_rate": 9.423843232853594e-06, - "loss": 0.1679, + "learning_rate": 1.944034449939931e-05, + "loss": 0.1257, "step": 113800 }, { "epoch": 5.31, - "learning_rate": 9.423374431578454e-06, - "loss": 0.2853, + "learning_rate": 1.943987642956329e-05, + "loss": 0.4815, "step": 113805 }, { "epoch": 5.31, - "learning_rate": 9.422905630303314e-06, - "loss": 0.0877, + "learning_rate": 1.943940835972727e-05, + "loss": 0.0861, "step": 113810 }, { "epoch": 5.31, - "learning_rate": 9.422436829028176e-06, - "loss": 0.0426, + "learning_rate": 1.9438940289891253e-05, + "loss": 0.0231, "step": 113815 }, { "epoch": 5.31, - "learning_rate": 9.421968027753036e-06, - "loss": 0.0416, + "learning_rate": 1.9438472220055233e-05, + "loss": 0.0318, "step": 113820 }, { "epoch": 5.31, - "learning_rate": 9.421499226477898e-06, - "loss": 0.0281, + "learning_rate": 1.9438004150219213e-05, + "loss": 0.0728, "step": 113825 }, { "epoch": 5.31, - "learning_rate": 9.421030425202757e-06, - "loss": 0.0835, + "learning_rate": 1.9437536080383193e-05, + "loss": 0.02, "step": 113830 }, { "epoch": 5.31, - "learning_rate": 9.420561623927617e-06, - "loss": 0.0932, + "learning_rate": 1.9437068010547176e-05, + "loss": 0.0412, "step": 113835 }, { "epoch": 5.31, - "learning_rate": 9.420092822652477e-06, - "loss": 0.0743, + "learning_rate": 1.9436599940711156e-05, + "loss": 0.0793, "step": 113840 }, { "epoch": 5.31, - "learning_rate": 9.419624021377339e-06, - "loss": 0.0928, + "learning_rate": 1.9436131870875136e-05, + "loss": 0.1266, "step": 113845 }, { "epoch": 5.31, - "learning_rate": 9.419155220102199e-06, - "loss": 0.1587, + "learning_rate": 1.9435663801039116e-05, + "loss": 0.1586, "step": 113850 }, { "epoch": 5.31, - "learning_rate": 9.41868641882706e-06, - "loss": 0.4162, + "learning_rate": 1.9435195731203095e-05, + "loss": 0.3518, "step": 113855 }, { "epoch": 5.31, - "learning_rate": 9.41821761755192e-06, - "loss": 0.0762, + "learning_rate": 1.9434727661367075e-05, + "loss": 0.0888, "step": 113860 }, { "epoch": 5.31, - "learning_rate": 9.41774881627678e-06, - "loss": 0.0396, + "learning_rate": 1.9434259591531055e-05, + "loss": 0.0137, "step": 113865 }, { "epoch": 5.31, - "learning_rate": 9.417280015001642e-06, - "loss": 0.029, + "learning_rate": 1.9433791521695038e-05, + "loss": 0.0324, "step": 113870 }, { "epoch": 5.31, - "learning_rate": 9.416811213726502e-06, - "loss": 0.0259, + "learning_rate": 1.9433323451859018e-05, + "loss": 0.0179, "step": 113875 }, { "epoch": 5.31, - "learning_rate": 9.416342412451362e-06, - "loss": 0.0711, + "learning_rate": 1.9432855382022998e-05, + "loss": 0.0131, "step": 113880 }, { "epoch": 5.31, - "learning_rate": 9.415873611176224e-06, - "loss": 0.0831, + "learning_rate": 1.9432387312186978e-05, + "loss": 0.0453, "step": 113885 }, { "epoch": 5.31, - "learning_rate": 9.415404809901083e-06, - "loss": 0.0397, + "learning_rate": 1.943191924235096e-05, + "loss": 0.1299, "step": 113890 }, { "epoch": 5.31, - "learning_rate": 9.414936008625945e-06, - "loss": 0.1053, + "learning_rate": 1.943145117251494e-05, + "loss": 0.1413, "step": 113895 }, { "epoch": 5.31, - "learning_rate": 9.414467207350805e-06, - "loss": 0.1415, + "learning_rate": 1.943098310267892e-05, + "loss": 0.0479, "step": 113900 }, { "epoch": 5.31, - "learning_rate": 9.413998406075665e-06, - "loss": 0.3402, + "learning_rate": 1.94305150328429e-05, + "loss": 0.2235, "step": 113905 }, { "epoch": 5.32, - "learning_rate": 9.413529604800525e-06, - "loss": 0.0865, + "learning_rate": 1.9430046963006884e-05, + "loss": 0.0907, "step": 113910 }, { "epoch": 5.32, - "learning_rate": 9.413060803525387e-06, - "loss": 0.0285, + "learning_rate": 1.9429578893170863e-05, + "loss": 0.0303, "step": 113915 }, { "epoch": 5.32, - "learning_rate": 9.412592002250246e-06, - "loss": 0.0336, + "learning_rate": 1.942911082333484e-05, + "loss": 0.0146, "step": 113920 }, { "epoch": 5.32, - "learning_rate": 9.412123200975108e-06, - "loss": 0.0218, + "learning_rate": 1.942864275349882e-05, + "loss": 0.0181, "step": 113925 }, { "epoch": 5.32, - "learning_rate": 9.411654399699968e-06, - "loss": 0.0499, + "learning_rate": 1.9428174683662803e-05, + "loss": 0.0183, "step": 113930 }, { "epoch": 5.32, - "learning_rate": 9.41118559842483e-06, - "loss": 0.0519, + "learning_rate": 1.9427706613826783e-05, + "loss": 0.0285, "step": 113935 }, { "epoch": 5.32, - "learning_rate": 9.41071679714969e-06, - "loss": 0.0655, + "learning_rate": 1.9427238543990762e-05, + "loss": 0.0487, "step": 113940 }, { "epoch": 5.32, - "learning_rate": 9.41024799587455e-06, - "loss": 0.069, + "learning_rate": 1.9426770474154746e-05, + "loss": 0.0708, "step": 113945 }, { "epoch": 5.32, - "learning_rate": 9.40977919459941e-06, - "loss": 0.1221, + "learning_rate": 1.9426302404318725e-05, + "loss": 0.0851, "step": 113950 }, { "epoch": 5.32, - "learning_rate": 9.40931039332427e-06, - "loss": 0.2932, + "learning_rate": 1.9425834334482705e-05, + "loss": 0.1962, "step": 113955 }, { "epoch": 5.32, - "learning_rate": 9.408841592049131e-06, - "loss": 0.113, + "learning_rate": 1.9425366264646685e-05, + "loss": 0.0819, "step": 113960 }, { "epoch": 5.32, - "learning_rate": 9.408372790773993e-06, - "loss": 0.0143, + "learning_rate": 1.9424898194810668e-05, + "loss": 0.0123, "step": 113965 }, { "epoch": 5.32, - "learning_rate": 9.407903989498853e-06, - "loss": 0.0245, + "learning_rate": 1.9424430124974648e-05, + "loss": 0.0208, "step": 113970 }, { "epoch": 5.32, - "learning_rate": 9.407435188223712e-06, - "loss": 0.0397, + "learning_rate": 1.9423962055138628e-05, + "loss": 0.0282, "step": 113975 }, { "epoch": 5.32, - "learning_rate": 9.406966386948572e-06, - "loss": 0.0226, + "learning_rate": 1.9423493985302608e-05, + "loss": 0.0403, "step": 113980 }, { "epoch": 5.32, - "learning_rate": 9.406497585673434e-06, - "loss": 0.082, + "learning_rate": 1.9423025915466588e-05, + "loss": 0.0333, "step": 113985 }, { "epoch": 5.32, - "learning_rate": 9.406028784398294e-06, - "loss": 0.0784, + "learning_rate": 1.9422557845630567e-05, + "loss": 0.0335, "step": 113990 }, { "epoch": 5.32, - "learning_rate": 9.405559983123154e-06, - "loss": 0.1963, + "learning_rate": 1.9422089775794547e-05, + "loss": 0.0915, "step": 113995 }, { "epoch": 5.32, - "learning_rate": 9.405091181848016e-06, - "loss": 0.1269, + "learning_rate": 1.942162170595853e-05, + "loss": 0.0665, "step": 114000 }, { "epoch": 5.32, - "learning_rate": 9.404622380572877e-06, - "loss": 0.3431, + "learning_rate": 1.942115363612251e-05, + "loss": 0.1332, "step": 114005 }, { "epoch": 5.32, - "learning_rate": 9.404153579297737e-06, - "loss": 0.083, + "learning_rate": 1.942068556628649e-05, + "loss": 0.1038, "step": 114010 }, { "epoch": 5.32, - "learning_rate": 9.403684778022597e-06, - "loss": 0.0225, + "learning_rate": 1.942021749645047e-05, + "loss": 0.0294, "step": 114015 }, { "epoch": 5.32, - "learning_rate": 9.403215976747457e-06, - "loss": 0.0277, + "learning_rate": 1.9419749426614453e-05, + "loss": 0.005, "step": 114020 }, { "epoch": 5.32, - "learning_rate": 9.402747175472317e-06, - "loss": 0.0106, + "learning_rate": 1.9419281356778433e-05, + "loss": 0.0379, "step": 114025 }, { "epoch": 5.32, - "learning_rate": 9.402278374197179e-06, - "loss": 0.0338, + "learning_rate": 1.9418813286942413e-05, + "loss": 0.0688, "step": 114030 }, { "epoch": 5.32, - "learning_rate": 9.40180957292204e-06, - "loss": 0.0682, + "learning_rate": 1.9418345217106393e-05, + "loss": 0.039, "step": 114035 }, { "epoch": 5.32, - "learning_rate": 9.4013407716469e-06, - "loss": 0.0604, + "learning_rate": 1.9417877147270376e-05, + "loss": 0.0869, "step": 114040 }, { "epoch": 5.32, - "learning_rate": 9.40087197037176e-06, - "loss": 0.0699, + "learning_rate": 1.9417409077434352e-05, + "loss": 0.1327, "step": 114045 }, { "epoch": 5.32, - "learning_rate": 9.40040316909662e-06, - "loss": 0.1606, + "learning_rate": 1.9416941007598332e-05, + "loss": 0.0881, "step": 114050 }, { "epoch": 5.32, - "learning_rate": 9.399934367821482e-06, - "loss": 0.1665, + "learning_rate": 1.9416472937762315e-05, + "loss": 0.2067, "step": 114055 }, { "epoch": 5.32, - "learning_rate": 9.399465566546342e-06, - "loss": 0.0863, + "learning_rate": 1.9416004867926295e-05, + "loss": 0.059, "step": 114060 }, { "epoch": 5.32, - "learning_rate": 9.398996765271201e-06, - "loss": 0.0148, + "learning_rate": 1.9415536798090275e-05, + "loss": 0.0102, "step": 114065 }, { "epoch": 5.32, - "learning_rate": 9.398527963996063e-06, - "loss": 0.0078, + "learning_rate": 1.9415068728254255e-05, + "loss": 0.0159, "step": 114070 }, { "epoch": 5.32, - "learning_rate": 9.398059162720925e-06, - "loss": 0.0433, + "learning_rate": 1.9414600658418238e-05, + "loss": 0.0304, "step": 114075 }, { "epoch": 5.32, - "learning_rate": 9.397590361445785e-06, - "loss": 0.045, + "learning_rate": 1.9414132588582218e-05, + "loss": 0.0133, "step": 114080 }, { "epoch": 5.32, - "learning_rate": 9.397121560170645e-06, - "loss": 0.1152, + "learning_rate": 1.9413664518746197e-05, + "loss": 0.0444, "step": 114085 }, { "epoch": 5.32, - "learning_rate": 9.396652758895505e-06, - "loss": 0.091, + "learning_rate": 1.9413196448910177e-05, + "loss": 0.064, "step": 114090 }, { "epoch": 5.32, - "learning_rate": 9.396183957620364e-06, - "loss": 0.1368, + "learning_rate": 1.941272837907416e-05, + "loss": 0.1364, "step": 114095 }, { "epoch": 5.32, - "learning_rate": 9.395715156345226e-06, - "loss": 0.229, + "learning_rate": 1.941226030923814e-05, + "loss": 0.125, "step": 114100 }, { "epoch": 5.32, - "learning_rate": 9.395246355070086e-06, - "loss": 0.2919, + "learning_rate": 1.941179223940212e-05, + "loss": 0.2696, "step": 114105 }, { "epoch": 5.32, - "learning_rate": 9.394777553794948e-06, - "loss": 0.0889, + "learning_rate": 1.9411324169566097e-05, + "loss": 0.0784, "step": 114110 }, { "epoch": 5.32, - "learning_rate": 9.394308752519808e-06, - "loss": 0.0117, + "learning_rate": 1.941085609973008e-05, + "loss": 0.0009, "step": 114115 }, { "epoch": 5.32, - "learning_rate": 9.393839951244668e-06, - "loss": 0.014, + "learning_rate": 1.941038802989406e-05, + "loss": 0.023, "step": 114120 }, { "epoch": 5.33, - "learning_rate": 9.393371149969529e-06, - "loss": 0.0159, + "learning_rate": 1.940991996005804e-05, + "loss": 0.0116, "step": 114125 }, { "epoch": 5.33, - "learning_rate": 9.392902348694389e-06, - "loss": 0.0107, + "learning_rate": 1.9409451890222023e-05, + "loss": 0.0477, "step": 114130 }, { "epoch": 5.33, - "learning_rate": 9.392433547419249e-06, - "loss": 0.0485, + "learning_rate": 1.9408983820386002e-05, + "loss": 0.0599, "step": 114135 }, { "epoch": 5.33, - "learning_rate": 9.39196474614411e-06, - "loss": 0.107, + "learning_rate": 1.9408515750549982e-05, + "loss": 0.0677, "step": 114140 }, { "epoch": 5.33, - "learning_rate": 9.39149594486897e-06, - "loss": 0.0783, + "learning_rate": 1.9408047680713962e-05, + "loss": 0.1234, "step": 114145 }, { "epoch": 5.33, - "learning_rate": 9.391027143593832e-06, - "loss": 0.1505, + "learning_rate": 1.9407579610877945e-05, + "loss": 0.1925, "step": 114150 }, { "epoch": 5.33, - "learning_rate": 9.390558342318692e-06, - "loss": 0.2627, + "learning_rate": 1.9407111541041925e-05, + "loss": 0.1341, "step": 114155 }, { "epoch": 5.33, - "learning_rate": 9.390089541043552e-06, - "loss": 0.0615, + "learning_rate": 1.9406643471205905e-05, + "loss": 0.0967, "step": 114160 }, { "epoch": 5.33, - "learning_rate": 9.389620739768412e-06, - "loss": 0.0335, + "learning_rate": 1.9406175401369885e-05, + "loss": 0.0307, "step": 114165 }, { "epoch": 5.33, - "learning_rate": 9.389151938493274e-06, - "loss": 0.0297, + "learning_rate": 1.9405707331533868e-05, + "loss": 0.0069, "step": 114170 }, { "epoch": 5.33, - "learning_rate": 9.388683137218134e-06, - "loss": 0.015, + "learning_rate": 1.9405239261697844e-05, + "loss": 0.0268, "step": 114175 }, { "epoch": 5.33, - "learning_rate": 9.388214335942995e-06, - "loss": 0.0447, + "learning_rate": 1.9404771191861824e-05, + "loss": 0.0177, "step": 114180 }, { "epoch": 5.33, - "learning_rate": 9.387745534667855e-06, - "loss": 0.0636, + "learning_rate": 1.9404303122025807e-05, + "loss": 0.0372, "step": 114185 }, { "epoch": 5.33, - "learning_rate": 9.387276733392715e-06, - "loss": 0.0832, + "learning_rate": 1.9403835052189787e-05, + "loss": 0.0875, "step": 114190 }, { "epoch": 5.33, - "learning_rate": 9.386807932117577e-06, - "loss": 0.1224, + "learning_rate": 1.9403366982353767e-05, + "loss": 0.0889, "step": 114195 }, { "epoch": 5.33, - "learning_rate": 9.386339130842437e-06, - "loss": 0.1061, + "learning_rate": 1.9402898912517747e-05, + "loss": 0.1112, "step": 114200 }, { "epoch": 5.33, - "learning_rate": 9.385870329567297e-06, - "loss": 0.297, + "learning_rate": 1.940243084268173e-05, + "loss": 0.2228, "step": 114205 }, { "epoch": 5.33, - "learning_rate": 9.385401528292158e-06, - "loss": 0.0611, + "learning_rate": 1.940196277284571e-05, + "loss": 0.0532, "step": 114210 }, { "epoch": 5.33, - "learning_rate": 9.384932727017018e-06, - "loss": 0.0272, + "learning_rate": 1.940149470300969e-05, + "loss": 0.0621, "step": 114215 }, { "epoch": 5.33, - "learning_rate": 9.38446392574188e-06, - "loss": 0.02, + "learning_rate": 1.940102663317367e-05, + "loss": 0.0193, "step": 114220 }, { "epoch": 5.33, - "learning_rate": 9.38399512446674e-06, - "loss": 0.053, + "learning_rate": 1.9400558563337653e-05, + "loss": 0.0129, "step": 114225 }, { "epoch": 5.33, - "learning_rate": 9.3835263231916e-06, - "loss": 0.0498, + "learning_rate": 1.9400090493501633e-05, + "loss": 0.0261, "step": 114230 }, { "epoch": 5.33, - "learning_rate": 9.38305752191646e-06, - "loss": 0.0682, + "learning_rate": 1.939962242366561e-05, + "loss": 0.0338, "step": 114235 }, { "epoch": 5.33, - "learning_rate": 9.382588720641321e-06, - "loss": 0.0497, + "learning_rate": 1.9399154353829592e-05, + "loss": 0.0738, "step": 114240 }, { "epoch": 5.33, - "learning_rate": 9.382119919366181e-06, - "loss": 0.1241, + "learning_rate": 1.9398686283993572e-05, + "loss": 0.0949, "step": 114245 }, { "epoch": 5.33, - "learning_rate": 9.381651118091043e-06, - "loss": 0.1002, + "learning_rate": 1.9398218214157552e-05, + "loss": 0.1072, "step": 114250 }, { "epoch": 5.33, - "learning_rate": 9.381182316815903e-06, - "loss": 0.2096, + "learning_rate": 1.939775014432153e-05, + "loss": 0.1303, "step": 114255 }, { "epoch": 5.33, - "learning_rate": 9.380713515540764e-06, - "loss": 0.1052, + "learning_rate": 1.9397282074485515e-05, + "loss": 0.068, "step": 114260 }, { "epoch": 5.33, - "learning_rate": 9.380244714265624e-06, - "loss": 0.0849, + "learning_rate": 1.9396814004649495e-05, + "loss": 0.0065, "step": 114265 }, { "epoch": 5.33, - "learning_rate": 9.379775912990484e-06, - "loss": 0.0168, + "learning_rate": 1.9396345934813474e-05, + "loss": 0.0318, "step": 114270 }, { "epoch": 5.33, - "learning_rate": 9.379307111715344e-06, - "loss": 0.042, + "learning_rate": 1.9395877864977454e-05, + "loss": 0.033, "step": 114275 }, { "epoch": 5.33, - "learning_rate": 9.378838310440204e-06, - "loss": 0.0984, + "learning_rate": 1.9395409795141437e-05, + "loss": 0.0369, "step": 114280 }, { "epoch": 5.33, - "learning_rate": 9.378369509165066e-06, - "loss": 0.0469, + "learning_rate": 1.9394941725305417e-05, + "loss": 0.0254, "step": 114285 }, { "epoch": 5.33, - "learning_rate": 9.377900707889927e-06, - "loss": 0.0815, + "learning_rate": 1.9394473655469397e-05, + "loss": 0.1625, "step": 114290 }, { "epoch": 5.33, - "learning_rate": 9.377431906614787e-06, - "loss": 0.1223, + "learning_rate": 1.9394005585633377e-05, + "loss": 0.0843, "step": 114295 }, { "epoch": 5.33, - "learning_rate": 9.376963105339647e-06, - "loss": 0.1178, + "learning_rate": 1.9393537515797357e-05, + "loss": 0.1042, "step": 114300 }, { "epoch": 5.33, - "learning_rate": 9.376494304064507e-06, - "loss": 0.2384, + "learning_rate": 1.9393069445961337e-05, + "loss": 0.1086, "step": 114305 }, { "epoch": 5.33, - "learning_rate": 9.376025502789369e-06, - "loss": 0.1208, + "learning_rate": 1.9392601376125316e-05, + "loss": 0.0848, "step": 114310 }, { "epoch": 5.33, - "learning_rate": 9.375556701514229e-06, - "loss": 0.0195, + "learning_rate": 1.93921333062893e-05, + "loss": 0.0327, "step": 114315 }, { "epoch": 5.33, - "learning_rate": 9.375087900239089e-06, - "loss": 0.0041, + "learning_rate": 1.939166523645328e-05, + "loss": 0.0242, "step": 114320 }, { "epoch": 5.33, - "learning_rate": 9.37461909896395e-06, - "loss": 0.0317, + "learning_rate": 1.939119716661726e-05, + "loss": 0.0362, "step": 114325 }, { "epoch": 5.33, - "learning_rate": 9.374150297688812e-06, - "loss": 0.0554, + "learning_rate": 1.939072909678124e-05, + "loss": 0.0249, "step": 114330 }, { "epoch": 5.34, - "learning_rate": 9.373681496413672e-06, - "loss": 0.0392, + "learning_rate": 1.9390261026945222e-05, + "loss": 0.074, "step": 114335 }, { "epoch": 5.34, - "learning_rate": 9.373212695138532e-06, - "loss": 0.0486, + "learning_rate": 1.9389792957109202e-05, + "loss": 0.0657, "step": 114340 }, { "epoch": 5.34, - "learning_rate": 9.372743893863392e-06, - "loss": 0.0901, + "learning_rate": 1.9389324887273182e-05, + "loss": 0.0832, "step": 114345 }, { "epoch": 5.34, - "learning_rate": 9.372275092588252e-06, - "loss": 0.0522, + "learning_rate": 1.938885681743716e-05, + "loss": 0.1187, "step": 114350 }, { "epoch": 5.34, - "learning_rate": 9.371806291313113e-06, - "loss": 0.185, + "learning_rate": 1.9388388747601145e-05, + "loss": 0.3402, "step": 114355 }, { "epoch": 5.34, - "learning_rate": 9.371337490037973e-06, - "loss": 0.1031, + "learning_rate": 1.938792067776512e-05, + "loss": 0.0758, "step": 114360 }, { "epoch": 5.34, - "learning_rate": 9.370868688762835e-06, - "loss": 0.0166, + "learning_rate": 1.93874526079291e-05, + "loss": 0.0045, "step": 114365 }, { "epoch": 5.34, - "learning_rate": 9.370399887487695e-06, - "loss": 0.0315, + "learning_rate": 1.9386984538093084e-05, + "loss": 0.014, "step": 114370 }, { "epoch": 5.34, - "learning_rate": 9.369931086212555e-06, - "loss": 0.0119, + "learning_rate": 1.9386516468257064e-05, + "loss": 0.0347, "step": 114375 }, { "epoch": 5.34, - "learning_rate": 9.369462284937416e-06, - "loss": 0.0221, + "learning_rate": 1.9386048398421044e-05, + "loss": 0.027, "step": 114380 }, { "epoch": 5.34, - "learning_rate": 9.368993483662276e-06, - "loss": 0.0637, + "learning_rate": 1.9385580328585024e-05, + "loss": 0.0454, "step": 114385 }, { "epoch": 5.34, - "learning_rate": 9.368524682387136e-06, - "loss": 0.0771, + "learning_rate": 1.9385112258749007e-05, + "loss": 0.0704, "step": 114390 }, { "epoch": 5.34, - "learning_rate": 9.368055881111998e-06, - "loss": 0.1146, + "learning_rate": 1.9384644188912987e-05, + "loss": 0.1058, "step": 114395 }, { "epoch": 5.34, - "learning_rate": 9.367587079836858e-06, - "loss": 0.0959, + "learning_rate": 1.9384176119076967e-05, + "loss": 0.146, "step": 114400 }, { "epoch": 5.34, - "learning_rate": 9.36711827856172e-06, - "loss": 0.2199, + "learning_rate": 1.9383708049240946e-05, + "loss": 0.333, "step": 114405 }, { "epoch": 5.34, - "learning_rate": 9.36664947728658e-06, - "loss": 0.0978, + "learning_rate": 1.938323997940493e-05, + "loss": 0.0996, "step": 114410 }, { "epoch": 5.34, - "learning_rate": 9.36618067601144e-06, - "loss": 0.0288, + "learning_rate": 1.938277190956891e-05, + "loss": 0.0212, "step": 114415 }, { "epoch": 5.34, - "learning_rate": 9.365711874736299e-06, - "loss": 0.0175, + "learning_rate": 1.938230383973289e-05, + "loss": 0.0021, "step": 114420 }, { "epoch": 5.34, - "learning_rate": 9.36524307346116e-06, - "loss": 0.0106, + "learning_rate": 1.938183576989687e-05, + "loss": 0.0367, "step": 114425 }, { "epoch": 5.34, - "learning_rate": 9.36477427218602e-06, - "loss": 0.0693, + "learning_rate": 1.938136770006085e-05, + "loss": 0.0355, "step": 114430 }, { "epoch": 5.34, - "learning_rate": 9.364305470910882e-06, - "loss": 0.0705, + "learning_rate": 1.938089963022483e-05, + "loss": 0.0317, "step": 114435 }, { "epoch": 5.34, - "learning_rate": 9.363836669635742e-06, - "loss": 0.0334, + "learning_rate": 1.938043156038881e-05, + "loss": 0.0948, "step": 114440 }, { "epoch": 5.34, - "learning_rate": 9.363367868360602e-06, - "loss": 0.0699, + "learning_rate": 1.9379963490552792e-05, + "loss": 0.0427, "step": 114445 }, { "epoch": 5.34, - "learning_rate": 9.362899067085464e-06, - "loss": 0.1493, + "learning_rate": 1.937949542071677e-05, + "loss": 0.148, "step": 114450 }, { "epoch": 5.34, - "learning_rate": 9.362430265810324e-06, - "loss": 0.2789, + "learning_rate": 1.937902735088075e-05, + "loss": 0.231, "step": 114455 }, { "epoch": 5.34, - "learning_rate": 9.361961464535184e-06, - "loss": 0.1214, + "learning_rate": 1.937855928104473e-05, + "loss": 0.0875, "step": 114460 }, { "epoch": 5.34, - "learning_rate": 9.361492663260045e-06, - "loss": 0.026, + "learning_rate": 1.9378091211208714e-05, + "loss": 0.025, "step": 114465 }, { "epoch": 5.34, - "learning_rate": 9.361023861984905e-06, - "loss": 0.0459, + "learning_rate": 1.9377623141372694e-05, + "loss": 0.0428, "step": 114470 }, { "epoch": 5.34, - "learning_rate": 9.360555060709767e-06, - "loss": 0.0145, + "learning_rate": 1.9377155071536674e-05, + "loss": 0.0694, "step": 114475 }, { "epoch": 5.34, - "learning_rate": 9.360086259434627e-06, - "loss": 0.0163, + "learning_rate": 1.9376687001700657e-05, + "loss": 0.011, "step": 114480 }, { "epoch": 5.34, - "learning_rate": 9.359617458159487e-06, - "loss": 0.0233, + "learning_rate": 1.9376218931864637e-05, + "loss": 0.0826, "step": 114485 }, { "epoch": 5.34, - "learning_rate": 9.359148656884347e-06, - "loss": 0.0748, + "learning_rate": 1.9375750862028614e-05, + "loss": 0.034, "step": 114490 }, { "epoch": 5.34, - "learning_rate": 9.358679855609208e-06, - "loss": 0.061, + "learning_rate": 1.9375282792192593e-05, + "loss": 0.1517, "step": 114495 }, { "epoch": 5.34, - "learning_rate": 9.358211054334068e-06, - "loss": 0.1492, + "learning_rate": 1.9374814722356577e-05, + "loss": 0.1662, "step": 114500 }, { "epoch": 5.34, - "learning_rate": 9.35774225305893e-06, - "loss": 0.2068, + "learning_rate": 1.9374346652520556e-05, + "loss": 0.1466, "step": 114505 }, { "epoch": 5.34, - "learning_rate": 9.35727345178379e-06, - "loss": 0.0973, + "learning_rate": 1.9373878582684536e-05, + "loss": 0.1002, "step": 114510 }, { "epoch": 5.34, - "learning_rate": 9.35680465050865e-06, - "loss": 0.0106, + "learning_rate": 1.9373410512848516e-05, + "loss": 0.0223, "step": 114515 }, { "epoch": 5.34, - "learning_rate": 9.356335849233511e-06, - "loss": 0.0407, + "learning_rate": 1.93729424430125e-05, + "loss": 0.0389, "step": 114520 }, { "epoch": 5.34, - "learning_rate": 9.355867047958371e-06, - "loss": 0.0181, + "learning_rate": 1.937247437317648e-05, + "loss": 0.0293, "step": 114525 }, { "epoch": 5.34, - "learning_rate": 9.355398246683231e-06, - "loss": 0.0255, + "learning_rate": 1.937200630334046e-05, + "loss": 0.0422, "step": 114530 }, { "epoch": 5.34, - "learning_rate": 9.354929445408091e-06, - "loss": 0.0751, + "learning_rate": 1.937153823350444e-05, + "loss": 0.0347, "step": 114535 }, { "epoch": 5.34, - "learning_rate": 9.354460644132953e-06, - "loss": 0.0798, + "learning_rate": 1.9371070163668422e-05, + "loss": 0.0868, "step": 114540 }, { "epoch": 5.34, - "learning_rate": 9.353991842857814e-06, - "loss": 0.1056, + "learning_rate": 1.93706020938324e-05, + "loss": 0.095, "step": 114545 }, { "epoch": 5.35, - "learning_rate": 9.353523041582674e-06, - "loss": 0.1526, + "learning_rate": 1.9370134023996378e-05, + "loss": 0.0906, "step": 114550 }, { "epoch": 5.35, - "learning_rate": 9.353054240307534e-06, - "loss": 0.2644, + "learning_rate": 1.936966595416036e-05, + "loss": 0.3408, "step": 114555 }, { "epoch": 5.35, - "learning_rate": 9.352585439032394e-06, - "loss": 0.1278, + "learning_rate": 1.936919788432434e-05, + "loss": 0.0562, "step": 114560 }, { "epoch": 5.35, - "learning_rate": 9.352116637757256e-06, - "loss": 0.0298, + "learning_rate": 1.936872981448832e-05, + "loss": 0.0593, "step": 114565 }, { "epoch": 5.35, - "learning_rate": 9.351647836482116e-06, - "loss": 0.0571, + "learning_rate": 1.93682617446523e-05, + "loss": 0.0279, "step": 114570 }, { "epoch": 5.35, - "learning_rate": 9.351179035206976e-06, - "loss": 0.0371, + "learning_rate": 1.9367793674816284e-05, + "loss": 0.0404, "step": 114575 }, { "epoch": 5.35, - "learning_rate": 9.350710233931837e-06, - "loss": 0.0532, + "learning_rate": 1.9367325604980264e-05, + "loss": 0.0357, "step": 114580 }, { "epoch": 5.35, - "learning_rate": 9.350241432656697e-06, - "loss": 0.0496, + "learning_rate": 1.9366857535144244e-05, + "loss": 0.0355, "step": 114585 }, { "epoch": 5.35, - "learning_rate": 9.349772631381559e-06, - "loss": 0.0985, + "learning_rate": 1.9366389465308223e-05, + "loss": 0.0608, "step": 114590 }, { "epoch": 5.35, - "learning_rate": 9.349303830106419e-06, - "loss": 0.0448, + "learning_rate": 1.9365921395472207e-05, + "loss": 0.0603, "step": 114595 }, { "epoch": 5.35, - "learning_rate": 9.348835028831279e-06, - "loss": 0.1443, + "learning_rate": 1.9365453325636186e-05, + "loss": 0.1458, "step": 114600 }, { "epoch": 5.35, - "learning_rate": 9.348366227556139e-06, - "loss": 0.3268, + "learning_rate": 1.9364985255800166e-05, + "loss": 0.2216, "step": 114605 }, { "epoch": 5.35, - "learning_rate": 9.347897426281e-06, - "loss": 0.0559, + "learning_rate": 1.936451718596415e-05, + "loss": 0.0968, "step": 114610 }, { "epoch": 5.35, - "learning_rate": 9.34742862500586e-06, - "loss": 0.0113, + "learning_rate": 1.9364049116128126e-05, + "loss": 0.0294, "step": 114615 }, { "epoch": 5.35, - "learning_rate": 9.346959823730722e-06, - "loss": 0.0224, + "learning_rate": 1.9363581046292106e-05, + "loss": 0.0676, "step": 114620 }, { "epoch": 5.35, - "learning_rate": 9.346491022455582e-06, - "loss": 0.0267, + "learning_rate": 1.9363112976456086e-05, + "loss": 0.0463, "step": 114625 }, { "epoch": 5.35, - "learning_rate": 9.346022221180442e-06, - "loss": 0.015, + "learning_rate": 1.936264490662007e-05, + "loss": 0.02, "step": 114630 }, { "epoch": 5.35, - "learning_rate": 9.345553419905303e-06, - "loss": 0.0351, + "learning_rate": 1.936217683678405e-05, + "loss": 0.1006, "step": 114635 }, { "epoch": 5.35, - "learning_rate": 9.345084618630163e-06, - "loss": 0.1382, + "learning_rate": 1.936170876694803e-05, + "loss": 0.061, "step": 114640 }, { "epoch": 5.35, - "learning_rate": 9.344615817355023e-06, - "loss": 0.0968, + "learning_rate": 1.9361240697112008e-05, + "loss": 0.0895, "step": 114645 }, { "epoch": 5.35, - "learning_rate": 9.344147016079885e-06, - "loss": 0.0777, + "learning_rate": 1.936077262727599e-05, + "loss": 0.1764, "step": 114650 }, { "epoch": 5.35, - "learning_rate": 9.343678214804745e-06, - "loss": 0.1628, + "learning_rate": 1.936030455743997e-05, + "loss": 0.1664, "step": 114655 }, { "epoch": 5.35, - "learning_rate": 9.343209413529606e-06, - "loss": 0.0776, + "learning_rate": 1.935983648760395e-05, + "loss": 0.1144, "step": 114660 }, { "epoch": 5.35, - "learning_rate": 9.342740612254466e-06, - "loss": 0.0164, + "learning_rate": 1.9359368417767934e-05, + "loss": 0.0232, "step": 114665 }, { "epoch": 5.35, - "learning_rate": 9.342271810979326e-06, - "loss": 0.0275, + "learning_rate": 1.9358900347931914e-05, + "loss": 0.0115, "step": 114670 }, { "epoch": 5.35, - "learning_rate": 9.341803009704186e-06, - "loss": 0.0146, + "learning_rate": 1.9358432278095894e-05, + "loss": 0.0224, "step": 114675 }, { "epoch": 5.35, - "learning_rate": 9.341334208429048e-06, - "loss": 0.0084, + "learning_rate": 1.935796420825987e-05, + "loss": 0.0798, "step": 114680 }, { "epoch": 5.35, - "learning_rate": 9.340865407153908e-06, - "loss": 0.0953, + "learning_rate": 1.9357496138423854e-05, + "loss": 0.0186, "step": 114685 }, { "epoch": 5.35, - "learning_rate": 9.34039660587877e-06, - "loss": 0.0912, + "learning_rate": 1.9357028068587833e-05, + "loss": 0.0999, "step": 114690 }, { "epoch": 5.35, - "learning_rate": 9.33992780460363e-06, - "loss": 0.0587, + "learning_rate": 1.9356559998751813e-05, + "loss": 0.0806, "step": 114695 }, { "epoch": 5.35, - "learning_rate": 9.33945900332849e-06, - "loss": 0.1331, + "learning_rate": 1.9356091928915793e-05, + "loss": 0.0791, "step": 114700 }, { "epoch": 5.35, - "learning_rate": 9.338990202053351e-06, - "loss": 0.2067, + "learning_rate": 1.9355623859079776e-05, + "loss": 0.3477, "step": 114705 }, { "epoch": 5.35, - "learning_rate": 9.33852140077821e-06, - "loss": 0.0927, + "learning_rate": 1.9355155789243756e-05, + "loss": 0.0533, "step": 114710 }, { "epoch": 5.35, - "learning_rate": 9.33805259950307e-06, - "loss": 0.0254, + "learning_rate": 1.9354687719407736e-05, + "loss": 0.0047, "step": 114715 }, { "epoch": 5.35, - "learning_rate": 9.337583798227932e-06, - "loss": 0.0118, + "learning_rate": 1.9354219649571716e-05, + "loss": 0.0113, "step": 114720 }, { "epoch": 5.35, - "learning_rate": 9.337114996952792e-06, - "loss": 0.0869, + "learning_rate": 1.93537515797357e-05, + "loss": 0.0247, "step": 114725 }, { "epoch": 5.35, - "learning_rate": 9.336646195677654e-06, - "loss": 0.044, + "learning_rate": 1.935328350989968e-05, + "loss": 0.0285, "step": 114730 }, { "epoch": 5.35, - "learning_rate": 9.336177394402514e-06, - "loss": 0.0341, + "learning_rate": 1.935281544006366e-05, + "loss": 0.0359, "step": 114735 }, { "epoch": 5.35, - "learning_rate": 9.335708593127374e-06, - "loss": 0.0953, + "learning_rate": 1.9352347370227638e-05, + "loss": 0.0889, "step": 114740 }, { "epoch": 5.35, - "learning_rate": 9.335239791852234e-06, - "loss": 0.0679, + "learning_rate": 1.9351879300391618e-05, + "loss": 0.0418, "step": 114745 }, { "epoch": 5.35, - "learning_rate": 9.334770990577095e-06, - "loss": 0.1204, + "learning_rate": 1.9351411230555598e-05, + "loss": 0.1064, "step": 114750 }, { "epoch": 5.35, - "learning_rate": 9.334302189301955e-06, - "loss": 0.2114, + "learning_rate": 1.9350943160719578e-05, + "loss": 0.2327, "step": 114755 }, { "epoch": 5.35, - "learning_rate": 9.333833388026817e-06, - "loss": 0.0945, + "learning_rate": 1.935047509088356e-05, + "loss": 0.0671, "step": 114760 }, { "epoch": 5.36, - "learning_rate": 9.333364586751677e-06, - "loss": 0.0432, + "learning_rate": 1.935000702104754e-05, + "loss": 0.0348, "step": 114765 }, { "epoch": 5.36, - "learning_rate": 9.332895785476537e-06, - "loss": 0.0217, + "learning_rate": 1.934953895121152e-05, + "loss": 0.0478, "step": 114770 }, { "epoch": 5.36, - "learning_rate": 9.332426984201398e-06, - "loss": 0.0191, + "learning_rate": 1.93490708813755e-05, + "loss": 0.0237, "step": 114775 }, { "epoch": 5.36, - "learning_rate": 9.331958182926258e-06, - "loss": 0.0675, + "learning_rate": 1.9348602811539484e-05, + "loss": 0.0376, "step": 114780 }, { "epoch": 5.36, - "learning_rate": 9.331489381651118e-06, - "loss": 0.0683, + "learning_rate": 1.9348134741703463e-05, + "loss": 0.0051, "step": 114785 }, { "epoch": 5.36, - "learning_rate": 9.331020580375978e-06, - "loss": 0.1145, + "learning_rate": 1.9347666671867443e-05, + "loss": 0.0733, "step": 114790 }, { "epoch": 5.36, - "learning_rate": 9.33055177910084e-06, - "loss": 0.1264, + "learning_rate": 1.9347198602031426e-05, + "loss": 0.0528, "step": 114795 }, { "epoch": 5.36, - "learning_rate": 9.330082977825701e-06, - "loss": 0.2337, + "learning_rate": 1.9346730532195406e-05, + "loss": 0.084, "step": 114800 }, { "epoch": 5.36, - "learning_rate": 9.329614176550561e-06, - "loss": 0.1942, + "learning_rate": 1.9346262462359383e-05, + "loss": 0.2569, "step": 114805 }, { "epoch": 5.36, - "learning_rate": 9.329145375275421e-06, - "loss": 0.0612, + "learning_rate": 1.9345794392523363e-05, + "loss": 0.0864, "step": 114810 }, { "epoch": 5.36, - "learning_rate": 9.328676574000281e-06, - "loss": 0.0136, + "learning_rate": 1.9345326322687346e-05, + "loss": 0.0133, "step": 114815 }, { "epoch": 5.36, - "learning_rate": 9.328207772725143e-06, - "loss": 0.028, + "learning_rate": 1.9344858252851326e-05, + "loss": 0.0053, "step": 114820 }, { "epoch": 5.36, - "learning_rate": 9.327738971450003e-06, - "loss": 0.0378, + "learning_rate": 1.9344390183015305e-05, + "loss": 0.0094, "step": 114825 }, { "epoch": 5.36, - "learning_rate": 9.327270170174863e-06, - "loss": 0.0168, + "learning_rate": 1.9343922113179285e-05, + "loss": 0.0748, "step": 114830 }, { "epoch": 5.36, - "learning_rate": 9.326801368899724e-06, - "loss": 0.0449, + "learning_rate": 1.934345404334327e-05, + "loss": 0.0709, "step": 114835 }, { "epoch": 5.36, - "learning_rate": 9.326332567624584e-06, - "loss": 0.0549, + "learning_rate": 1.9342985973507248e-05, + "loss": 0.099, "step": 114840 }, { "epoch": 5.36, - "learning_rate": 9.325863766349446e-06, - "loss": 0.1014, + "learning_rate": 1.9342517903671228e-05, + "loss": 0.0611, "step": 114845 }, { "epoch": 5.36, - "learning_rate": 9.325394965074306e-06, - "loss": 0.1465, + "learning_rate": 1.934204983383521e-05, + "loss": 0.1526, "step": 114850 }, { "epoch": 5.36, - "learning_rate": 9.324926163799166e-06, - "loss": 0.2417, + "learning_rate": 1.934158176399919e-05, + "loss": 0.3086, "step": 114855 }, { "epoch": 5.36, - "learning_rate": 9.324457362524026e-06, - "loss": 0.1064, + "learning_rate": 1.934111369416317e-05, + "loss": 0.0683, "step": 114860 }, { "epoch": 5.36, - "learning_rate": 9.323988561248887e-06, - "loss": 0.019, + "learning_rate": 1.934064562432715e-05, + "loss": 0.0017, "step": 114865 }, { "epoch": 5.36, - "learning_rate": 9.323519759973747e-06, - "loss": 0.0058, + "learning_rate": 1.934017755449113e-05, + "loss": 0.0212, "step": 114870 }, { "epoch": 5.36, - "learning_rate": 9.323050958698609e-06, - "loss": 0.0566, + "learning_rate": 1.933970948465511e-05, + "loss": 0.0507, "step": 114875 }, { "epoch": 5.36, - "learning_rate": 9.322582157423469e-06, - "loss": 0.0552, + "learning_rate": 1.933924141481909e-05, + "loss": 0.0499, "step": 114880 }, { "epoch": 5.36, - "learning_rate": 9.322113356148329e-06, - "loss": 0.0886, + "learning_rate": 1.933877334498307e-05, + "loss": 0.0249, "step": 114885 }, { "epoch": 5.36, - "learning_rate": 9.32164455487319e-06, - "loss": 0.0713, + "learning_rate": 1.9338305275147053e-05, + "loss": 0.1109, "step": 114890 }, { "epoch": 5.36, - "learning_rate": 9.32117575359805e-06, - "loss": 0.0696, + "learning_rate": 1.9337837205311033e-05, + "loss": 0.1286, "step": 114895 }, { "epoch": 5.36, - "learning_rate": 9.32070695232291e-06, - "loss": 0.1268, + "learning_rate": 1.9337369135475013e-05, + "loss": 0.1148, "step": 114900 }, { "epoch": 5.36, - "learning_rate": 9.320238151047772e-06, - "loss": 0.1688, + "learning_rate": 1.9336901065638993e-05, + "loss": 0.2124, "step": 114905 }, { "epoch": 5.36, - "learning_rate": 9.319769349772632e-06, - "loss": 0.1378, + "learning_rate": 1.9336432995802976e-05, + "loss": 0.11, "step": 114910 }, { "epoch": 5.36, - "learning_rate": 9.319300548497494e-06, - "loss": 0.0341, + "learning_rate": 1.9335964925966956e-05, + "loss": 0.0322, "step": 114915 }, { "epoch": 5.36, - "learning_rate": 9.318831747222353e-06, - "loss": 0.0469, + "learning_rate": 1.9335496856130935e-05, + "loss": 0.0139, "step": 114920 }, { "epoch": 5.36, - "learning_rate": 9.318362945947213e-06, - "loss": 0.2669, + "learning_rate": 1.933502878629492e-05, + "loss": 0.0157, "step": 114925 }, { "epoch": 5.36, - "learning_rate": 9.317894144672073e-06, - "loss": 0.0702, + "learning_rate": 1.9334560716458895e-05, + "loss": 0.0253, "step": 114930 }, { "epoch": 5.36, - "learning_rate": 9.317425343396935e-06, - "loss": 0.0559, + "learning_rate": 1.9334092646622875e-05, + "loss": 0.0567, "step": 114935 }, { "epoch": 5.36, - "learning_rate": 9.316956542121795e-06, - "loss": 0.0952, + "learning_rate": 1.9333624576786855e-05, + "loss": 0.0586, "step": 114940 }, { "epoch": 5.36, - "learning_rate": 9.316487740846657e-06, - "loss": 0.0795, + "learning_rate": 1.9333156506950838e-05, + "loss": 0.1176, "step": 114945 }, { "epoch": 5.36, - "learning_rate": 9.316018939571516e-06, - "loss": 0.0878, + "learning_rate": 1.9332688437114818e-05, + "loss": 0.0809, "step": 114950 }, { "epoch": 5.36, - "learning_rate": 9.315550138296376e-06, - "loss": 0.2821, + "learning_rate": 1.9332220367278798e-05, + "loss": 0.3458, "step": 114955 }, { "epoch": 5.36, - "learning_rate": 9.315081337021238e-06, - "loss": 0.0704, + "learning_rate": 1.9331752297442777e-05, + "loss": 0.1178, "step": 114960 }, { "epoch": 5.36, - "learning_rate": 9.314612535746098e-06, - "loss": 0.025, + "learning_rate": 1.933128422760676e-05, + "loss": 0.0332, "step": 114965 }, { "epoch": 5.36, - "learning_rate": 9.314143734470958e-06, - "loss": 0.0578, + "learning_rate": 1.933081615777074e-05, + "loss": 0.0516, "step": 114970 }, { "epoch": 5.36, - "learning_rate": 9.31367493319582e-06, - "loss": 0.0231, + "learning_rate": 1.933034808793472e-05, + "loss": 0.0173, "step": 114975 }, { "epoch": 5.37, - "learning_rate": 9.31320613192068e-06, - "loss": 0.0272, + "learning_rate": 1.9329880018098703e-05, + "loss": 0.0847, "step": 114980 }, { "epoch": 5.37, - "learning_rate": 9.312737330645541e-06, - "loss": 0.0615, + "learning_rate": 1.9329411948262683e-05, + "loss": 0.0112, "step": 114985 }, { "epoch": 5.37, - "learning_rate": 9.312268529370401e-06, - "loss": 0.0578, + "learning_rate": 1.9328943878426663e-05, + "loss": 0.0431, "step": 114990 }, { "epoch": 5.37, - "learning_rate": 9.311799728095261e-06, - "loss": 0.0761, + "learning_rate": 1.932847580859064e-05, + "loss": 0.0551, "step": 114995 }, { "epoch": 5.37, - "learning_rate": 9.311330926820121e-06, - "loss": 0.113, + "learning_rate": 1.9328007738754623e-05, + "loss": 0.0958, "step": 115000 }, { "epoch": 5.37, - "learning_rate": 9.310862125544982e-06, - "loss": 0.2349, + "learning_rate": 1.9327539668918603e-05, + "loss": 0.1632, "step": 115005 }, { "epoch": 5.37, - "learning_rate": 9.310393324269842e-06, - "loss": 0.054, + "learning_rate": 1.9327071599082582e-05, + "loss": 0.0958, "step": 115010 }, { "epoch": 5.37, - "learning_rate": 9.309924522994704e-06, - "loss": 0.0151, + "learning_rate": 1.9326603529246562e-05, + "loss": 0.0133, "step": 115015 }, { "epoch": 5.37, - "learning_rate": 9.309455721719564e-06, - "loss": 0.0222, + "learning_rate": 1.9326135459410545e-05, + "loss": 0.0516, "step": 115020 }, { "epoch": 5.37, - "learning_rate": 9.308986920444424e-06, - "loss": 0.0473, + "learning_rate": 1.9325667389574525e-05, + "loss": 0.0459, "step": 115025 }, { "epoch": 5.37, - "learning_rate": 9.308518119169286e-06, - "loss": 0.0624, + "learning_rate": 1.9325199319738505e-05, + "loss": 0.0161, "step": 115030 }, { "epoch": 5.37, - "learning_rate": 9.308049317894145e-06, - "loss": 0.0183, + "learning_rate": 1.9324731249902488e-05, + "loss": 0.0452, "step": 115035 }, { "epoch": 5.37, - "learning_rate": 9.307580516619005e-06, - "loss": 0.0816, + "learning_rate": 1.9324263180066468e-05, + "loss": 0.2175, "step": 115040 }, { "epoch": 5.37, - "learning_rate": 9.307111715343865e-06, - "loss": 0.0798, + "learning_rate": 1.9323795110230448e-05, + "loss": 0.0583, "step": 115045 }, { "epoch": 5.37, - "learning_rate": 9.306642914068727e-06, - "loss": 0.0944, + "learning_rate": 1.9323327040394428e-05, + "loss": 0.1616, "step": 115050 }, { "epoch": 5.37, - "learning_rate": 9.306174112793589e-06, - "loss": 0.1977, + "learning_rate": 1.9322858970558407e-05, + "loss": 0.262, "step": 115055 }, { "epoch": 5.37, - "learning_rate": 9.305705311518449e-06, - "loss": 0.0654, + "learning_rate": 1.9322390900722387e-05, + "loss": 0.0776, "step": 115060 }, { "epoch": 5.37, - "learning_rate": 9.305236510243308e-06, - "loss": 0.037, + "learning_rate": 1.9321922830886367e-05, + "loss": 0.0069, "step": 115065 }, { "epoch": 5.37, - "learning_rate": 9.304767708968168e-06, - "loss": 0.024, + "learning_rate": 1.9321454761050347e-05, + "loss": 0.0186, "step": 115070 }, { "epoch": 5.37, - "learning_rate": 9.30429890769303e-06, - "loss": 0.0241, + "learning_rate": 1.932098669121433e-05, + "loss": 0.0184, "step": 115075 }, { "epoch": 5.37, - "learning_rate": 9.30383010641789e-06, - "loss": 0.0673, + "learning_rate": 1.932051862137831e-05, + "loss": 0.0542, "step": 115080 }, { "epoch": 5.37, - "learning_rate": 9.30336130514275e-06, - "loss": 0.0382, + "learning_rate": 1.932005055154229e-05, + "loss": 0.0463, "step": 115085 }, { "epoch": 5.37, - "learning_rate": 9.302892503867612e-06, - "loss": 0.0528, + "learning_rate": 1.9319582481706273e-05, + "loss": 0.0583, "step": 115090 }, { "epoch": 5.37, - "learning_rate": 9.302423702592471e-06, - "loss": 0.0695, + "learning_rate": 1.9319114411870253e-05, + "loss": 0.1179, "step": 115095 }, { "epoch": 5.37, - "learning_rate": 9.301954901317333e-06, - "loss": 0.1737, + "learning_rate": 1.9318646342034233e-05, + "loss": 0.1142, "step": 115100 }, { "epoch": 5.37, - "learning_rate": 9.301486100042193e-06, - "loss": 0.2837, + "learning_rate": 1.9318178272198212e-05, + "loss": 0.3191, "step": 115105 }, { "epoch": 5.37, - "learning_rate": 9.301017298767053e-06, - "loss": 0.1077, + "learning_rate": 1.9317710202362196e-05, + "loss": 0.0728, "step": 115110 }, { "epoch": 5.37, - "learning_rate": 9.300548497491913e-06, - "loss": 0.0204, + "learning_rate": 1.9317242132526175e-05, + "loss": 0.0078, "step": 115115 }, { "epoch": 5.37, - "learning_rate": 9.300079696216775e-06, - "loss": 0.0247, + "learning_rate": 1.9316774062690152e-05, + "loss": 0.0181, "step": 115120 }, { "epoch": 5.37, - "learning_rate": 9.299610894941634e-06, - "loss": 0.0332, + "learning_rate": 1.931630599285413e-05, + "loss": 0.0243, "step": 115125 }, { "epoch": 5.37, - "learning_rate": 9.299142093666496e-06, - "loss": 0.024, + "learning_rate": 1.9315837923018115e-05, + "loss": 0.0127, "step": 115130 }, { "epoch": 5.37, - "learning_rate": 9.298673292391356e-06, - "loss": 0.0302, + "learning_rate": 1.9315369853182095e-05, + "loss": 0.0176, "step": 115135 }, { "epoch": 5.37, - "learning_rate": 9.298204491116216e-06, - "loss": 0.0295, + "learning_rate": 1.9314901783346075e-05, + "loss": 0.0466, "step": 115140 }, { "epoch": 5.37, - "learning_rate": 9.297735689841078e-06, - "loss": 0.0819, + "learning_rate": 1.9314433713510054e-05, + "loss": 0.0386, "step": 115145 }, { "epoch": 5.37, - "learning_rate": 9.297266888565938e-06, - "loss": 0.2515, + "learning_rate": 1.9313965643674038e-05, + "loss": 0.1209, "step": 115150 }, { "epoch": 5.37, - "learning_rate": 9.296798087290797e-06, - "loss": 0.1222, + "learning_rate": 1.9313497573838017e-05, + "loss": 0.2447, "step": 115155 }, { "epoch": 5.37, - "learning_rate": 9.296329286015659e-06, - "loss": 0.1292, + "learning_rate": 1.9313029504001997e-05, + "loss": 0.0806, "step": 115160 }, { "epoch": 5.37, - "learning_rate": 9.295860484740519e-06, - "loss": 0.0048, + "learning_rate": 1.931256143416598e-05, + "loss": 0.0165, "step": 115165 }, { "epoch": 5.37, - "learning_rate": 9.29539168346538e-06, - "loss": 0.0249, + "learning_rate": 1.931209336432996e-05, + "loss": 0.0471, "step": 115170 }, { "epoch": 5.37, - "learning_rate": 9.29492288219024e-06, - "loss": 0.0074, + "learning_rate": 1.931162529449394e-05, + "loss": 0.0142, "step": 115175 }, { "epoch": 5.37, - "learning_rate": 9.2944540809151e-06, - "loss": 0.0519, + "learning_rate": 1.931115722465792e-05, + "loss": 0.0351, "step": 115180 }, { "epoch": 5.37, - "learning_rate": 9.29398527963996e-06, - "loss": 0.0533, + "learning_rate": 1.93106891548219e-05, + "loss": 0.0215, "step": 115185 }, { "epoch": 5.37, - "learning_rate": 9.293516478364822e-06, - "loss": 0.0761, + "learning_rate": 1.931022108498588e-05, + "loss": 0.0377, "step": 115190 }, { "epoch": 5.38, - "learning_rate": 9.293047677089682e-06, - "loss": 0.0822, + "learning_rate": 1.930975301514986e-05, + "loss": 0.1335, "step": 115195 }, { "epoch": 5.38, - "learning_rate": 9.292578875814544e-06, - "loss": 0.1226, + "learning_rate": 1.930928494531384e-05, + "loss": 0.093, "step": 115200 }, { "epoch": 5.38, - "learning_rate": 9.292110074539404e-06, - "loss": 0.2269, + "learning_rate": 1.9308816875477822e-05, + "loss": 0.301, "step": 115205 }, { "epoch": 5.38, - "learning_rate": 9.291641273264263e-06, - "loss": 0.0806, + "learning_rate": 1.9308348805641802e-05, + "loss": 0.1177, "step": 115210 }, { "epoch": 5.38, - "learning_rate": 9.291172471989125e-06, - "loss": 0.0104, + "learning_rate": 1.9307880735805782e-05, + "loss": 0.0161, "step": 115215 }, { "epoch": 5.38, - "learning_rate": 9.290703670713985e-06, - "loss": 0.0315, + "learning_rate": 1.9307412665969765e-05, + "loss": 0.0248, "step": 115220 }, { "epoch": 5.38, - "learning_rate": 9.290234869438845e-06, - "loss": 0.0244, + "learning_rate": 1.9306944596133745e-05, + "loss": 0.0548, "step": 115225 }, { "epoch": 5.38, - "learning_rate": 9.289766068163707e-06, - "loss": 0.0268, + "learning_rate": 1.9306476526297725e-05, + "loss": 0.0123, "step": 115230 }, { "epoch": 5.38, - "learning_rate": 9.289297266888567e-06, - "loss": 0.0722, + "learning_rate": 1.9306008456461705e-05, + "loss": 0.0286, "step": 115235 }, { "epoch": 5.38, - "learning_rate": 9.288828465613428e-06, - "loss": 0.0555, + "learning_rate": 1.9305540386625688e-05, + "loss": 0.0614, "step": 115240 }, { "epoch": 5.38, - "learning_rate": 9.288359664338288e-06, - "loss": 0.0795, + "learning_rate": 1.9305072316789664e-05, + "loss": 0.0888, "step": 115245 }, { "epoch": 5.38, - "learning_rate": 9.287890863063148e-06, - "loss": 0.103, + "learning_rate": 1.9304604246953644e-05, + "loss": 0.1086, "step": 115250 }, { "epoch": 5.38, - "learning_rate": 9.287422061788008e-06, - "loss": 0.2388, + "learning_rate": 1.9304136177117624e-05, + "loss": 0.359, "step": 115255 }, { "epoch": 5.38, - "learning_rate": 9.28695326051287e-06, - "loss": 0.0502, + "learning_rate": 1.9303668107281607e-05, + "loss": 0.0897, "step": 115260 }, { "epoch": 5.38, - "learning_rate": 9.28648445923773e-06, - "loss": 0.0116, + "learning_rate": 1.9303200037445587e-05, + "loss": 0.0305, "step": 115265 }, { "epoch": 5.38, - "learning_rate": 9.286015657962591e-06, - "loss": 0.0104, + "learning_rate": 1.9302731967609567e-05, + "loss": 0.0325, "step": 115270 }, { "epoch": 5.38, - "learning_rate": 9.285546856687451e-06, - "loss": 0.0171, + "learning_rate": 1.930226389777355e-05, + "loss": 0.0381, "step": 115275 }, { "epoch": 5.38, - "learning_rate": 9.285078055412311e-06, - "loss": 0.0773, + "learning_rate": 1.930179582793753e-05, + "loss": 0.0282, "step": 115280 }, { "epoch": 5.38, - "learning_rate": 9.284609254137173e-06, - "loss": 0.0577, + "learning_rate": 1.930132775810151e-05, + "loss": 0.0597, "step": 115285 }, { "epoch": 5.38, - "learning_rate": 9.284140452862033e-06, - "loss": 0.1274, + "learning_rate": 1.930085968826549e-05, + "loss": 0.0311, "step": 115290 }, { "epoch": 5.38, - "learning_rate": 9.283671651586893e-06, - "loss": 0.0628, + "learning_rate": 1.9300391618429473e-05, + "loss": 0.1179, "step": 115295 }, { "epoch": 5.38, - "learning_rate": 9.283202850311752e-06, - "loss": 0.1598, + "learning_rate": 1.9299923548593452e-05, + "loss": 0.1499, "step": 115300 }, { "epoch": 5.38, - "learning_rate": 9.282734049036614e-06, - "loss": 0.2426, + "learning_rate": 1.9299455478757432e-05, + "loss": 0.348, "step": 115305 }, { "epoch": 5.38, - "learning_rate": 9.282265247761476e-06, - "loss": 0.0816, + "learning_rate": 1.929898740892141e-05, + "loss": 0.1238, "step": 115310 }, { "epoch": 5.38, - "learning_rate": 9.281796446486336e-06, - "loss": 0.0366, + "learning_rate": 1.9298519339085392e-05, + "loss": 0.0182, "step": 115315 }, { "epoch": 5.38, - "learning_rate": 9.281327645211196e-06, - "loss": 0.0605, + "learning_rate": 1.929805126924937e-05, + "loss": 0.0099, "step": 115320 }, { "epoch": 5.38, - "learning_rate": 9.280858843936056e-06, - "loss": 0.0465, + "learning_rate": 1.929758319941335e-05, + "loss": 0.0296, "step": 115325 }, { "epoch": 5.38, - "learning_rate": 9.280390042660917e-06, - "loss": 0.0186, + "learning_rate": 1.929711512957733e-05, + "loss": 0.0226, "step": 115330 }, { "epoch": 5.38, - "learning_rate": 9.279921241385777e-06, - "loss": 0.0607, + "learning_rate": 1.9296647059741315e-05, + "loss": 0.0469, "step": 115335 }, { "epoch": 5.38, - "learning_rate": 9.279452440110637e-06, - "loss": 0.0337, + "learning_rate": 1.9296178989905294e-05, + "loss": 0.0626, "step": 115340 }, { "epoch": 5.38, - "learning_rate": 9.278983638835499e-06, - "loss": 0.0774, + "learning_rate": 1.9295710920069274e-05, + "loss": 0.1224, "step": 115345 }, { "epoch": 5.38, - "learning_rate": 9.278514837560359e-06, - "loss": 0.2092, + "learning_rate": 1.9295242850233257e-05, + "loss": 0.1286, "step": 115350 }, { "epoch": 5.38, - "learning_rate": 9.27804603628522e-06, - "loss": 0.2044, + "learning_rate": 1.9294774780397237e-05, + "loss": 0.228, "step": 115355 }, { "epoch": 5.38, - "learning_rate": 9.27757723501008e-06, - "loss": 0.0962, + "learning_rate": 1.9294306710561217e-05, + "loss": 0.0829, "step": 115360 }, { "epoch": 5.38, - "learning_rate": 9.27710843373494e-06, - "loss": 0.0477, + "learning_rate": 1.9293838640725197e-05, + "loss": 0.0037, "step": 115365 }, { "epoch": 5.38, - "learning_rate": 9.2766396324598e-06, - "loss": 0.0161, + "learning_rate": 1.929337057088918e-05, + "loss": 0.0124, "step": 115370 }, { "epoch": 5.38, - "learning_rate": 9.276170831184662e-06, - "loss": 0.0636, + "learning_rate": 1.9292902501053156e-05, + "loss": 0.0097, "step": 115375 }, { "epoch": 5.38, - "learning_rate": 9.275702029909522e-06, - "loss": 0.0689, + "learning_rate": 1.9292434431217136e-05, + "loss": 0.0252, "step": 115380 }, { "epoch": 5.38, - "learning_rate": 9.275233228634383e-06, - "loss": 0.0186, + "learning_rate": 1.9291966361381116e-05, + "loss": 0.0471, "step": 115385 }, { "epoch": 5.38, - "learning_rate": 9.274764427359243e-06, - "loss": 0.0609, + "learning_rate": 1.92914982915451e-05, + "loss": 0.0837, "step": 115390 }, { "epoch": 5.38, - "learning_rate": 9.274295626084103e-06, - "loss": 0.1017, + "learning_rate": 1.929103022170908e-05, + "loss": 0.0603, "step": 115395 }, { "epoch": 5.38, - "learning_rate": 9.273826824808965e-06, - "loss": 0.1622, + "learning_rate": 1.929056215187306e-05, + "loss": 0.1717, "step": 115400 }, { "epoch": 5.38, - "learning_rate": 9.273358023533825e-06, - "loss": 0.2766, + "learning_rate": 1.9290094082037042e-05, + "loss": 0.224, "step": 115405 }, { "epoch": 5.39, - "learning_rate": 9.272889222258685e-06, - "loss": 0.0953, + "learning_rate": 1.9289626012201022e-05, + "loss": 0.0846, "step": 115410 }, { "epoch": 5.39, - "learning_rate": 9.272420420983546e-06, - "loss": 0.0288, + "learning_rate": 1.9289157942365002e-05, + "loss": 0.0242, "step": 115415 }, { "epoch": 5.39, - "learning_rate": 9.271951619708406e-06, - "loss": 0.0236, + "learning_rate": 1.928868987252898e-05, + "loss": 0.0615, "step": 115420 }, { "epoch": 5.39, - "learning_rate": 9.271482818433268e-06, - "loss": 0.0767, + "learning_rate": 1.9288221802692965e-05, + "loss": 0.0081, "step": 115425 }, { "epoch": 5.39, - "learning_rate": 9.271014017158128e-06, - "loss": 0.0352, + "learning_rate": 1.9287753732856945e-05, + "loss": 0.0513, "step": 115430 }, { "epoch": 5.39, - "learning_rate": 9.270545215882988e-06, - "loss": 0.041, + "learning_rate": 1.928728566302092e-05, + "loss": 0.0603, "step": 115435 }, { "epoch": 5.39, - "learning_rate": 9.270076414607848e-06, - "loss": 0.0729, + "learning_rate": 1.92868175931849e-05, + "loss": 0.0916, "step": 115440 }, { "epoch": 5.39, - "learning_rate": 9.26960761333271e-06, - "loss": 0.2183, + "learning_rate": 1.9286349523348884e-05, + "loss": 0.1174, "step": 115445 }, { "epoch": 5.39, - "learning_rate": 9.269138812057569e-06, - "loss": 0.1521, + "learning_rate": 1.9285881453512864e-05, + "loss": 0.0765, "step": 115450 }, { "epoch": 5.39, - "learning_rate": 9.26867001078243e-06, - "loss": 0.3255, + "learning_rate": 1.9285413383676844e-05, + "loss": 0.3793, "step": 115455 }, { "epoch": 5.39, - "learning_rate": 9.26820120950729e-06, - "loss": 0.0753, + "learning_rate": 1.9284945313840827e-05, + "loss": 0.0543, "step": 115460 }, { "epoch": 5.39, - "learning_rate": 9.26773240823215e-06, - "loss": 0.0186, + "learning_rate": 1.9284477244004807e-05, + "loss": 0.0108, "step": 115465 }, { "epoch": 5.39, - "learning_rate": 9.267263606957012e-06, - "loss": 0.0167, + "learning_rate": 1.9284009174168787e-05, + "loss": 0.0256, "step": 115470 }, { "epoch": 5.39, - "learning_rate": 9.266794805681872e-06, - "loss": 0.0298, + "learning_rate": 1.9283541104332766e-05, + "loss": 0.0353, "step": 115475 }, { "epoch": 5.39, - "learning_rate": 9.266326004406732e-06, - "loss": 0.0519, + "learning_rate": 1.928307303449675e-05, + "loss": 0.0422, "step": 115480 }, { "epoch": 5.39, - "learning_rate": 9.265857203131594e-06, - "loss": 0.0462, + "learning_rate": 1.928260496466073e-05, + "loss": 0.0459, "step": 115485 }, { "epoch": 5.39, - "learning_rate": 9.265388401856454e-06, - "loss": 0.0959, + "learning_rate": 1.928213689482471e-05, + "loss": 0.0442, "step": 115490 }, { "epoch": 5.39, - "learning_rate": 9.264919600581315e-06, - "loss": 0.1059, + "learning_rate": 1.928166882498869e-05, + "loss": 0.1222, "step": 115495 }, { "epoch": 5.39, - "learning_rate": 9.264450799306175e-06, - "loss": 0.1796, + "learning_rate": 1.928120075515267e-05, + "loss": 0.0685, "step": 115500 }, { "epoch": 5.39, - "learning_rate": 9.263981998031035e-06, - "loss": 0.3054, + "learning_rate": 1.928073268531665e-05, + "loss": 0.2461, "step": 115505 }, { "epoch": 5.39, - "learning_rate": 9.263513196755895e-06, - "loss": 0.0721, + "learning_rate": 1.928026461548063e-05, + "loss": 0.0858, "step": 115510 }, { "epoch": 5.39, - "learning_rate": 9.263044395480757e-06, - "loss": 0.034, + "learning_rate": 1.9279796545644608e-05, + "loss": 0.0089, "step": 115515 }, { "epoch": 5.39, - "learning_rate": 9.262575594205617e-06, - "loss": 0.0156, + "learning_rate": 1.927932847580859e-05, + "loss": 0.0312, "step": 115520 }, { "epoch": 5.39, - "learning_rate": 9.262106792930478e-06, - "loss": 0.0337, + "learning_rate": 1.927886040597257e-05, + "loss": 0.0311, "step": 115525 }, { "epoch": 5.39, - "learning_rate": 9.261637991655338e-06, - "loss": 0.0689, + "learning_rate": 1.927839233613655e-05, + "loss": 0.03, "step": 115530 }, { "epoch": 5.39, - "learning_rate": 9.261169190380198e-06, - "loss": 0.0289, + "learning_rate": 1.9277924266300534e-05, + "loss": 0.0834, "step": 115535 }, { "epoch": 5.39, - "learning_rate": 9.26070038910506e-06, - "loss": 0.0624, + "learning_rate": 1.9277456196464514e-05, + "loss": 0.0531, "step": 115540 }, { "epoch": 5.39, - "learning_rate": 9.26023158782992e-06, - "loss": 0.0794, + "learning_rate": 1.9276988126628494e-05, + "loss": 0.0663, "step": 115545 }, { "epoch": 5.39, - "learning_rate": 9.25976278655478e-06, - "loss": 0.1929, + "learning_rate": 1.9276520056792474e-05, + "loss": 0.1145, "step": 115550 }, { "epoch": 5.39, - "learning_rate": 9.25929398527964e-06, - "loss": 0.2632, + "learning_rate": 1.9276051986956457e-05, + "loss": 0.2266, "step": 115555 }, { "epoch": 5.39, - "learning_rate": 9.258825184004501e-06, - "loss": 0.0855, + "learning_rate": 1.9275583917120433e-05, + "loss": 0.1259, "step": 115560 }, { "epoch": 5.39, - "learning_rate": 9.258356382729363e-06, - "loss": 0.0589, + "learning_rate": 1.9275115847284413e-05, + "loss": 0.002, "step": 115565 }, { "epoch": 5.39, - "learning_rate": 9.257887581454223e-06, - "loss": 0.0279, + "learning_rate": 1.9274647777448393e-05, + "loss": 0.0322, "step": 115570 }, { "epoch": 5.39, - "learning_rate": 9.257418780179083e-06, - "loss": 0.0866, + "learning_rate": 1.9274179707612376e-05, + "loss": 0.0107, "step": 115575 }, { "epoch": 5.39, - "learning_rate": 9.256949978903943e-06, - "loss": 0.0568, + "learning_rate": 1.9273711637776356e-05, + "loss": 0.084, "step": 115580 }, { "epoch": 5.39, - "learning_rate": 9.256481177628804e-06, - "loss": 0.0675, + "learning_rate": 1.9273243567940336e-05, + "loss": 0.0729, "step": 115585 }, { "epoch": 5.39, - "learning_rate": 9.256012376353664e-06, - "loss": 0.0751, + "learning_rate": 1.927277549810432e-05, + "loss": 0.0864, "step": 115590 }, { "epoch": 5.39, - "learning_rate": 9.255543575078524e-06, - "loss": 0.1376, + "learning_rate": 1.92723074282683e-05, + "loss": 0.0968, "step": 115595 }, { "epoch": 5.39, - "learning_rate": 9.255074773803386e-06, - "loss": 0.0757, + "learning_rate": 1.927183935843228e-05, + "loss": 0.1291, "step": 115600 }, { "epoch": 5.39, - "learning_rate": 9.254605972528246e-06, - "loss": 0.2745, + "learning_rate": 1.927137128859626e-05, + "loss": 0.3638, "step": 115605 }, { "epoch": 5.39, - "learning_rate": 9.254137171253107e-06, - "loss": 0.1076, + "learning_rate": 1.9270903218760242e-05, + "loss": 0.062, "step": 115610 }, { "epoch": 5.39, - "learning_rate": 9.253668369977967e-06, - "loss": 0.0244, + "learning_rate": 1.927043514892422e-05, + "loss": 0.0206, "step": 115615 }, { "epoch": 5.39, - "learning_rate": 9.253199568702827e-06, - "loss": 0.0174, + "learning_rate": 1.92699670790882e-05, + "loss": 0.0069, "step": 115620 }, { "epoch": 5.4, - "learning_rate": 9.252730767427687e-06, - "loss": 0.0122, + "learning_rate": 1.9269499009252178e-05, + "loss": 0.0421, "step": 115625 }, { "epoch": 5.4, - "learning_rate": 9.252261966152549e-06, - "loss": 0.0616, + "learning_rate": 1.926903093941616e-05, + "loss": 0.0397, "step": 115630 }, { "epoch": 5.4, - "learning_rate": 9.25179316487741e-06, - "loss": 0.067, + "learning_rate": 1.926856286958014e-05, + "loss": 0.0654, "step": 115635 }, { "epoch": 5.4, - "learning_rate": 9.25132436360227e-06, - "loss": 0.0677, + "learning_rate": 1.926809479974412e-05, + "loss": 0.029, "step": 115640 }, { "epoch": 5.4, - "learning_rate": 9.25085556232713e-06, - "loss": 0.1675, + "learning_rate": 1.9267626729908104e-05, + "loss": 0.0759, "step": 115645 }, { "epoch": 5.4, - "learning_rate": 9.25038676105199e-06, - "loss": 0.1313, + "learning_rate": 1.9267158660072084e-05, + "loss": 0.1203, "step": 115650 }, { "epoch": 5.4, - "learning_rate": 9.249917959776852e-06, - "loss": 0.3149, + "learning_rate": 1.9266690590236064e-05, + "loss": 0.3228, "step": 115655 }, { "epoch": 5.4, - "learning_rate": 9.249449158501712e-06, - "loss": 0.0798, + "learning_rate": 1.9266222520400043e-05, + "loss": 0.085, "step": 115660 }, { "epoch": 5.4, - "learning_rate": 9.248980357226572e-06, - "loss": 0.0121, + "learning_rate": 1.9265754450564027e-05, + "loss": 0.004, "step": 115665 }, { "epoch": 5.4, - "learning_rate": 9.248511555951433e-06, - "loss": 0.0124, + "learning_rate": 1.9265286380728006e-05, + "loss": 0.038, "step": 115670 }, { "epoch": 5.4, - "learning_rate": 9.248042754676293e-06, - "loss": 0.0375, + "learning_rate": 1.9264818310891986e-05, + "loss": 0.0515, "step": 115675 }, { "epoch": 5.4, - "learning_rate": 9.247573953401155e-06, - "loss": 0.0716, + "learning_rate": 1.9264350241055966e-05, + "loss": 0.0478, "step": 115680 }, { "epoch": 5.4, - "learning_rate": 9.247105152126015e-06, - "loss": 0.0306, + "learning_rate": 1.926388217121995e-05, + "loss": 0.0396, "step": 115685 }, { "epoch": 5.4, - "learning_rate": 9.246636350850875e-06, - "loss": 0.0712, + "learning_rate": 1.9263414101383926e-05, + "loss": 0.084, "step": 115690 }, { "epoch": 5.4, - "learning_rate": 9.246167549575735e-06, - "loss": 0.0705, + "learning_rate": 1.9262946031547905e-05, + "loss": 0.1472, "step": 115695 }, { "epoch": 5.4, - "learning_rate": 9.245698748300596e-06, - "loss": 0.2335, + "learning_rate": 1.9262477961711885e-05, + "loss": 0.1258, "step": 115700 }, { "epoch": 5.4, - "learning_rate": 9.245229947025456e-06, - "loss": 0.162, + "learning_rate": 1.926200989187587e-05, + "loss": 0.2471, "step": 115705 }, { "epoch": 5.4, - "learning_rate": 9.244761145750318e-06, - "loss": 0.1534, + "learning_rate": 1.9261541822039848e-05, + "loss": 0.0837, "step": 115710 }, { "epoch": 5.4, - "learning_rate": 9.244292344475178e-06, - "loss": 0.0317, + "learning_rate": 1.9261073752203828e-05, + "loss": 0.0243, "step": 115715 }, { "epoch": 5.4, - "learning_rate": 9.243823543200038e-06, - "loss": 0.0415, + "learning_rate": 1.926060568236781e-05, + "loss": 0.0452, "step": 115720 }, { "epoch": 5.4, - "learning_rate": 9.2433547419249e-06, - "loss": 0.0914, + "learning_rate": 1.926013761253179e-05, + "loss": 0.0316, "step": 115725 }, { "epoch": 5.4, - "learning_rate": 9.24288594064976e-06, - "loss": 0.0543, + "learning_rate": 1.925966954269577e-05, + "loss": 0.0246, "step": 115730 }, { "epoch": 5.4, - "learning_rate": 9.24241713937462e-06, - "loss": 0.1089, + "learning_rate": 1.925920147285975e-05, + "loss": 0.0569, "step": 115735 }, { "epoch": 5.4, - "learning_rate": 9.24194833809948e-06, - "loss": 0.1185, + "learning_rate": 1.9258733403023734e-05, + "loss": 0.0793, "step": 115740 }, { "epoch": 5.4, - "learning_rate": 9.24147953682434e-06, - "loss": 0.0879, + "learning_rate": 1.9258265333187714e-05, + "loss": 0.0215, "step": 115745 }, { "epoch": 5.4, - "learning_rate": 9.241010735549202e-06, - "loss": 0.0873, + "learning_rate": 1.925779726335169e-05, + "loss": 0.1102, "step": 115750 }, { "epoch": 5.4, - "learning_rate": 9.240541934274062e-06, - "loss": 0.1795, + "learning_rate": 1.925732919351567e-05, + "loss": 0.1949, "step": 115755 }, { "epoch": 5.4, - "learning_rate": 9.240073132998922e-06, - "loss": 0.0995, + "learning_rate": 1.9256861123679653e-05, + "loss": 0.0976, "step": 115760 }, { "epoch": 5.4, - "learning_rate": 9.239604331723782e-06, - "loss": 0.0146, + "learning_rate": 1.9256393053843633e-05, + "loss": 0.0547, "step": 115765 }, { "epoch": 5.4, - "learning_rate": 9.239135530448644e-06, - "loss": 0.0322, + "learning_rate": 1.9255924984007613e-05, + "loss": 0.0232, "step": 115770 }, { "epoch": 5.4, - "learning_rate": 9.238666729173504e-06, - "loss": 0.0422, + "learning_rate": 1.9255456914171596e-05, + "loss": 0.0138, "step": 115775 }, { "epoch": 5.4, - "learning_rate": 9.238197927898365e-06, - "loss": 0.0357, + "learning_rate": 1.9254988844335576e-05, + "loss": 0.0832, "step": 115780 }, { "epoch": 5.4, - "learning_rate": 9.237729126623225e-06, - "loss": 0.0477, + "learning_rate": 1.9254520774499556e-05, + "loss": 0.0495, "step": 115785 }, { "epoch": 5.4, - "learning_rate": 9.237260325348085e-06, + "learning_rate": 1.9254052704663536e-05, "loss": 0.0564, "step": 115790 }, { "epoch": 5.4, - "learning_rate": 9.236791524072947e-06, - "loss": 0.1207, + "learning_rate": 1.925358463482752e-05, + "loss": 0.0898, "step": 115795 }, { "epoch": 5.4, - "learning_rate": 9.236322722797807e-06, - "loss": 0.1893, + "learning_rate": 1.92531165649915e-05, + "loss": 0.057, "step": 115800 }, { "epoch": 5.4, - "learning_rate": 9.235853921522667e-06, - "loss": 0.3479, + "learning_rate": 1.925264849515548e-05, + "loss": 0.2426, "step": 115805 }, { "epoch": 5.4, - "learning_rate": 9.235385120247528e-06, - "loss": 0.0796, + "learning_rate": 1.9252180425319458e-05, + "loss": 0.0992, "step": 115810 }, { "epoch": 5.4, - "learning_rate": 9.234916318972388e-06, - "loss": 0.0142, + "learning_rate": 1.9251712355483438e-05, + "loss": 0.0035, "step": 115815 }, { "epoch": 5.4, - "learning_rate": 9.23444751769725e-06, - "loss": 0.0344, + "learning_rate": 1.9251244285647418e-05, + "loss": 0.0061, "step": 115820 }, { "epoch": 5.4, - "learning_rate": 9.23397871642211e-06, - "loss": 0.0469, + "learning_rate": 1.9250776215811398e-05, + "loss": 0.0263, "step": 115825 }, { "epoch": 5.4, - "learning_rate": 9.23350991514697e-06, - "loss": 0.0217, + "learning_rate": 1.925030814597538e-05, + "loss": 0.0072, "step": 115830 }, { "epoch": 5.41, - "learning_rate": 9.23304111387183e-06, - "loss": 0.013, + "learning_rate": 1.924984007613936e-05, + "loss": 0.0509, "step": 115835 }, { "epoch": 5.41, - "learning_rate": 9.232572312596691e-06, - "loss": 0.0609, + "learning_rate": 1.924937200630334e-05, + "loss": 0.0396, "step": 115840 }, { "epoch": 5.41, - "learning_rate": 9.232103511321551e-06, - "loss": 0.036, + "learning_rate": 1.924890393646732e-05, + "loss": 0.0926, "step": 115845 }, { "epoch": 5.41, - "learning_rate": 9.231634710046413e-06, - "loss": 0.1611, + "learning_rate": 1.9248435866631303e-05, + "loss": 0.2505, "step": 115850 }, { "epoch": 5.41, - "learning_rate": 9.231165908771273e-06, - "loss": 0.255, + "learning_rate": 1.9247967796795283e-05, + "loss": 0.0985, "step": 115855 }, { "epoch": 5.41, - "learning_rate": 9.230697107496133e-06, - "loss": 0.0942, + "learning_rate": 1.9247499726959263e-05, + "loss": 0.1139, "step": 115860 }, { "epoch": 5.41, - "learning_rate": 9.230228306220994e-06, - "loss": 0.0159, + "learning_rate": 1.9247031657123243e-05, + "loss": 0.0082, "step": 115865 }, { "epoch": 5.41, - "learning_rate": 9.229759504945854e-06, - "loss": 0.0605, + "learning_rate": 1.9246563587287226e-05, + "loss": 0.0278, "step": 115870 }, { "epoch": 5.41, - "learning_rate": 9.229290703670714e-06, - "loss": 0.0643, + "learning_rate": 1.9246095517451206e-05, + "loss": 0.0069, "step": 115875 }, { "epoch": 5.41, - "learning_rate": 9.228821902395574e-06, - "loss": 0.0388, + "learning_rate": 1.9245627447615182e-05, + "loss": 0.0023, "step": 115880 }, { "epoch": 5.41, - "learning_rate": 9.228353101120436e-06, - "loss": 0.0995, + "learning_rate": 1.9245159377779166e-05, + "loss": 0.0305, "step": 115885 }, { "epoch": 5.41, - "learning_rate": 9.227884299845297e-06, - "loss": 0.0793, + "learning_rate": 1.9244691307943145e-05, + "loss": 0.1068, "step": 115890 }, { "epoch": 5.41, - "learning_rate": 9.227415498570157e-06, - "loss": 0.072, + "learning_rate": 1.9244223238107125e-05, + "loss": 0.1124, "step": 115895 }, { "epoch": 5.41, - "learning_rate": 9.226946697295017e-06, - "loss": 0.1288, + "learning_rate": 1.9243755168271105e-05, + "loss": 0.0499, "step": 115900 }, { "epoch": 5.41, - "learning_rate": 9.226477896019877e-06, - "loss": 0.1152, + "learning_rate": 1.9243287098435088e-05, + "loss": 0.105, "step": 115905 }, { "epoch": 5.41, - "learning_rate": 9.226009094744739e-06, - "loss": 0.1025, + "learning_rate": 1.9242819028599068e-05, + "loss": 0.1144, "step": 115910 }, { "epoch": 5.41, - "learning_rate": 9.225540293469599e-06, - "loss": 0.013, + "learning_rate": 1.9242350958763048e-05, + "loss": 0.0225, "step": 115915 }, { "epoch": 5.41, - "learning_rate": 9.225071492194459e-06, - "loss": 0.0465, + "learning_rate": 1.9241882888927028e-05, + "loss": 0.0297, "step": 115920 }, { "epoch": 5.41, - "learning_rate": 9.22460269091932e-06, - "loss": 0.0597, + "learning_rate": 1.924141481909101e-05, + "loss": 0.0218, "step": 115925 }, { "epoch": 5.41, - "learning_rate": 9.22413388964418e-06, - "loss": 0.0344, + "learning_rate": 1.924094674925499e-05, + "loss": 0.0022, "step": 115930 }, { "epoch": 5.41, - "learning_rate": 9.223665088369042e-06, - "loss": 0.0679, + "learning_rate": 1.924047867941897e-05, + "loss": 0.0342, "step": 115935 }, { "epoch": 5.41, - "learning_rate": 9.223196287093902e-06, - "loss": 0.0547, + "learning_rate": 1.9240010609582947e-05, + "loss": 0.0425, "step": 115940 }, { "epoch": 5.41, - "learning_rate": 9.222727485818762e-06, - "loss": 0.1239, + "learning_rate": 1.923954253974693e-05, + "loss": 0.0479, "step": 115945 }, { "epoch": 5.41, - "learning_rate": 9.222258684543622e-06, - "loss": 0.0895, + "learning_rate": 1.923907446991091e-05, + "loss": 0.1293, "step": 115950 }, { "epoch": 5.41, - "learning_rate": 9.221789883268483e-06, - "loss": 0.2333, + "learning_rate": 1.923860640007489e-05, + "loss": 0.1726, "step": 115955 }, { "epoch": 5.41, - "learning_rate": 9.221321081993343e-06, - "loss": 0.1021, + "learning_rate": 1.9238138330238873e-05, + "loss": 0.072, "step": 115960 }, { "epoch": 5.41, - "learning_rate": 9.220852280718205e-06, - "loss": 0.0264, + "learning_rate": 1.9237670260402853e-05, + "loss": 0.0108, "step": 115965 }, { "epoch": 5.41, - "learning_rate": 9.220383479443065e-06, - "loss": 0.0386, + "learning_rate": 1.9237202190566833e-05, + "loss": 0.0387, "step": 115970 }, { "epoch": 5.41, - "learning_rate": 9.219914678167925e-06, - "loss": 0.0404, + "learning_rate": 1.9236734120730812e-05, + "loss": 0.0095, "step": 115975 }, { "epoch": 5.41, - "learning_rate": 9.219445876892786e-06, - "loss": 0.072, + "learning_rate": 1.9236266050894796e-05, + "loss": 0.06, "step": 115980 }, { "epoch": 5.41, - "learning_rate": 9.218977075617646e-06, - "loss": 0.0505, + "learning_rate": 1.9235797981058776e-05, + "loss": 0.0179, "step": 115985 }, { "epoch": 5.41, - "learning_rate": 9.218508274342506e-06, - "loss": 0.078, + "learning_rate": 1.9235329911222755e-05, + "loss": 0.1946, "step": 115990 }, { "epoch": 5.41, - "learning_rate": 9.218039473067368e-06, - "loss": 0.066, + "learning_rate": 1.9234861841386735e-05, + "loss": 0.0688, "step": 115995 }, { "epoch": 5.41, - "learning_rate": 9.217570671792228e-06, - "loss": 0.0833, + "learning_rate": 1.923439377155072e-05, + "loss": 0.1848, "step": 116000 }, { "epoch": 5.41, - "learning_rate": 9.21710187051709e-06, - "loss": 0.2808, + "learning_rate": 1.9233925701714695e-05, + "loss": 0.4445, "step": 116005 }, { "epoch": 5.41, - "learning_rate": 9.21663306924195e-06, - "loss": 0.0745, + "learning_rate": 1.9233457631878675e-05, + "loss": 0.095, "step": 116010 }, { "epoch": 5.41, - "learning_rate": 9.21616426796681e-06, - "loss": 0.0026, + "learning_rate": 1.9232989562042658e-05, + "loss": 0.0334, "step": 116015 }, { "epoch": 5.41, - "learning_rate": 9.21569546669167e-06, - "loss": 0.0254, + "learning_rate": 1.9232521492206638e-05, + "loss": 0.0033, "step": 116020 }, { "epoch": 5.41, - "learning_rate": 9.215226665416531e-06, - "loss": 0.0114, + "learning_rate": 1.9232053422370617e-05, + "loss": 0.049, "step": 116025 }, { "epoch": 5.41, - "learning_rate": 9.214757864141391e-06, - "loss": 0.0599, + "learning_rate": 1.9231585352534597e-05, + "loss": 0.0628, "step": 116030 }, { "epoch": 5.41, - "learning_rate": 9.214289062866252e-06, - "loss": 0.0853, + "learning_rate": 1.923111728269858e-05, + "loss": 0.1035, "step": 116035 }, { "epoch": 5.41, - "learning_rate": 9.213820261591112e-06, - "loss": 0.1054, + "learning_rate": 1.923064921286256e-05, + "loss": 0.0857, "step": 116040 }, { "epoch": 5.41, - "learning_rate": 9.213351460315972e-06, - "loss": 0.0686, + "learning_rate": 1.923018114302654e-05, + "loss": 0.0449, "step": 116045 }, { "epoch": 5.42, - "learning_rate": 9.212882659040834e-06, - "loss": 0.1496, + "learning_rate": 1.922971307319052e-05, + "loss": 0.0349, "step": 116050 }, { "epoch": 5.42, - "learning_rate": 9.212413857765694e-06, - "loss": 0.1966, + "learning_rate": 1.9229245003354503e-05, + "loss": 0.1458, "step": 116055 }, { "epoch": 5.42, - "learning_rate": 9.211945056490554e-06, - "loss": 0.0831, + "learning_rate": 1.9228776933518483e-05, + "loss": 0.1143, "step": 116060 }, { "epoch": 5.42, - "learning_rate": 9.211476255215415e-06, - "loss": 0.0283, + "learning_rate": 1.9228308863682463e-05, + "loss": 0.0053, "step": 116065 }, { "epoch": 5.42, - "learning_rate": 9.211007453940275e-06, - "loss": 0.0312, + "learning_rate": 1.9227840793846443e-05, + "loss": 0.0051, "step": 116070 }, { "epoch": 5.42, - "learning_rate": 9.210538652665137e-06, - "loss": 0.0415, + "learning_rate": 1.9227372724010422e-05, + "loss": 0.0174, "step": 116075 }, { "epoch": 5.42, - "learning_rate": 9.210069851389997e-06, - "loss": 0.0367, + "learning_rate": 1.9226904654174402e-05, + "loss": 0.0273, "step": 116080 }, { "epoch": 5.42, - "learning_rate": 9.209601050114857e-06, - "loss": 0.047, + "learning_rate": 1.9226436584338382e-05, + "loss": 0.0736, "step": 116085 }, { "epoch": 5.42, - "learning_rate": 9.209132248839717e-06, - "loss": 0.0951, + "learning_rate": 1.9225968514502365e-05, + "loss": 0.0746, "step": 116090 }, { "epoch": 5.42, - "learning_rate": 9.208663447564578e-06, - "loss": 0.1382, + "learning_rate": 1.9225500444666345e-05, + "loss": 0.1022, "step": 116095 }, { "epoch": 5.42, - "learning_rate": 9.208194646289438e-06, - "loss": 0.0589, + "learning_rate": 1.9225032374830325e-05, + "loss": 0.0789, "step": 116100 }, { "epoch": 5.42, - "learning_rate": 9.2077258450143e-06, - "loss": 0.2927, + "learning_rate": 1.9224564304994305e-05, + "loss": 0.227, "step": 116105 }, { "epoch": 5.42, - "learning_rate": 9.20725704373916e-06, - "loss": 0.1071, + "learning_rate": 1.9224096235158288e-05, + "loss": 0.0722, "step": 116110 }, { "epoch": 5.42, - "learning_rate": 9.20678824246402e-06, - "loss": 0.0023, + "learning_rate": 1.9223628165322268e-05, + "loss": 0.0409, "step": 116115 }, { "epoch": 5.42, - "learning_rate": 9.206319441188882e-06, - "loss": 0.0098, + "learning_rate": 1.9223160095486248e-05, + "loss": 0.0223, "step": 116120 }, { "epoch": 5.42, - "learning_rate": 9.205850639913741e-06, - "loss": 0.022, + "learning_rate": 1.9222692025650227e-05, + "loss": 0.0509, "step": 116125 }, { "epoch": 5.42, - "learning_rate": 9.205381838638601e-06, - "loss": 0.011, + "learning_rate": 1.9222223955814207e-05, + "loss": 0.0515, "step": 116130 }, { "epoch": 5.42, - "learning_rate": 9.204913037363461e-06, - "loss": 0.0545, + "learning_rate": 1.9221755885978187e-05, + "loss": 0.0152, "step": 116135 }, { "epoch": 5.42, - "learning_rate": 9.204444236088323e-06, - "loss": 0.0523, + "learning_rate": 1.9221287816142167e-05, + "loss": 0.0306, "step": 116140 }, { "epoch": 5.42, - "learning_rate": 9.203975434813185e-06, - "loss": 0.1376, + "learning_rate": 1.922081974630615e-05, + "loss": 0.0802, "step": 116145 }, { "epoch": 5.42, - "learning_rate": 9.203506633538045e-06, - "loss": 0.0733, + "learning_rate": 1.922035167647013e-05, + "loss": 0.0656, "step": 116150 }, { "epoch": 5.42, - "learning_rate": 9.203037832262904e-06, - "loss": 0.2183, + "learning_rate": 1.921988360663411e-05, + "loss": 0.1901, "step": 116155 }, { "epoch": 5.42, - "learning_rate": 9.202569030987764e-06, - "loss": 0.1051, + "learning_rate": 1.921941553679809e-05, + "loss": 0.0827, "step": 116160 }, { "epoch": 5.42, - "learning_rate": 9.202100229712626e-06, - "loss": 0.0106, + "learning_rate": 1.9218947466962073e-05, + "loss": 0.0101, "step": 116165 }, { "epoch": 5.42, - "learning_rate": 9.201631428437486e-06, - "loss": 0.0229, + "learning_rate": 1.9218479397126052e-05, + "loss": 0.0016, "step": 116170 }, { "epoch": 5.42, - "learning_rate": 9.201162627162346e-06, - "loss": 0.0153, + "learning_rate": 1.9218011327290032e-05, + "loss": 0.0456, "step": 116175 }, { "epoch": 5.42, - "learning_rate": 9.200693825887208e-06, - "loss": 0.0523, + "learning_rate": 1.9217543257454012e-05, + "loss": 0.0905, "step": 116180 }, { "epoch": 5.42, - "learning_rate": 9.200225024612067e-06, - "loss": 0.0914, + "learning_rate": 1.9217075187617995e-05, + "loss": 0.044, "step": 116185 }, { "epoch": 5.42, - "learning_rate": 9.199756223336929e-06, - "loss": 0.0384, + "learning_rate": 1.9216607117781975e-05, + "loss": 0.1197, "step": 116190 }, { "epoch": 5.42, - "learning_rate": 9.199287422061789e-06, - "loss": 0.1598, + "learning_rate": 1.921613904794595e-05, + "loss": 0.0709, "step": 116195 }, { "epoch": 5.42, - "learning_rate": 9.198818620786649e-06, - "loss": 0.162, + "learning_rate": 1.9215670978109935e-05, + "loss": 0.0324, "step": 116200 }, { "epoch": 5.42, - "learning_rate": 9.198349819511509e-06, - "loss": 0.2778, + "learning_rate": 1.9215202908273915e-05, + "loss": 0.2368, "step": 116205 }, { "epoch": 5.42, - "learning_rate": 9.19788101823637e-06, - "loss": 0.1308, + "learning_rate": 1.9214734838437894e-05, + "loss": 0.1007, "step": 116210 }, { "epoch": 5.42, - "learning_rate": 9.19741221696123e-06, - "loss": 0.003, + "learning_rate": 1.9214266768601874e-05, + "loss": 0.0383, "step": 116215 }, { "epoch": 5.42, - "learning_rate": 9.196943415686092e-06, - "loss": 0.0076, + "learning_rate": 1.9213798698765857e-05, + "loss": 0.0187, "step": 116220 }, { "epoch": 5.42, - "learning_rate": 9.196474614410952e-06, - "loss": 0.0455, + "learning_rate": 1.9213330628929837e-05, + "loss": 0.0373, "step": 116225 }, { "epoch": 5.42, - "learning_rate": 9.196005813135812e-06, - "loss": 0.0403, + "learning_rate": 1.9212862559093817e-05, + "loss": 0.05, "step": 116230 }, { "epoch": 5.42, - "learning_rate": 9.195537011860674e-06, - "loss": 0.1364, + "learning_rate": 1.9212394489257797e-05, + "loss": 0.0787, "step": 116235 }, { "epoch": 5.42, - "learning_rate": 9.195068210585533e-06, - "loss": 0.0602, + "learning_rate": 1.921192641942178e-05, + "loss": 0.0677, "step": 116240 }, { "epoch": 5.42, - "learning_rate": 9.194599409310393e-06, - "loss": 0.1067, + "learning_rate": 1.921145834958576e-05, + "loss": 0.0357, "step": 116245 }, { "epoch": 5.42, - "learning_rate": 9.194130608035255e-06, - "loss": 0.0487, + "learning_rate": 1.921099027974974e-05, + "loss": 0.0939, "step": 116250 }, { "epoch": 5.42, - "learning_rate": 9.193661806760115e-06, - "loss": 0.2984, + "learning_rate": 1.921052220991372e-05, + "loss": 0.1714, "step": 116255 }, { "epoch": 5.42, - "learning_rate": 9.193193005484977e-06, - "loss": 0.0508, + "learning_rate": 1.92100541400777e-05, + "loss": 0.0758, "step": 116260 }, { "epoch": 5.43, - "learning_rate": 9.192724204209837e-06, - "loss": 0.0357, + "learning_rate": 1.920958607024168e-05, + "loss": 0.015, "step": 116265 }, { "epoch": 5.43, - "learning_rate": 9.192255402934696e-06, - "loss": 0.0355, + "learning_rate": 1.920911800040566e-05, + "loss": 0.0435, "step": 116270 }, { "epoch": 5.43, - "learning_rate": 9.191786601659556e-06, - "loss": 0.0768, + "learning_rate": 1.9208649930569642e-05, + "loss": 0.0223, "step": 116275 }, { "epoch": 5.43, - "learning_rate": 9.191317800384418e-06, - "loss": 0.0354, + "learning_rate": 1.9208181860733622e-05, + "loss": 0.0298, "step": 116280 }, { "epoch": 5.43, - "learning_rate": 9.190848999109278e-06, - "loss": 0.0547, + "learning_rate": 1.9207713790897602e-05, + "loss": 0.0857, "step": 116285 }, { "epoch": 5.43, - "learning_rate": 9.19038019783414e-06, - "loss": 0.0764, + "learning_rate": 1.920724572106158e-05, + "loss": 0.0398, "step": 116290 }, { "epoch": 5.43, - "learning_rate": 9.189911396559e-06, - "loss": 0.1464, + "learning_rate": 1.9206777651225565e-05, + "loss": 0.102, "step": 116295 }, { "epoch": 5.43, - "learning_rate": 9.18944259528386e-06, - "loss": 0.2362, + "learning_rate": 1.9206309581389545e-05, + "loss": 0.0937, "step": 116300 }, { "epoch": 5.43, - "learning_rate": 9.188973794008721e-06, - "loss": 0.2482, + "learning_rate": 1.9205841511553525e-05, + "loss": 0.1997, "step": 116305 }, { "epoch": 5.43, - "learning_rate": 9.188504992733581e-06, - "loss": 0.0858, + "learning_rate": 1.9205373441717504e-05, + "loss": 0.0918, "step": 116310 }, { "epoch": 5.43, - "learning_rate": 9.188036191458441e-06, - "loss": 0.0186, + "learning_rate": 1.9204905371881488e-05, + "loss": 0.0066, "step": 116315 }, { "epoch": 5.43, - "learning_rate": 9.187567390183303e-06, - "loss": 0.0696, + "learning_rate": 1.9204437302045464e-05, + "loss": 0.0287, "step": 116320 }, { "epoch": 5.43, - "learning_rate": 9.187098588908163e-06, - "loss": 0.0147, + "learning_rate": 1.9203969232209444e-05, + "loss": 0.0265, "step": 116325 }, { "epoch": 5.43, - "learning_rate": 9.186629787633024e-06, - "loss": 0.0497, + "learning_rate": 1.9203501162373427e-05, + "loss": 0.0401, "step": 116330 }, { "epoch": 5.43, - "learning_rate": 9.186160986357884e-06, - "loss": 0.0755, + "learning_rate": 1.9203033092537407e-05, + "loss": 0.061, "step": 116335 }, { "epoch": 5.43, - "learning_rate": 9.185692185082744e-06, - "loss": 0.0475, + "learning_rate": 1.9202565022701387e-05, + "loss": 0.069, "step": 116340 }, { "epoch": 5.43, - "learning_rate": 9.185223383807604e-06, - "loss": 0.0793, + "learning_rate": 1.9202096952865366e-05, + "loss": 0.0969, "step": 116345 }, { "epoch": 5.43, - "learning_rate": 9.184754582532466e-06, - "loss": 0.1906, + "learning_rate": 1.920162888302935e-05, + "loss": 0.0624, "step": 116350 }, { "epoch": 5.43, - "learning_rate": 9.184285781257326e-06, - "loss": 0.1359, + "learning_rate": 1.920116081319333e-05, + "loss": 0.2164, "step": 116355 }, { "epoch": 5.43, - "learning_rate": 9.183816979982187e-06, - "loss": 0.1095, + "learning_rate": 1.920069274335731e-05, + "loss": 0.0809, "step": 116360 }, { "epoch": 5.43, - "learning_rate": 9.183348178707047e-06, - "loss": 0.0105, + "learning_rate": 1.920022467352129e-05, + "loss": 0.0275, "step": 116365 }, { "epoch": 5.43, - "learning_rate": 9.182879377431907e-06, - "loss": 0.0149, + "learning_rate": 1.9199756603685272e-05, + "loss": 0.0345, "step": 116370 }, { "epoch": 5.43, - "learning_rate": 9.182410576156769e-06, - "loss": 0.0284, + "learning_rate": 1.9199288533849252e-05, + "loss": 0.063, "step": 116375 }, { "epoch": 5.43, - "learning_rate": 9.181941774881629e-06, - "loss": 0.0464, + "learning_rate": 1.9198820464013232e-05, + "loss": 0.0352, "step": 116380 }, { "epoch": 5.43, - "learning_rate": 9.181472973606489e-06, - "loss": 0.063, + "learning_rate": 1.9198352394177212e-05, + "loss": 0.0335, "step": 116385 }, { "epoch": 5.43, - "learning_rate": 9.181004172331348e-06, - "loss": 0.0717, + "learning_rate": 1.919788432434119e-05, + "loss": 0.0565, "step": 116390 }, { "epoch": 5.43, - "learning_rate": 9.18053537105621e-06, - "loss": 0.0738, + "learning_rate": 1.919741625450517e-05, + "loss": 0.0598, "step": 116395 }, { "epoch": 5.43, - "learning_rate": 9.180066569781072e-06, - "loss": 0.1674, + "learning_rate": 1.919694818466915e-05, + "loss": 0.1137, "step": 116400 }, { "epoch": 5.43, - "learning_rate": 9.179597768505932e-06, - "loss": 0.313, + "learning_rate": 1.9196480114833134e-05, + "loss": 0.2412, "step": 116405 }, { "epoch": 5.43, - "learning_rate": 9.179128967230792e-06, - "loss": 0.1044, + "learning_rate": 1.9196012044997114e-05, + "loss": 0.078, "step": 116410 }, { "epoch": 5.43, - "learning_rate": 9.178660165955651e-06, - "loss": 0.0382, + "learning_rate": 1.9195543975161094e-05, + "loss": 0.0072, "step": 116415 }, { "epoch": 5.43, - "learning_rate": 9.178191364680513e-06, - "loss": 0.025, + "learning_rate": 1.9195075905325074e-05, + "loss": 0.0092, "step": 116420 }, { "epoch": 5.43, - "learning_rate": 9.177722563405373e-06, - "loss": 0.0901, + "learning_rate": 1.9194607835489057e-05, + "loss": 0.0488, "step": 116425 }, { "epoch": 5.43, - "learning_rate": 9.177253762130233e-06, - "loss": 0.0137, + "learning_rate": 1.9194139765653037e-05, + "loss": 0.0712, "step": 116430 }, { "epoch": 5.43, - "learning_rate": 9.176784960855095e-06, - "loss": 0.0536, + "learning_rate": 1.9193671695817017e-05, + "loss": 0.0676, "step": 116435 }, { "epoch": 5.43, - "learning_rate": 9.176316159579955e-06, - "loss": 0.0708, + "learning_rate": 1.9193203625981e-05, + "loss": 0.0525, "step": 116440 }, { "epoch": 5.43, - "learning_rate": 9.175847358304816e-06, - "loss": 0.0868, + "learning_rate": 1.9192735556144976e-05, + "loss": 0.0793, "step": 116445 }, { "epoch": 5.43, - "learning_rate": 9.175378557029676e-06, - "loss": 0.1771, + "learning_rate": 1.9192267486308956e-05, + "loss": 0.0736, "step": 116450 }, { "epoch": 5.43, - "learning_rate": 9.174909755754536e-06, - "loss": 0.2273, + "learning_rate": 1.9191799416472936e-05, + "loss": 0.3909, "step": 116455 }, { "epoch": 5.43, - "learning_rate": 9.174440954479396e-06, - "loss": 0.0751, + "learning_rate": 1.919133134663692e-05, + "loss": 0.1178, "step": 116460 }, { "epoch": 5.43, - "learning_rate": 9.173972153204258e-06, - "loss": 0.031, + "learning_rate": 1.91908632768009e-05, + "loss": 0.0191, "step": 116465 }, { "epoch": 5.43, - "learning_rate": 9.173503351929118e-06, - "loss": 0.0354, + "learning_rate": 1.919039520696488e-05, + "loss": 0.0247, "step": 116470 }, { "epoch": 5.43, - "learning_rate": 9.17303455065398e-06, - "loss": 0.039, + "learning_rate": 1.918992713712886e-05, + "loss": 0.0161, "step": 116475 }, { "epoch": 5.44, - "learning_rate": 9.172565749378839e-06, - "loss": 0.0647, + "learning_rate": 1.9189459067292842e-05, + "loss": 0.0267, "step": 116480 }, { "epoch": 5.44, - "learning_rate": 9.172096948103699e-06, - "loss": 0.0597, + "learning_rate": 1.918899099745682e-05, + "loss": 0.0738, "step": 116485 }, { "epoch": 5.44, - "learning_rate": 9.17162814682856e-06, - "loss": 0.1435, + "learning_rate": 1.91885229276208e-05, + "loss": 0.0854, "step": 116490 }, { "epoch": 5.44, - "learning_rate": 9.17115934555342e-06, - "loss": 0.0807, + "learning_rate": 1.9188054857784785e-05, + "loss": 0.0888, "step": 116495 }, { "epoch": 5.44, - "learning_rate": 9.17069054427828e-06, - "loss": 0.083, + "learning_rate": 1.9187586787948764e-05, + "loss": 0.1925, "step": 116500 }, { "epoch": 5.44, - "learning_rate": 9.170221743003142e-06, - "loss": 0.2154, + "learning_rate": 1.9187118718112744e-05, + "loss": 0.3213, "step": 116505 }, { "epoch": 5.44, - "learning_rate": 9.169752941728002e-06, - "loss": 0.0853, + "learning_rate": 1.918665064827672e-05, + "loss": 0.0731, "step": 116510 }, { "epoch": 5.44, - "learning_rate": 9.169284140452864e-06, - "loss": 0.0037, + "learning_rate": 1.9186182578440704e-05, + "loss": 0.0477, "step": 116515 }, { "epoch": 5.44, - "learning_rate": 9.168815339177724e-06, - "loss": 0.0478, + "learning_rate": 1.9185714508604684e-05, + "loss": 0.0797, "step": 116520 }, { "epoch": 5.44, - "learning_rate": 9.168346537902584e-06, - "loss": 0.007, + "learning_rate": 1.9185246438768664e-05, + "loss": 0.0344, "step": 116525 }, { "epoch": 5.44, - "learning_rate": 9.167877736627444e-06, - "loss": 0.0273, + "learning_rate": 1.9184778368932643e-05, + "loss": 0.0707, "step": 116530 }, { "epoch": 5.44, - "learning_rate": 9.167408935352305e-06, - "loss": 0.0458, + "learning_rate": 1.9184310299096627e-05, + "loss": 0.0359, "step": 116535 }, { "epoch": 5.44, - "learning_rate": 9.166940134077165e-06, - "loss": 0.0494, + "learning_rate": 1.9183842229260606e-05, + "loss": 0.0387, "step": 116540 }, { "epoch": 5.44, - "learning_rate": 9.166471332802027e-06, - "loss": 0.1669, + "learning_rate": 1.9183374159424586e-05, + "loss": 0.1406, "step": 116545 }, { "epoch": 5.44, - "learning_rate": 9.166002531526887e-06, - "loss": 0.1877, + "learning_rate": 1.9182906089588566e-05, + "loss": 0.1518, "step": 116550 }, { "epoch": 5.44, - "learning_rate": 9.165533730251747e-06, - "loss": 0.1898, + "learning_rate": 1.918243801975255e-05, + "loss": 0.2566, "step": 116555 }, { "epoch": 5.44, - "learning_rate": 9.165064928976608e-06, - "loss": 0.1039, + "learning_rate": 1.918196994991653e-05, + "loss": 0.0618, "step": 116560 }, { "epoch": 5.44, - "learning_rate": 9.164596127701468e-06, - "loss": 0.0243, + "learning_rate": 1.918150188008051e-05, + "loss": 0.0174, "step": 116565 }, { "epoch": 5.44, - "learning_rate": 9.164127326426328e-06, - "loss": 0.0459, + "learning_rate": 1.9181033810244492e-05, + "loss": 0.0588, "step": 116570 }, { "epoch": 5.44, - "learning_rate": 9.16365852515119e-06, - "loss": 0.0197, + "learning_rate": 1.918056574040847e-05, + "loss": 0.0075, "step": 116575 }, { "epoch": 5.44, - "learning_rate": 9.16318972387605e-06, - "loss": 0.0444, + "learning_rate": 1.918009767057245e-05, + "loss": 0.0158, "step": 116580 }, { "epoch": 5.44, - "learning_rate": 9.162720922600911e-06, - "loss": 0.0993, + "learning_rate": 1.9179629600736428e-05, + "loss": 0.035, "step": 116585 }, { "epoch": 5.44, - "learning_rate": 9.162252121325771e-06, - "loss": 0.0553, + "learning_rate": 1.917916153090041e-05, + "loss": 0.0521, "step": 116590 }, { "epoch": 5.44, - "learning_rate": 9.161783320050631e-06, - "loss": 0.092, + "learning_rate": 1.917869346106439e-05, + "loss": 0.1153, "step": 116595 }, { "epoch": 5.44, - "learning_rate": 9.161314518775491e-06, - "loss": 0.1327, + "learning_rate": 1.917822539122837e-05, + "loss": 0.0847, "step": 116600 }, { "epoch": 5.44, - "learning_rate": 9.160845717500353e-06, - "loss": 0.4196, + "learning_rate": 1.917775732139235e-05, + "loss": 0.1852, "step": 116605 }, { "epoch": 5.44, - "learning_rate": 9.160376916225213e-06, - "loss": 0.0726, + "learning_rate": 1.9177289251556334e-05, + "loss": 0.0671, "step": 116610 }, { "epoch": 5.44, - "learning_rate": 9.159908114950074e-06, - "loss": 0.0287, + "learning_rate": 1.9176821181720314e-05, + "loss": 0.0017, "step": 116615 }, { "epoch": 5.44, - "learning_rate": 9.159439313674934e-06, - "loss": 0.0228, + "learning_rate": 1.9176353111884294e-05, + "loss": 0.0152, "step": 116620 }, { "epoch": 5.44, - "learning_rate": 9.158970512399794e-06, - "loss": 0.0309, + "learning_rate": 1.9175885042048277e-05, + "loss": 0.063, "step": 116625 }, { "epoch": 5.44, - "learning_rate": 9.158501711124656e-06, - "loss": 0.0304, + "learning_rate": 1.9175416972212257e-05, + "loss": 0.0332, "step": 116630 }, { "epoch": 5.44, - "learning_rate": 9.158032909849516e-06, - "loss": 0.0922, + "learning_rate": 1.9174948902376233e-05, + "loss": 0.0207, "step": 116635 }, { "epoch": 5.44, - "learning_rate": 9.157564108574376e-06, - "loss": 0.0225, + "learning_rate": 1.9174480832540213e-05, + "loss": 0.0436, "step": 116640 }, { "epoch": 5.44, - "learning_rate": 9.157095307299236e-06, - "loss": 0.1203, + "learning_rate": 1.9174012762704196e-05, + "loss": 0.1007, "step": 116645 }, { "epoch": 5.44, - "learning_rate": 9.156626506024097e-06, - "loss": 0.0971, + "learning_rate": 1.9173544692868176e-05, + "loss": 0.1077, "step": 116650 }, { "epoch": 5.44, - "learning_rate": 9.156157704748959e-06, - "loss": 0.1463, + "learning_rate": 1.9173076623032156e-05, + "loss": 0.2269, "step": 116655 }, { "epoch": 5.44, - "learning_rate": 9.155688903473819e-06, - "loss": 0.0572, + "learning_rate": 1.9172608553196136e-05, + "loss": 0.1536, "step": 116660 }, { "epoch": 5.44, - "learning_rate": 9.155220102198679e-06, - "loss": 0.0106, + "learning_rate": 1.917214048336012e-05, + "loss": 0.0122, "step": 116665 }, { "epoch": 5.44, - "learning_rate": 9.154751300923539e-06, - "loss": 0.0578, + "learning_rate": 1.91716724135241e-05, + "loss": 0.0275, "step": 116670 }, { "epoch": 5.44, - "learning_rate": 9.1542824996484e-06, - "loss": 0.0596, + "learning_rate": 1.917120434368808e-05, + "loss": 0.0363, "step": 116675 }, { "epoch": 5.44, - "learning_rate": 9.15381369837326e-06, - "loss": 0.0749, + "learning_rate": 1.917073627385206e-05, + "loss": 0.0624, "step": 116680 }, { "epoch": 5.44, - "learning_rate": 9.15334489709812e-06, - "loss": 0.0369, + "learning_rate": 1.917026820401604e-05, + "loss": 0.0916, "step": 116685 }, { "epoch": 5.44, - "learning_rate": 9.152876095822982e-06, - "loss": 0.0394, + "learning_rate": 1.916980013418002e-05, + "loss": 0.0711, "step": 116690 }, { "epoch": 5.45, - "learning_rate": 9.152407294547842e-06, - "loss": 0.0472, + "learning_rate": 1.9169332064344e-05, + "loss": 0.0408, "step": 116695 }, { "epoch": 5.45, - "learning_rate": 9.151938493272703e-06, - "loss": 0.1548, + "learning_rate": 1.916886399450798e-05, + "loss": 0.0741, "step": 116700 }, { "epoch": 5.45, - "learning_rate": 9.151469691997563e-06, - "loss": 0.1738, + "learning_rate": 1.916839592467196e-05, + "loss": 0.4412, "step": 116705 }, { "epoch": 5.45, - "learning_rate": 9.151000890722423e-06, - "loss": 0.0621, + "learning_rate": 1.916792785483594e-05, + "loss": 0.0736, "step": 116710 }, { "epoch": 5.45, - "learning_rate": 9.150532089447283e-06, - "loss": 0.0224, + "learning_rate": 1.916745978499992e-05, + "loss": 0.0211, "step": 116715 }, { "epoch": 5.45, - "learning_rate": 9.150063288172145e-06, - "loss": 0.039, + "learning_rate": 1.9166991715163904e-05, + "loss": 0.0092, "step": 116720 }, { "epoch": 5.45, - "learning_rate": 9.149594486897005e-06, - "loss": 0.092, + "learning_rate": 1.9166523645327883e-05, + "loss": 0.0263, "step": 116725 }, { "epoch": 5.45, - "learning_rate": 9.149125685621866e-06, - "loss": 0.0237, + "learning_rate": 1.9166055575491863e-05, + "loss": 0.0343, "step": 116730 }, { "epoch": 5.45, - "learning_rate": 9.148656884346726e-06, - "loss": 0.0104, + "learning_rate": 1.9165587505655843e-05, + "loss": 0.0672, "step": 116735 }, { "epoch": 5.45, - "learning_rate": 9.148188083071586e-06, - "loss": 0.0593, + "learning_rate": 1.9165119435819826e-05, + "loss": 0.1053, "step": 116740 }, { "epoch": 5.45, - "learning_rate": 9.147719281796448e-06, - "loss": 0.0793, + "learning_rate": 1.9164651365983806e-05, + "loss": 0.0931, "step": 116745 }, { "epoch": 5.45, - "learning_rate": 9.147250480521308e-06, - "loss": 0.1443, + "learning_rate": 1.9164183296147786e-05, + "loss": 0.1015, "step": 116750 }, { "epoch": 5.45, - "learning_rate": 9.146781679246168e-06, - "loss": 0.2635, + "learning_rate": 1.916371522631177e-05, + "loss": 0.1604, "step": 116755 }, { "epoch": 5.45, - "learning_rate": 9.14631287797103e-06, - "loss": 0.0993, + "learning_rate": 1.916324715647575e-05, + "loss": 0.0641, "step": 116760 }, { "epoch": 5.45, - "learning_rate": 9.14584407669589e-06, - "loss": 0.0315, + "learning_rate": 1.9162779086639725e-05, + "loss": 0.0241, "step": 116765 }, { "epoch": 5.45, - "learning_rate": 9.14537527542075e-06, - "loss": 0.013, + "learning_rate": 1.9162311016803705e-05, + "loss": 0.0386, "step": 116770 }, { "epoch": 5.45, - "learning_rate": 9.14490647414561e-06, - "loss": 0.0402, + "learning_rate": 1.916184294696769e-05, + "loss": 0.0155, "step": 116775 }, { "epoch": 5.45, - "learning_rate": 9.14443767287047e-06, - "loss": 0.0324, + "learning_rate": 1.9161374877131668e-05, + "loss": 0.0726, "step": 116780 }, { "epoch": 5.45, - "learning_rate": 9.14396887159533e-06, - "loss": 0.1033, + "learning_rate": 1.9160906807295648e-05, + "loss": 0.0347, "step": 116785 }, { "epoch": 5.45, - "learning_rate": 9.143500070320192e-06, - "loss": 0.1152, + "learning_rate": 1.9160438737459628e-05, + "loss": 0.072, "step": 116790 }, { "epoch": 5.45, - "learning_rate": 9.143031269045052e-06, - "loss": 0.0728, + "learning_rate": 1.915997066762361e-05, + "loss": 0.0842, "step": 116795 }, { "epoch": 5.45, - "learning_rate": 9.142562467769914e-06, - "loss": 0.1286, + "learning_rate": 1.915950259778759e-05, + "loss": 0.1394, "step": 116800 }, { "epoch": 5.45, - "learning_rate": 9.142093666494774e-06, - "loss": 0.2861, + "learning_rate": 1.915903452795157e-05, + "loss": 0.2499, "step": 116805 }, { "epoch": 5.45, - "learning_rate": 9.141624865219634e-06, - "loss": 0.0378, + "learning_rate": 1.9158566458115554e-05, + "loss": 0.1269, "step": 116810 }, { "epoch": 5.45, - "learning_rate": 9.141156063944495e-06, - "loss": 0.0287, + "learning_rate": 1.9158098388279534e-05, + "loss": 0.0552, "step": 116815 }, { "epoch": 5.45, - "learning_rate": 9.140687262669355e-06, - "loss": 0.0178, + "learning_rate": 1.9157630318443513e-05, + "loss": 0.029, "step": 116820 }, { "epoch": 5.45, - "learning_rate": 9.140218461394215e-06, - "loss": 0.0658, + "learning_rate": 1.915716224860749e-05, + "loss": 0.0347, "step": 116825 }, { "epoch": 5.45, - "learning_rate": 9.139749660119077e-06, - "loss": 0.0793, + "learning_rate": 1.9156694178771473e-05, + "loss": 0.0764, "step": 116830 }, { "epoch": 5.45, - "learning_rate": 9.139280858843937e-06, - "loss": 0.0655, + "learning_rate": 1.9156226108935453e-05, + "loss": 0.0307, "step": 116835 }, { "epoch": 5.45, - "learning_rate": 9.138812057568798e-06, - "loss": 0.0172, + "learning_rate": 1.9155758039099433e-05, + "loss": 0.0566, "step": 116840 }, { "epoch": 5.45, - "learning_rate": 9.138343256293658e-06, - "loss": 0.0489, + "learning_rate": 1.9155289969263413e-05, + "loss": 0.0814, "step": 116845 }, { "epoch": 5.45, - "learning_rate": 9.137874455018518e-06, - "loss": 0.0778, + "learning_rate": 1.9154821899427396e-05, + "loss": 0.1702, "step": 116850 }, { "epoch": 5.45, - "learning_rate": 9.137405653743378e-06, - "loss": 0.2516, + "learning_rate": 1.9154353829591376e-05, + "loss": 0.2405, "step": 116855 }, { "epoch": 5.45, - "learning_rate": 9.136936852468238e-06, - "loss": 0.0835, + "learning_rate": 1.9153885759755355e-05, + "loss": 0.088, "step": 116860 }, { "epoch": 5.45, - "learning_rate": 9.1364680511931e-06, - "loss": 0.0218, + "learning_rate": 1.915341768991934e-05, + "loss": 0.0093, "step": 116865 }, { "epoch": 5.45, - "learning_rate": 9.135999249917961e-06, - "loss": 0.008, + "learning_rate": 1.915294962008332e-05, + "loss": 0.0108, "step": 116870 }, { "epoch": 5.45, - "learning_rate": 9.135530448642821e-06, - "loss": 0.0475, + "learning_rate": 1.9152481550247298e-05, + "loss": 0.0395, "step": 116875 }, { "epoch": 5.45, - "learning_rate": 9.135061647367681e-06, - "loss": 0.0898, + "learning_rate": 1.9152013480411278e-05, + "loss": 0.0386, "step": 116880 }, { "epoch": 5.45, - "learning_rate": 9.134592846092543e-06, - "loss": 0.0721, + "learning_rate": 1.915154541057526e-05, + "loss": 0.0687, "step": 116885 }, { "epoch": 5.45, - "learning_rate": 9.134124044817403e-06, - "loss": 0.0941, + "learning_rate": 1.9151077340739238e-05, + "loss": 0.0783, "step": 116890 }, { "epoch": 5.45, - "learning_rate": 9.133655243542263e-06, - "loss": 0.1063, + "learning_rate": 1.9150609270903218e-05, + "loss": 0.1345, "step": 116895 }, { "epoch": 5.45, - "learning_rate": 9.133186442267123e-06, - "loss": 0.1315, + "learning_rate": 1.9150141201067197e-05, + "loss": 0.2226, "step": 116900 }, { "epoch": 5.45, - "learning_rate": 9.132717640991984e-06, - "loss": 0.2335, + "learning_rate": 1.914967313123118e-05, + "loss": 0.1678, "step": 116905 }, { "epoch": 5.46, - "learning_rate": 9.132248839716846e-06, - "loss": 0.1309, + "learning_rate": 1.914920506139516e-05, + "loss": 0.0323, "step": 116910 }, { "epoch": 5.46, - "learning_rate": 9.131780038441706e-06, - "loss": 0.0258, + "learning_rate": 1.914873699155914e-05, + "loss": 0.0118, "step": 116915 }, { "epoch": 5.46, - "learning_rate": 9.131311237166566e-06, - "loss": 0.0251, + "learning_rate": 1.914826892172312e-05, + "loss": 0.0033, "step": 116920 }, { "epoch": 5.46, - "learning_rate": 9.130842435891426e-06, - "loss": 0.0224, + "learning_rate": 1.9147800851887103e-05, + "loss": 0.0809, "step": 116925 }, { "epoch": 5.46, - "learning_rate": 9.130373634616287e-06, - "loss": 0.0258, + "learning_rate": 1.9147332782051083e-05, + "loss": 0.024, "step": 116930 }, { "epoch": 5.46, - "learning_rate": 9.129904833341147e-06, - "loss": 0.0572, + "learning_rate": 1.9146864712215063e-05, + "loss": 0.0435, "step": 116935 }, { "epoch": 5.46, - "learning_rate": 9.129436032066007e-06, - "loss": 0.0915, + "learning_rate": 1.9146396642379046e-05, + "loss": 0.0765, "step": 116940 }, { "epoch": 5.46, - "learning_rate": 9.128967230790869e-06, - "loss": 0.1512, + "learning_rate": 1.9145928572543026e-05, + "loss": 0.0625, "step": 116945 }, { "epoch": 5.46, - "learning_rate": 9.128498429515729e-06, - "loss": 0.1106, + "learning_rate": 1.9145460502707002e-05, + "loss": 0.1266, "step": 116950 }, { "epoch": 5.46, - "learning_rate": 9.12802962824059e-06, - "loss": 0.2726, + "learning_rate": 1.9144992432870982e-05, + "loss": 0.3016, "step": 116955 }, { "epoch": 5.46, - "learning_rate": 9.12756082696545e-06, - "loss": 0.0828, + "learning_rate": 1.9144524363034965e-05, + "loss": 0.08, "step": 116960 }, { "epoch": 5.46, - "learning_rate": 9.12709202569031e-06, - "loss": 0.0127, + "learning_rate": 1.9144056293198945e-05, + "loss": 0.0209, "step": 116965 }, { "epoch": 5.46, - "learning_rate": 9.12662322441517e-06, - "loss": 0.0186, + "learning_rate": 1.9143588223362925e-05, + "loss": 0.0198, "step": 116970 }, { "epoch": 5.46, - "learning_rate": 9.126154423140032e-06, - "loss": 0.083, + "learning_rate": 1.9143120153526905e-05, + "loss": 0.0267, "step": 116975 }, { "epoch": 5.46, - "learning_rate": 9.125685621864892e-06, - "loss": 0.054, + "learning_rate": 1.9142652083690888e-05, + "loss": 0.03, "step": 116980 }, { "epoch": 5.46, - "learning_rate": 9.125216820589753e-06, - "loss": 0.0462, + "learning_rate": 1.9142184013854868e-05, + "loss": 0.0353, "step": 116985 }, { "epoch": 5.46, - "learning_rate": 9.124748019314613e-06, - "loss": 0.1243, + "learning_rate": 1.9141715944018848e-05, + "loss": 0.0708, "step": 116990 }, { "epoch": 5.46, - "learning_rate": 9.124279218039473e-06, - "loss": 0.0958, + "learning_rate": 1.914124787418283e-05, + "loss": 0.1666, "step": 116995 }, { "epoch": 5.46, - "learning_rate": 9.123810416764335e-06, - "loss": 0.1079, + "learning_rate": 1.914077980434681e-05, + "loss": 0.1101, "step": 117000 }, { "epoch": 5.46, - "learning_rate": 9.123341615489195e-06, - "loss": 0.2976, + "learning_rate": 1.914031173451079e-05, + "loss": 0.3747, "step": 117005 }, { "epoch": 5.46, - "learning_rate": 9.122872814214055e-06, - "loss": 0.0609, + "learning_rate": 1.913984366467477e-05, + "loss": 0.1002, "step": 117010 }, { "epoch": 5.46, - "learning_rate": 9.122404012938916e-06, - "loss": 0.0105, + "learning_rate": 1.913937559483875e-05, + "loss": 0.0369, "step": 117015 }, { "epoch": 5.46, - "learning_rate": 9.121935211663776e-06, - "loss": 0.0371, + "learning_rate": 1.913890752500273e-05, + "loss": 0.0509, "step": 117020 }, { "epoch": 5.46, - "learning_rate": 9.121466410388638e-06, - "loss": 0.0454, + "learning_rate": 1.913843945516671e-05, + "loss": 0.0238, "step": 117025 }, { "epoch": 5.46, - "learning_rate": 9.120997609113498e-06, - "loss": 0.0467, + "learning_rate": 1.913797138533069e-05, + "loss": 0.0645, "step": 117030 }, { "epoch": 5.46, - "learning_rate": 9.120528807838358e-06, - "loss": 0.0835, + "learning_rate": 1.9137503315494673e-05, + "loss": 0.0266, "step": 117035 }, { "epoch": 5.46, - "learning_rate": 9.120060006563218e-06, - "loss": 0.0955, + "learning_rate": 1.9137035245658653e-05, + "loss": 0.0762, "step": 117040 }, { "epoch": 5.46, - "learning_rate": 9.11959120528808e-06, - "loss": 0.0882, + "learning_rate": 1.9136567175822632e-05, + "loss": 0.1404, "step": 117045 }, { "epoch": 5.46, - "learning_rate": 9.11912240401294e-06, - "loss": 0.1594, + "learning_rate": 1.9136099105986616e-05, + "loss": 0.1069, "step": 117050 }, { "epoch": 5.46, - "learning_rate": 9.118653602737801e-06, - "loss": 0.3315, + "learning_rate": 1.9135631036150595e-05, + "loss": 0.2779, "step": 117055 }, { "epoch": 5.46, - "learning_rate": 9.118184801462661e-06, - "loss": 0.0627, + "learning_rate": 1.9135162966314575e-05, + "loss": 0.0953, "step": 117060 }, { "epoch": 5.46, - "learning_rate": 9.11771600018752e-06, - "loss": 0.026, + "learning_rate": 1.9134694896478555e-05, + "loss": 0.0139, "step": 117065 }, { "epoch": 5.46, - "learning_rate": 9.117247198912382e-06, - "loss": 0.044, + "learning_rate": 1.9134226826642538e-05, + "loss": 0.0315, "step": 117070 }, { "epoch": 5.46, - "learning_rate": 9.116778397637242e-06, - "loss": 0.0445, + "learning_rate": 1.9133758756806518e-05, + "loss": 0.024, "step": 117075 }, { "epoch": 5.46, - "learning_rate": 9.116309596362102e-06, - "loss": 0.0142, + "learning_rate": 1.9133290686970494e-05, + "loss": 0.0143, "step": 117080 }, { "epoch": 5.46, - "learning_rate": 9.115840795086964e-06, - "loss": 0.0726, + "learning_rate": 1.9132822617134474e-05, + "loss": 0.0723, "step": 117085 }, { "epoch": 5.46, - "learning_rate": 9.115371993811824e-06, - "loss": 0.0767, + "learning_rate": 1.9132354547298458e-05, + "loss": 0.1913, "step": 117090 }, { "epoch": 5.46, - "learning_rate": 9.114903192536685e-06, - "loss": 0.0375, + "learning_rate": 1.9131886477462437e-05, + "loss": 0.0484, "step": 117095 }, { "epoch": 5.46, - "learning_rate": 9.114434391261545e-06, - "loss": 0.0965, + "learning_rate": 1.9131418407626417e-05, + "loss": 0.0649, "step": 117100 }, { "epoch": 5.46, - "learning_rate": 9.113965589986405e-06, - "loss": 0.1898, + "learning_rate": 1.9130950337790397e-05, + "loss": 0.2398, "step": 117105 }, { "epoch": 5.46, - "learning_rate": 9.113496788711265e-06, - "loss": 0.1177, + "learning_rate": 1.913048226795438e-05, + "loss": 0.0703, "step": 117110 }, { "epoch": 5.46, - "learning_rate": 9.113027987436125e-06, - "loss": 0.0073, + "learning_rate": 1.913001419811836e-05, + "loss": 0.0553, "step": 117115 }, { "epoch": 5.46, - "learning_rate": 9.112559186160987e-06, - "loss": 0.0104, + "learning_rate": 1.912954612828234e-05, + "loss": 0.0189, "step": 117120 }, { "epoch": 5.47, - "learning_rate": 9.112090384885848e-06, - "loss": 0.028, + "learning_rate": 1.9129078058446323e-05, + "loss": 0.0161, "step": 117125 }, { "epoch": 5.47, - "learning_rate": 9.111621583610708e-06, - "loss": 0.04, + "learning_rate": 1.9128609988610303e-05, + "loss": 0.0419, "step": 117130 }, { "epoch": 5.47, - "learning_rate": 9.111152782335568e-06, - "loss": 0.1221, + "learning_rate": 1.9128141918774283e-05, + "loss": 0.055, "step": 117135 }, { "epoch": 5.47, - "learning_rate": 9.11068398106043e-06, - "loss": 0.096, + "learning_rate": 1.912767384893826e-05, + "loss": 0.0414, "step": 117140 }, { "epoch": 5.47, - "learning_rate": 9.11021517978529e-06, - "loss": 0.1009, + "learning_rate": 1.9127205779102242e-05, + "loss": 0.1555, "step": 117145 }, { "epoch": 5.47, - "learning_rate": 9.10974637851015e-06, - "loss": 0.1518, + "learning_rate": 1.9126737709266222e-05, + "loss": 0.1174, "step": 117150 }, { "epoch": 5.47, - "learning_rate": 9.10927757723501e-06, - "loss": 0.2811, + "learning_rate": 1.9126269639430202e-05, + "loss": 0.1412, "step": 117155 }, { "epoch": 5.47, - "learning_rate": 9.108808775959871e-06, - "loss": 0.115, + "learning_rate": 1.9125801569594182e-05, + "loss": 0.1287, "step": 117160 }, { "epoch": 5.47, - "learning_rate": 9.108339974684733e-06, - "loss": 0.0091, + "learning_rate": 1.9125333499758165e-05, + "loss": 0.009, "step": 117165 }, { "epoch": 5.47, - "learning_rate": 9.107871173409593e-06, - "loss": 0.0232, + "learning_rate": 1.9124865429922145e-05, + "loss": 0.0129, "step": 117170 }, { "epoch": 5.47, - "learning_rate": 9.107402372134453e-06, - "loss": 0.0351, + "learning_rate": 1.9124397360086125e-05, + "loss": 0.0542, "step": 117175 }, { "epoch": 5.47, - "learning_rate": 9.106933570859313e-06, - "loss": 0.0305, + "learning_rate": 1.9123929290250108e-05, + "loss": 0.0655, "step": 117180 }, { "epoch": 5.47, - "learning_rate": 9.106464769584173e-06, - "loss": 0.0483, + "learning_rate": 1.9123461220414088e-05, + "loss": 0.0824, "step": 117185 }, { "epoch": 5.47, - "learning_rate": 9.105995968309034e-06, - "loss": 0.0334, + "learning_rate": 1.9122993150578067e-05, + "loss": 0.0862, "step": 117190 }, { "epoch": 5.47, - "learning_rate": 9.105527167033896e-06, - "loss": 0.2288, + "learning_rate": 1.9122525080742047e-05, + "loss": 0.0869, "step": 117195 }, { "epoch": 5.47, - "learning_rate": 9.105058365758756e-06, - "loss": 0.2077, + "learning_rate": 1.912205701090603e-05, + "loss": 0.1392, "step": 117200 }, { "epoch": 5.47, - "learning_rate": 9.104589564483616e-06, - "loss": 0.1651, + "learning_rate": 1.9121588941070007e-05, + "loss": 0.1877, "step": 117205 }, { "epoch": 5.47, - "learning_rate": 9.104120763208478e-06, - "loss": 0.0927, + "learning_rate": 1.9121120871233987e-05, + "loss": 0.0671, "step": 117210 }, { "epoch": 5.47, - "learning_rate": 9.103651961933337e-06, - "loss": 0.0123, + "learning_rate": 1.9120652801397967e-05, + "loss": 0.016, "step": 117215 }, { "epoch": 5.47, - "learning_rate": 9.103183160658197e-06, - "loss": 0.0319, + "learning_rate": 1.912018473156195e-05, + "loss": 0.0251, "step": 117220 }, { "epoch": 5.47, - "learning_rate": 9.102714359383057e-06, - "loss": 0.0097, + "learning_rate": 1.911971666172593e-05, + "loss": 0.0392, "step": 117225 }, { "epoch": 5.47, - "learning_rate": 9.102245558107919e-06, - "loss": 0.0599, + "learning_rate": 1.911924859188991e-05, + "loss": 0.0179, "step": 117230 }, { "epoch": 5.47, - "learning_rate": 9.10177675683278e-06, - "loss": 0.0524, + "learning_rate": 1.9118780522053893e-05, + "loss": 0.111, "step": 117235 }, { "epoch": 5.47, - "learning_rate": 9.10130795555764e-06, - "loss": 0.0268, + "learning_rate": 1.9118312452217872e-05, + "loss": 0.0961, "step": 117240 }, { "epoch": 5.47, - "learning_rate": 9.1008391542825e-06, - "loss": 0.0347, + "learning_rate": 1.9117844382381852e-05, + "loss": 0.0812, "step": 117245 }, { "epoch": 5.47, - "learning_rate": 9.10037035300736e-06, - "loss": 0.1514, + "learning_rate": 1.9117376312545832e-05, + "loss": 0.0726, "step": 117250 }, { "epoch": 5.47, - "learning_rate": 9.099901551732222e-06, - "loss": 0.2839, + "learning_rate": 1.9116908242709815e-05, + "loss": 0.2716, "step": 117255 }, { "epoch": 5.47, - "learning_rate": 9.099432750457082e-06, - "loss": 0.096, + "learning_rate": 1.9116440172873795e-05, + "loss": 0.0979, "step": 117260 }, { "epoch": 5.47, - "learning_rate": 9.098963949181942e-06, - "loss": 0.0182, + "learning_rate": 1.9115972103037775e-05, + "loss": 0.0305, "step": 117265 }, { "epoch": 5.47, - "learning_rate": 9.098495147906803e-06, - "loss": 0.0209, + "learning_rate": 1.911550403320175e-05, + "loss": 0.0216, "step": 117270 }, { "epoch": 5.47, - "learning_rate": 9.098026346631663e-06, - "loss": 0.0526, + "learning_rate": 1.9115035963365734e-05, + "loss": 0.0408, "step": 117275 }, { "epoch": 5.47, - "learning_rate": 9.097557545356525e-06, - "loss": 0.0539, + "learning_rate": 1.9114567893529714e-05, + "loss": 0.0707, "step": 117280 }, { "epoch": 5.47, - "learning_rate": 9.097088744081385e-06, - "loss": 0.03, + "learning_rate": 1.9114099823693694e-05, + "loss": 0.0445, "step": 117285 }, { "epoch": 5.47, - "learning_rate": 9.096619942806245e-06, - "loss": 0.0632, + "learning_rate": 1.9113631753857677e-05, + "loss": 0.076, "step": 117290 }, { "epoch": 5.47, - "learning_rate": 9.096151141531105e-06, - "loss": 0.0609, + "learning_rate": 1.9113163684021657e-05, + "loss": 0.0911, "step": 117295 }, { "epoch": 5.47, - "learning_rate": 9.095682340255966e-06, - "loss": 0.1301, + "learning_rate": 1.9112695614185637e-05, + "loss": 0.1306, "step": 117300 }, { "epoch": 5.47, - "learning_rate": 9.095213538980826e-06, - "loss": 0.2285, + "learning_rate": 1.9112227544349617e-05, + "loss": 0.331, "step": 117305 }, { "epoch": 5.47, - "learning_rate": 9.094744737705688e-06, - "loss": 0.0555, + "learning_rate": 1.91117594745136e-05, + "loss": 0.0688, "step": 117310 }, { "epoch": 5.47, - "learning_rate": 9.094275936430548e-06, - "loss": 0.0197, + "learning_rate": 1.911129140467758e-05, + "loss": 0.0014, "step": 117315 }, { "epoch": 5.47, - "learning_rate": 9.093807135155408e-06, - "loss": 0.0174, + "learning_rate": 1.911082333484156e-05, + "loss": 0.029, "step": 117320 }, { "epoch": 5.47, - "learning_rate": 9.09333833388027e-06, - "loss": 0.0456, + "learning_rate": 1.911035526500554e-05, + "loss": 0.0016, "step": 117325 }, { "epoch": 5.47, - "learning_rate": 9.09286953260513e-06, - "loss": 0.0767, + "learning_rate": 1.910988719516952e-05, + "loss": 0.0235, "step": 117330 }, { "epoch": 5.48, - "learning_rate": 9.09240073132999e-06, - "loss": 0.0456, + "learning_rate": 1.91094191253335e-05, + "loss": 0.033, "step": 117335 }, { "epoch": 5.48, - "learning_rate": 9.091931930054851e-06, - "loss": 0.0557, + "learning_rate": 1.910895105549748e-05, + "loss": 0.0273, "step": 117340 }, { "epoch": 5.48, - "learning_rate": 9.091463128779711e-06, - "loss": 0.0988, + "learning_rate": 1.910848298566146e-05, + "loss": 0.1092, "step": 117345 }, { "epoch": 5.48, - "learning_rate": 9.090994327504573e-06, - "loss": 0.136, + "learning_rate": 1.9108014915825442e-05, + "loss": 0.0741, "step": 117350 }, { "epoch": 5.48, - "learning_rate": 9.090525526229433e-06, - "loss": 0.2792, + "learning_rate": 1.9107546845989422e-05, + "loss": 0.1742, "step": 117355 }, { "epoch": 5.48, - "learning_rate": 9.090056724954292e-06, - "loss": 0.1056, + "learning_rate": 1.91070787761534e-05, + "loss": 0.0668, "step": 117360 }, { "epoch": 5.48, - "learning_rate": 9.089587923679152e-06, - "loss": 0.0502, + "learning_rate": 1.9106610706317385e-05, + "loss": 0.0607, "step": 117365 }, { "epoch": 5.48, - "learning_rate": 9.089119122404014e-06, - "loss": 0.0183, + "learning_rate": 1.9106142636481365e-05, + "loss": 0.026, "step": 117370 }, { "epoch": 5.48, - "learning_rate": 9.088650321128874e-06, - "loss": 0.0585, + "learning_rate": 1.9105674566645344e-05, + "loss": 0.0253, "step": 117375 }, { "epoch": 5.48, - "learning_rate": 9.088181519853736e-06, - "loss": 0.0823, + "learning_rate": 1.9105206496809324e-05, + "loss": 0.0285, "step": 117380 }, { "epoch": 5.48, - "learning_rate": 9.087712718578596e-06, - "loss": 0.0631, + "learning_rate": 1.9104738426973307e-05, + "loss": 0.0355, "step": 117385 }, { "epoch": 5.48, - "learning_rate": 9.087243917303455e-06, - "loss": 0.0589, + "learning_rate": 1.9104270357137287e-05, + "loss": 0.1421, "step": 117390 }, { "epoch": 5.48, - "learning_rate": 9.086775116028317e-06, - "loss": 0.1048, + "learning_rate": 1.9103802287301264e-05, + "loss": 0.056, "step": 117395 }, { "epoch": 5.48, - "learning_rate": 9.086306314753177e-06, - "loss": 0.0903, + "learning_rate": 1.9103334217465243e-05, + "loss": 0.0837, "step": 117400 }, { "epoch": 5.48, - "learning_rate": 9.085837513478037e-06, - "loss": 0.3029, + "learning_rate": 1.9102866147629227e-05, + "loss": 0.2194, "step": 117405 }, { "epoch": 5.48, - "learning_rate": 9.085368712202899e-06, - "loss": 0.0612, + "learning_rate": 1.9102398077793207e-05, + "loss": 0.1174, "step": 117410 }, { "epoch": 5.48, - "learning_rate": 9.084899910927759e-06, - "loss": 0.0061, + "learning_rate": 1.9101930007957186e-05, + "loss": 0.0041, "step": 117415 }, { "epoch": 5.48, - "learning_rate": 9.08443110965262e-06, - "loss": 0.03, + "learning_rate": 1.910146193812117e-05, + "loss": 0.0059, "step": 117420 }, { "epoch": 5.48, - "learning_rate": 9.08396230837748e-06, - "loss": 0.0294, + "learning_rate": 1.910099386828515e-05, + "loss": 0.0139, "step": 117425 }, { "epoch": 5.48, - "learning_rate": 9.08349350710234e-06, - "loss": 0.0096, + "learning_rate": 1.910052579844913e-05, + "loss": 0.0884, "step": 117430 }, { "epoch": 5.48, - "learning_rate": 9.0830247058272e-06, - "loss": 0.0802, + "learning_rate": 1.910005772861311e-05, + "loss": 0.0308, "step": 117435 }, { "epoch": 5.48, - "learning_rate": 9.08255590455206e-06, - "loss": 0.0496, + "learning_rate": 1.9099589658777092e-05, + "loss": 0.0655, "step": 117440 }, { "epoch": 5.48, - "learning_rate": 9.082087103276921e-06, - "loss": 0.0654, + "learning_rate": 1.9099121588941072e-05, + "loss": 0.0604, "step": 117445 }, { "epoch": 5.48, - "learning_rate": 9.081618302001783e-06, - "loss": 0.0739, + "learning_rate": 1.9098653519105052e-05, + "loss": 0.1693, "step": 117450 }, { "epoch": 5.48, - "learning_rate": 9.081149500726643e-06, - "loss": 0.2911, + "learning_rate": 1.909818544926903e-05, + "loss": 0.2158, "step": 117455 }, { "epoch": 5.48, - "learning_rate": 9.080680699451503e-06, - "loss": 0.0475, + "learning_rate": 1.909771737943301e-05, + "loss": 0.1033, "step": 117460 }, { "epoch": 5.48, - "learning_rate": 9.080211898176365e-06, - "loss": 0.0105, + "learning_rate": 1.909724930959699e-05, + "loss": 0.0101, "step": 117465 }, { "epoch": 5.48, - "learning_rate": 9.079743096901225e-06, - "loss": 0.0383, + "learning_rate": 1.909678123976097e-05, + "loss": 0.0291, "step": 117470 }, { "epoch": 5.48, - "learning_rate": 9.079274295626084e-06, - "loss": 0.0248, + "learning_rate": 1.9096313169924954e-05, + "loss": 0.0161, "step": 117475 }, { "epoch": 5.48, - "learning_rate": 9.078805494350944e-06, - "loss": 0.1112, + "learning_rate": 1.9095845100088934e-05, + "loss": 0.0252, "step": 117480 }, { "epoch": 5.48, - "learning_rate": 9.078336693075806e-06, - "loss": 0.0732, + "learning_rate": 1.9095377030252914e-05, + "loss": 0.098, "step": 117485 }, { "epoch": 5.48, - "learning_rate": 9.077867891800668e-06, - "loss": 0.0085, + "learning_rate": 1.9094908960416894e-05, + "loss": 0.091, "step": 117490 }, { "epoch": 5.48, - "learning_rate": 9.077399090525528e-06, - "loss": 0.0933, + "learning_rate": 1.9094440890580877e-05, + "loss": 0.0822, "step": 117495 }, { "epoch": 5.48, - "learning_rate": 9.076930289250388e-06, - "loss": 0.2581, + "learning_rate": 1.9093972820744857e-05, + "loss": 0.09, "step": 117500 }, { "epoch": 5.48, - "learning_rate": 9.076461487975247e-06, - "loss": 0.2955, + "learning_rate": 1.9093504750908837e-05, + "loss": 0.256, "step": 117505 }, { "epoch": 5.48, - "learning_rate": 9.075992686700107e-06, - "loss": 0.0904, + "learning_rate": 1.9093036681072816e-05, + "loss": 0.0614, "step": 117510 }, { "epoch": 5.48, - "learning_rate": 9.075523885424969e-06, - "loss": 0.0153, + "learning_rate": 1.90925686112368e-05, + "loss": 0.0165, "step": 117515 }, { "epoch": 5.48, - "learning_rate": 9.075055084149829e-06, - "loss": 0.0351, + "learning_rate": 1.9092100541400776e-05, + "loss": 0.1022, "step": 117520 }, { "epoch": 5.48, - "learning_rate": 9.07458628287469e-06, - "loss": 0.0174, + "learning_rate": 1.9091632471564756e-05, + "loss": 0.0213, "step": 117525 }, { "epoch": 5.48, - "learning_rate": 9.07411748159955e-06, - "loss": 0.0614, + "learning_rate": 1.9091164401728736e-05, + "loss": 0.0203, "step": 117530 }, { "epoch": 5.48, - "learning_rate": 9.073648680324412e-06, - "loss": 0.0452, + "learning_rate": 1.909069633189272e-05, + "loss": 0.0697, "step": 117535 }, { "epoch": 5.48, - "learning_rate": 9.073179879049272e-06, - "loss": 0.0734, + "learning_rate": 1.90902282620567e-05, + "loss": 0.034, "step": 117540 }, { "epoch": 5.48, - "learning_rate": 9.072711077774132e-06, - "loss": 0.0717, + "learning_rate": 1.908976019222068e-05, + "loss": 0.0989, "step": 117545 }, { "epoch": 5.49, - "learning_rate": 9.072242276498992e-06, - "loss": 0.1025, + "learning_rate": 1.9089292122384662e-05, + "loss": 0.1165, "step": 117550 }, { "epoch": 5.49, - "learning_rate": 9.071773475223854e-06, - "loss": 0.3364, + "learning_rate": 1.908882405254864e-05, + "loss": 0.2304, "step": 117555 }, { "epoch": 5.49, - "learning_rate": 9.071304673948714e-06, - "loss": 0.1132, + "learning_rate": 1.908835598271262e-05, + "loss": 0.0668, "step": 117560 }, { "epoch": 5.49, - "learning_rate": 9.070835872673575e-06, - "loss": 0.0179, + "learning_rate": 1.90878879128766e-05, + "loss": 0.032, "step": 117565 }, { "epoch": 5.49, - "learning_rate": 9.070367071398435e-06, - "loss": 0.0326, + "learning_rate": 1.9087419843040584e-05, + "loss": 0.0244, "step": 117570 }, { "epoch": 5.49, - "learning_rate": 9.069898270123295e-06, - "loss": 0.0388, + "learning_rate": 1.9086951773204564e-05, + "loss": 0.036, "step": 117575 }, { "epoch": 5.49, - "learning_rate": 9.069429468848157e-06, - "loss": 0.0443, + "learning_rate": 1.9086483703368544e-05, + "loss": 0.0364, "step": 117580 }, { "epoch": 5.49, - "learning_rate": 9.068960667573017e-06, - "loss": 0.0358, + "learning_rate": 1.908601563353252e-05, + "loss": 0.0111, "step": 117585 }, { "epoch": 5.49, - "learning_rate": 9.068491866297877e-06, - "loss": 0.0711, + "learning_rate": 1.9085547563696504e-05, + "loss": 0.0624, "step": 117590 }, { "epoch": 5.49, - "learning_rate": 9.068023065022738e-06, - "loss": 0.0632, + "learning_rate": 1.9085079493860483e-05, + "loss": 0.0848, "step": 117595 }, { "epoch": 5.49, - "learning_rate": 9.067554263747598e-06, - "loss": 0.1156, + "learning_rate": 1.9084611424024463e-05, + "loss": 0.2029, "step": 117600 }, { "epoch": 5.49, - "learning_rate": 9.06708546247246e-06, - "loss": 0.2085, + "learning_rate": 1.9084143354188447e-05, + "loss": 0.4404, "step": 117605 }, { "epoch": 5.49, - "learning_rate": 9.06661666119732e-06, - "loss": 0.0711, + "learning_rate": 1.9083675284352426e-05, + "loss": 0.0617, "step": 117610 }, { "epoch": 5.49, - "learning_rate": 9.06614785992218e-06, - "loss": 0.0034, + "learning_rate": 1.9083207214516406e-05, + "loss": 0.0206, "step": 117615 }, { "epoch": 5.49, - "learning_rate": 9.06567905864704e-06, - "loss": 0.0243, + "learning_rate": 1.9082739144680386e-05, + "loss": 0.0072, "step": 117620 }, { "epoch": 5.49, - "learning_rate": 9.065210257371901e-06, - "loss": 0.0108, + "learning_rate": 1.908227107484437e-05, + "loss": 0.0475, "step": 117625 }, { "epoch": 5.49, - "learning_rate": 9.064741456096761e-06, - "loss": 0.0748, + "learning_rate": 1.908180300500835e-05, + "loss": 0.0869, "step": 117630 }, { "epoch": 5.49, - "learning_rate": 9.064272654821623e-06, - "loss": 0.03, + "learning_rate": 1.908133493517233e-05, + "loss": 0.0293, "step": 117635 }, { "epoch": 5.49, - "learning_rate": 9.063803853546483e-06, - "loss": 0.0365, + "learning_rate": 1.908086686533631e-05, + "loss": 0.0259, "step": 117640 }, { "epoch": 5.49, - "learning_rate": 9.063335052271343e-06, - "loss": 0.1389, + "learning_rate": 1.908039879550029e-05, + "loss": 0.0658, "step": 117645 }, { "epoch": 5.49, - "learning_rate": 9.062866250996204e-06, - "loss": 0.1147, + "learning_rate": 1.9079930725664268e-05, + "loss": 0.0955, "step": 117650 }, { "epoch": 5.49, - "learning_rate": 9.062397449721064e-06, - "loss": 0.2702, + "learning_rate": 1.9079462655828248e-05, + "loss": 0.07, "step": 117655 }, { "epoch": 5.49, - "learning_rate": 9.061928648445924e-06, - "loss": 0.0691, + "learning_rate": 1.907899458599223e-05, + "loss": 0.0905, "step": 117660 }, { "epoch": 5.49, - "learning_rate": 9.061459847170786e-06, - "loss": 0.0078, + "learning_rate": 1.907852651615621e-05, + "loss": 0.012, "step": 117665 }, { "epoch": 5.49, - "learning_rate": 9.060991045895646e-06, - "loss": 0.042, + "learning_rate": 1.907805844632019e-05, + "loss": 0.0931, "step": 117670 }, { "epoch": 5.49, - "learning_rate": 9.060522244620507e-06, - "loss": 0.027, + "learning_rate": 1.907759037648417e-05, + "loss": 0.0164, "step": 117675 }, { "epoch": 5.49, - "learning_rate": 9.060053443345367e-06, - "loss": 0.0195, + "learning_rate": 1.9077122306648154e-05, + "loss": 0.0094, "step": 117680 }, { "epoch": 5.49, - "learning_rate": 9.059584642070227e-06, - "loss": 0.0368, + "learning_rate": 1.9076654236812134e-05, + "loss": 0.069, "step": 117685 }, { "epoch": 5.49, - "learning_rate": 9.059115840795087e-06, - "loss": 0.0591, + "learning_rate": 1.9076186166976114e-05, + "loss": 0.0773, "step": 117690 }, { "epoch": 5.49, - "learning_rate": 9.058647039519947e-06, - "loss": 0.0506, + "learning_rate": 1.9075718097140093e-05, + "loss": 0.0798, "step": 117695 }, { "epoch": 5.49, - "learning_rate": 9.058178238244809e-06, + "learning_rate": 1.9075250027304077e-05, "loss": 0.1628, "step": 117700 }, { "epoch": 5.49, - "learning_rate": 9.05770943696967e-06, - "loss": 0.2574, + "learning_rate": 1.9074781957468056e-05, + "loss": 0.1159, "step": 117705 }, { "epoch": 5.49, - "learning_rate": 9.05724063569453e-06, - "loss": 0.1073, + "learning_rate": 1.9074313887632033e-05, + "loss": 0.1401, "step": 117710 }, { "epoch": 5.49, - "learning_rate": 9.05677183441939e-06, - "loss": 0.0114, + "learning_rate": 1.9073845817796013e-05, + "loss": 0.0091, "step": 117715 }, { "epoch": 5.49, - "learning_rate": 9.056303033144252e-06, - "loss": 0.0324, + "learning_rate": 1.9073377747959996e-05, + "loss": 0.0207, "step": 117720 }, { "epoch": 5.49, - "learning_rate": 9.055834231869112e-06, - "loss": 0.0311, + "learning_rate": 1.9072909678123976e-05, + "loss": 0.0291, "step": 117725 }, { "epoch": 5.49, - "learning_rate": 9.055365430593972e-06, - "loss": 0.0418, + "learning_rate": 1.9072441608287955e-05, + "loss": 0.1226, "step": 117730 }, { "epoch": 5.49, - "learning_rate": 9.054896629318832e-06, - "loss": 0.0694, + "learning_rate": 1.907197353845194e-05, + "loss": 0.0456, "step": 117735 }, { "epoch": 5.49, - "learning_rate": 9.054427828043693e-06, - "loss": 0.0549, + "learning_rate": 1.907150546861592e-05, + "loss": 0.0438, "step": 117740 }, { "epoch": 5.49, - "learning_rate": 9.053959026768555e-06, - "loss": 0.1364, + "learning_rate": 1.90710373987799e-05, + "loss": 0.0663, "step": 117745 }, { "epoch": 5.49, - "learning_rate": 9.053490225493415e-06, - "loss": 0.2643, + "learning_rate": 1.9070569328943878e-05, + "loss": 0.148, "step": 117750 }, { "epoch": 5.49, - "learning_rate": 9.053021424218275e-06, - "loss": 0.238, + "learning_rate": 1.907010125910786e-05, + "loss": 0.2146, "step": 117755 }, { "epoch": 5.49, - "learning_rate": 9.052552622943135e-06, - "loss": 0.117, + "learning_rate": 1.906963318927184e-05, + "loss": 0.1094, "step": 117760 }, { "epoch": 5.5, - "learning_rate": 9.052083821667995e-06, - "loss": 0.0348, + "learning_rate": 1.906916511943582e-05, + "loss": 0.0087, "step": 117765 }, { "epoch": 5.5, - "learning_rate": 9.051615020392856e-06, - "loss": 0.03, + "learning_rate": 1.90686970495998e-05, + "loss": 0.0434, "step": 117770 }, { "epoch": 5.5, - "learning_rate": 9.051146219117716e-06, - "loss": 0.0098, + "learning_rate": 1.906822897976378e-05, + "loss": 0.0187, "step": 117775 }, { "epoch": 5.5, - "learning_rate": 9.050677417842578e-06, - "loss": 0.0294, + "learning_rate": 1.906776090992776e-05, + "loss": 0.0442, "step": 117780 }, { "epoch": 5.5, - "learning_rate": 9.050208616567438e-06, - "loss": 0.0775, + "learning_rate": 1.906729284009174e-05, + "loss": 0.0248, "step": 117785 }, { "epoch": 5.5, - "learning_rate": 9.0497398152923e-06, - "loss": 0.0293, + "learning_rate": 1.9066824770255723e-05, + "loss": 0.1032, "step": 117790 }, { "epoch": 5.5, - "learning_rate": 9.04927101401716e-06, - "loss": 0.0429, + "learning_rate": 1.9066356700419703e-05, + "loss": 0.0751, "step": 117795 }, { "epoch": 5.5, - "learning_rate": 9.048802212742019e-06, - "loss": 0.0892, + "learning_rate": 1.9065888630583683e-05, + "loss": 0.1226, "step": 117800 }, { "epoch": 5.5, - "learning_rate": 9.048333411466879e-06, - "loss": 0.2232, + "learning_rate": 1.9065420560747663e-05, + "loss": 0.3544, "step": 117805 }, { "epoch": 5.5, - "learning_rate": 9.04786461019174e-06, - "loss": 0.0891, + "learning_rate": 1.9064952490911646e-05, + "loss": 0.0929, "step": 117810 }, { "epoch": 5.5, - "learning_rate": 9.0473958089166e-06, - "loss": 0.0125, + "learning_rate": 1.9064484421075626e-05, + "loss": 0.0182, "step": 117815 }, { "epoch": 5.5, - "learning_rate": 9.046927007641462e-06, - "loss": 0.0346, + "learning_rate": 1.9064016351239606e-05, + "loss": 0.009, "step": 117820 }, { "epoch": 5.5, - "learning_rate": 9.046458206366322e-06, - "loss": 0.0493, + "learning_rate": 1.9063548281403586e-05, + "loss": 0.0033, "step": 117825 }, { "epoch": 5.5, - "learning_rate": 9.045989405091182e-06, - "loss": 0.0253, + "learning_rate": 1.906308021156757e-05, + "loss": 0.0345, "step": 117830 }, { "epoch": 5.5, - "learning_rate": 9.045520603816042e-06, - "loss": 0.1131, + "learning_rate": 1.9062612141731545e-05, + "loss": 0.0271, "step": 117835 }, { "epoch": 5.5, - "learning_rate": 9.045051802540904e-06, - "loss": 0.0498, + "learning_rate": 1.9062144071895525e-05, + "loss": 0.0876, "step": 117840 }, { "epoch": 5.5, - "learning_rate": 9.044583001265764e-06, - "loss": 0.0709, + "learning_rate": 1.9061676002059508e-05, + "loss": 0.1297, "step": 117845 }, { "epoch": 5.5, - "learning_rate": 9.044114199990625e-06, - "loss": 0.1391, + "learning_rate": 1.9061207932223488e-05, + "loss": 0.2326, "step": 117850 }, { "epoch": 5.5, - "learning_rate": 9.043645398715485e-06, - "loss": 0.3, + "learning_rate": 1.9060739862387468e-05, + "loss": 0.1254, "step": 117855 }, { "epoch": 5.5, - "learning_rate": 9.043176597440347e-06, - "loss": 0.0846, + "learning_rate": 1.9060271792551448e-05, + "loss": 0.0794, "step": 117860 }, { "epoch": 5.5, - "learning_rate": 9.042707796165207e-06, - "loss": 0.0147, + "learning_rate": 1.905980372271543e-05, + "loss": 0.0214, "step": 117865 }, { "epoch": 5.5, - "learning_rate": 9.042238994890067e-06, - "loss": 0.0097, + "learning_rate": 1.905933565287941e-05, + "loss": 0.0617, "step": 117870 }, { "epoch": 5.5, - "learning_rate": 9.041770193614927e-06, - "loss": 0.0322, + "learning_rate": 1.905886758304339e-05, + "loss": 0.0706, "step": 117875 }, { "epoch": 5.5, - "learning_rate": 9.041301392339788e-06, - "loss": 0.0329, + "learning_rate": 1.905839951320737e-05, + "loss": 0.0394, "step": 117880 }, { "epoch": 5.5, - "learning_rate": 9.040832591064648e-06, - "loss": 0.0597, + "learning_rate": 1.9057931443371354e-05, + "loss": 0.0302, "step": 117885 }, { "epoch": 5.5, - "learning_rate": 9.04036378978951e-06, - "loss": 0.0466, + "learning_rate": 1.9057463373535333e-05, + "loss": 0.1372, "step": 117890 }, { "epoch": 5.5, - "learning_rate": 9.03989498851437e-06, - "loss": 0.1337, + "learning_rate": 1.9056995303699313e-05, + "loss": 0.0704, "step": 117895 }, { "epoch": 5.5, - "learning_rate": 9.03942618723923e-06, - "loss": 0.2394, + "learning_rate": 1.9056527233863293e-05, + "loss": 0.1122, "step": 117900 }, { "epoch": 5.5, - "learning_rate": 9.038957385964091e-06, - "loss": 0.1768, + "learning_rate": 1.9056059164027273e-05, + "loss": 0.3621, "step": 117905 }, { "epoch": 5.5, - "learning_rate": 9.038488584688951e-06, - "loss": 0.0882, + "learning_rate": 1.9055591094191253e-05, + "loss": 0.0956, "step": 117910 }, { "epoch": 5.5, - "learning_rate": 9.038019783413811e-06, - "loss": 0.0069, + "learning_rate": 1.9055123024355232e-05, + "loss": 0.0113, "step": 117915 }, { "epoch": 5.5, - "learning_rate": 9.037550982138673e-06, - "loss": 0.006, + "learning_rate": 1.9054654954519216e-05, + "loss": 0.0409, "step": 117920 }, { "epoch": 5.5, - "learning_rate": 9.037082180863533e-06, - "loss": 0.0353, + "learning_rate": 1.9054186884683195e-05, + "loss": 0.0183, "step": 117925 }, { "epoch": 5.5, - "learning_rate": 9.036613379588394e-06, - "loss": 0.0485, + "learning_rate": 1.9053718814847175e-05, + "loss": 0.0476, "step": 117930 }, { "epoch": 5.5, - "learning_rate": 9.036144578313254e-06, - "loss": 0.0365, + "learning_rate": 1.9053250745011155e-05, + "loss": 0.0819, "step": 117935 }, { "epoch": 5.5, - "learning_rate": 9.035675777038114e-06, - "loss": 0.0727, + "learning_rate": 1.905278267517514e-05, + "loss": 0.067, "step": 117940 }, { "epoch": 5.5, - "learning_rate": 9.035206975762974e-06, - "loss": 0.0762, + "learning_rate": 1.9052314605339118e-05, + "loss": 0.0881, "step": 117945 }, { "epoch": 5.5, - "learning_rate": 9.034738174487834e-06, - "loss": 0.1576, + "learning_rate": 1.9051846535503098e-05, + "loss": 0.1324, "step": 117950 }, { "epoch": 5.5, - "learning_rate": 9.034269373212696e-06, - "loss": 0.3668, + "learning_rate": 1.9051378465667078e-05, + "loss": 0.28, "step": 117955 }, { "epoch": 5.5, - "learning_rate": 9.033800571937557e-06, - "loss": 0.0905, + "learning_rate": 1.905091039583106e-05, + "loss": 0.079, "step": 117960 }, { "epoch": 5.5, - "learning_rate": 9.033331770662417e-06, - "loss": 0.0329, + "learning_rate": 1.9050442325995037e-05, + "loss": 0.0213, "step": 117965 }, { "epoch": 5.5, - "learning_rate": 9.032862969387277e-06, - "loss": 0.0353, + "learning_rate": 1.9049974256159017e-05, + "loss": 0.0156, "step": 117970 }, { "epoch": 5.5, - "learning_rate": 9.032394168112139e-06, - "loss": 0.0238, + "learning_rate": 1.9049506186323e-05, + "loss": 0.0416, "step": 117975 }, { "epoch": 5.51, - "learning_rate": 9.031925366836999e-06, - "loss": 0.0486, + "learning_rate": 1.904903811648698e-05, + "loss": 0.063, "step": 117980 }, { "epoch": 5.51, - "learning_rate": 9.031456565561859e-06, - "loss": 0.0265, + "learning_rate": 1.904857004665096e-05, + "loss": 0.0696, "step": 117985 }, { "epoch": 5.51, - "learning_rate": 9.030987764286719e-06, - "loss": 0.0996, + "learning_rate": 1.904810197681494e-05, + "loss": 0.0351, "step": 117990 }, { "epoch": 5.51, - "learning_rate": 9.03051896301158e-06, - "loss": 0.1073, + "learning_rate": 1.9047633906978923e-05, + "loss": 0.0716, "step": 117995 }, { "epoch": 5.51, - "learning_rate": 9.030050161736442e-06, - "loss": 0.1165, + "learning_rate": 1.9047165837142903e-05, + "loss": 0.0963, "step": 118000 }, { "epoch": 5.51, - "learning_rate": 9.029581360461302e-06, - "loss": 0.1257, + "learning_rate": 1.9046697767306883e-05, + "loss": 0.3328, "step": 118005 }, { "epoch": 5.51, - "learning_rate": 9.029112559186162e-06, - "loss": 0.0794, + "learning_rate": 1.9046229697470863e-05, + "loss": 0.0967, "step": 118010 }, { "epoch": 5.51, - "learning_rate": 9.028643757911022e-06, - "loss": 0.0192, + "learning_rate": 1.9045761627634846e-05, + "loss": 0.0014, "step": 118015 }, { "epoch": 5.51, - "learning_rate": 9.028174956635882e-06, - "loss": 0.076, + "learning_rate": 1.9045293557798826e-05, + "loss": 0.0228, "step": 118020 }, { "epoch": 5.51, - "learning_rate": 9.027706155360743e-06, - "loss": 0.051, + "learning_rate": 1.9044825487962802e-05, + "loss": 0.0565, "step": 118025 }, { "epoch": 5.51, - "learning_rate": 9.027237354085603e-06, - "loss": 0.0137, + "learning_rate": 1.9044357418126785e-05, + "loss": 0.0242, "step": 118030 }, { "epoch": 5.51, - "learning_rate": 9.026768552810465e-06, - "loss": 0.0475, + "learning_rate": 1.9043889348290765e-05, + "loss": 0.0398, "step": 118035 }, { "epoch": 5.51, - "learning_rate": 9.026299751535325e-06, - "loss": 0.0611, + "learning_rate": 1.9043421278454745e-05, + "loss": 0.0598, "step": 118040 }, { "epoch": 5.51, - "learning_rate": 9.025830950260186e-06, - "loss": 0.073, + "learning_rate": 1.9042953208618725e-05, + "loss": 0.1262, "step": 118045 }, { "epoch": 5.51, - "learning_rate": 9.025362148985046e-06, - "loss": 0.1931, + "learning_rate": 1.9042485138782708e-05, + "loss": 0.121, "step": 118050 }, { "epoch": 5.51, - "learning_rate": 9.024893347709906e-06, - "loss": 0.3707, + "learning_rate": 1.9042017068946688e-05, + "loss": 0.1326, "step": 118055 }, { "epoch": 5.51, - "learning_rate": 9.024424546434766e-06, - "loss": 0.0777, + "learning_rate": 1.9041548999110668e-05, + "loss": 0.0769, "step": 118060 }, { "epoch": 5.51, - "learning_rate": 9.023955745159628e-06, - "loss": 0.009, + "learning_rate": 1.9041080929274647e-05, + "loss": 0.0151, "step": 118065 }, { "epoch": 5.51, - "learning_rate": 9.023486943884488e-06, - "loss": 0.0333, + "learning_rate": 1.904061285943863e-05, + "loss": 0.0758, "step": 118070 }, { "epoch": 5.51, - "learning_rate": 9.02301814260935e-06, - "loss": 0.0302, + "learning_rate": 1.904014478960261e-05, + "loss": 0.0622, "step": 118075 }, { "epoch": 5.51, - "learning_rate": 9.02254934133421e-06, - "loss": 0.0393, + "learning_rate": 1.903967671976659e-05, + "loss": 0.0661, "step": 118080 }, { "epoch": 5.51, - "learning_rate": 9.02208054005907e-06, - "loss": 0.017, + "learning_rate": 1.9039208649930573e-05, + "loss": 0.0272, "step": 118085 }, { "epoch": 5.51, - "learning_rate": 9.02161173878393e-06, - "loss": 0.1199, + "learning_rate": 1.903874058009455e-05, + "loss": 0.0734, "step": 118090 }, { "epoch": 5.51, - "learning_rate": 9.02114293750879e-06, - "loss": 0.1088, + "learning_rate": 1.903827251025853e-05, + "loss": 0.0774, "step": 118095 }, { "epoch": 5.51, - "learning_rate": 9.02067413623365e-06, - "loss": 0.0759, + "learning_rate": 1.903780444042251e-05, + "loss": 0.1086, "step": 118100 }, { "epoch": 5.51, - "learning_rate": 9.020205334958512e-06, - "loss": 0.1405, + "learning_rate": 1.9037336370586493e-05, + "loss": 0.2229, "step": 118105 }, { "epoch": 5.51, - "learning_rate": 9.019736533683372e-06, - "loss": 0.1169, + "learning_rate": 1.9036868300750472e-05, + "loss": 0.0856, "step": 118110 }, { "epoch": 5.51, - "learning_rate": 9.019267732408234e-06, - "loss": 0.0121, + "learning_rate": 1.9036400230914452e-05, + "loss": 0.0232, "step": 118115 }, { "epoch": 5.51, - "learning_rate": 9.018798931133094e-06, - "loss": 0.0092, + "learning_rate": 1.9035932161078432e-05, + "loss": 0.047, "step": 118120 }, { "epoch": 5.51, - "learning_rate": 9.018330129857954e-06, - "loss": 0.0396, + "learning_rate": 1.9035464091242415e-05, + "loss": 0.0373, "step": 118125 }, { "epoch": 5.51, - "learning_rate": 9.017861328582814e-06, - "loss": 0.0708, + "learning_rate": 1.9034996021406395e-05, + "loss": 0.0417, "step": 118130 }, { "epoch": 5.51, - "learning_rate": 9.017392527307675e-06, - "loss": 0.0936, + "learning_rate": 1.9034527951570375e-05, + "loss": 0.0469, "step": 118135 }, { "epoch": 5.51, - "learning_rate": 9.016923726032535e-06, - "loss": 0.1028, + "learning_rate": 1.9034059881734355e-05, + "loss": 0.0799, "step": 118140 }, { "epoch": 5.51, - "learning_rate": 9.016454924757397e-06, - "loss": 0.0856, + "learning_rate": 1.9033591811898338e-05, + "loss": 0.0824, "step": 118145 }, { "epoch": 5.51, - "learning_rate": 9.015986123482257e-06, - "loss": 0.1314, + "learning_rate": 1.9033123742062318e-05, + "loss": 0.1326, "step": 118150 }, { "epoch": 5.51, - "learning_rate": 9.015517322207117e-06, - "loss": 0.1874, + "learning_rate": 1.9032655672226294e-05, + "loss": 0.1745, "step": 118155 }, { "epoch": 5.51, - "learning_rate": 9.015048520931977e-06, - "loss": 0.0952, + "learning_rate": 1.9032187602390277e-05, + "loss": 0.1036, "step": 118160 }, { "epoch": 5.51, - "learning_rate": 9.014579719656838e-06, - "loss": 0.0187, + "learning_rate": 1.9031719532554257e-05, + "loss": 0.0248, "step": 118165 }, { "epoch": 5.51, - "learning_rate": 9.014110918381698e-06, - "loss": 0.0288, + "learning_rate": 1.9031251462718237e-05, + "loss": 0.0236, "step": 118170 }, { "epoch": 5.51, - "learning_rate": 9.01364211710656e-06, - "loss": 0.0635, + "learning_rate": 1.9030783392882217e-05, + "loss": 0.0266, "step": 118175 }, { "epoch": 5.51, - "learning_rate": 9.01317331583142e-06, - "loss": 0.0122, + "learning_rate": 1.90303153230462e-05, + "loss": 0.0603, "step": 118180 }, { "epoch": 5.51, - "learning_rate": 9.012704514556281e-06, - "loss": 0.0228, + "learning_rate": 1.902984725321018e-05, + "loss": 0.0535, "step": 118185 }, { "epoch": 5.51, - "learning_rate": 9.012235713281141e-06, - "loss": 0.0817, + "learning_rate": 1.902937918337416e-05, + "loss": 0.0347, "step": 118190 }, { "epoch": 5.52, - "learning_rate": 9.011766912006001e-06, - "loss": 0.0529, + "learning_rate": 1.902891111353814e-05, + "loss": 0.0639, "step": 118195 }, { "epoch": 5.52, - "learning_rate": 9.011298110730861e-06, - "loss": 0.0817, + "learning_rate": 1.9028443043702123e-05, + "loss": 0.0297, "step": 118200 }, { "epoch": 5.52, - "learning_rate": 9.010829309455721e-06, - "loss": 0.2346, + "learning_rate": 1.9027974973866103e-05, + "loss": 0.2137, "step": 118205 }, { "epoch": 5.52, - "learning_rate": 9.010360508180583e-06, - "loss": 0.1077, + "learning_rate": 1.9027506904030082e-05, + "loss": 0.0839, "step": 118210 }, { "epoch": 5.52, - "learning_rate": 9.009891706905444e-06, - "loss": 0.0193, + "learning_rate": 1.9027038834194062e-05, + "loss": 0.0107, "step": 118215 }, { "epoch": 5.52, - "learning_rate": 9.009422905630304e-06, - "loss": 0.0043, + "learning_rate": 1.9026570764358042e-05, + "loss": 0.0252, "step": 118220 }, { "epoch": 5.52, - "learning_rate": 9.008954104355164e-06, - "loss": 0.0164, + "learning_rate": 1.9026102694522022e-05, + "loss": 0.0312, "step": 118225 }, { "epoch": 5.52, - "learning_rate": 9.008485303080026e-06, - "loss": 0.0112, + "learning_rate": 1.9025634624686e-05, + "loss": 0.0945, "step": 118230 }, { "epoch": 5.52, - "learning_rate": 9.008016501804886e-06, - "loss": 0.0962, + "learning_rate": 1.9025166554849985e-05, + "loss": 0.0332, "step": 118235 }, { "epoch": 5.52, - "learning_rate": 9.007547700529746e-06, - "loss": 0.0899, + "learning_rate": 1.9024698485013965e-05, + "loss": 0.0176, "step": 118240 }, { "epoch": 5.52, - "learning_rate": 9.007078899254606e-06, - "loss": 0.1472, + "learning_rate": 1.9024230415177944e-05, + "loss": 0.0354, "step": 118245 }, { "epoch": 5.52, - "learning_rate": 9.006610097979467e-06, - "loss": 0.1357, + "learning_rate": 1.9023762345341924e-05, + "loss": 0.1426, "step": 118250 }, { "epoch": 5.52, - "learning_rate": 9.006141296704329e-06, - "loss": 0.2271, + "learning_rate": 1.9023294275505908e-05, + "loss": 0.1658, "step": 118255 }, { "epoch": 5.52, - "learning_rate": 9.005672495429189e-06, - "loss": 0.0837, + "learning_rate": 1.9022826205669887e-05, + "loss": 0.0621, "step": 118260 }, { "epoch": 5.52, - "learning_rate": 9.005203694154049e-06, - "loss": 0.0414, + "learning_rate": 1.9022358135833867e-05, + "loss": 0.0127, "step": 118265 }, { "epoch": 5.52, - "learning_rate": 9.004734892878909e-06, - "loss": 0.0226, + "learning_rate": 1.902189006599785e-05, + "loss": 0.0269, "step": 118270 }, { "epoch": 5.52, - "learning_rate": 9.004266091603769e-06, - "loss": 0.0147, + "learning_rate": 1.902142199616183e-05, + "loss": 0.0429, "step": 118275 }, { "epoch": 5.52, - "learning_rate": 9.00379729032863e-06, - "loss": 0.0312, + "learning_rate": 1.9020953926325807e-05, + "loss": 0.0456, "step": 118280 }, { "epoch": 5.52, - "learning_rate": 9.00332848905349e-06, - "loss": 0.0936, + "learning_rate": 1.9020485856489786e-05, + "loss": 0.0643, "step": 118285 }, { "epoch": 5.52, - "learning_rate": 9.002859687778352e-06, - "loss": 0.0318, + "learning_rate": 1.902001778665377e-05, + "loss": 0.1011, "step": 118290 }, { "epoch": 5.52, - "learning_rate": 9.002390886503212e-06, - "loss": 0.0717, + "learning_rate": 1.901954971681775e-05, + "loss": 0.1314, "step": 118295 }, { "epoch": 5.52, - "learning_rate": 9.001922085228073e-06, - "loss": 0.215, + "learning_rate": 1.901908164698173e-05, + "loss": 0.0739, "step": 118300 }, { "epoch": 5.52, - "learning_rate": 9.001453283952933e-06, - "loss": 0.1793, + "learning_rate": 1.901861357714571e-05, + "loss": 0.3465, "step": 118305 }, { "epoch": 5.52, - "learning_rate": 9.000984482677793e-06, - "loss": 0.0935, + "learning_rate": 1.9018145507309692e-05, + "loss": 0.0945, "step": 118310 }, { "epoch": 5.52, - "learning_rate": 9.000515681402653e-06, - "loss": 0.0068, + "learning_rate": 1.9017677437473672e-05, + "loss": 0.0203, "step": 118315 }, { "epoch": 5.52, - "learning_rate": 9.000046880127515e-06, - "loss": 0.0364, + "learning_rate": 1.9017209367637652e-05, + "loss": 0.0294, "step": 118320 }, { "epoch": 5.52, - "learning_rate": 8.999578078852375e-06, - "loss": 0.0403, + "learning_rate": 1.9016741297801632e-05, + "loss": 0.0492, "step": 118325 }, { "epoch": 5.52, - "learning_rate": 8.999109277577236e-06, - "loss": 0.0216, + "learning_rate": 1.9016273227965615e-05, + "loss": 0.0303, "step": 118330 }, { "epoch": 5.52, - "learning_rate": 8.998640476302096e-06, - "loss": 0.078, + "learning_rate": 1.9015805158129595e-05, + "loss": 0.0249, "step": 118335 }, { "epoch": 5.52, - "learning_rate": 8.998171675026956e-06, - "loss": 0.0285, + "learning_rate": 1.901533708829357e-05, + "loss": 0.0569, "step": 118340 }, { "epoch": 5.52, - "learning_rate": 8.997702873751816e-06, - "loss": 0.0792, + "learning_rate": 1.9014869018457554e-05, + "loss": 0.17, "step": 118345 }, { "epoch": 5.52, - "learning_rate": 8.997234072476678e-06, - "loss": 0.1787, + "learning_rate": 1.9014400948621534e-05, + "loss": 0.1303, "step": 118350 }, { "epoch": 5.52, - "learning_rate": 8.996765271201538e-06, - "loss": 0.2465, + "learning_rate": 1.9013932878785514e-05, + "loss": 0.3844, "step": 118355 }, { "epoch": 5.52, - "learning_rate": 8.9962964699264e-06, - "loss": 0.0738, + "learning_rate": 1.9013464808949494e-05, + "loss": 0.0425, "step": 118360 }, { "epoch": 5.52, - "learning_rate": 8.99582766865126e-06, - "loss": 0.0195, + "learning_rate": 1.9012996739113477e-05, + "loss": 0.0047, "step": 118365 }, { "epoch": 5.52, - "learning_rate": 8.995358867376121e-06, - "loss": 0.0263, + "learning_rate": 1.9012528669277457e-05, + "loss": 0.0457, "step": 118370 }, { "epoch": 5.52, - "learning_rate": 8.994890066100981e-06, - "loss": 0.036, + "learning_rate": 1.9012060599441437e-05, + "loss": 0.0179, "step": 118375 }, { "epoch": 5.52, - "learning_rate": 8.994421264825841e-06, - "loss": 0.0656, + "learning_rate": 1.9011592529605416e-05, + "loss": 0.0255, "step": 118380 }, { "epoch": 5.52, - "learning_rate": 8.9939524635507e-06, - "loss": 0.0338, + "learning_rate": 1.90111244597694e-05, + "loss": 0.0636, "step": 118385 }, { "epoch": 5.52, - "learning_rate": 8.993483662275562e-06, - "loss": 0.0731, + "learning_rate": 1.901065638993338e-05, + "loss": 0.0798, "step": 118390 }, { "epoch": 5.52, - "learning_rate": 8.993014861000422e-06, - "loss": 0.053, + "learning_rate": 1.901018832009736e-05, + "loss": 0.081, "step": 118395 }, { "epoch": 5.52, - "learning_rate": 8.992546059725284e-06, - "loss": 0.112, + "learning_rate": 1.9009720250261343e-05, + "loss": 0.0952, "step": 118400 }, { "epoch": 5.52, - "learning_rate": 8.992077258450144e-06, - "loss": 0.3096, + "learning_rate": 1.900925218042532e-05, + "loss": 0.2107, "step": 118405 }, { "epoch": 5.53, - "learning_rate": 8.991608457175004e-06, - "loss": 0.1022, + "learning_rate": 1.90087841105893e-05, + "loss": 0.0784, "step": 118410 }, { "epoch": 5.53, - "learning_rate": 8.991139655899864e-06, - "loss": 0.0286, + "learning_rate": 1.900831604075328e-05, + "loss": 0.0162, "step": 118415 }, { "epoch": 5.53, - "learning_rate": 8.990670854624725e-06, - "loss": 0.0188, + "learning_rate": 1.9007847970917262e-05, + "loss": 0.0474, "step": 118420 }, { "epoch": 5.53, - "learning_rate": 8.990202053349585e-06, - "loss": 0.0351, + "learning_rate": 1.900737990108124e-05, + "loss": 0.0494, "step": 118425 }, { "epoch": 5.53, - "learning_rate": 8.989733252074447e-06, - "loss": 0.0425, + "learning_rate": 1.900691183124522e-05, + "loss": 0.0467, "step": 118430 }, { "epoch": 5.53, - "learning_rate": 8.989264450799307e-06, - "loss": 0.0298, + "learning_rate": 1.90064437614092e-05, + "loss": 0.0431, "step": 118435 }, { "epoch": 5.53, - "learning_rate": 8.988795649524169e-06, - "loss": 0.0784, + "learning_rate": 1.9005975691573184e-05, + "loss": 0.0235, "step": 118440 }, { "epoch": 5.53, - "learning_rate": 8.988326848249028e-06, - "loss": 0.0851, + "learning_rate": 1.9005507621737164e-05, + "loss": 0.0467, "step": 118445 }, { "epoch": 5.53, - "learning_rate": 8.987858046973888e-06, - "loss": 0.1939, + "learning_rate": 1.9005039551901144e-05, + "loss": 0.1016, "step": 118450 }, { "epoch": 5.53, - "learning_rate": 8.987389245698748e-06, - "loss": 0.2064, + "learning_rate": 1.9004571482065127e-05, + "loss": 0.3486, "step": 118455 }, { "epoch": 5.53, - "learning_rate": 8.986920444423608e-06, - "loss": 0.0914, + "learning_rate": 1.9004103412229107e-05, + "loss": 0.0894, "step": 118460 }, { "epoch": 5.53, - "learning_rate": 8.98645164314847e-06, - "loss": 0.0483, + "learning_rate": 1.9003635342393087e-05, + "loss": 0.0164, "step": 118465 }, { "epoch": 5.53, - "learning_rate": 8.985982841873332e-06, - "loss": 0.0414, + "learning_rate": 1.9003167272557063e-05, + "loss": 0.0645, "step": 118470 }, { "epoch": 5.53, - "learning_rate": 8.985514040598191e-06, - "loss": 0.0352, + "learning_rate": 1.9002699202721047e-05, + "loss": 0.0225, "step": 118475 }, { "epoch": 5.53, - "learning_rate": 8.985045239323051e-06, - "loss": 0.0173, + "learning_rate": 1.9002231132885026e-05, + "loss": 0.0762, "step": 118480 }, { "epoch": 5.53, - "learning_rate": 8.984576438047911e-06, - "loss": 0.0764, + "learning_rate": 1.9001763063049006e-05, + "loss": 0.0606, "step": 118485 }, { "epoch": 5.53, - "learning_rate": 8.984107636772773e-06, - "loss": 0.041, + "learning_rate": 1.9001294993212986e-05, + "loss": 0.0741, "step": 118490 }, { "epoch": 5.53, - "learning_rate": 8.983638835497633e-06, - "loss": 0.1722, + "learning_rate": 1.900082692337697e-05, + "loss": 0.0851, "step": 118495 }, { "epoch": 5.53, - "learning_rate": 8.983170034222493e-06, - "loss": 0.0928, + "learning_rate": 1.900035885354095e-05, + "loss": 0.1397, "step": 118500 }, { "epoch": 5.53, - "learning_rate": 8.982701232947354e-06, - "loss": 0.214, + "learning_rate": 1.899989078370493e-05, + "loss": 0.1489, "step": 118505 }, { "epoch": 5.53, - "learning_rate": 8.982232431672216e-06, - "loss": 0.0932, + "learning_rate": 1.8999422713868912e-05, + "loss": 0.0658, "step": 118510 }, { "epoch": 5.53, - "learning_rate": 8.981763630397076e-06, - "loss": 0.0044, + "learning_rate": 1.8998954644032892e-05, + "loss": 0.027, "step": 118515 }, { "epoch": 5.53, - "learning_rate": 8.981294829121936e-06, - "loss": 0.011, + "learning_rate": 1.8998486574196872e-05, + "loss": 0.0296, "step": 118520 }, { "epoch": 5.53, - "learning_rate": 8.980826027846796e-06, - "loss": 0.0426, + "learning_rate": 1.899801850436085e-05, + "loss": 0.0515, "step": 118525 }, { "epoch": 5.53, - "learning_rate": 8.980357226571656e-06, - "loss": 0.0454, + "learning_rate": 1.899755043452483e-05, + "loss": 0.0913, "step": 118530 }, { "epoch": 5.53, - "learning_rate": 8.979888425296517e-06, - "loss": 0.0751, + "learning_rate": 1.899708236468881e-05, + "loss": 0.0522, "step": 118535 }, { "epoch": 5.53, - "learning_rate": 8.979419624021377e-06, - "loss": 0.036, + "learning_rate": 1.899661429485279e-05, + "loss": 0.1012, "step": 118540 }, { "epoch": 5.53, - "learning_rate": 8.978950822746239e-06, - "loss": 0.0523, + "learning_rate": 1.899614622501677e-05, + "loss": 0.0641, "step": 118545 }, { "epoch": 5.53, - "learning_rate": 8.978482021471099e-06, - "loss": 0.1403, + "learning_rate": 1.8995678155180754e-05, + "loss": 0.1089, "step": 118550 }, { "epoch": 5.53, - "learning_rate": 8.97801322019596e-06, - "loss": 0.2436, + "learning_rate": 1.8995210085344734e-05, + "loss": 0.4135, "step": 118555 }, { "epoch": 5.53, - "learning_rate": 8.97754441892082e-06, - "loss": 0.0777, + "learning_rate": 1.8994742015508714e-05, + "loss": 0.0662, "step": 118560 }, { "epoch": 5.53, - "learning_rate": 8.97707561764568e-06, - "loss": 0.0383, + "learning_rate": 1.8994273945672693e-05, + "loss": 0.0299, "step": 118565 }, { "epoch": 5.53, - "learning_rate": 8.97660681637054e-06, - "loss": 0.0064, + "learning_rate": 1.8993805875836677e-05, + "loss": 0.0174, "step": 118570 }, { "epoch": 5.53, - "learning_rate": 8.976138015095402e-06, - "loss": 0.0224, + "learning_rate": 1.8993337806000656e-05, + "loss": 0.0247, "step": 118575 }, { "epoch": 5.53, - "learning_rate": 8.975669213820262e-06, - "loss": 0.0683, + "learning_rate": 1.8992869736164636e-05, + "loss": 0.0388, "step": 118580 }, { "epoch": 5.53, - "learning_rate": 8.975200412545124e-06, - "loss": 0.0962, + "learning_rate": 1.899240166632862e-05, + "loss": 0.0753, "step": 118585 }, { "epoch": 5.53, - "learning_rate": 8.974731611269984e-06, - "loss": 0.0764, + "learning_rate": 1.89919335964926e-05, + "loss": 0.1002, "step": 118590 }, { "epoch": 5.53, - "learning_rate": 8.974262809994843e-06, - "loss": 0.1239, + "learning_rate": 1.8991465526656576e-05, + "loss": 0.0463, "step": 118595 }, { "epoch": 5.53, - "learning_rate": 8.973794008719703e-06, - "loss": 0.1363, + "learning_rate": 1.8990997456820556e-05, + "loss": 0.1232, "step": 118600 }, { "epoch": 5.53, - "learning_rate": 8.973325207444565e-06, - "loss": 0.3418, + "learning_rate": 1.899052938698454e-05, + "loss": 0.1634, "step": 118605 }, { "epoch": 5.53, - "learning_rate": 8.972856406169425e-06, - "loss": 0.1037, + "learning_rate": 1.899006131714852e-05, + "loss": 0.0789, "step": 118610 }, { "epoch": 5.53, - "learning_rate": 8.972387604894287e-06, - "loss": 0.0136, + "learning_rate": 1.89895932473125e-05, + "loss": 0.0493, "step": 118615 }, { "epoch": 5.53, - "learning_rate": 8.971918803619147e-06, - "loss": 0.0594, + "learning_rate": 1.8989125177476478e-05, + "loss": 0.0027, "step": 118620 }, { "epoch": 5.54, - "learning_rate": 8.971450002344008e-06, - "loss": 0.085, + "learning_rate": 1.898865710764046e-05, + "loss": 0.0283, "step": 118625 }, { "epoch": 5.54, - "learning_rate": 8.970981201068868e-06, - "loss": 0.0128, + "learning_rate": 1.898818903780444e-05, + "loss": 0.0477, "step": 118630 }, { "epoch": 5.54, - "learning_rate": 8.970512399793728e-06, - "loss": 0.0437, + "learning_rate": 1.898772096796842e-05, + "loss": 0.0637, "step": 118635 }, { "epoch": 5.54, - "learning_rate": 8.970043598518588e-06, - "loss": 0.0369, + "learning_rate": 1.8987252898132404e-05, + "loss": 0.0225, "step": 118640 }, { "epoch": 5.54, - "learning_rate": 8.96957479724345e-06, - "loss": 0.0458, + "learning_rate": 1.8986784828296384e-05, + "loss": 0.0609, "step": 118645 }, { "epoch": 5.54, - "learning_rate": 8.96910599596831e-06, - "loss": 0.1238, + "learning_rate": 1.8986316758460364e-05, + "loss": 0.1569, "step": 118650 }, { "epoch": 5.54, - "learning_rate": 8.968637194693171e-06, - "loss": 0.3535, + "learning_rate": 1.8985848688624344e-05, + "loss": 0.3318, "step": 118655 }, { "epoch": 5.54, - "learning_rate": 8.968168393418031e-06, - "loss": 0.0945, + "learning_rate": 1.8985380618788324e-05, + "loss": 0.0444, "step": 118660 }, { "epoch": 5.54, - "learning_rate": 8.967699592142891e-06, - "loss": 0.0082, + "learning_rate": 1.8984912548952303e-05, + "loss": 0.0174, "step": 118665 }, { "epoch": 5.54, - "learning_rate": 8.967230790867751e-06, - "loss": 0.0147, + "learning_rate": 1.8984444479116283e-05, + "loss": 0.0235, "step": 118670 }, { "epoch": 5.54, - "learning_rate": 8.966761989592613e-06, - "loss": 0.0342, + "learning_rate": 1.8983976409280263e-05, + "loss": 0.0254, "step": 118675 }, { "epoch": 5.54, - "learning_rate": 8.966293188317472e-06, - "loss": 0.0249, + "learning_rate": 1.8983508339444246e-05, + "loss": 0.0424, "step": 118680 }, { "epoch": 5.54, - "learning_rate": 8.965824387042334e-06, - "loss": 0.0516, + "learning_rate": 1.8983040269608226e-05, + "loss": 0.0585, "step": 118685 }, { "epoch": 5.54, - "learning_rate": 8.965355585767194e-06, - "loss": 0.0765, + "learning_rate": 1.8982572199772206e-05, + "loss": 0.0319, "step": 118690 }, { "epoch": 5.54, - "learning_rate": 8.964886784492056e-06, - "loss": 0.0704, + "learning_rate": 1.898210412993619e-05, + "loss": 0.0731, "step": 118695 }, { "epoch": 5.54, - "learning_rate": 8.964417983216916e-06, - "loss": 0.0762, + "learning_rate": 1.898163606010017e-05, + "loss": 0.0982, "step": 118700 }, { "epoch": 5.54, - "learning_rate": 8.963949181941776e-06, - "loss": 0.1745, + "learning_rate": 1.898116799026415e-05, + "loss": 0.244, "step": 118705 }, { "epoch": 5.54, - "learning_rate": 8.963480380666635e-06, - "loss": 0.0458, + "learning_rate": 1.898069992042813e-05, + "loss": 0.0856, "step": 118710 }, { "epoch": 5.54, - "learning_rate": 8.963011579391495e-06, - "loss": 0.0193, + "learning_rate": 1.8980231850592112e-05, + "loss": 0.0498, "step": 118715 }, { "epoch": 5.54, - "learning_rate": 8.962542778116357e-06, - "loss": 0.0207, + "learning_rate": 1.8979763780756088e-05, + "loss": 0.0208, "step": 118720 }, { "epoch": 5.54, - "learning_rate": 8.962073976841219e-06, - "loss": 0.0127, + "learning_rate": 1.8979295710920068e-05, + "loss": 0.0245, "step": 118725 }, { "epoch": 5.54, - "learning_rate": 8.961605175566079e-06, - "loss": 0.0751, + "learning_rate": 1.8978827641084048e-05, + "loss": 0.0373, "step": 118730 }, { "epoch": 5.54, - "learning_rate": 8.961136374290939e-06, - "loss": 0.0819, + "learning_rate": 1.897835957124803e-05, + "loss": 0.0867, "step": 118735 }, { "epoch": 5.54, - "learning_rate": 8.960667573015798e-06, - "loss": 0.0334, + "learning_rate": 1.897789150141201e-05, + "loss": 0.0541, "step": 118740 }, { "epoch": 5.54, - "learning_rate": 8.96019877174066e-06, - "loss": 0.0319, + "learning_rate": 1.897742343157599e-05, + "loss": 0.1111, "step": 118745 }, { "epoch": 5.54, - "learning_rate": 8.95972997046552e-06, - "loss": 0.1115, + "learning_rate": 1.897695536173997e-05, + "loss": 0.1708, "step": 118750 }, { "epoch": 5.54, - "learning_rate": 8.95926116919038e-06, - "loss": 0.2242, + "learning_rate": 1.8976487291903954e-05, + "loss": 0.1262, "step": 118755 }, { "epoch": 5.54, - "learning_rate": 8.958792367915242e-06, - "loss": 0.0686, + "learning_rate": 1.8976019222067933e-05, + "loss": 0.0692, "step": 118760 }, { "epoch": 5.54, - "learning_rate": 8.958323566640103e-06, - "loss": 0.0192, + "learning_rate": 1.8975551152231913e-05, + "loss": 0.0227, "step": 118765 }, { "epoch": 5.54, - "learning_rate": 8.957854765364963e-06, - "loss": 0.0131, + "learning_rate": 1.8975083082395896e-05, + "loss": 0.0206, "step": 118770 }, { "epoch": 5.54, - "learning_rate": 8.957385964089823e-06, - "loss": 0.0177, + "learning_rate": 1.8974615012559876e-05, + "loss": 0.0444, "step": 118775 }, { "epoch": 5.54, - "learning_rate": 8.956917162814683e-06, - "loss": 0.0392, + "learning_rate": 1.8974146942723856e-05, + "loss": 0.0397, "step": 118780 }, { "epoch": 5.54, - "learning_rate": 8.956448361539543e-06, - "loss": 0.0632, + "learning_rate": 1.8973678872887833e-05, + "loss": 0.0547, "step": 118785 }, { "epoch": 5.54, - "learning_rate": 8.955979560264405e-06, - "loss": 0.063, + "learning_rate": 1.8973210803051816e-05, + "loss": 0.0733, "step": 118790 }, { "epoch": 5.54, - "learning_rate": 8.955510758989266e-06, - "loss": 0.0734, + "learning_rate": 1.8972742733215796e-05, + "loss": 0.0677, "step": 118795 }, { "epoch": 5.54, - "learning_rate": 8.955041957714126e-06, - "loss": 0.115, + "learning_rate": 1.8972274663379775e-05, + "loss": 0.0692, "step": 118800 }, { "epoch": 5.54, - "learning_rate": 8.954573156438986e-06, - "loss": 0.382, + "learning_rate": 1.8971806593543755e-05, + "loss": 0.2168, "step": 118805 }, { "epoch": 5.54, - "learning_rate": 8.954104355163846e-06, - "loss": 0.096, + "learning_rate": 1.897133852370774e-05, + "loss": 0.0894, "step": 118810 }, { "epoch": 5.54, - "learning_rate": 8.953635553888708e-06, - "loss": 0.0527, + "learning_rate": 1.8970870453871718e-05, + "loss": 0.0067, "step": 118815 }, { "epoch": 5.54, - "learning_rate": 8.953166752613568e-06, - "loss": 0.0471, + "learning_rate": 1.8970402384035698e-05, + "loss": 0.0183, "step": 118820 }, { "epoch": 5.54, - "learning_rate": 8.952697951338428e-06, - "loss": 0.016, + "learning_rate": 1.896993431419968e-05, + "loss": 0.018, "step": 118825 }, { "epoch": 5.54, - "learning_rate": 8.952229150063289e-06, - "loss": 0.0083, + "learning_rate": 1.896946624436366e-05, + "loss": 0.053, "step": 118830 }, { "epoch": 5.55, - "learning_rate": 8.95176034878815e-06, - "loss": 0.0139, + "learning_rate": 1.896899817452764e-05, + "loss": 0.0298, "step": 118835 }, { "epoch": 5.55, - "learning_rate": 8.95129154751301e-06, - "loss": 0.0698, + "learning_rate": 1.896853010469162e-05, + "loss": 0.0487, "step": 118840 }, { "epoch": 5.55, - "learning_rate": 8.95082274623787e-06, - "loss": 0.1555, + "learning_rate": 1.89680620348556e-05, + "loss": 0.0875, "step": 118845 }, { "epoch": 5.55, - "learning_rate": 8.95035394496273e-06, - "loss": 0.1776, + "learning_rate": 1.896759396501958e-05, + "loss": 0.0609, "step": 118850 }, { "epoch": 5.55, - "learning_rate": 8.94988514368759e-06, - "loss": 0.2482, + "learning_rate": 1.896712589518356e-05, + "loss": 0.3467, "step": 118855 }, { "epoch": 5.55, - "learning_rate": 8.949416342412452e-06, - "loss": 0.058, + "learning_rate": 1.896665782534754e-05, + "loss": 0.0913, "step": 118860 }, { "epoch": 5.55, - "learning_rate": 8.948947541137312e-06, - "loss": 0.0413, + "learning_rate": 1.8966189755511523e-05, + "loss": 0.0136, "step": 118865 }, { "epoch": 5.55, - "learning_rate": 8.948478739862174e-06, - "loss": 0.0371, + "learning_rate": 1.8965721685675503e-05, + "loss": 0.0021, "step": 118870 }, { "epoch": 5.55, - "learning_rate": 8.948009938587034e-06, - "loss": 0.0266, + "learning_rate": 1.8965253615839483e-05, + "loss": 0.0154, "step": 118875 }, { "epoch": 5.55, - "learning_rate": 8.947541137311894e-06, - "loss": 0.0696, + "learning_rate": 1.8964785546003466e-05, + "loss": 0.0745, "step": 118880 }, { "epoch": 5.55, - "learning_rate": 8.947072336036755e-06, - "loss": 0.0454, + "learning_rate": 1.8964317476167446e-05, + "loss": 0.066, "step": 118885 }, { "epoch": 5.55, - "learning_rate": 8.946603534761615e-06, - "loss": 0.0662, + "learning_rate": 1.8963849406331426e-05, + "loss": 0.0574, "step": 118890 }, { "epoch": 5.55, - "learning_rate": 8.946134733486475e-06, - "loss": 0.0683, + "learning_rate": 1.8963381336495405e-05, + "loss": 0.0847, "step": 118895 }, { "epoch": 5.55, - "learning_rate": 8.945665932211337e-06, - "loss": 0.1362, + "learning_rate": 1.896291326665939e-05, + "loss": 0.1175, "step": 118900 }, { "epoch": 5.55, - "learning_rate": 8.945197130936197e-06, - "loss": 0.2939, + "learning_rate": 1.896244519682337e-05, + "loss": 0.4303, "step": 118905 }, { "epoch": 5.55, - "learning_rate": 8.944728329661058e-06, - "loss": 0.0642, + "learning_rate": 1.8961977126987345e-05, + "loss": 0.0591, "step": 118910 }, { "epoch": 5.55, - "learning_rate": 8.944259528385918e-06, - "loss": 0.0081, + "learning_rate": 1.8961509057151325e-05, + "loss": 0.0387, "step": 118915 }, { "epoch": 5.55, - "learning_rate": 8.943790727110778e-06, - "loss": 0.0234, + "learning_rate": 1.8961040987315308e-05, + "loss": 0.0407, "step": 118920 }, { "epoch": 5.55, - "learning_rate": 8.943321925835638e-06, - "loss": 0.0548, + "learning_rate": 1.8960572917479288e-05, + "loss": 0.0193, "step": 118925 }, { "epoch": 5.55, - "learning_rate": 8.9428531245605e-06, - "loss": 0.0376, + "learning_rate": 1.8960104847643268e-05, + "loss": 0.0449, "step": 118930 }, { "epoch": 5.55, - "learning_rate": 8.94238432328536e-06, - "loss": 0.0412, + "learning_rate": 1.8959636777807247e-05, + "loss": 0.1099, "step": 118935 }, { "epoch": 5.55, - "learning_rate": 8.941915522010221e-06, - "loss": 0.0984, + "learning_rate": 1.895916870797123e-05, + "loss": 0.1271, "step": 118940 }, { "epoch": 5.55, - "learning_rate": 8.941446720735081e-06, - "loss": 0.1271, + "learning_rate": 1.895870063813521e-05, + "loss": 0.104, "step": 118945 }, { "epoch": 5.55, - "learning_rate": 8.940977919459943e-06, - "loss": 0.1437, + "learning_rate": 1.895823256829919e-05, + "loss": 0.1213, "step": 118950 }, { "epoch": 5.55, - "learning_rate": 8.940509118184803e-06, - "loss": 0.3483, + "learning_rate": 1.8957764498463173e-05, + "loss": 0.2085, "step": 118955 }, { "epoch": 5.55, - "learning_rate": 8.940040316909663e-06, - "loss": 0.108, + "learning_rate": 1.8957296428627153e-05, + "loss": 0.0794, "step": 118960 }, { "epoch": 5.55, - "learning_rate": 8.939571515634523e-06, - "loss": 0.0048, + "learning_rate": 1.8956828358791133e-05, + "loss": 0.014, "step": 118965 }, { "epoch": 5.55, - "learning_rate": 8.939102714359384e-06, - "loss": 0.0253, + "learning_rate": 1.8956360288955113e-05, + "loss": 0.0333, "step": 118970 }, { "epoch": 5.55, - "learning_rate": 8.938633913084244e-06, - "loss": 0.0193, + "learning_rate": 1.8955892219119093e-05, + "loss": 0.0181, "step": 118975 }, { "epoch": 5.55, - "learning_rate": 8.938165111809106e-06, - "loss": 0.0452, + "learning_rate": 1.8955424149283073e-05, + "loss": 0.0412, "step": 118980 }, { "epoch": 5.55, - "learning_rate": 8.937696310533966e-06, - "loss": 0.0451, + "learning_rate": 1.8954956079447052e-05, + "loss": 0.0328, "step": 118985 }, { "epoch": 5.55, - "learning_rate": 8.937227509258826e-06, - "loss": 0.0293, + "learning_rate": 1.8954488009611032e-05, + "loss": 0.0331, "step": 118990 }, { "epoch": 5.55, - "learning_rate": 8.936758707983686e-06, - "loss": 0.0949, + "learning_rate": 1.8954019939775015e-05, + "loss": 0.0379, "step": 118995 }, { "epoch": 5.55, - "learning_rate": 8.936289906708547e-06, - "loss": 0.1551, + "learning_rate": 1.8953551869938995e-05, + "loss": 0.1175, "step": 119000 }, { "epoch": 5.55, - "learning_rate": 8.935821105433407e-06, - "loss": 0.2035, + "learning_rate": 1.8953083800102975e-05, + "loss": 0.2089, "step": 119005 }, { "epoch": 5.55, - "learning_rate": 8.935352304158269e-06, - "loss": 0.0941, + "learning_rate": 1.8952615730266958e-05, + "loss": 0.1049, "step": 119010 }, { "epoch": 5.55, - "learning_rate": 8.934883502883129e-06, - "loss": 0.0319, + "learning_rate": 1.8952147660430938e-05, + "loss": 0.0293, "step": 119015 }, { "epoch": 5.55, - "learning_rate": 8.93441470160799e-06, - "loss": 0.0326, + "learning_rate": 1.8951679590594918e-05, + "loss": 0.0176, "step": 119020 }, { "epoch": 5.55, - "learning_rate": 8.93394590033285e-06, - "loss": 0.03, + "learning_rate": 1.8951211520758898e-05, + "loss": 0.0509, "step": 119025 }, { "epoch": 5.55, - "learning_rate": 8.93347709905771e-06, - "loss": 0.0669, + "learning_rate": 1.895074345092288e-05, + "loss": 0.0579, "step": 119030 }, { "epoch": 5.55, - "learning_rate": 8.93300829778257e-06, - "loss": 0.0728, + "learning_rate": 1.8950275381086857e-05, + "loss": 0.0313, "step": 119035 }, { "epoch": 5.55, - "learning_rate": 8.93253949650743e-06, - "loss": 0.0458, + "learning_rate": 1.8949807311250837e-05, + "loss": 0.0678, "step": 119040 }, { "epoch": 5.55, - "learning_rate": 8.932070695232292e-06, - "loss": 0.0452, + "learning_rate": 1.8949339241414817e-05, + "loss": 0.1092, "step": 119045 }, { "epoch": 5.56, - "learning_rate": 8.931601893957153e-06, - "loss": 0.1481, + "learning_rate": 1.89488711715788e-05, + "loss": 0.2095, "step": 119050 }, { "epoch": 5.56, - "learning_rate": 8.931133092682013e-06, - "loss": 0.1082, + "learning_rate": 1.894840310174278e-05, + "loss": 0.3191, "step": 119055 }, { "epoch": 5.56, - "learning_rate": 8.930664291406873e-06, - "loss": 0.0817, + "learning_rate": 1.894793503190676e-05, + "loss": 0.0703, "step": 119060 }, { "epoch": 5.56, - "learning_rate": 8.930195490131733e-06, - "loss": 0.014, + "learning_rate": 1.8947466962070743e-05, + "loss": 0.0117, "step": 119065 }, { "epoch": 5.56, - "learning_rate": 8.929726688856595e-06, - "loss": 0.0166, + "learning_rate": 1.8946998892234723e-05, + "loss": 0.0175, "step": 119070 }, { "epoch": 5.56, - "learning_rate": 8.929257887581455e-06, - "loss": 0.108, + "learning_rate": 1.8946530822398703e-05, + "loss": 0.0432, "step": 119075 }, { "epoch": 5.56, - "learning_rate": 8.928789086306315e-06, - "loss": 0.0569, + "learning_rate": 1.8946062752562682e-05, + "loss": 0.0565, "step": 119080 }, { "epoch": 5.56, - "learning_rate": 8.928320285031176e-06, - "loss": 0.0772, + "learning_rate": 1.8945594682726666e-05, + "loss": 0.077, "step": 119085 }, { "epoch": 5.56, - "learning_rate": 8.927851483756038e-06, - "loss": 0.1137, + "learning_rate": 1.8945126612890645e-05, + "loss": 0.0677, "step": 119090 }, { "epoch": 5.56, - "learning_rate": 8.927382682480898e-06, - "loss": 0.0515, + "learning_rate": 1.8944658543054625e-05, + "loss": 0.0436, "step": 119095 }, { "epoch": 5.56, - "learning_rate": 8.926913881205758e-06, - "loss": 0.1035, + "learning_rate": 1.8944190473218602e-05, + "loss": 0.2265, "step": 119100 }, { "epoch": 5.56, - "learning_rate": 8.926445079930618e-06, - "loss": 0.3041, + "learning_rate": 1.8943722403382585e-05, + "loss": 0.2239, "step": 119105 }, { "epoch": 5.56, - "learning_rate": 8.925976278655478e-06, - "loss": 0.0632, + "learning_rate": 1.8943254333546565e-05, + "loss": 0.0703, "step": 119110 }, { "epoch": 5.56, - "learning_rate": 8.92550747738034e-06, - "loss": 0.0048, + "learning_rate": 1.8942786263710545e-05, + "loss": 0.0475, "step": 119115 }, { "epoch": 5.56, - "learning_rate": 8.9250386761052e-06, - "loss": 0.0092, + "learning_rate": 1.8942318193874524e-05, + "loss": 0.0246, "step": 119120 }, { "epoch": 5.56, - "learning_rate": 8.92456987483006e-06, - "loss": 0.0181, + "learning_rate": 1.8941850124038508e-05, + "loss": 0.056, "step": 119125 }, { "epoch": 5.56, - "learning_rate": 8.92410107355492e-06, - "loss": 0.0392, + "learning_rate": 1.8941382054202487e-05, + "loss": 0.0218, "step": 119130 }, { "epoch": 5.56, - "learning_rate": 8.92363227227978e-06, - "loss": 0.0244, + "learning_rate": 1.8940913984366467e-05, + "loss": 0.0601, "step": 119135 }, { "epoch": 5.56, - "learning_rate": 8.923163471004642e-06, - "loss": 0.1567, + "learning_rate": 1.894044591453045e-05, + "loss": 0.086, "step": 119140 }, { "epoch": 5.56, - "learning_rate": 8.922694669729502e-06, - "loss": 0.1097, + "learning_rate": 1.893997784469443e-05, + "loss": 0.1039, "step": 119145 }, { "epoch": 5.56, - "learning_rate": 8.922225868454362e-06, - "loss": 0.1723, + "learning_rate": 1.893950977485841e-05, + "loss": 0.1163, "step": 119150 }, { "epoch": 5.56, - "learning_rate": 8.921757067179224e-06, - "loss": 0.1702, + "learning_rate": 1.893904170502239e-05, + "loss": 0.3951, "step": 119155 }, { "epoch": 5.56, - "learning_rate": 8.921288265904084e-06, - "loss": 0.1058, + "learning_rate": 1.8938573635186373e-05, + "loss": 0.0961, "step": 119160 }, { "epoch": 5.56, - "learning_rate": 8.920819464628945e-06, - "loss": 0.0066, + "learning_rate": 1.893810556535035e-05, + "loss": 0.0323, "step": 119165 }, { "epoch": 5.56, - "learning_rate": 8.920350663353805e-06, - "loss": 0.0194, + "learning_rate": 1.893763749551433e-05, + "loss": 0.0515, "step": 119170 }, { "epoch": 5.56, - "learning_rate": 8.919881862078665e-06, - "loss": 0.0695, + "learning_rate": 1.893716942567831e-05, + "loss": 0.0184, "step": 119175 }, { "epoch": 5.56, - "learning_rate": 8.919413060803525e-06, - "loss": 0.0344, + "learning_rate": 1.8936701355842292e-05, + "loss": 0.046, "step": 119180 }, { "epoch": 5.56, - "learning_rate": 8.918944259528387e-06, - "loss": 0.0596, + "learning_rate": 1.8936233286006272e-05, + "loss": 0.0506, "step": 119185 }, { "epoch": 5.56, - "learning_rate": 8.918475458253247e-06, - "loss": 0.0486, + "learning_rate": 1.8935765216170252e-05, + "loss": 0.0474, "step": 119190 }, { "epoch": 5.56, - "learning_rate": 8.918006656978108e-06, - "loss": 0.1368, + "learning_rate": 1.8935297146334235e-05, + "loss": 0.158, "step": 119195 }, { "epoch": 5.56, - "learning_rate": 8.917537855702968e-06, - "loss": 0.07, + "learning_rate": 1.8934829076498215e-05, + "loss": 0.1957, "step": 119200 }, { "epoch": 5.56, - "learning_rate": 8.917069054427828e-06, - "loss": 0.2711, + "learning_rate": 1.8934361006662195e-05, + "loss": 0.167, "step": 119205 }, { "epoch": 5.56, - "learning_rate": 8.91660025315269e-06, - "loss": 0.0825, + "learning_rate": 1.8933892936826175e-05, + "loss": 0.0579, "step": 119210 }, { "epoch": 5.56, - "learning_rate": 8.91613145187755e-06, - "loss": 0.0351, + "learning_rate": 1.8933424866990158e-05, + "loss": 0.027, "step": 119215 }, { "epoch": 5.56, - "learning_rate": 8.91566265060241e-06, - "loss": 0.0046, + "learning_rate": 1.8932956797154138e-05, + "loss": 0.0258, "step": 119220 }, { "epoch": 5.56, - "learning_rate": 8.915193849327271e-06, - "loss": 0.0309, + "learning_rate": 1.8932488727318114e-05, + "loss": 0.0243, "step": 119225 }, { "epoch": 5.56, - "learning_rate": 8.914725048052131e-06, - "loss": 0.0641, + "learning_rate": 1.8932020657482094e-05, + "loss": 0.034, "step": 119230 }, { "epoch": 5.56, - "learning_rate": 8.914256246776993e-06, - "loss": 0.0537, + "learning_rate": 1.8931552587646077e-05, + "loss": 0.3255, "step": 119235 }, { "epoch": 5.56, - "learning_rate": 8.913787445501853e-06, - "loss": 0.0363, + "learning_rate": 1.8931084517810057e-05, + "loss": 0.0423, "step": 119240 }, { "epoch": 5.56, - "learning_rate": 8.913318644226713e-06, - "loss": 0.1055, + "learning_rate": 1.8930616447974037e-05, + "loss": 0.064, "step": 119245 }, { "epoch": 5.56, - "learning_rate": 8.912849842951573e-06, - "loss": 0.0776, + "learning_rate": 1.893014837813802e-05, + "loss": 0.0977, "step": 119250 }, { "epoch": 5.56, - "learning_rate": 8.912381041676434e-06, - "loss": 0.4027, + "learning_rate": 1.8929680308302e-05, + "loss": 0.2537, "step": 119255 }, { "epoch": 5.56, - "learning_rate": 8.911912240401294e-06, - "loss": 0.0697, + "learning_rate": 1.892921223846598e-05, + "loss": 0.1032, "step": 119260 }, { "epoch": 5.57, - "learning_rate": 8.911443439126156e-06, - "loss": 0.0494, + "learning_rate": 1.892874416862996e-05, + "loss": 0.0349, "step": 119265 }, { "epoch": 5.57, - "learning_rate": 8.910974637851016e-06, - "loss": 0.0272, + "learning_rate": 1.8928276098793943e-05, + "loss": 0.0203, "step": 119270 }, { "epoch": 5.57, - "learning_rate": 8.910505836575877e-06, - "loss": 0.0611, + "learning_rate": 1.8927808028957922e-05, + "loss": 0.0046, "step": 119275 }, { "epoch": 5.57, - "learning_rate": 8.910037035300737e-06, - "loss": 0.0331, + "learning_rate": 1.8927339959121902e-05, + "loss": 0.064, "step": 119280 }, { "epoch": 5.57, - "learning_rate": 8.909568234025597e-06, - "loss": 0.0405, + "learning_rate": 1.8926871889285882e-05, + "loss": 0.0439, "step": 119285 }, { "epoch": 5.57, - "learning_rate": 8.909099432750457e-06, - "loss": 0.0437, + "learning_rate": 1.8926403819449862e-05, + "loss": 0.0247, "step": 119290 }, { "epoch": 5.57, - "learning_rate": 8.908630631475317e-06, - "loss": 0.1561, + "learning_rate": 1.8925935749613842e-05, + "loss": 0.0843, "step": 119295 }, { "epoch": 5.57, - "learning_rate": 8.908161830200179e-06, - "loss": 0.1048, + "learning_rate": 1.892546767977782e-05, + "loss": 0.1241, "step": 119300 }, { "epoch": 5.57, - "learning_rate": 8.90769302892504e-06, - "loss": 0.2812, + "learning_rate": 1.8924999609941805e-05, + "loss": 0.281, "step": 119305 }, { "epoch": 5.57, - "learning_rate": 8.9072242276499e-06, - "loss": 0.0859, + "learning_rate": 1.8924531540105785e-05, + "loss": 0.1011, "step": 119310 }, { "epoch": 5.57, - "learning_rate": 8.90675542637476e-06, - "loss": 0.0017, + "learning_rate": 1.8924063470269764e-05, + "loss": 0.004, "step": 119315 }, { "epoch": 5.57, - "learning_rate": 8.90628662509962e-06, - "loss": 0.0544, + "learning_rate": 1.8923595400433744e-05, + "loss": 0.0416, "step": 119320 }, { "epoch": 5.57, - "learning_rate": 8.905817823824482e-06, - "loss": 0.0277, + "learning_rate": 1.8923127330597727e-05, + "loss": 0.0211, "step": 119325 }, { "epoch": 5.57, - "learning_rate": 8.905349022549342e-06, - "loss": 0.0157, + "learning_rate": 1.8922659260761707e-05, + "loss": 0.0298, "step": 119330 }, { "epoch": 5.57, - "learning_rate": 8.904880221274202e-06, - "loss": 0.0515, + "learning_rate": 1.8922191190925687e-05, + "loss": 0.0145, "step": 119335 }, { "epoch": 5.57, - "learning_rate": 8.904411419999063e-06, - "loss": 0.0454, + "learning_rate": 1.8921723121089667e-05, + "loss": 0.0622, "step": 119340 }, { "epoch": 5.57, - "learning_rate": 8.903942618723925e-06, - "loss": 0.0546, + "learning_rate": 1.892125505125365e-05, + "loss": 0.1162, "step": 119345 }, { "epoch": 5.57, - "learning_rate": 8.903473817448785e-06, - "loss": 0.1691, + "learning_rate": 1.892078698141763e-05, + "loss": 0.0753, "step": 119350 }, { "epoch": 5.57, - "learning_rate": 8.903005016173645e-06, - "loss": 0.1981, + "learning_rate": 1.8920318911581606e-05, + "loss": 0.3898, "step": 119355 }, { "epoch": 5.57, - "learning_rate": 8.902536214898505e-06, - "loss": 0.081, + "learning_rate": 1.8919850841745586e-05, + "loss": 0.1015, "step": 119360 }, { "epoch": 5.57, - "learning_rate": 8.902067413623365e-06, - "loss": 0.0271, + "learning_rate": 1.891938277190957e-05, + "loss": 0.0314, "step": 119365 }, { "epoch": 5.57, - "learning_rate": 8.901598612348226e-06, - "loss": 0.0371, + "learning_rate": 1.891891470207355e-05, + "loss": 0.0321, "step": 119370 }, { "epoch": 5.57, - "learning_rate": 8.901129811073086e-06, - "loss": 0.0177, + "learning_rate": 1.891844663223753e-05, + "loss": 0.0329, "step": 119375 }, { "epoch": 5.57, - "learning_rate": 8.900661009797948e-06, - "loss": 0.0369, + "learning_rate": 1.8917978562401512e-05, + "loss": 0.0473, "step": 119380 }, { "epoch": 5.57, - "learning_rate": 8.900192208522808e-06, - "loss": 0.0608, + "learning_rate": 1.8917510492565492e-05, + "loss": 0.0654, "step": 119385 }, { "epoch": 5.57, - "learning_rate": 8.899723407247668e-06, - "loss": 0.0897, + "learning_rate": 1.8917042422729472e-05, + "loss": 0.081, "step": 119390 }, { "epoch": 5.57, - "learning_rate": 8.89925460597253e-06, - "loss": 0.0944, + "learning_rate": 1.891657435289345e-05, + "loss": 0.1043, "step": 119395 }, { "epoch": 5.57, - "learning_rate": 8.89878580469739e-06, - "loss": 0.155, + "learning_rate": 1.8916106283057435e-05, + "loss": 0.1253, "step": 119400 }, { "epoch": 5.57, - "learning_rate": 8.89831700342225e-06, - "loss": 0.2665, + "learning_rate": 1.8915638213221415e-05, + "loss": 0.2419, "step": 119405 }, { "epoch": 5.57, - "learning_rate": 8.897848202147111e-06, - "loss": 0.093, + "learning_rate": 1.8915170143385394e-05, + "loss": 0.0869, "step": 119410 }, { "epoch": 5.57, - "learning_rate": 8.89737940087197e-06, - "loss": 0.0143, + "learning_rate": 1.891470207354937e-05, + "loss": 0.0039, "step": 119415 }, { "epoch": 5.57, - "learning_rate": 8.896910599596832e-06, - "loss": 0.0394, + "learning_rate": 1.8914234003713354e-05, + "loss": 0.0097, "step": 119420 }, { "epoch": 5.57, - "learning_rate": 8.896441798321692e-06, - "loss": 0.0132, + "learning_rate": 1.8913765933877334e-05, + "loss": 0.0226, "step": 119425 }, { "epoch": 5.57, - "learning_rate": 8.895972997046552e-06, - "loss": 0.1483, + "learning_rate": 1.8913297864041314e-05, + "loss": 0.049, "step": 119430 }, { "epoch": 5.57, - "learning_rate": 8.895504195771412e-06, - "loss": 0.0879, + "learning_rate": 1.8912829794205297e-05, + "loss": 0.0979, "step": 119435 }, { "epoch": 5.57, - "learning_rate": 8.895035394496274e-06, - "loss": 0.1169, + "learning_rate": 1.8912361724369277e-05, + "loss": 0.0834, "step": 119440 }, { "epoch": 5.57, - "learning_rate": 8.894566593221134e-06, - "loss": 0.1324, + "learning_rate": 1.8911893654533257e-05, + "loss": 0.0772, "step": 119445 }, { "epoch": 5.57, - "learning_rate": 8.894097791945995e-06, - "loss": 0.1016, + "learning_rate": 1.8911425584697236e-05, + "loss": 0.0788, "step": 119450 }, { "epoch": 5.57, - "learning_rate": 8.893628990670855e-06, - "loss": 0.1703, + "learning_rate": 1.891095751486122e-05, + "loss": 0.2304, "step": 119455 }, { "epoch": 5.57, - "learning_rate": 8.893160189395715e-06, - "loss": 0.0924, + "learning_rate": 1.89104894450252e-05, + "loss": 0.1418, "step": 119460 }, { "epoch": 5.57, - "learning_rate": 8.892691388120577e-06, - "loss": 0.0025, + "learning_rate": 1.891002137518918e-05, + "loss": 0.0063, "step": 119465 }, { "epoch": 5.57, - "learning_rate": 8.892222586845437e-06, - "loss": 0.0196, + "learning_rate": 1.890955330535316e-05, + "loss": 0.012, "step": 119470 }, { "epoch": 5.57, - "learning_rate": 8.891753785570297e-06, - "loss": 0.0232, + "learning_rate": 1.8909085235517142e-05, + "loss": 0.0161, "step": 119475 }, { "epoch": 5.58, - "learning_rate": 8.891284984295158e-06, - "loss": 0.0389, + "learning_rate": 1.890861716568112e-05, + "loss": 0.0212, "step": 119480 }, { "epoch": 5.58, - "learning_rate": 8.890816183020018e-06, - "loss": 0.0425, + "learning_rate": 1.89081490958451e-05, + "loss": 0.0744, "step": 119485 }, { "epoch": 5.58, - "learning_rate": 8.89034738174488e-06, - "loss": 0.1137, + "learning_rate": 1.8907681026009082e-05, + "loss": 0.0403, "step": 119490 }, { "epoch": 5.58, - "learning_rate": 8.88987858046974e-06, - "loss": 0.1783, + "learning_rate": 1.890721295617306e-05, + "loss": 0.1331, "step": 119495 }, { "epoch": 5.58, - "learning_rate": 8.8894097791946e-06, - "loss": 0.0974, + "learning_rate": 1.890674488633704e-05, + "loss": 0.1111, "step": 119500 }, { "epoch": 5.58, - "learning_rate": 8.88894097791946e-06, - "loss": 0.198, + "learning_rate": 1.890627681650102e-05, + "loss": 0.1851, "step": 119505 }, { "epoch": 5.58, - "learning_rate": 8.888472176644321e-06, - "loss": 0.0909, + "learning_rate": 1.8905808746665004e-05, + "loss": 0.0496, "step": 119510 }, { "epoch": 5.58, - "learning_rate": 8.888003375369181e-06, - "loss": 0.012, + "learning_rate": 1.8905340676828984e-05, + "loss": 0.021, "step": 119515 }, { "epoch": 5.58, - "learning_rate": 8.887534574094043e-06, - "loss": 0.0203, + "learning_rate": 1.8904872606992964e-05, + "loss": 0.0384, "step": 119520 }, { "epoch": 5.58, - "learning_rate": 8.887065772818903e-06, - "loss": 0.0815, + "learning_rate": 1.8904404537156944e-05, + "loss": 0.04, "step": 119525 }, { "epoch": 5.58, - "learning_rate": 8.886596971543763e-06, - "loss": 0.0327, + "learning_rate": 1.8903936467320927e-05, + "loss": 0.058, "step": 119530 }, { "epoch": 5.58, - "learning_rate": 8.886128170268624e-06, - "loss": 0.1361, + "learning_rate": 1.8903468397484907e-05, + "loss": 0.0826, "step": 119535 }, { "epoch": 5.58, - "learning_rate": 8.885659368993484e-06, - "loss": 0.0887, + "learning_rate": 1.8903000327648887e-05, + "loss": 0.0512, "step": 119540 }, { "epoch": 5.58, - "learning_rate": 8.885190567718344e-06, - "loss": 0.1282, + "learning_rate": 1.8902532257812863e-05, + "loss": 0.0909, "step": 119545 }, { "epoch": 5.58, - "learning_rate": 8.884721766443204e-06, - "loss": 0.1388, + "learning_rate": 1.8902064187976846e-05, + "loss": 0.1345, "step": 119550 }, { "epoch": 5.58, - "learning_rate": 8.884252965168066e-06, - "loss": 0.1713, + "learning_rate": 1.8901596118140826e-05, + "loss": 0.188, "step": 119555 }, { "epoch": 5.58, - "learning_rate": 8.883784163892928e-06, - "loss": 0.1051, + "learning_rate": 1.8901128048304806e-05, + "loss": 0.0837, "step": 119560 }, { "epoch": 5.58, - "learning_rate": 8.883315362617787e-06, - "loss": 0.0221, + "learning_rate": 1.890065997846879e-05, + "loss": 0.0105, "step": 119565 }, { "epoch": 5.58, - "learning_rate": 8.882846561342647e-06, - "loss": 0.015, + "learning_rate": 1.890019190863277e-05, + "loss": 0.0197, "step": 119570 }, { "epoch": 5.58, - "learning_rate": 8.882377760067507e-06, - "loss": 0.0384, + "learning_rate": 1.889972383879675e-05, + "loss": 0.036, "step": 119575 }, { "epoch": 5.58, - "learning_rate": 8.881908958792369e-06, - "loss": 0.0231, + "learning_rate": 1.889925576896073e-05, + "loss": 0.0588, "step": 119580 }, { "epoch": 5.58, - "learning_rate": 8.881440157517229e-06, - "loss": 0.0742, + "learning_rate": 1.8898787699124712e-05, + "loss": 0.046, "step": 119585 }, { "epoch": 5.58, - "learning_rate": 8.880971356242089e-06, - "loss": 0.0545, + "learning_rate": 1.889831962928869e-05, + "loss": 0.1735, "step": 119590 }, { "epoch": 5.58, - "learning_rate": 8.88050255496695e-06, - "loss": 0.0861, + "learning_rate": 1.889785155945267e-05, + "loss": 0.0887, "step": 119595 }, { "epoch": 5.58, - "learning_rate": 8.880033753691812e-06, - "loss": 0.1021, + "learning_rate": 1.889738348961665e-05, + "loss": 0.0914, "step": 119600 }, { "epoch": 5.58, - "learning_rate": 8.879564952416672e-06, - "loss": 0.2736, + "learning_rate": 1.889691541978063e-05, + "loss": 0.3387, "step": 119605 }, { "epoch": 5.58, - "learning_rate": 8.879096151141532e-06, - "loss": 0.0721, + "learning_rate": 1.889644734994461e-05, + "loss": 0.0788, "step": 119610 }, { "epoch": 5.58, - "learning_rate": 8.878627349866392e-06, - "loss": 0.0225, + "learning_rate": 1.889597928010859e-05, + "loss": 0.0357, "step": 119615 }, { "epoch": 5.58, - "learning_rate": 8.878158548591252e-06, - "loss": 0.0206, + "learning_rate": 1.8895511210272574e-05, + "loss": 0.0029, "step": 119620 }, { "epoch": 5.58, - "learning_rate": 8.877689747316113e-06, - "loss": 0.0357, + "learning_rate": 1.8895043140436554e-05, + "loss": 0.0517, "step": 119625 }, { "epoch": 5.58, - "learning_rate": 8.877220946040973e-06, - "loss": 0.0207, + "learning_rate": 1.8894575070600534e-05, + "loss": 0.0532, "step": 119630 }, { "epoch": 5.58, - "learning_rate": 8.876752144765835e-06, - "loss": 0.0811, + "learning_rate": 1.8894107000764513e-05, + "loss": 0.0737, "step": 119635 }, { "epoch": 5.58, - "learning_rate": 8.876283343490695e-06, - "loss": 0.0978, + "learning_rate": 1.8893638930928497e-05, + "loss": 0.0716, "step": 119640 }, { "epoch": 5.58, - "learning_rate": 8.875814542215555e-06, - "loss": 0.0754, + "learning_rate": 1.8893170861092476e-05, + "loss": 0.134, "step": 119645 }, { "epoch": 5.58, - "learning_rate": 8.875345740940417e-06, - "loss": 0.0835, + "learning_rate": 1.8892702791256456e-05, + "loss": 0.0868, "step": 119650 }, { "epoch": 5.58, - "learning_rate": 8.874876939665276e-06, - "loss": 0.166, + "learning_rate": 1.8892234721420436e-05, + "loss": 0.3507, "step": 119655 }, { "epoch": 5.58, - "learning_rate": 8.874408138390136e-06, - "loss": 0.0593, + "learning_rate": 1.889176665158442e-05, + "loss": 0.0992, "step": 119660 }, { "epoch": 5.58, - "learning_rate": 8.873939337114998e-06, - "loss": 0.0188, + "learning_rate": 1.88912985817484e-05, + "loss": 0.0194, "step": 119665 }, { "epoch": 5.58, - "learning_rate": 8.873470535839858e-06, - "loss": 0.0288, + "learning_rate": 1.8890830511912375e-05, + "loss": 0.0149, "step": 119670 }, { "epoch": 5.58, - "learning_rate": 8.87300173456472e-06, - "loss": 0.0542, + "learning_rate": 1.889036244207636e-05, + "loss": 0.0282, "step": 119675 }, { "epoch": 5.58, - "learning_rate": 8.87253293328958e-06, + "learning_rate": 1.888989437224034e-05, "loss": 0.0496, "step": 119680 }, { "epoch": 5.58, - "learning_rate": 8.87206413201444e-06, - "loss": 0.0314, + "learning_rate": 1.888942630240432e-05, + "loss": 0.0434, "step": 119685 }, { "epoch": 5.58, - "learning_rate": 8.8715953307393e-06, - "loss": 0.0603, + "learning_rate": 1.8888958232568298e-05, + "loss": 0.0898, "step": 119690 }, { "epoch": 5.59, - "learning_rate": 8.871126529464161e-06, - "loss": 0.1009, + "learning_rate": 1.888849016273228e-05, + "loss": 0.0416, "step": 119695 }, { "epoch": 5.59, - "learning_rate": 8.870657728189021e-06, - "loss": 0.1461, + "learning_rate": 1.888802209289626e-05, + "loss": 0.1091, "step": 119700 }, { "epoch": 5.59, - "learning_rate": 8.870188926913883e-06, - "loss": 0.2816, + "learning_rate": 1.888755402306024e-05, + "loss": 0.1714, "step": 119705 }, { "epoch": 5.59, - "learning_rate": 8.869720125638742e-06, - "loss": 0.0806, + "learning_rate": 1.888708595322422e-05, + "loss": 0.0788, "step": 119710 }, { "epoch": 5.59, - "learning_rate": 8.869251324363602e-06, - "loss": 0.019, + "learning_rate": 1.8886617883388204e-05, + "loss": 0.0018, "step": 119715 }, { "epoch": 5.59, - "learning_rate": 8.868782523088464e-06, - "loss": 0.0486, + "learning_rate": 1.8886149813552184e-05, + "loss": 0.0663, "step": 119720 }, { "epoch": 5.59, - "learning_rate": 8.868313721813324e-06, - "loss": 0.0425, + "learning_rate": 1.8885681743716164e-05, + "loss": 0.0243, "step": 119725 }, { "epoch": 5.59, - "learning_rate": 8.867844920538184e-06, - "loss": 0.0858, + "learning_rate": 1.888521367388014e-05, + "loss": 0.0248, "step": 119730 }, { "epoch": 5.59, - "learning_rate": 8.867376119263046e-06, - "loss": 0.0285, + "learning_rate": 1.8884745604044123e-05, + "loss": 0.0459, "step": 119735 }, { "epoch": 5.59, - "learning_rate": 8.866907317987905e-06, - "loss": 0.0522, + "learning_rate": 1.8884277534208103e-05, + "loss": 0.1057, "step": 119740 }, { "epoch": 5.59, - "learning_rate": 8.866438516712767e-06, - "loss": 0.0483, + "learning_rate": 1.8883809464372083e-05, + "loss": 0.0945, "step": 119745 }, { "epoch": 5.59, - "learning_rate": 8.865969715437627e-06, - "loss": 0.136, + "learning_rate": 1.8883341394536066e-05, + "loss": 0.1188, "step": 119750 }, { "epoch": 5.59, - "learning_rate": 8.865500914162487e-06, - "loss": 0.2257, + "learning_rate": 1.8882873324700046e-05, + "loss": 0.2639, "step": 119755 }, { "epoch": 5.59, - "learning_rate": 8.865032112887347e-06, - "loss": 0.0746, + "learning_rate": 1.8882405254864026e-05, + "loss": 0.0627, "step": 119760 }, { "epoch": 5.59, - "learning_rate": 8.864563311612209e-06, - "loss": 0.0402, + "learning_rate": 1.8881937185028006e-05, + "loss": 0.0338, "step": 119765 }, { "epoch": 5.59, - "learning_rate": 8.864094510337068e-06, - "loss": 0.009, + "learning_rate": 1.888146911519199e-05, + "loss": 0.0531, "step": 119770 }, { "epoch": 5.59, - "learning_rate": 8.86362570906193e-06, - "loss": 0.019, + "learning_rate": 1.888100104535597e-05, + "loss": 0.0347, "step": 119775 }, { "epoch": 5.59, - "learning_rate": 8.86315690778679e-06, - "loss": 0.0348, + "learning_rate": 1.888053297551995e-05, + "loss": 0.0702, "step": 119780 }, { "epoch": 5.59, - "learning_rate": 8.86268810651165e-06, - "loss": 0.0352, + "learning_rate": 1.8880064905683928e-05, + "loss": 0.0288, "step": 119785 }, { "epoch": 5.59, - "learning_rate": 8.862219305236512e-06, - "loss": 0.0701, + "learning_rate": 1.887959683584791e-05, + "loss": 0.0429, "step": 119790 }, { "epoch": 5.59, - "learning_rate": 8.861750503961372e-06, - "loss": 0.0804, + "learning_rate": 1.8879128766011888e-05, + "loss": 0.1011, "step": 119795 }, { "epoch": 5.59, - "learning_rate": 8.861281702686231e-06, - "loss": 0.1885, + "learning_rate": 1.8878660696175868e-05, + "loss": 0.0813, "step": 119800 }, { "epoch": 5.59, - "learning_rate": 8.860812901411091e-06, - "loss": 0.2723, + "learning_rate": 1.887819262633985e-05, + "loss": 0.1929, "step": 119805 }, { "epoch": 5.59, - "learning_rate": 8.860344100135953e-06, - "loss": 0.0655, + "learning_rate": 1.887772455650383e-05, + "loss": 0.1139, "step": 119810 }, { "epoch": 5.59, - "learning_rate": 8.859875298860815e-06, - "loss": 0.0128, + "learning_rate": 1.887725648666781e-05, + "loss": 0.028, "step": 119815 }, { "epoch": 5.59, - "learning_rate": 8.859406497585675e-06, - "loss": 0.0367, + "learning_rate": 1.887678841683179e-05, + "loss": 0.0131, "step": 119820 }, { "epoch": 5.59, - "learning_rate": 8.858937696310535e-06, - "loss": 0.0403, + "learning_rate": 1.8876320346995774e-05, + "loss": 0.0283, "step": 119825 }, { "epoch": 5.59, - "learning_rate": 8.858468895035394e-06, - "loss": 0.0554, + "learning_rate": 1.8875852277159753e-05, + "loss": 0.0257, "step": 119830 }, { "epoch": 5.59, - "learning_rate": 8.858000093760256e-06, - "loss": 0.0495, + "learning_rate": 1.8875384207323733e-05, + "loss": 0.0436, "step": 119835 }, { "epoch": 5.59, - "learning_rate": 8.857531292485116e-06, - "loss": 0.0872, + "learning_rate": 1.8874916137487713e-05, + "loss": 0.0507, "step": 119840 }, { "epoch": 5.59, - "learning_rate": 8.857062491209976e-06, - "loss": 0.0816, + "learning_rate": 1.8874448067651696e-05, + "loss": 0.099, "step": 119845 }, { "epoch": 5.59, - "learning_rate": 8.856593689934838e-06, - "loss": 0.0809, + "learning_rate": 1.8873979997815676e-05, + "loss": 0.1385, "step": 119850 }, { "epoch": 5.59, - "learning_rate": 8.856124888659698e-06, - "loss": 0.2087, + "learning_rate": 1.8873511927979656e-05, + "loss": 0.1359, "step": 119855 }, { "epoch": 5.59, - "learning_rate": 8.855656087384559e-06, - "loss": 0.1141, + "learning_rate": 1.8873043858143636e-05, + "loss": 0.0702, "step": 119860 }, { "epoch": 5.59, - "learning_rate": 8.855187286109419e-06, - "loss": 0.0173, + "learning_rate": 1.8872575788307615e-05, + "loss": 0.0137, "step": 119865 }, { "epoch": 5.59, - "learning_rate": 8.854718484834279e-06, - "loss": 0.0073, + "learning_rate": 1.8872107718471595e-05, + "loss": 0.0066, "step": 119870 }, { "epoch": 5.59, - "learning_rate": 8.854249683559139e-06, - "loss": 0.0335, + "learning_rate": 1.8871639648635575e-05, + "loss": 0.0199, "step": 119875 }, { "epoch": 5.59, - "learning_rate": 8.853780882284e-06, - "loss": 0.0421, + "learning_rate": 1.887117157879956e-05, + "loss": 0.0143, "step": 119880 }, { "epoch": 5.59, - "learning_rate": 8.85331208100886e-06, - "loss": 0.0388, + "learning_rate": 1.8870703508963538e-05, + "loss": 0.0902, "step": 119885 }, { "epoch": 5.59, - "learning_rate": 8.852843279733722e-06, - "loss": 0.0666, + "learning_rate": 1.8870235439127518e-05, + "loss": 0.0887, "step": 119890 }, { "epoch": 5.59, - "learning_rate": 8.852374478458582e-06, - "loss": 0.047, + "learning_rate": 1.8869767369291498e-05, + "loss": 0.107, "step": 119895 }, { "epoch": 5.59, - "learning_rate": 8.851905677183442e-06, - "loss": 0.0245, + "learning_rate": 1.886929929945548e-05, + "loss": 0.1275, "step": 119900 }, { "epoch": 5.59, - "learning_rate": 8.851436875908304e-06, - "loss": 0.2843, + "learning_rate": 1.886883122961946e-05, + "loss": 0.1876, "step": 119905 }, { "epoch": 5.6, - "learning_rate": 8.850968074633164e-06, - "loss": 0.1261, + "learning_rate": 1.886836315978344e-05, + "loss": 0.0976, "step": 119910 }, { "epoch": 5.6, - "learning_rate": 8.850499273358023e-06, - "loss": 0.0316, + "learning_rate": 1.8867895089947424e-05, + "loss": 0.0502, "step": 119915 }, { "epoch": 5.6, - "learning_rate": 8.850030472082885e-06, - "loss": 0.0323, + "learning_rate": 1.88674270201114e-05, + "loss": 0.015, "step": 119920 }, { "epoch": 5.6, - "learning_rate": 8.849561670807745e-06, - "loss": 0.0421, + "learning_rate": 1.886695895027538e-05, + "loss": 0.0338, "step": 119925 }, { "epoch": 5.6, - "learning_rate": 8.849092869532607e-06, - "loss": 0.0284, + "learning_rate": 1.886649088043936e-05, + "loss": 0.0337, "step": 119930 }, { "epoch": 5.6, - "learning_rate": 8.848624068257467e-06, - "loss": 0.1214, + "learning_rate": 1.8866022810603343e-05, + "loss": 0.0736, "step": 119935 }, { "epoch": 5.6, - "learning_rate": 8.848155266982327e-06, - "loss": 0.0307, + "learning_rate": 1.8865554740767323e-05, + "loss": 0.0409, "step": 119940 }, { "epoch": 5.6, - "learning_rate": 8.847686465707186e-06, - "loss": 0.0841, + "learning_rate": 1.8865086670931303e-05, + "loss": 0.0849, "step": 119945 }, { "epoch": 5.6, - "learning_rate": 8.847217664432048e-06, - "loss": 0.1378, + "learning_rate": 1.8864618601095283e-05, + "loss": 0.2122, "step": 119950 }, { "epoch": 5.6, - "learning_rate": 8.846748863156908e-06, - "loss": 0.2313, + "learning_rate": 1.8864150531259266e-05, + "loss": 0.1422, "step": 119955 }, { "epoch": 5.6, - "learning_rate": 8.84628006188177e-06, - "loss": 0.0613, + "learning_rate": 1.8863682461423246e-05, + "loss": 0.0657, "step": 119960 }, { "epoch": 5.6, - "learning_rate": 8.84581126060663e-06, - "loss": 0.0208, + "learning_rate": 1.8863214391587225e-05, + "loss": 0.0201, "step": 119965 }, { "epoch": 5.6, - "learning_rate": 8.84534245933149e-06, - "loss": 0.0379, + "learning_rate": 1.8862746321751205e-05, + "loss": 0.0323, "step": 119970 }, { "epoch": 5.6, - "learning_rate": 8.844873658056351e-06, - "loss": 0.0749, + "learning_rate": 1.886227825191519e-05, + "loss": 0.027, "step": 119975 }, { "epoch": 5.6, - "learning_rate": 8.844404856781211e-06, - "loss": 0.0433, + "learning_rate": 1.8861810182079168e-05, + "loss": 0.005, "step": 119980 }, { "epoch": 5.6, - "learning_rate": 8.843936055506071e-06, - "loss": 0.0779, + "learning_rate": 1.8861342112243145e-05, + "loss": 0.0629, "step": 119985 }, { "epoch": 5.6, - "learning_rate": 8.843467254230933e-06, - "loss": 0.0543, + "learning_rate": 1.8860874042407128e-05, + "loss": 0.0415, "step": 119990 }, { "epoch": 5.6, - "learning_rate": 8.842998452955793e-06, - "loss": 0.1193, + "learning_rate": 1.8860405972571108e-05, + "loss": 0.0974, "step": 119995 }, { "epoch": 5.6, - "learning_rate": 8.842529651680654e-06, - "loss": 0.0715, + "learning_rate": 1.8859937902735087e-05, + "loss": 0.1274, "step": 120000 }, { "epoch": 5.6, - "learning_rate": 8.842060850405514e-06, - "loss": 0.2445, + "learning_rate": 1.8859469832899067e-05, + "loss": 0.2085, "step": 120005 }, { "epoch": 5.6, - "learning_rate": 8.841592049130374e-06, - "loss": 0.1004, + "learning_rate": 1.885900176306305e-05, + "loss": 0.0971, "step": 120010 }, { "epoch": 5.6, - "learning_rate": 8.841123247855234e-06, - "loss": 0.0173, + "learning_rate": 1.885853369322703e-05, + "loss": 0.0274, "step": 120015 }, { "epoch": 5.6, - "learning_rate": 8.840654446580096e-06, - "loss": 0.0236, + "learning_rate": 1.885806562339101e-05, + "loss": 0.0178, "step": 120020 }, { "epoch": 5.6, - "learning_rate": 8.840185645304956e-06, - "loss": 0.0354, + "learning_rate": 1.885759755355499e-05, + "loss": 0.0141, "step": 120025 }, { "epoch": 5.6, - "learning_rate": 8.839716844029817e-06, - "loss": 0.0385, + "learning_rate": 1.8857129483718973e-05, + "loss": 0.0349, "step": 120030 }, { "epoch": 5.6, - "learning_rate": 8.839248042754677e-06, - "loss": 0.0291, + "learning_rate": 1.8856661413882953e-05, + "loss": 0.0233, "step": 120035 }, { "epoch": 5.6, - "learning_rate": 8.838779241479537e-06, - "loss": 0.0931, + "learning_rate": 1.8856193344046933e-05, + "loss": 0.0743, "step": 120040 }, { "epoch": 5.6, - "learning_rate": 8.838310440204399e-06, - "loss": 0.1174, + "learning_rate": 1.8855725274210913e-05, + "loss": 0.0753, "step": 120045 }, { "epoch": 5.6, - "learning_rate": 8.837841638929259e-06, - "loss": 0.1647, + "learning_rate": 1.8855257204374892e-05, + "loss": 0.0903, "step": 120050 }, { "epoch": 5.6, - "learning_rate": 8.837372837654119e-06, - "loss": 0.1759, + "learning_rate": 1.8854789134538872e-05, + "loss": 0.081, "step": 120055 }, { "epoch": 5.6, - "learning_rate": 8.836904036378978e-06, - "loss": 0.1054, + "learning_rate": 1.8854321064702852e-05, + "loss": 0.0984, "step": 120060 }, { "epoch": 5.6, - "learning_rate": 8.83643523510384e-06, - "loss": 0.0341, + "learning_rate": 1.8853852994866835e-05, + "loss": 0.035, "step": 120065 }, { "epoch": 5.6, - "learning_rate": 8.835966433828702e-06, - "loss": 0.0125, + "learning_rate": 1.8853384925030815e-05, + "loss": 0.0242, "step": 120070 }, { "epoch": 5.6, - "learning_rate": 8.835497632553562e-06, - "loss": 0.0664, + "learning_rate": 1.8852916855194795e-05, + "loss": 0.0575, "step": 120075 }, { "epoch": 5.6, - "learning_rate": 8.835028831278422e-06, - "loss": 0.0394, + "learning_rate": 1.8852448785358775e-05, + "loss": 0.0311, "step": 120080 }, { "epoch": 5.6, - "learning_rate": 8.834560030003282e-06, + "learning_rate": 1.8851980715522758e-05, "loss": 0.0754, "step": 120085 }, { "epoch": 5.6, - "learning_rate": 8.834091228728143e-06, - "loss": 0.038, + "learning_rate": 1.8851512645686738e-05, + "loss": 0.0205, "step": 120090 }, { "epoch": 5.6, - "learning_rate": 8.833622427453003e-06, - "loss": 0.0584, + "learning_rate": 1.8851044575850718e-05, + "loss": 0.0902, "step": 120095 }, { "epoch": 5.6, - "learning_rate": 8.833153626177863e-06, - "loss": 0.1227, + "learning_rate": 1.88505765060147e-05, + "loss": 0.1371, "step": 120100 }, { "epoch": 5.6, - "learning_rate": 8.832684824902725e-06, - "loss": 0.2044, + "learning_rate": 1.885010843617868e-05, + "loss": 0.1828, "step": 120105 }, { "epoch": 5.6, - "learning_rate": 8.832216023627585e-06, - "loss": 0.0853, + "learning_rate": 1.8849640366342657e-05, + "loss": 0.063, "step": 120110 }, { "epoch": 5.6, - "learning_rate": 8.831747222352446e-06, - "loss": 0.0226, + "learning_rate": 1.8849172296506637e-05, + "loss": 0.0343, "step": 120115 }, { "epoch": 5.6, - "learning_rate": 8.831278421077306e-06, - "loss": 0.0363, + "learning_rate": 1.884870422667062e-05, + "loss": 0.003, "step": 120120 }, { "epoch": 5.61, - "learning_rate": 8.830809619802166e-06, - "loss": 0.0535, + "learning_rate": 1.88482361568346e-05, + "loss": 0.0101, "step": 120125 }, { "epoch": 5.61, - "learning_rate": 8.830340818527026e-06, - "loss": 0.0198, + "learning_rate": 1.884776808699858e-05, + "loss": 0.0323, "step": 120130 }, { "epoch": 5.61, - "learning_rate": 8.829872017251888e-06, - "loss": 0.073, + "learning_rate": 1.884730001716256e-05, + "loss": 0.0501, "step": 120135 }, { "epoch": 5.61, - "learning_rate": 8.829403215976748e-06, - "loss": 0.0491, + "learning_rate": 1.8846831947326543e-05, + "loss": 0.0792, "step": 120140 }, { "epoch": 5.61, - "learning_rate": 8.82893441470161e-06, - "loss": 0.1289, + "learning_rate": 1.8846363877490523e-05, + "loss": 0.0437, "step": 120145 }, { "epoch": 5.61, - "learning_rate": 8.82846561342647e-06, - "loss": 0.1145, + "learning_rate": 1.8845895807654502e-05, + "loss": 0.0588, "step": 120150 }, { "epoch": 5.61, - "learning_rate": 8.827996812151329e-06, - "loss": 0.3176, + "learning_rate": 1.8845427737818482e-05, + "loss": 0.2882, "step": 120155 }, { "epoch": 5.61, - "learning_rate": 8.82752801087619e-06, - "loss": 0.0561, + "learning_rate": 1.8844959667982465e-05, + "loss": 0.1062, "step": 120160 }, { "epoch": 5.61, - "learning_rate": 8.82705920960105e-06, - "loss": 0.0329, + "learning_rate": 1.8844491598146445e-05, + "loss": 0.0384, "step": 120165 }, { "epoch": 5.61, - "learning_rate": 8.82659040832591e-06, - "loss": 0.0153, + "learning_rate": 1.8844023528310425e-05, + "loss": 0.0138, "step": 120170 }, { "epoch": 5.61, - "learning_rate": 8.826121607050772e-06, - "loss": 0.0214, + "learning_rate": 1.8843555458474405e-05, + "loss": 0.0778, "step": 120175 }, { "epoch": 5.61, - "learning_rate": 8.825652805775632e-06, - "loss": 0.0454, + "learning_rate": 1.8843087388638385e-05, + "loss": 0.0341, "step": 120180 }, { "epoch": 5.61, - "learning_rate": 8.825184004500494e-06, - "loss": 0.0659, + "learning_rate": 1.8842619318802364e-05, + "loss": 0.0299, "step": 120185 }, { "epoch": 5.61, - "learning_rate": 8.824715203225354e-06, - "loss": 0.0598, + "learning_rate": 1.8842151248966344e-05, + "loss": 0.0699, "step": 120190 }, { "epoch": 5.61, - "learning_rate": 8.824246401950214e-06, - "loss": 0.0828, + "learning_rate": 1.8841683179130327e-05, + "loss": 0.0686, "step": 120195 }, { "epoch": 5.61, - "learning_rate": 8.823777600675074e-06, - "loss": 0.1036, + "learning_rate": 1.8841215109294307e-05, + "loss": 0.1264, "step": 120200 }, { "epoch": 5.61, - "learning_rate": 8.823308799399935e-06, - "loss": 0.1692, + "learning_rate": 1.8840747039458287e-05, + "loss": 0.1429, "step": 120205 }, { "epoch": 5.61, - "learning_rate": 8.822839998124795e-06, - "loss": 0.0779, + "learning_rate": 1.8840278969622267e-05, + "loss": 0.0734, "step": 120210 }, { "epoch": 5.61, - "learning_rate": 8.822371196849657e-06, - "loss": 0.0143, + "learning_rate": 1.883981089978625e-05, + "loss": 0.0026, "step": 120215 }, { "epoch": 5.61, - "learning_rate": 8.821902395574517e-06, - "loss": 0.0251, + "learning_rate": 1.883934282995023e-05, + "loss": 0.0089, "step": 120220 }, { "epoch": 5.61, - "learning_rate": 8.821433594299377e-06, - "loss": 0.0392, + "learning_rate": 1.883887476011421e-05, + "loss": 0.0463, "step": 120225 }, { "epoch": 5.61, - "learning_rate": 8.820964793024238e-06, - "loss": 0.0599, + "learning_rate": 1.8838406690278193e-05, + "loss": 0.0355, "step": 120230 }, { "epoch": 5.61, - "learning_rate": 8.820495991749098e-06, - "loss": 0.0332, + "learning_rate": 1.883793862044217e-05, + "loss": 0.0378, "step": 120235 }, { "epoch": 5.61, - "learning_rate": 8.820027190473958e-06, - "loss": 0.0763, + "learning_rate": 1.883747055060615e-05, + "loss": 0.109, "step": 120240 }, { "epoch": 5.61, - "learning_rate": 8.81955838919882e-06, - "loss": 0.0837, + "learning_rate": 1.883700248077013e-05, + "loss": 0.0648, "step": 120245 }, { "epoch": 5.61, - "learning_rate": 8.81908958792368e-06, - "loss": 0.1332, + "learning_rate": 1.8836534410934112e-05, + "loss": 0.1534, "step": 120250 }, { "epoch": 5.61, - "learning_rate": 8.818620786648541e-06, - "loss": 0.2816, + "learning_rate": 1.8836066341098092e-05, + "loss": 0.2537, "step": 120255 }, { "epoch": 5.61, - "learning_rate": 8.818151985373401e-06, - "loss": 0.0975, + "learning_rate": 1.8835598271262072e-05, + "loss": 0.0976, "step": 120260 }, { "epoch": 5.61, - "learning_rate": 8.817683184098261e-06, - "loss": 0.0202, + "learning_rate": 1.8835130201426052e-05, + "loss": 0.02, "step": 120265 }, { "epoch": 5.61, - "learning_rate": 8.817214382823121e-06, - "loss": 0.0069, + "learning_rate": 1.8834662131590035e-05, + "loss": 0.0574, "step": 120270 }, { "epoch": 5.61, - "learning_rate": 8.816745581547983e-06, - "loss": 0.0596, + "learning_rate": 1.8834194061754015e-05, + "loss": 0.0297, "step": 120275 }, { "epoch": 5.61, - "learning_rate": 8.816276780272843e-06, - "loss": 0.0557, + "learning_rate": 1.8833725991917995e-05, + "loss": 0.0492, "step": 120280 }, { "epoch": 5.61, - "learning_rate": 8.815807978997704e-06, - "loss": 0.0752, + "learning_rate": 1.8833257922081978e-05, + "loss": 0.0155, "step": 120285 }, { "epoch": 5.61, - "learning_rate": 8.815339177722564e-06, - "loss": 0.0758, + "learning_rate": 1.8832789852245958e-05, + "loss": 0.0341, "step": 120290 }, { "epoch": 5.61, - "learning_rate": 8.814870376447424e-06, - "loss": 0.0373, + "learning_rate": 1.8832321782409937e-05, + "loss": 0.039, "step": 120295 }, { "epoch": 5.61, - "learning_rate": 8.814401575172286e-06, - "loss": 0.1627, + "learning_rate": 1.8831853712573914e-05, + "loss": 0.1376, "step": 120300 }, { "epoch": 5.61, - "learning_rate": 8.813932773897146e-06, - "loss": 0.3789, + "learning_rate": 1.8831385642737897e-05, + "loss": 0.1482, "step": 120305 }, { "epoch": 5.61, - "learning_rate": 8.813463972622006e-06, + "learning_rate": 1.8830917572901877e-05, "loss": 0.0673, "step": 120310 }, { "epoch": 5.61, - "learning_rate": 8.812995171346866e-06, - "loss": 0.0227, + "learning_rate": 1.8830449503065857e-05, + "loss": 0.0191, "step": 120315 }, { "epoch": 5.61, - "learning_rate": 8.812526370071727e-06, - "loss": 0.0275, + "learning_rate": 1.8829981433229836e-05, + "loss": 0.0193, "step": 120320 }, { "epoch": 5.61, - "learning_rate": 8.812057568796589e-06, - "loss": 0.0843, + "learning_rate": 1.882951336339382e-05, + "loss": 0.0169, "step": 120325 }, { "epoch": 5.61, - "learning_rate": 8.811588767521449e-06, - "loss": 0.0238, + "learning_rate": 1.88290452935578e-05, + "loss": 0.0454, "step": 120330 }, { "epoch": 5.61, - "learning_rate": 8.811119966246309e-06, - "loss": 0.0491, + "learning_rate": 1.882857722372178e-05, + "loss": 0.0916, "step": 120335 }, { "epoch": 5.62, - "learning_rate": 8.810651164971169e-06, - "loss": 0.0876, + "learning_rate": 1.882810915388576e-05, + "loss": 0.1172, "step": 120340 }, { "epoch": 5.62, - "learning_rate": 8.81018236369603e-06, - "loss": 0.2452, + "learning_rate": 1.8827641084049742e-05, + "loss": 0.0621, "step": 120345 }, { "epoch": 5.62, - "learning_rate": 8.80971356242089e-06, - "loss": 0.1704, + "learning_rate": 1.8827173014213722e-05, + "loss": 0.1475, "step": 120350 }, { "epoch": 5.62, - "learning_rate": 8.80924476114575e-06, - "loss": 0.1962, + "learning_rate": 1.8826704944377702e-05, + "loss": 0.2018, "step": 120355 }, { "epoch": 5.62, - "learning_rate": 8.808775959870612e-06, - "loss": 0.109, + "learning_rate": 1.8826236874541685e-05, + "loss": 0.0621, "step": 120360 }, { "epoch": 5.62, - "learning_rate": 8.808307158595472e-06, - "loss": 0.028, + "learning_rate": 1.882576880470566e-05, + "loss": 0.0064, "step": 120365 }, { "epoch": 5.62, - "learning_rate": 8.807838357320333e-06, - "loss": 0.0145, + "learning_rate": 1.882530073486964e-05, + "loss": 0.0507, "step": 120370 }, { "epoch": 5.62, - "learning_rate": 8.807369556045193e-06, - "loss": 0.0325, + "learning_rate": 1.882483266503362e-05, + "loss": 0.0473, "step": 120375 }, { "epoch": 5.62, - "learning_rate": 8.806900754770053e-06, - "loss": 0.0495, + "learning_rate": 1.8824364595197604e-05, + "loss": 0.0136, "step": 120380 }, { "epoch": 5.62, - "learning_rate": 8.806431953494913e-06, - "loss": 0.0622, + "learning_rate": 1.8823896525361584e-05, + "loss": 0.0611, "step": 120385 }, { "epoch": 5.62, - "learning_rate": 8.805963152219775e-06, - "loss": 0.0602, + "learning_rate": 1.8823428455525564e-05, + "loss": 0.0614, "step": 120390 }, { "epoch": 5.62, - "learning_rate": 8.805494350944636e-06, - "loss": 0.0733, + "learning_rate": 1.8822960385689544e-05, + "loss": 0.0576, "step": 120395 }, { "epoch": 5.62, - "learning_rate": 8.805025549669496e-06, - "loss": 0.102, + "learning_rate": 1.8822492315853527e-05, + "loss": 0.1005, "step": 120400 }, { "epoch": 5.62, - "learning_rate": 8.804556748394356e-06, - "loss": 0.1688, + "learning_rate": 1.8822024246017507e-05, + "loss": 0.319, "step": 120405 }, { "epoch": 5.62, - "learning_rate": 8.804087947119216e-06, - "loss": 0.1227, + "learning_rate": 1.8821556176181487e-05, + "loss": 0.0975, "step": 120410 }, { "epoch": 5.62, - "learning_rate": 8.803619145844078e-06, - "loss": 0.0216, + "learning_rate": 1.882108810634547e-05, + "loss": 0.0087, "step": 120415 }, { "epoch": 5.62, - "learning_rate": 8.803150344568938e-06, - "loss": 0.0512, + "learning_rate": 1.882062003650945e-05, + "loss": 0.0273, "step": 120420 }, { "epoch": 5.62, - "learning_rate": 8.802681543293798e-06, - "loss": 0.0906, + "learning_rate": 1.8820151966673426e-05, + "loss": 0.0137, "step": 120425 }, { "epoch": 5.62, - "learning_rate": 8.80221274201866e-06, - "loss": 0.058, + "learning_rate": 1.8819683896837406e-05, + "loss": 0.0199, "step": 120430 }, { "epoch": 5.62, - "learning_rate": 8.80174394074352e-06, - "loss": 0.0543, + "learning_rate": 1.881921582700139e-05, + "loss": 0.0478, "step": 120435 }, { "epoch": 5.62, - "learning_rate": 8.801275139468381e-06, - "loss": 0.0677, + "learning_rate": 1.881874775716537e-05, + "loss": 0.0341, "step": 120440 }, { "epoch": 5.62, - "learning_rate": 8.80080633819324e-06, - "loss": 0.1163, + "learning_rate": 1.881827968732935e-05, + "loss": 0.0708, "step": 120445 }, { "epoch": 5.62, - "learning_rate": 8.8003375369181e-06, - "loss": 0.0707, + "learning_rate": 1.881781161749333e-05, + "loss": 0.1398, "step": 120450 }, { "epoch": 5.62, - "learning_rate": 8.79986873564296e-06, - "loss": 0.1521, + "learning_rate": 1.8817343547657312e-05, + "loss": 0.1351, "step": 120455 }, { "epoch": 5.62, - "learning_rate": 8.799399934367822e-06, - "loss": 0.1123, + "learning_rate": 1.8816875477821292e-05, + "loss": 0.0988, "step": 120460 }, { "epoch": 5.62, - "learning_rate": 8.798931133092682e-06, - "loss": 0.007, + "learning_rate": 1.881640740798527e-05, + "loss": 0.0176, "step": 120465 }, { "epoch": 5.62, - "learning_rate": 8.798462331817544e-06, - "loss": 0.0445, + "learning_rate": 1.8815939338149255e-05, + "loss": 0.025, "step": 120470 }, { "epoch": 5.62, - "learning_rate": 8.797993530542404e-06, - "loss": 0.0482, + "learning_rate": 1.8815471268313235e-05, + "loss": 0.0368, "step": 120475 }, { "epoch": 5.62, - "learning_rate": 8.797524729267264e-06, - "loss": 0.0576, + "learning_rate": 1.8815003198477214e-05, + "loss": 0.0138, "step": 120480 }, { "epoch": 5.62, - "learning_rate": 8.797055927992125e-06, - "loss": 0.0592, + "learning_rate": 1.8814535128641194e-05, + "loss": 0.0642, "step": 120485 }, { "epoch": 5.62, - "learning_rate": 8.796587126716985e-06, - "loss": 0.0581, + "learning_rate": 1.8814067058805174e-05, + "loss": 0.0705, "step": 120490 }, { "epoch": 5.62, - "learning_rate": 8.796118325441845e-06, - "loss": 0.1706, + "learning_rate": 1.8813598988969154e-05, + "loss": 0.0866, "step": 120495 }, { "epoch": 5.62, - "learning_rate": 8.795649524166707e-06, - "loss": 0.0427, + "learning_rate": 1.8813130919133134e-05, + "loss": 0.0554, "step": 120500 }, { "epoch": 5.62, - "learning_rate": 8.795180722891567e-06, - "loss": 0.1339, + "learning_rate": 1.8812662849297113e-05, + "loss": 0.2841, "step": 120505 }, { "epoch": 5.62, - "learning_rate": 8.794711921616428e-06, - "loss": 0.1533, + "learning_rate": 1.8812194779461097e-05, + "loss": 0.0993, "step": 120510 }, { "epoch": 5.62, - "learning_rate": 8.794243120341288e-06, - "loss": 0.0617, + "learning_rate": 1.8811726709625076e-05, + "loss": 0.0064, "step": 120515 }, { "epoch": 5.62, - "learning_rate": 8.793774319066148e-06, - "loss": 0.0421, + "learning_rate": 1.8811258639789056e-05, + "loss": 0.0532, "step": 120520 }, { "epoch": 5.62, - "learning_rate": 8.793305517791008e-06, - "loss": 0.0331, + "learning_rate": 1.8810790569953036e-05, + "loss": 0.0596, "step": 120525 }, { "epoch": 5.62, - "learning_rate": 8.79283671651587e-06, - "loss": 0.0872, + "learning_rate": 1.881032250011702e-05, + "loss": 0.0262, "step": 120530 }, { "epoch": 5.62, - "learning_rate": 8.79236791524073e-06, - "loss": 0.0362, + "learning_rate": 1.8809854430281e-05, + "loss": 0.0456, "step": 120535 }, { "epoch": 5.62, - "learning_rate": 8.791899113965591e-06, - "loss": 0.0676, + "learning_rate": 1.880938636044498e-05, + "loss": 0.0897, "step": 120540 }, { "epoch": 5.62, - "learning_rate": 8.791430312690451e-06, - "loss": 0.0603, + "learning_rate": 1.8808918290608962e-05, + "loss": 0.1132, "step": 120545 }, { "epoch": 5.63, - "learning_rate": 8.790961511415311e-06, - "loss": 0.1401, + "learning_rate": 1.8808450220772942e-05, + "loss": 0.3046, "step": 120550 }, { "epoch": 5.63, - "learning_rate": 8.790492710140173e-06, - "loss": 0.408, + "learning_rate": 1.880798215093692e-05, + "loss": 0.3111, "step": 120555 }, { "epoch": 5.63, - "learning_rate": 8.790023908865033e-06, - "loss": 0.086, + "learning_rate": 1.8807514081100898e-05, + "loss": 0.0733, "step": 120560 }, { "epoch": 5.63, - "learning_rate": 8.789555107589893e-06, - "loss": 0.009, + "learning_rate": 1.880704601126488e-05, + "loss": 0.0854, "step": 120565 }, { "epoch": 5.63, - "learning_rate": 8.789086306314754e-06, - "loss": 0.0105, + "learning_rate": 1.880657794142886e-05, + "loss": 0.0424, "step": 120570 }, { "epoch": 5.63, - "learning_rate": 8.788617505039614e-06, - "loss": 0.0204, + "learning_rate": 1.880610987159284e-05, + "loss": 0.0572, "step": 120575 }, { "epoch": 5.63, - "learning_rate": 8.788148703764476e-06, - "loss": 0.0137, + "learning_rate": 1.880564180175682e-05, + "loss": 0.0525, "step": 120580 }, { "epoch": 5.63, - "learning_rate": 8.787679902489336e-06, - "loss": 0.0386, + "learning_rate": 1.8805173731920804e-05, + "loss": 0.0623, "step": 120585 }, { "epoch": 5.63, - "learning_rate": 8.787211101214196e-06, - "loss": 0.1609, + "learning_rate": 1.8804705662084784e-05, + "loss": 0.0583, "step": 120590 }, { "epoch": 5.63, - "learning_rate": 8.786742299939056e-06, - "loss": 0.1054, + "learning_rate": 1.8804237592248764e-05, + "loss": 0.117, "step": 120595 }, { "epoch": 5.63, - "learning_rate": 8.786273498663917e-06, - "loss": 0.1123, + "learning_rate": 1.8803769522412747e-05, + "loss": 0.1971, "step": 120600 }, { "epoch": 5.63, - "learning_rate": 8.785804697388777e-06, - "loss": 0.4036, + "learning_rate": 1.8803301452576727e-05, + "loss": 0.3374, "step": 120605 }, { "epoch": 5.63, - "learning_rate": 8.785335896113639e-06, - "loss": 0.0696, + "learning_rate": 1.8802833382740707e-05, + "loss": 0.0902, "step": 120610 }, { "epoch": 5.63, - "learning_rate": 8.784867094838499e-06, - "loss": 0.0098, + "learning_rate": 1.8802365312904683e-05, + "loss": 0.0179, "step": 120615 }, { "epoch": 5.63, - "learning_rate": 8.784398293563359e-06, - "loss": 0.0668, + "learning_rate": 1.8801897243068666e-05, + "loss": 0.0427, "step": 120620 }, { "epoch": 5.63, - "learning_rate": 8.78392949228822e-06, - "loss": 0.025, + "learning_rate": 1.8801429173232646e-05, + "loss": 0.0344, "step": 120625 }, { "epoch": 5.63, - "learning_rate": 8.78346069101308e-06, - "loss": 0.0302, + "learning_rate": 1.8800961103396626e-05, + "loss": 0.0097, "step": 120630 }, { "epoch": 5.63, - "learning_rate": 8.78299188973794e-06, - "loss": 0.0711, + "learning_rate": 1.8800493033560606e-05, + "loss": 0.0768, "step": 120635 }, { "epoch": 5.63, - "learning_rate": 8.7825230884628e-06, - "loss": 0.0719, + "learning_rate": 1.880002496372459e-05, + "loss": 0.0828, "step": 120640 }, { "epoch": 5.63, - "learning_rate": 8.782054287187662e-06, - "loss": 0.1177, + "learning_rate": 1.879955689388857e-05, + "loss": 0.0948, "step": 120645 }, { "epoch": 5.63, - "learning_rate": 8.781585485912524e-06, - "loss": 0.0952, + "learning_rate": 1.879908882405255e-05, + "loss": 0.1453, "step": 120650 }, { "epoch": 5.63, - "learning_rate": 8.781116684637383e-06, - "loss": 0.2589, + "learning_rate": 1.879862075421653e-05, + "loss": 0.216, "step": 120655 }, { "epoch": 5.63, - "learning_rate": 8.780647883362243e-06, - "loss": 0.1077, + "learning_rate": 1.879815268438051e-05, + "loss": 0.0939, "step": 120660 }, { "epoch": 5.63, - "learning_rate": 8.780179082087103e-06, - "loss": 0.0141, + "learning_rate": 1.879768461454449e-05, + "loss": 0.012, "step": 120665 }, { "epoch": 5.63, - "learning_rate": 8.779710280811965e-06, - "loss": 0.0121, + "learning_rate": 1.879721654470847e-05, + "loss": 0.0218, "step": 120670 }, { "epoch": 5.63, - "learning_rate": 8.779241479536825e-06, - "loss": 0.0531, + "learning_rate": 1.8796748474872454e-05, + "loss": 0.0257, "step": 120675 }, { "epoch": 5.63, - "learning_rate": 8.778772678261685e-06, - "loss": 0.0406, + "learning_rate": 1.879628040503643e-05, + "loss": 0.0281, "step": 120680 }, { "epoch": 5.63, - "learning_rate": 8.778303876986546e-06, - "loss": 0.0293, + "learning_rate": 1.879581233520041e-05, + "loss": 0.0494, "step": 120685 }, { "epoch": 5.63, - "learning_rate": 8.777835075711406e-06, - "loss": 0.0547, + "learning_rate": 1.879534426536439e-05, + "loss": 0.1003, "step": 120690 }, { "epoch": 5.63, - "learning_rate": 8.777366274436268e-06, - "loss": 0.0778, + "learning_rate": 1.8794876195528374e-05, + "loss": 0.0717, "step": 120695 }, { "epoch": 5.63, - "learning_rate": 8.776897473161128e-06, - "loss": 0.202, + "learning_rate": 1.8794408125692353e-05, + "loss": 0.1786, "step": 120700 }, { "epoch": 5.63, - "learning_rate": 8.776428671885988e-06, - "loss": 0.1561, + "learning_rate": 1.8793940055856333e-05, + "loss": 0.2206, "step": 120705 }, { "epoch": 5.63, - "learning_rate": 8.775959870610848e-06, - "loss": 0.1044, + "learning_rate": 1.8793471986020316e-05, + "loss": 0.0828, "step": 120710 }, { "epoch": 5.63, - "learning_rate": 8.77549106933571e-06, - "loss": 0.0201, + "learning_rate": 1.8793003916184296e-05, + "loss": 0.0705, "step": 120715 }, { "epoch": 5.63, - "learning_rate": 8.77502226806057e-06, - "loss": 0.0178, + "learning_rate": 1.8792535846348276e-05, + "loss": 0.033, "step": 120720 }, { "epoch": 5.63, - "learning_rate": 8.774553466785431e-06, - "loss": 0.023, + "learning_rate": 1.8792067776512256e-05, + "loss": 0.0256, "step": 120725 }, { "epoch": 5.63, - "learning_rate": 8.774084665510291e-06, - "loss": 0.0474, + "learning_rate": 1.879159970667624e-05, + "loss": 0.0249, "step": 120730 }, { "epoch": 5.63, - "learning_rate": 8.773615864235151e-06, - "loss": 0.08, + "learning_rate": 1.879113163684022e-05, + "loss": 0.0509, "step": 120735 }, { "epoch": 5.63, - "learning_rate": 8.773147062960012e-06, - "loss": 0.0566, + "learning_rate": 1.87906635670042e-05, + "loss": 0.0195, "step": 120740 }, { "epoch": 5.63, - "learning_rate": 8.772678261684872e-06, - "loss": 0.1142, + "learning_rate": 1.8790195497168175e-05, + "loss": 0.1077, "step": 120745 }, { "epoch": 5.63, - "learning_rate": 8.772209460409732e-06, - "loss": 0.1128, + "learning_rate": 1.878972742733216e-05, + "loss": 0.1794, "step": 120750 }, { "epoch": 5.63, - "learning_rate": 8.771740659134594e-06, - "loss": 0.3239, + "learning_rate": 1.8789259357496138e-05, + "loss": 0.1985, "step": 120755 }, { "epoch": 5.63, - "learning_rate": 8.771271857859454e-06, - "loss": 0.0961, + "learning_rate": 1.8788791287660118e-05, + "loss": 0.0742, "step": 120760 }, { "epoch": 5.64, - "learning_rate": 8.770803056584316e-06, - "loss": 0.0186, + "learning_rate": 1.8788323217824098e-05, + "loss": 0.0192, "step": 120765 }, { "epoch": 5.64, - "learning_rate": 8.770334255309175e-06, - "loss": 0.0067, + "learning_rate": 1.878785514798808e-05, + "loss": 0.0282, "step": 120770 }, { "epoch": 5.64, - "learning_rate": 8.769865454034035e-06, - "loss": 0.0287, + "learning_rate": 1.878738707815206e-05, + "loss": 0.0483, "step": 120775 }, { "epoch": 5.64, - "learning_rate": 8.769396652758895e-06, - "loss": 0.036, + "learning_rate": 1.878691900831604e-05, + "loss": 0.0243, "step": 120780 }, { "epoch": 5.64, - "learning_rate": 8.768927851483757e-06, - "loss": 0.0441, + "learning_rate": 1.8786450938480024e-05, + "loss": 0.0335, "step": 120785 }, { "epoch": 5.64, - "learning_rate": 8.768459050208617e-06, - "loss": 0.0816, + "learning_rate": 1.8785982868644004e-05, + "loss": 0.0519, "step": 120790 }, { "epoch": 5.64, - "learning_rate": 8.767990248933479e-06, - "loss": 0.0845, + "learning_rate": 1.8785514798807984e-05, + "loss": 0.1148, "step": 120795 }, { "epoch": 5.64, - "learning_rate": 8.767521447658338e-06, - "loss": 0.1825, + "learning_rate": 1.8785046728971963e-05, + "loss": 0.0935, "step": 120800 }, { "epoch": 5.64, - "learning_rate": 8.767052646383198e-06, - "loss": 0.2885, + "learning_rate": 1.8784578659135943e-05, + "loss": 0.275, "step": 120805 }, { "epoch": 5.64, - "learning_rate": 8.76658384510806e-06, - "loss": 0.066, + "learning_rate": 1.8784110589299923e-05, + "loss": 0.0934, "step": 120810 }, { "epoch": 5.64, - "learning_rate": 8.76611504383292e-06, - "loss": 0.0268, + "learning_rate": 1.8783642519463903e-05, + "loss": 0.0219, "step": 120815 }, { "epoch": 5.64, - "learning_rate": 8.76564624255778e-06, - "loss": 0.021, + "learning_rate": 1.8783174449627883e-05, + "loss": 0.0559, "step": 120820 }, { "epoch": 5.64, - "learning_rate": 8.765177441282642e-06, - "loss": 0.0076, + "learning_rate": 1.8782706379791866e-05, + "loss": 0.0597, "step": 120825 }, { "epoch": 5.64, - "learning_rate": 8.764708640007501e-06, - "loss": 0.0261, + "learning_rate": 1.8782238309955846e-05, + "loss": 0.028, "step": 120830 }, { "epoch": 5.64, - "learning_rate": 8.764239838732363e-06, - "loss": 0.0538, + "learning_rate": 1.8781770240119825e-05, + "loss": 0.0331, "step": 120835 }, { "epoch": 5.64, - "learning_rate": 8.763771037457223e-06, - "loss": 0.2024, + "learning_rate": 1.878130217028381e-05, + "loss": 0.0267, "step": 120840 }, { "epoch": 5.64, - "learning_rate": 8.763302236182083e-06, - "loss": 0.1074, + "learning_rate": 1.878083410044779e-05, + "loss": 0.1039, "step": 120845 }, { "epoch": 5.64, - "learning_rate": 8.762833434906943e-06, - "loss": 0.0938, + "learning_rate": 1.8780366030611768e-05, + "loss": 0.1586, "step": 120850 }, { "epoch": 5.64, - "learning_rate": 8.762364633631805e-06, - "loss": 0.3917, + "learning_rate": 1.8779897960775748e-05, + "loss": 0.188, "step": 120855 }, { "epoch": 5.64, - "learning_rate": 8.761895832356664e-06, - "loss": 0.1177, + "learning_rate": 1.877942989093973e-05, + "loss": 0.0842, "step": 120860 }, { "epoch": 5.64, - "learning_rate": 8.761427031081526e-06, - "loss": 0.0216, + "learning_rate": 1.877896182110371e-05, + "loss": 0.012, "step": 120865 }, { "epoch": 5.64, - "learning_rate": 8.760958229806386e-06, - "loss": 0.008, + "learning_rate": 1.8778493751267688e-05, + "loss": 0.0388, "step": 120870 }, { "epoch": 5.64, - "learning_rate": 8.760489428531246e-06, - "loss": 0.013, + "learning_rate": 1.8778025681431667e-05, + "loss": 0.0318, "step": 120875 }, { "epoch": 5.64, - "learning_rate": 8.760020627256108e-06, - "loss": 0.0699, + "learning_rate": 1.877755761159565e-05, + "loss": 0.0576, "step": 120880 }, { "epoch": 5.64, - "learning_rate": 8.759551825980967e-06, - "loss": 0.0934, + "learning_rate": 1.877708954175963e-05, + "loss": 0.0876, "step": 120885 }, { "epoch": 5.64, - "learning_rate": 8.759083024705827e-06, - "loss": 0.1244, + "learning_rate": 1.877662147192361e-05, + "loss": 0.0616, "step": 120890 }, { "epoch": 5.64, - "learning_rate": 8.758614223430687e-06, - "loss": 0.0992, + "learning_rate": 1.8776153402087593e-05, + "loss": 0.0956, "step": 120895 }, { "epoch": 5.64, - "learning_rate": 8.758145422155549e-06, - "loss": 0.127, + "learning_rate": 1.8775685332251573e-05, + "loss": 0.1531, "step": 120900 }, { "epoch": 5.64, - "learning_rate": 8.75767662088041e-06, - "loss": 0.1656, + "learning_rate": 1.8775217262415553e-05, + "loss": 0.2597, "step": 120905 }, { "epoch": 5.64, - "learning_rate": 8.75720781960527e-06, - "loss": 0.068, + "learning_rate": 1.8774749192579533e-05, + "loss": 0.0719, "step": 120910 }, { "epoch": 5.64, - "learning_rate": 8.75673901833013e-06, - "loss": 0.0229, + "learning_rate": 1.8774281122743516e-05, + "loss": 0.0203, "step": 120915 }, { "epoch": 5.64, - "learning_rate": 8.75627021705499e-06, - "loss": 0.0325, + "learning_rate": 1.8773813052907496e-05, + "loss": 0.0513, "step": 120920 }, { "epoch": 5.64, - "learning_rate": 8.755801415779852e-06, - "loss": 0.0386, + "learning_rate": 1.8773344983071476e-05, + "loss": 0.0443, "step": 120925 }, { "epoch": 5.64, - "learning_rate": 8.755332614504712e-06, - "loss": 0.0399, + "learning_rate": 1.8772876913235456e-05, + "loss": 0.0563, "step": 120930 }, { "epoch": 5.64, - "learning_rate": 8.754863813229572e-06, - "loss": 0.1117, + "learning_rate": 1.8772408843399435e-05, + "loss": 0.0239, "step": 120935 }, { "epoch": 5.64, - "learning_rate": 8.754395011954434e-06, - "loss": 0.0591, + "learning_rate": 1.8771940773563415e-05, + "loss": 0.0744, "step": 120940 }, { "epoch": 5.64, - "learning_rate": 8.753926210679293e-06, - "loss": 0.0365, + "learning_rate": 1.8771472703727395e-05, + "loss": 0.0838, "step": 120945 }, { "epoch": 5.64, - "learning_rate": 8.753457409404155e-06, - "loss": 0.1393, + "learning_rate": 1.8771004633891375e-05, + "loss": 0.0627, "step": 120950 }, { "epoch": 5.64, - "learning_rate": 8.752988608129015e-06, - "loss": 0.2302, + "learning_rate": 1.8770536564055358e-05, + "loss": 0.2828, "step": 120955 }, { "epoch": 5.64, - "learning_rate": 8.752519806853875e-06, - "loss": 0.1134, + "learning_rate": 1.8770068494219338e-05, + "loss": 0.0612, "step": 120960 }, { "epoch": 5.64, - "learning_rate": 8.752051005578735e-06, - "loss": 0.0075, + "learning_rate": 1.8769600424383318e-05, + "loss": 0.0606, "step": 120965 }, { "epoch": 5.64, - "learning_rate": 8.751582204303597e-06, - "loss": 0.0238, + "learning_rate": 1.87691323545473e-05, + "loss": 0.014, "step": 120970 }, { "epoch": 5.64, - "learning_rate": 8.751113403028456e-06, - "loss": 0.0522, + "learning_rate": 1.876866428471128e-05, + "loss": 0.0107, "step": 120975 }, { "epoch": 5.65, - "learning_rate": 8.750644601753318e-06, - "loss": 0.0661, + "learning_rate": 1.876819621487526e-05, + "loss": 0.0385, "step": 120980 }, { "epoch": 5.65, - "learning_rate": 8.750175800478178e-06, - "loss": 0.0672, + "learning_rate": 1.876772814503924e-05, + "loss": 0.0542, "step": 120985 }, { "epoch": 5.65, - "learning_rate": 8.749706999203038e-06, - "loss": 0.019, + "learning_rate": 1.8767260075203224e-05, + "loss": 0.0777, "step": 120990 }, { "epoch": 5.65, - "learning_rate": 8.7492381979279e-06, - "loss": 0.0414, + "learning_rate": 1.87667920053672e-05, + "loss": 0.1134, "step": 120995 }, { "epoch": 5.65, - "learning_rate": 8.74876939665276e-06, - "loss": 0.0741, + "learning_rate": 1.876632393553118e-05, + "loss": 0.1055, "step": 121000 }, { "epoch": 5.65, - "learning_rate": 8.74830059537762e-06, - "loss": 0.1203, + "learning_rate": 1.876585586569516e-05, + "loss": 0.2188, "step": 121005 }, { "epoch": 5.65, - "learning_rate": 8.747831794102481e-06, - "loss": 0.0929, + "learning_rate": 1.8765387795859143e-05, + "loss": 0.0585, "step": 121010 }, { "epoch": 5.65, - "learning_rate": 8.747362992827341e-06, - "loss": 0.021, + "learning_rate": 1.8764919726023123e-05, + "loss": 0.0288, "step": 121015 }, { "epoch": 5.65, - "learning_rate": 8.746894191552203e-06, - "loss": 0.031, + "learning_rate": 1.8764451656187102e-05, + "loss": 0.0231, "step": 121020 }, { "epoch": 5.65, - "learning_rate": 8.746425390277063e-06, - "loss": 0.0166, + "learning_rate": 1.8763983586351086e-05, + "loss": 0.0502, "step": 121025 }, { "epoch": 5.65, - "learning_rate": 8.745956589001923e-06, - "loss": 0.0182, + "learning_rate": 1.8763515516515065e-05, + "loss": 0.0206, "step": 121030 }, { "epoch": 5.65, - "learning_rate": 8.745487787726782e-06, - "loss": 0.0552, + "learning_rate": 1.8763047446679045e-05, + "loss": 0.0643, "step": 121035 }, { "epoch": 5.65, - "learning_rate": 8.745018986451644e-06, - "loss": 0.0744, + "learning_rate": 1.8762579376843025e-05, + "loss": 0.0576, "step": 121040 }, { "epoch": 5.65, - "learning_rate": 8.744550185176504e-06, - "loss": 0.1044, + "learning_rate": 1.8762111307007008e-05, + "loss": 0.0638, "step": 121045 }, { "epoch": 5.65, - "learning_rate": 8.744081383901366e-06, - "loss": 0.1489, + "learning_rate": 1.8761643237170988e-05, + "loss": 0.1182, "step": 121050 }, { "epoch": 5.65, - "learning_rate": 8.743612582626226e-06, - "loss": 0.2528, + "learning_rate": 1.8761175167334968e-05, + "loss": 0.3559, "step": 121055 }, { "epoch": 5.65, - "learning_rate": 8.743143781351086e-06, - "loss": 0.1242, + "learning_rate": 1.8760707097498944e-05, + "loss": 0.0832, "step": 121060 }, { "epoch": 5.65, - "learning_rate": 8.742674980075947e-06, - "loss": 0.0168, + "learning_rate": 1.8760239027662928e-05, + "loss": 0.0073, "step": 121065 }, { "epoch": 5.65, - "learning_rate": 8.742206178800807e-06, - "loss": 0.0694, + "learning_rate": 1.8759770957826907e-05, + "loss": 0.0331, "step": 121070 }, { "epoch": 5.65, - "learning_rate": 8.741737377525667e-06, - "loss": 0.0196, + "learning_rate": 1.8759302887990887e-05, + "loss": 0.0127, "step": 121075 }, { "epoch": 5.65, - "learning_rate": 8.741268576250529e-06, - "loss": 0.0649, + "learning_rate": 1.875883481815487e-05, + "loss": 0.0211, "step": 121080 }, { "epoch": 5.65, - "learning_rate": 8.740799774975389e-06, - "loss": 0.0702, + "learning_rate": 1.875836674831885e-05, + "loss": 0.1742, "step": 121085 }, { "epoch": 5.65, - "learning_rate": 8.74033097370025e-06, - "loss": 0.1481, + "learning_rate": 1.875789867848283e-05, + "loss": 0.0458, "step": 121090 }, { "epoch": 5.65, - "learning_rate": 8.73986217242511e-06, - "loss": 0.0995, + "learning_rate": 1.875743060864681e-05, + "loss": 0.1068, "step": 121095 }, { "epoch": 5.65, - "learning_rate": 8.73939337114997e-06, - "loss": 0.0291, + "learning_rate": 1.8756962538810793e-05, + "loss": 0.1189, "step": 121100 }, { "epoch": 5.65, - "learning_rate": 8.73892456987483e-06, - "loss": 0.2236, + "learning_rate": 1.8756494468974773e-05, + "loss": 0.2919, "step": 121105 }, { "epoch": 5.65, - "learning_rate": 8.738455768599692e-06, - "loss": 0.0785, + "learning_rate": 1.8756026399138753e-05, + "loss": 0.0858, "step": 121110 }, { "epoch": 5.65, - "learning_rate": 8.737986967324552e-06, - "loss": 0.0299, + "learning_rate": 1.8755558329302733e-05, + "loss": 0.0235, "step": 121115 }, { "epoch": 5.65, - "learning_rate": 8.737518166049413e-06, - "loss": 0.02, + "learning_rate": 1.8755090259466712e-05, + "loss": 0.0323, "step": 121120 }, { "epoch": 5.65, - "learning_rate": 8.737049364774273e-06, - "loss": 0.0413, + "learning_rate": 1.8754622189630692e-05, + "loss": 0.0392, "step": 121125 }, { "epoch": 5.65, - "learning_rate": 8.736580563499133e-06, - "loss": 0.0468, + "learning_rate": 1.8754154119794672e-05, + "loss": 0.0321, "step": 121130 }, { "epoch": 5.65, - "learning_rate": 8.736111762223995e-06, - "loss": 0.037, + "learning_rate": 1.8753686049958652e-05, + "loss": 0.0849, "step": 121135 }, { "epoch": 5.65, - "learning_rate": 8.735642960948855e-06, - "loss": 0.0316, + "learning_rate": 1.8753217980122635e-05, + "loss": 0.1075, "step": 121140 }, { "epoch": 5.65, - "learning_rate": 8.735174159673715e-06, - "loss": 0.0658, + "learning_rate": 1.8752749910286615e-05, + "loss": 0.0997, "step": 121145 }, { "epoch": 5.65, - "learning_rate": 8.734705358398574e-06, - "loss": 0.1111, + "learning_rate": 1.8752281840450595e-05, + "loss": 0.0512, "step": 121150 }, { "epoch": 5.65, - "learning_rate": 8.734236557123436e-06, - "loss": 0.2218, + "learning_rate": 1.8751813770614578e-05, + "loss": 0.2451, "step": 121155 }, { "epoch": 5.65, - "learning_rate": 8.733767755848298e-06, - "loss": 0.0878, + "learning_rate": 1.8751345700778558e-05, + "loss": 0.0955, "step": 121160 }, { "epoch": 5.65, - "learning_rate": 8.733298954573158e-06, - "loss": 0.0094, + "learning_rate": 1.8750877630942537e-05, + "loss": 0.0175, "step": 121165 }, { "epoch": 5.65, - "learning_rate": 8.732830153298018e-06, - "loss": 0.0084, + "learning_rate": 1.8750409561106517e-05, + "loss": 0.0328, "step": 121170 }, { "epoch": 5.65, - "learning_rate": 8.732361352022878e-06, - "loss": 0.0156, + "learning_rate": 1.87499414912705e-05, + "loss": 0.0424, "step": 121175 }, { "epoch": 5.65, - "learning_rate": 8.73189255074774e-06, - "loss": 0.0222, + "learning_rate": 1.874947342143448e-05, + "loss": 0.0126, "step": 121180 }, { "epoch": 5.65, - "learning_rate": 8.731423749472599e-06, - "loss": 0.0394, + "learning_rate": 1.8749005351598457e-05, + "loss": 0.0989, "step": 121185 }, { "epoch": 5.65, - "learning_rate": 8.730954948197459e-06, - "loss": 0.0921, + "learning_rate": 1.8748537281762437e-05, + "loss": 0.0586, "step": 121190 }, { "epoch": 5.66, - "learning_rate": 8.73048614692232e-06, - "loss": 0.0598, + "learning_rate": 1.874806921192642e-05, + "loss": 0.1308, "step": 121195 }, { "epoch": 5.66, - "learning_rate": 8.73001734564718e-06, - "loss": 0.1878, + "learning_rate": 1.87476011420904e-05, + "loss": 0.09, "step": 121200 }, { "epoch": 5.66, - "learning_rate": 8.729548544372042e-06, - "loss": 0.1715, + "learning_rate": 1.874713307225438e-05, + "loss": 0.2123, "step": 121205 }, { "epoch": 5.66, - "learning_rate": 8.729079743096902e-06, - "loss": 0.0855, + "learning_rate": 1.8746665002418363e-05, + "loss": 0.0854, "step": 121210 }, { "epoch": 5.66, - "learning_rate": 8.728610941821762e-06, - "loss": 0.0546, + "learning_rate": 1.8746196932582342e-05, + "loss": 0.005, "step": 121215 }, { "epoch": 5.66, - "learning_rate": 8.728142140546622e-06, - "loss": 0.0424, + "learning_rate": 1.8745728862746322e-05, + "loss": 0.0324, "step": 121220 }, { "epoch": 5.66, - "learning_rate": 8.727673339271484e-06, - "loss": 0.0218, + "learning_rate": 1.8745260792910302e-05, + "loss": 0.0055, "step": 121225 }, { "epoch": 5.66, - "learning_rate": 8.727204537996344e-06, - "loss": 0.0289, + "learning_rate": 1.8744792723074285e-05, + "loss": 0.0295, "step": 121230 }, { "epoch": 5.66, - "learning_rate": 8.726735736721205e-06, - "loss": 0.0941, + "learning_rate": 1.8744324653238265e-05, + "loss": 0.046, "step": 121235 }, { "epoch": 5.66, - "learning_rate": 8.726266935446065e-06, - "loss": 0.0837, + "learning_rate": 1.8743856583402245e-05, + "loss": 0.0975, "step": 121240 }, { "epoch": 5.66, - "learning_rate": 8.725798134170925e-06, - "loss": 0.1324, + "learning_rate": 1.8743388513566225e-05, + "loss": 0.1128, "step": 121245 }, { "epoch": 5.66, - "learning_rate": 8.725329332895787e-06, - "loss": 0.0728, + "learning_rate": 1.8742920443730205e-05, + "loss": 0.0838, "step": 121250 }, { "epoch": 5.66, - "learning_rate": 8.724860531620647e-06, - "loss": 0.3384, + "learning_rate": 1.8742452373894184e-05, + "loss": 0.1896, "step": 121255 }, { "epoch": 5.66, - "learning_rate": 8.724391730345507e-06, - "loss": 0.1231, + "learning_rate": 1.8741984304058164e-05, + "loss": 0.0823, "step": 121260 }, { "epoch": 5.66, - "learning_rate": 8.723922929070368e-06, - "loss": 0.0359, + "learning_rate": 1.8741516234222147e-05, + "loss": 0.0169, "step": 121265 }, { "epoch": 5.66, - "learning_rate": 8.723454127795228e-06, - "loss": 0.0195, + "learning_rate": 1.8741048164386127e-05, + "loss": 0.0227, "step": 121270 }, { "epoch": 5.66, - "learning_rate": 8.72298532652009e-06, - "loss": 0.0173, + "learning_rate": 1.8740580094550107e-05, + "loss": 0.0422, "step": 121275 }, { "epoch": 5.66, - "learning_rate": 8.72251652524495e-06, - "loss": 0.0182, + "learning_rate": 1.8740112024714087e-05, + "loss": 0.0232, "step": 121280 }, { "epoch": 5.66, - "learning_rate": 8.72204772396981e-06, - "loss": 0.0437, + "learning_rate": 1.873964395487807e-05, + "loss": 0.0378, "step": 121285 }, { "epoch": 5.66, - "learning_rate": 8.72157892269467e-06, - "loss": 0.0569, + "learning_rate": 1.873917588504205e-05, + "loss": 0.0568, "step": 121290 }, { "epoch": 5.66, - "learning_rate": 8.721110121419531e-06, - "loss": 0.1449, + "learning_rate": 1.873870781520603e-05, + "loss": 0.0821, "step": 121295 }, { "epoch": 5.66, - "learning_rate": 8.720641320144391e-06, - "loss": 0.1186, + "learning_rate": 1.873823974537001e-05, + "loss": 0.0535, "step": 121300 }, { "epoch": 5.66, - "learning_rate": 8.720172518869253e-06, - "loss": 0.1919, + "learning_rate": 1.8737771675533993e-05, + "loss": 0.1339, "step": 121305 }, { "epoch": 5.66, - "learning_rate": 8.719703717594113e-06, - "loss": 0.1022, + "learning_rate": 1.873730360569797e-05, + "loss": 0.1029, "step": 121310 }, { "epoch": 5.66, - "learning_rate": 8.719234916318973e-06, - "loss": 0.0067, + "learning_rate": 1.873683553586195e-05, + "loss": 0.0358, "step": 121315 }, { "epoch": 5.66, - "learning_rate": 8.718766115043834e-06, - "loss": 0.0549, + "learning_rate": 1.8736367466025932e-05, + "loss": 0.0411, "step": 121320 }, { "epoch": 5.66, - "learning_rate": 8.718297313768694e-06, - "loss": 0.0299, + "learning_rate": 1.8735899396189912e-05, + "loss": 0.0169, "step": 121325 }, { "epoch": 5.66, - "learning_rate": 8.717828512493554e-06, - "loss": 0.0199, + "learning_rate": 1.8735431326353892e-05, + "loss": 0.025, "step": 121330 }, { "epoch": 5.66, - "learning_rate": 8.717359711218416e-06, - "loss": 0.112, + "learning_rate": 1.873496325651787e-05, + "loss": 0.0205, "step": 121335 }, { "epoch": 5.66, - "learning_rate": 8.716890909943276e-06, - "loss": 0.0789, + "learning_rate": 1.8734495186681855e-05, + "loss": 0.0646, "step": 121340 }, { "epoch": 5.66, - "learning_rate": 8.716422108668137e-06, - "loss": 0.1316, + "learning_rate": 1.8734027116845835e-05, + "loss": 0.025, "step": 121345 }, { "epoch": 5.66, - "learning_rate": 8.715953307392997e-06, - "loss": 0.0963, + "learning_rate": 1.8733559047009814e-05, + "loss": 0.069, "step": 121350 }, { "epoch": 5.66, - "learning_rate": 8.715484506117857e-06, - "loss": 0.3448, + "learning_rate": 1.8733090977173794e-05, + "loss": 0.275, "step": 121355 }, { "epoch": 5.66, - "learning_rate": 8.715015704842717e-06, - "loss": 0.1192, + "learning_rate": 1.8732622907337777e-05, + "loss": 0.0707, "step": 121360 }, { "epoch": 5.66, - "learning_rate": 8.714546903567579e-06, - "loss": 0.0082, + "learning_rate": 1.8732154837501757e-05, + "loss": 0.0059, "step": 121365 }, { "epoch": 5.66, - "learning_rate": 8.714078102292439e-06, - "loss": 0.0202, + "learning_rate": 1.8731686767665737e-05, + "loss": 0.0349, "step": 121370 }, { "epoch": 5.66, - "learning_rate": 8.7136093010173e-06, - "loss": 0.0125, + "learning_rate": 1.8731218697829714e-05, + "loss": 0.0019, "step": 121375 }, { "epoch": 5.66, - "learning_rate": 8.71314049974216e-06, - "loss": 0.0271, + "learning_rate": 1.8730750627993697e-05, + "loss": 0.0479, "step": 121380 }, { "epoch": 5.66, - "learning_rate": 8.71267169846702e-06, - "loss": 0.0258, + "learning_rate": 1.8730282558157677e-05, + "loss": 0.0366, "step": 121385 }, { "epoch": 5.66, - "learning_rate": 8.712202897191882e-06, - "loss": 0.1085, + "learning_rate": 1.8729814488321656e-05, + "loss": 0.0477, "step": 121390 }, { "epoch": 5.66, - "learning_rate": 8.711734095916742e-06, - "loss": 0.0546, + "learning_rate": 1.872934641848564e-05, + "loss": 0.043, "step": 121395 }, { "epoch": 5.66, - "learning_rate": 8.711265294641602e-06, - "loss": 0.1266, + "learning_rate": 1.872887834864962e-05, + "loss": 0.1003, "step": 121400 }, { "epoch": 5.66, - "learning_rate": 8.710796493366462e-06, - "loss": 0.2042, + "learning_rate": 1.87284102788136e-05, + "loss": 0.3862, "step": 121405 }, { "epoch": 5.67, - "learning_rate": 8.710327692091323e-06, - "loss": 0.0827, + "learning_rate": 1.872794220897758e-05, + "loss": 0.1099, "step": 121410 }, { "epoch": 5.67, - "learning_rate": 8.709858890816185e-06, - "loss": 0.0069, + "learning_rate": 1.8727474139141562e-05, + "loss": 0.0023, "step": 121415 }, { "epoch": 5.67, - "learning_rate": 8.709390089541045e-06, - "loss": 0.0238, + "learning_rate": 1.8727006069305542e-05, + "loss": 0.0365, "step": 121420 }, { "epoch": 5.67, - "learning_rate": 8.708921288265905e-06, - "loss": 0.02, + "learning_rate": 1.8726537999469522e-05, + "loss": 0.087, "step": 121425 }, { "epoch": 5.67, - "learning_rate": 8.708452486990765e-06, - "loss": 0.1003, + "learning_rate": 1.87260699296335e-05, + "loss": 0.0135, "step": 121430 }, { "epoch": 5.67, - "learning_rate": 8.707983685715626e-06, - "loss": 0.0489, + "learning_rate": 1.872560185979748e-05, + "loss": 0.033, "step": 121435 }, { "epoch": 5.67, - "learning_rate": 8.707514884440486e-06, - "loss": 0.0287, + "learning_rate": 1.872513378996146e-05, + "loss": 0.0635, "step": 121440 }, { "epoch": 5.67, - "learning_rate": 8.707046083165346e-06, - "loss": 0.0392, + "learning_rate": 1.872466572012544e-05, + "loss": 0.0466, "step": 121445 }, { "epoch": 5.67, - "learning_rate": 8.706577281890208e-06, - "loss": 0.1954, + "learning_rate": 1.8724197650289424e-05, + "loss": 0.0917, "step": 121450 }, { "epoch": 5.67, - "learning_rate": 8.706108480615068e-06, - "loss": 0.2737, + "learning_rate": 1.8723729580453404e-05, + "loss": 0.067, "step": 121455 }, { "epoch": 5.67, - "learning_rate": 8.70563967933993e-06, - "loss": 0.0573, + "learning_rate": 1.8723261510617384e-05, + "loss": 0.0947, "step": 121460 }, { "epoch": 5.67, - "learning_rate": 8.70517087806479e-06, - "loss": 0.0106, + "learning_rate": 1.8722793440781364e-05, + "loss": 0.0035, "step": 121465 }, { "epoch": 5.67, - "learning_rate": 8.70470207678965e-06, - "loss": 0.0469, + "learning_rate": 1.8722325370945347e-05, + "loss": 0.0093, "step": 121470 }, { "epoch": 5.67, - "learning_rate": 8.704233275514509e-06, - "loss": 0.0898, + "learning_rate": 1.8721857301109327e-05, + "loss": 0.0337, "step": 121475 }, { "epoch": 5.67, - "learning_rate": 8.70376447423937e-06, - "loss": 0.0407, + "learning_rate": 1.8721389231273307e-05, + "loss": 0.0788, "step": 121480 }, { "epoch": 5.67, - "learning_rate": 8.70329567296423e-06, - "loss": 0.0257, + "learning_rate": 1.8720921161437286e-05, + "loss": 0.0569, "step": 121485 }, { "epoch": 5.67, - "learning_rate": 8.702826871689092e-06, - "loss": 0.0931, + "learning_rate": 1.872045309160127e-05, + "loss": 0.0787, "step": 121490 }, { "epoch": 5.67, - "learning_rate": 8.702358070413952e-06, - "loss": 0.0459, + "learning_rate": 1.871998502176525e-05, + "loss": 0.1495, "step": 121495 }, { "epoch": 5.67, - "learning_rate": 8.701889269138812e-06, - "loss": 0.0732, + "learning_rate": 1.8719516951929226e-05, + "loss": 0.0613, "step": 121500 }, { "epoch": 5.67, - "learning_rate": 8.701420467863674e-06, - "loss": 0.4671, + "learning_rate": 1.871904888209321e-05, + "loss": 0.1209, "step": 121505 }, { "epoch": 5.67, - "learning_rate": 8.700951666588534e-06, - "loss": 0.0845, + "learning_rate": 1.871858081225719e-05, + "loss": 0.0859, "step": 121510 }, { "epoch": 5.67, - "learning_rate": 8.700482865313394e-06, - "loss": 0.0177, + "learning_rate": 1.871811274242117e-05, + "loss": 0.032, "step": 121515 }, { "epoch": 5.67, - "learning_rate": 8.700014064038255e-06, - "loss": 0.0131, + "learning_rate": 1.871764467258515e-05, + "loss": 0.0652, "step": 121520 }, { "epoch": 5.67, - "learning_rate": 8.699545262763115e-06, - "loss": 0.019, + "learning_rate": 1.8717176602749132e-05, + "loss": 0.0146, "step": 121525 }, { "epoch": 5.67, - "learning_rate": 8.699076461487977e-06, - "loss": 0.0584, + "learning_rate": 1.871670853291311e-05, + "loss": 0.0311, "step": 121530 }, { "epoch": 5.67, - "learning_rate": 8.698607660212837e-06, - "loss": 0.0441, + "learning_rate": 1.871624046307709e-05, + "loss": 0.1088, "step": 121535 }, { "epoch": 5.67, - "learning_rate": 8.698138858937697e-06, - "loss": 0.0522, + "learning_rate": 1.871577239324107e-05, + "loss": 0.0362, "step": 121540 }, { "epoch": 5.67, - "learning_rate": 8.697670057662557e-06, - "loss": 0.0858, + "learning_rate": 1.8715304323405054e-05, + "loss": 0.0768, "step": 121545 }, { "epoch": 5.67, - "learning_rate": 8.697201256387418e-06, - "loss": 0.1335, + "learning_rate": 1.8714836253569034e-05, + "loss": 0.158, "step": 121550 }, { "epoch": 5.67, - "learning_rate": 8.696732455112278e-06, - "loss": 0.1201, + "learning_rate": 1.8714368183733014e-05, + "loss": 0.1679, "step": 121555 }, { "epoch": 5.67, - "learning_rate": 8.69626365383714e-06, - "loss": 0.0917, + "learning_rate": 1.8713900113896994e-05, + "loss": 0.1169, "step": 121560 }, { "epoch": 5.67, - "learning_rate": 8.695794852562e-06, - "loss": 0.0097, + "learning_rate": 1.8713432044060974e-05, + "loss": 0.0274, "step": 121565 }, { "epoch": 5.67, - "learning_rate": 8.69532605128686e-06, - "loss": 0.0137, + "learning_rate": 1.8712963974224954e-05, + "loss": 0.0237, "step": 121570 }, { "epoch": 5.67, - "learning_rate": 8.694857250011721e-06, - "loss": 0.0482, + "learning_rate": 1.8712495904388933e-05, + "loss": 0.0348, "step": 121575 }, { "epoch": 5.67, - "learning_rate": 8.694388448736581e-06, - "loss": 0.0272, + "learning_rate": 1.8712027834552917e-05, + "loss": 0.0375, "step": 121580 }, { "epoch": 5.67, - "learning_rate": 8.693919647461441e-06, - "loss": 0.0768, + "learning_rate": 1.8711559764716896e-05, + "loss": 0.0218, "step": 121585 }, { "epoch": 5.67, - "learning_rate": 8.693450846186303e-06, - "loss": 0.0801, + "learning_rate": 1.8711091694880876e-05, + "loss": 0.0942, "step": 121590 }, { "epoch": 5.67, - "learning_rate": 8.692982044911163e-06, - "loss": 0.0882, + "learning_rate": 1.8710623625044856e-05, + "loss": 0.0947, "step": 121595 }, { "epoch": 5.67, - "learning_rate": 8.692513243636024e-06, - "loss": 0.11, + "learning_rate": 1.871015555520884e-05, + "loss": 0.3221, "step": 121600 }, { "epoch": 5.67, - "learning_rate": 8.692044442360884e-06, - "loss": 0.1673, + "learning_rate": 1.870968748537282e-05, + "loss": 0.4223, "step": 121605 }, { "epoch": 5.67, - "learning_rate": 8.691575641085744e-06, - "loss": 0.0453, + "learning_rate": 1.87092194155368e-05, + "loss": 0.119, "step": 121610 }, { "epoch": 5.67, - "learning_rate": 8.691106839810604e-06, - "loss": 0.0225, + "learning_rate": 1.870875134570078e-05, + "loss": 0.0156, "step": 121615 }, { "epoch": 5.67, - "learning_rate": 8.690638038535466e-06, - "loss": 0.0338, + "learning_rate": 1.8708283275864762e-05, + "loss": 0.0656, "step": 121620 }, { "epoch": 5.68, - "learning_rate": 8.690169237260326e-06, - "loss": 0.0197, + "learning_rate": 1.8707815206028738e-05, + "loss": 0.0686, "step": 121625 }, { "epoch": 5.68, - "learning_rate": 8.689700435985187e-06, - "loss": 0.0459, + "learning_rate": 1.8707347136192718e-05, + "loss": 0.0975, "step": 121630 }, { "epoch": 5.68, - "learning_rate": 8.689231634710047e-06, - "loss": 0.0443, + "learning_rate": 1.87068790663567e-05, + "loss": 0.0266, "step": 121635 }, { "epoch": 5.68, - "learning_rate": 8.688762833434907e-06, - "loss": 0.0928, + "learning_rate": 1.870641099652068e-05, + "loss": 0.1522, "step": 121640 }, { "epoch": 5.68, - "learning_rate": 8.688294032159769e-06, - "loss": 0.0791, + "learning_rate": 1.870594292668466e-05, + "loss": 0.1413, "step": 121645 }, { "epoch": 5.68, - "learning_rate": 8.687825230884629e-06, - "loss": 0.1801, + "learning_rate": 1.870547485684864e-05, + "loss": 0.2692, "step": 121650 }, { "epoch": 5.68, - "learning_rate": 8.687356429609489e-06, - "loss": 0.2516, + "learning_rate": 1.8705006787012624e-05, + "loss": 0.2215, "step": 121655 }, { "epoch": 5.68, - "learning_rate": 8.686887628334349e-06, - "loss": 0.0967, + "learning_rate": 1.8704538717176604e-05, + "loss": 0.099, "step": 121660 }, { "epoch": 5.68, - "learning_rate": 8.68641882705921e-06, - "loss": 0.0315, + "learning_rate": 1.8704070647340584e-05, + "loss": 0.0172, "step": 121665 }, { "epoch": 5.68, - "learning_rate": 8.685950025784072e-06, - "loss": 0.019, + "learning_rate": 1.8703602577504563e-05, + "loss": 0.0119, "step": 121670 }, { "epoch": 5.68, - "learning_rate": 8.685481224508932e-06, - "loss": 0.0265, + "learning_rate": 1.8703134507668547e-05, + "loss": 0.0562, "step": 121675 }, { "epoch": 5.68, - "learning_rate": 8.685012423233792e-06, - "loss": 0.0513, + "learning_rate": 1.8702666437832526e-05, + "loss": 0.0438, "step": 121680 }, { "epoch": 5.68, - "learning_rate": 8.684543621958652e-06, - "loss": 0.0754, + "learning_rate": 1.8702198367996506e-05, + "loss": 0.0568, "step": 121685 }, { "epoch": 5.68, - "learning_rate": 8.684074820683513e-06, - "loss": 0.0571, + "learning_rate": 1.8701730298160486e-05, + "loss": 0.0331, "step": 121690 }, { "epoch": 5.68, - "learning_rate": 8.683606019408373e-06, - "loss": 0.1104, + "learning_rate": 1.8701262228324466e-05, + "loss": 0.0296, "step": 121695 }, { "epoch": 5.68, - "learning_rate": 8.683137218133233e-06, - "loss": 0.0587, + "learning_rate": 1.8700794158488446e-05, + "loss": 0.1196, "step": 121700 }, { "epoch": 5.68, - "learning_rate": 8.682668416858095e-06, - "loss": 0.271, + "learning_rate": 1.8700326088652426e-05, + "loss": 0.245, "step": 121705 }, { "epoch": 5.68, - "learning_rate": 8.682199615582955e-06, - "loss": 0.1252, + "learning_rate": 1.869985801881641e-05, + "loss": 0.1076, "step": 121710 }, { "epoch": 5.68, - "learning_rate": 8.681730814307816e-06, - "loss": 0.0048, + "learning_rate": 1.869938994898039e-05, + "loss": 0.0062, "step": 121715 }, { "epoch": 5.68, - "learning_rate": 8.681262013032676e-06, - "loss": 0.0271, + "learning_rate": 1.869892187914437e-05, + "loss": 0.064, "step": 121720 }, { "epoch": 5.68, - "learning_rate": 8.680793211757536e-06, - "loss": 0.0571, + "learning_rate": 1.8698453809308348e-05, + "loss": 0.0152, "step": 121725 }, { "epoch": 5.68, - "learning_rate": 8.680324410482396e-06, - "loss": 0.0384, + "learning_rate": 1.869798573947233e-05, + "loss": 0.0443, "step": 121730 }, { "epoch": 5.68, - "learning_rate": 8.679855609207258e-06, - "loss": 0.079, + "learning_rate": 1.869751766963631e-05, + "loss": 0.0768, "step": 121735 }, { "epoch": 5.68, - "learning_rate": 8.679386807932118e-06, - "loss": 0.0465, + "learning_rate": 1.869704959980029e-05, + "loss": 0.0416, "step": 121740 }, { "epoch": 5.68, - "learning_rate": 8.67891800665698e-06, - "loss": 0.0872, + "learning_rate": 1.869658152996427e-05, + "loss": 0.0747, "step": 121745 }, { "epoch": 5.68, - "learning_rate": 8.67844920538184e-06, - "loss": 0.052, + "learning_rate": 1.8696113460128254e-05, + "loss": 0.1511, "step": 121750 }, { "epoch": 5.68, - "learning_rate": 8.6779804041067e-06, - "loss": 0.2127, + "learning_rate": 1.869564539029223e-05, + "loss": 0.2155, "step": 121755 }, { "epoch": 5.68, - "learning_rate": 8.677511602831561e-06, - "loss": 0.0734, + "learning_rate": 1.869517732045621e-05, + "loss": 0.0584, "step": 121760 }, { "epoch": 5.68, - "learning_rate": 8.677042801556421e-06, - "loss": 0.0139, + "learning_rate": 1.8694709250620194e-05, + "loss": 0.015, "step": 121765 }, { "epoch": 5.68, - "learning_rate": 8.67657400028128e-06, - "loss": 0.0552, + "learning_rate": 1.8694241180784173e-05, + "loss": 0.0469, "step": 121770 }, { "epoch": 5.68, - "learning_rate": 8.676105199006142e-06, - "loss": 0.0854, + "learning_rate": 1.8693773110948153e-05, + "loss": 0.0234, "step": 121775 }, { "epoch": 5.68, - "learning_rate": 8.675636397731002e-06, - "loss": 0.0308, + "learning_rate": 1.8693305041112133e-05, + "loss": 0.0549, "step": 121780 }, { "epoch": 5.68, - "learning_rate": 8.675167596455864e-06, - "loss": 0.0749, + "learning_rate": 1.8692836971276116e-05, + "loss": 0.0719, "step": 121785 }, { "epoch": 5.68, - "learning_rate": 8.674698795180724e-06, - "loss": 0.0589, + "learning_rate": 1.8692368901440096e-05, + "loss": 0.0967, "step": 121790 }, { "epoch": 5.68, - "learning_rate": 8.674229993905584e-06, - "loss": 0.0629, + "learning_rate": 1.8691900831604076e-05, + "loss": 0.0804, "step": 121795 }, { "epoch": 5.68, - "learning_rate": 8.673761192630444e-06, - "loss": 0.1482, + "learning_rate": 1.8691432761768056e-05, + "loss": 0.1919, "step": 121800 }, { "epoch": 5.68, - "learning_rate": 8.673292391355305e-06, - "loss": 0.3663, + "learning_rate": 1.869096469193204e-05, + "loss": 0.2051, "step": 121805 }, { "epoch": 5.68, - "learning_rate": 8.672823590080165e-06, - "loss": 0.0933, + "learning_rate": 1.869049662209602e-05, + "loss": 0.0786, "step": 121810 }, { "epoch": 5.68, - "learning_rate": 8.672354788805027e-06, - "loss": 0.0333, + "learning_rate": 1.8690028552259995e-05, + "loss": 0.0123, "step": 121815 }, { "epoch": 5.68, - "learning_rate": 8.671885987529887e-06, - "loss": 0.0032, + "learning_rate": 1.8689560482423978e-05, + "loss": 0.0837, "step": 121820 }, { "epoch": 5.68, - "learning_rate": 8.671417186254747e-06, - "loss": 0.0206, + "learning_rate": 1.8689092412587958e-05, + "loss": 0.0283, "step": 121825 }, { "epoch": 5.68, - "learning_rate": 8.670948384979608e-06, - "loss": 0.0207, + "learning_rate": 1.8688624342751938e-05, + "loss": 0.0763, "step": 121830 }, { "epoch": 5.68, - "learning_rate": 8.670479583704468e-06, - "loss": 0.099, + "learning_rate": 1.8688156272915918e-05, + "loss": 0.0401, "step": 121835 }, { "epoch": 5.69, - "learning_rate": 8.670010782429328e-06, - "loss": 0.0453, + "learning_rate": 1.86876882030799e-05, + "loss": 0.0598, "step": 121840 }, { "epoch": 5.69, - "learning_rate": 8.66954198115419e-06, - "loss": 0.1099, + "learning_rate": 1.868722013324388e-05, + "loss": 0.0923, "step": 121845 }, { "epoch": 5.69, - "learning_rate": 8.66907317987905e-06, - "loss": 0.1947, + "learning_rate": 1.868675206340786e-05, + "loss": 0.1253, "step": 121850 }, { "epoch": 5.69, - "learning_rate": 8.668604378603912e-06, - "loss": 0.2829, + "learning_rate": 1.868628399357184e-05, + "loss": 0.1636, "step": 121855 }, { "epoch": 5.69, - "learning_rate": 8.668135577328771e-06, - "loss": 0.1454, + "learning_rate": 1.8685815923735824e-05, + "loss": 0.1162, "step": 121860 }, { "epoch": 5.69, - "learning_rate": 8.667666776053631e-06, - "loss": 0.0174, + "learning_rate": 1.8685347853899803e-05, + "loss": 0.0441, "step": 121865 }, { "epoch": 5.69, - "learning_rate": 8.667197974778491e-06, - "loss": 0.035, + "learning_rate": 1.8684879784063783e-05, + "loss": 0.0236, "step": 121870 }, { "epoch": 5.69, - "learning_rate": 8.666729173503353e-06, - "loss": 0.0269, + "learning_rate": 1.8684411714227766e-05, + "loss": 0.0196, "step": 121875 }, { "epoch": 5.69, - "learning_rate": 8.666260372228213e-06, - "loss": 0.0326, + "learning_rate": 1.8683943644391743e-05, + "loss": 0.0764, "step": 121880 }, { "epoch": 5.69, - "learning_rate": 8.665791570953075e-06, - "loss": 0.0696, + "learning_rate": 1.8683475574555723e-05, + "loss": 0.0478, "step": 121885 }, { "epoch": 5.69, - "learning_rate": 8.665322769677934e-06, - "loss": 0.0183, + "learning_rate": 1.8683007504719703e-05, + "loss": 0.0162, "step": 121890 }, { "epoch": 5.69, - "learning_rate": 8.664853968402794e-06, - "loss": 0.1382, + "learning_rate": 1.8682539434883686e-05, + "loss": 0.0447, "step": 121895 }, { "epoch": 5.69, - "learning_rate": 8.664385167127656e-06, - "loss": 0.1084, + "learning_rate": 1.8682071365047666e-05, + "loss": 0.1242, "step": 121900 }, { "epoch": 5.69, - "learning_rate": 8.663916365852516e-06, - "loss": 0.2958, + "learning_rate": 1.8681603295211645e-05, + "loss": 0.2752, "step": 121905 }, { "epoch": 5.69, - "learning_rate": 8.663447564577376e-06, - "loss": 0.0655, + "learning_rate": 1.8681135225375625e-05, + "loss": 0.096, "step": 121910 }, { "epoch": 5.69, - "learning_rate": 8.662978763302236e-06, - "loss": 0.0111, + "learning_rate": 1.868066715553961e-05, + "loss": 0.0158, "step": 121915 }, { "epoch": 5.69, - "learning_rate": 8.662509962027097e-06, - "loss": 0.0775, + "learning_rate": 1.8680199085703588e-05, + "loss": 0.0333, "step": 121920 }, { "epoch": 5.69, - "learning_rate": 8.662041160751959e-06, - "loss": 0.0324, + "learning_rate": 1.8679731015867568e-05, + "loss": 0.0128, "step": 121925 }, { "epoch": 5.69, - "learning_rate": 8.661572359476819e-06, - "loss": 0.023, + "learning_rate": 1.8679262946031548e-05, + "loss": 0.0192, "step": 121930 }, { "epoch": 5.69, - "learning_rate": 8.661103558201679e-06, - "loss": 0.0433, + "learning_rate": 1.867879487619553e-05, + "loss": 0.0351, "step": 121935 }, { "epoch": 5.69, - "learning_rate": 8.660634756926539e-06, - "loss": 0.085, + "learning_rate": 1.867832680635951e-05, + "loss": 0.044, "step": 121940 }, { "epoch": 5.69, - "learning_rate": 8.6601659556514e-06, - "loss": 0.0724, + "learning_rate": 1.8677858736523487e-05, + "loss": 0.1874, "step": 121945 }, { "epoch": 5.69, - "learning_rate": 8.65969715437626e-06, - "loss": 0.1639, + "learning_rate": 1.867739066668747e-05, + "loss": 0.1017, "step": 121950 }, { "epoch": 5.69, - "learning_rate": 8.65922835310112e-06, - "loss": 0.1469, + "learning_rate": 1.867692259685145e-05, + "loss": 0.2542, "step": 121955 }, { "epoch": 5.69, - "learning_rate": 8.658759551825982e-06, - "loss": 0.1149, + "learning_rate": 1.867645452701543e-05, + "loss": 0.1181, "step": 121960 }, { "epoch": 5.69, - "learning_rate": 8.658290750550842e-06, - "loss": 0.0106, + "learning_rate": 1.867598645717941e-05, + "loss": 0.0342, "step": 121965 }, { "epoch": 5.69, - "learning_rate": 8.657821949275704e-06, - "loss": 0.024, + "learning_rate": 1.8675518387343393e-05, + "loss": 0.0336, "step": 121970 }, { "epoch": 5.69, - "learning_rate": 8.657353148000563e-06, - "loss": 0.0347, + "learning_rate": 1.8675050317507373e-05, + "loss": 0.0241, "step": 121975 }, { "epoch": 5.69, - "learning_rate": 8.656884346725423e-06, - "loss": 0.0109, + "learning_rate": 1.8674582247671353e-05, + "loss": 0.0106, "step": 121980 }, { "epoch": 5.69, - "learning_rate": 8.656415545450283e-06, - "loss": 0.0527, + "learning_rate": 1.8674114177835333e-05, + "loss": 0.0544, "step": 121985 }, { "epoch": 5.69, - "learning_rate": 8.655946744175145e-06, - "loss": 0.0848, + "learning_rate": 1.8673646107999316e-05, + "loss": 0.0257, "step": 121990 }, { "epoch": 5.69, - "learning_rate": 8.655477942900007e-06, - "loss": 0.0498, + "learning_rate": 1.8673178038163296e-05, + "loss": 0.0945, "step": 121995 }, { "epoch": 5.69, - "learning_rate": 8.655009141624867e-06, - "loss": 0.0519, + "learning_rate": 1.8672709968327275e-05, + "loss": 0.0897, "step": 122000 }, { "epoch": 5.69, - "learning_rate": 8.654540340349726e-06, - "loss": 0.1598, + "learning_rate": 1.8672241898491255e-05, + "loss": 0.2375, "step": 122005 }, { "epoch": 5.69, - "learning_rate": 8.654071539074586e-06, - "loss": 0.083, + "learning_rate": 1.8671773828655235e-05, + "loss": 0.0894, "step": 122010 }, { "epoch": 5.69, - "learning_rate": 8.653602737799448e-06, - "loss": 0.0208, + "learning_rate": 1.8671305758819215e-05, + "loss": 0.0066, "step": 122015 }, { "epoch": 5.69, - "learning_rate": 8.653133936524308e-06, - "loss": 0.0349, + "learning_rate": 1.8670837688983195e-05, + "loss": 0.0153, "step": 122020 }, { "epoch": 5.69, - "learning_rate": 8.652665135249168e-06, - "loss": 0.0397, + "learning_rate": 1.8670369619147178e-05, + "loss": 0.0158, "step": 122025 }, { "epoch": 5.69, - "learning_rate": 8.65219633397403e-06, - "loss": 0.073, + "learning_rate": 1.8669901549311158e-05, + "loss": 0.0592, "step": 122030 }, { "epoch": 5.69, - "learning_rate": 8.65172753269889e-06, - "loss": 0.0645, + "learning_rate": 1.8669433479475138e-05, + "loss": 0.0564, "step": 122035 }, { "epoch": 5.69, - "learning_rate": 8.651258731423751e-06, - "loss": 0.1669, + "learning_rate": 1.8668965409639117e-05, + "loss": 0.0561, "step": 122040 }, { "epoch": 5.69, - "learning_rate": 8.650789930148611e-06, - "loss": 0.064, + "learning_rate": 1.86684973398031e-05, + "loss": 0.1007, "step": 122045 }, { "epoch": 5.7, - "learning_rate": 8.650321128873471e-06, - "loss": 0.0985, + "learning_rate": 1.866802926996708e-05, + "loss": 0.18, "step": 122050 }, { "epoch": 5.7, - "learning_rate": 8.649852327598331e-06, - "loss": 0.3283, + "learning_rate": 1.866756120013106e-05, + "loss": 0.2254, "step": 122055 }, { "epoch": 5.7, - "learning_rate": 8.649383526323193e-06, - "loss": 0.1026, + "learning_rate": 1.8667093130295043e-05, + "loss": 0.0498, "step": 122060 }, { "epoch": 5.7, - "learning_rate": 8.648914725048052e-06, - "loss": 0.0172, + "learning_rate": 1.8666625060459023e-05, + "loss": 0.0016, "step": 122065 }, { "epoch": 5.7, - "learning_rate": 8.648445923772914e-06, - "loss": 0.0101, + "learning_rate": 1.8666156990623e-05, + "loss": 0.0283, "step": 122070 }, { "epoch": 5.7, - "learning_rate": 8.647977122497774e-06, - "loss": 0.0503, + "learning_rate": 1.866568892078698e-05, + "loss": 0.0225, "step": 122075 }, { "epoch": 5.7, - "learning_rate": 8.647508321222634e-06, - "loss": 0.0135, + "learning_rate": 1.8665220850950963e-05, + "loss": 0.0474, "step": 122080 }, { "epoch": 5.7, - "learning_rate": 8.647039519947496e-06, - "loss": 0.0292, + "learning_rate": 1.8664752781114943e-05, + "loss": 0.0124, "step": 122085 }, { "epoch": 5.7, - "learning_rate": 8.646570718672356e-06, - "loss": 0.0578, + "learning_rate": 1.8664284711278922e-05, + "loss": 0.0195, "step": 122090 }, { "epoch": 5.7, - "learning_rate": 8.646101917397215e-06, - "loss": 0.0912, + "learning_rate": 1.8663816641442902e-05, + "loss": 0.0485, "step": 122095 }, { "epoch": 5.7, - "learning_rate": 8.645633116122077e-06, - "loss": 0.1477, + "learning_rate": 1.8663348571606885e-05, + "loss": 0.0962, "step": 122100 }, { "epoch": 5.7, - "learning_rate": 8.645164314846937e-06, - "loss": 0.2143, + "learning_rate": 1.8662880501770865e-05, + "loss": 0.2183, "step": 122105 }, { "epoch": 5.7, - "learning_rate": 8.644695513571799e-06, - "loss": 0.0742, + "learning_rate": 1.8662412431934845e-05, + "loss": 0.0888, "step": 122110 }, { "epoch": 5.7, - "learning_rate": 8.644226712296659e-06, - "loss": 0.0157, + "learning_rate": 1.8661944362098828e-05, + "loss": 0.0102, "step": 122115 }, { "epoch": 5.7, - "learning_rate": 8.643757911021518e-06, - "loss": 0.0245, + "learning_rate": 1.8661476292262808e-05, + "loss": 0.0371, "step": 122120 }, { "epoch": 5.7, - "learning_rate": 8.643289109746378e-06, - "loss": 0.0443, + "learning_rate": 1.8661008222426788e-05, + "loss": 0.0337, "step": 122125 }, { "epoch": 5.7, - "learning_rate": 8.64282030847124e-06, - "loss": 0.0961, + "learning_rate": 1.8660540152590768e-05, + "loss": 0.0686, "step": 122130 }, { "epoch": 5.7, - "learning_rate": 8.6423515071961e-06, - "loss": 0.0735, + "learning_rate": 1.8660072082754747e-05, + "loss": 0.0828, "step": 122135 }, { "epoch": 5.7, - "learning_rate": 8.641882705920962e-06, - "loss": 0.0551, + "learning_rate": 1.8659604012918727e-05, + "loss": 0.1121, "step": 122140 }, { "epoch": 5.7, - "learning_rate": 8.641413904645822e-06, - "loss": 0.1656, + "learning_rate": 1.8659135943082707e-05, + "loss": 0.0751, "step": 122145 }, { "epoch": 5.7, - "learning_rate": 8.640945103370681e-06, - "loss": 0.1371, + "learning_rate": 1.8658667873246687e-05, + "loss": 0.1486, "step": 122150 }, { "epoch": 5.7, - "learning_rate": 8.640476302095543e-06, - "loss": 0.1847, + "learning_rate": 1.865819980341067e-05, + "loss": 0.1951, "step": 122155 }, { "epoch": 5.7, - "learning_rate": 8.640007500820403e-06, - "loss": 0.1211, + "learning_rate": 1.865773173357465e-05, + "loss": 0.1155, "step": 122160 }, { "epoch": 5.7, - "learning_rate": 8.639538699545263e-06, - "loss": 0.0091, + "learning_rate": 1.865726366373863e-05, + "loss": 0.0106, "step": 122165 }, { "epoch": 5.7, - "learning_rate": 8.639069898270125e-06, - "loss": 0.0149, + "learning_rate": 1.865679559390261e-05, + "loss": 0.0106, "step": 122170 }, { "epoch": 5.7, - "learning_rate": 8.638601096994985e-06, - "loss": 0.0613, + "learning_rate": 1.8656327524066593e-05, + "loss": 0.0183, "step": 122175 }, { "epoch": 5.7, - "learning_rate": 8.638132295719846e-06, - "loss": 0.0176, + "learning_rate": 1.8655859454230573e-05, + "loss": 0.0787, "step": 122180 }, { "epoch": 5.7, - "learning_rate": 8.637663494444706e-06, - "loss": 0.0678, + "learning_rate": 1.8655391384394552e-05, + "loss": 0.0253, "step": 122185 }, { "epoch": 5.7, - "learning_rate": 8.637194693169566e-06, - "loss": 0.0732, + "learning_rate": 1.8654923314558536e-05, + "loss": 0.0328, "step": 122190 }, { "epoch": 5.7, - "learning_rate": 8.636725891894426e-06, - "loss": 0.041, + "learning_rate": 1.8654455244722512e-05, + "loss": 0.0627, "step": 122195 }, { "epoch": 5.7, - "learning_rate": 8.636257090619288e-06, - "loss": 0.1661, + "learning_rate": 1.8653987174886492e-05, + "loss": 0.0711, "step": 122200 }, { "epoch": 5.7, - "learning_rate": 8.635788289344148e-06, - "loss": 0.2258, + "learning_rate": 1.865351910505047e-05, + "loss": 0.2909, "step": 122205 }, { "epoch": 5.7, - "learning_rate": 8.63531948806901e-06, - "loss": 0.0624, + "learning_rate": 1.8653051035214455e-05, + "loss": 0.093, "step": 122210 }, { "epoch": 5.7, - "learning_rate": 8.634850686793869e-06, - "loss": 0.0334, + "learning_rate": 1.8652582965378435e-05, + "loss": 0.014, "step": 122215 }, { "epoch": 5.7, - "learning_rate": 8.634381885518729e-06, - "loss": 0.0162, + "learning_rate": 1.8652114895542415e-05, + "loss": 0.0128, "step": 122220 }, { "epoch": 5.7, - "learning_rate": 8.63391308424359e-06, - "loss": 0.0286, + "learning_rate": 1.8651646825706394e-05, + "loss": 0.0495, "step": 122225 }, { "epoch": 5.7, - "learning_rate": 8.63344428296845e-06, - "loss": 0.0157, + "learning_rate": 1.8651178755870378e-05, + "loss": 0.0248, "step": 122230 }, { "epoch": 5.7, - "learning_rate": 8.63297548169331e-06, - "loss": 0.0524, + "learning_rate": 1.8650710686034357e-05, + "loss": 0.0726, "step": 122235 }, { "epoch": 5.7, - "learning_rate": 8.63250668041817e-06, - "loss": 0.1211, + "learning_rate": 1.8650242616198337e-05, + "loss": 0.0352, "step": 122240 }, { "epoch": 5.7, - "learning_rate": 8.632037879143032e-06, - "loss": 0.061, + "learning_rate": 1.864977454636232e-05, + "loss": 0.0601, "step": 122245 }, { "epoch": 5.7, - "learning_rate": 8.631569077867894e-06, - "loss": 0.1339, + "learning_rate": 1.86493064765263e-05, + "loss": 0.1016, "step": 122250 }, { "epoch": 5.7, - "learning_rate": 8.631100276592754e-06, - "loss": 0.1491, + "learning_rate": 1.864883840669028e-05, + "loss": 0.2121, "step": 122255 }, { "epoch": 5.7, - "learning_rate": 8.630631475317614e-06, - "loss": 0.1301, + "learning_rate": 1.8648370336854256e-05, + "loss": 0.0877, "step": 122260 }, { "epoch": 5.71, - "learning_rate": 8.630162674042474e-06, - "loss": 0.0078, + "learning_rate": 1.864790226701824e-05, + "loss": 0.0108, "step": 122265 }, { "epoch": 5.71, - "learning_rate": 8.629693872767335e-06, - "loss": 0.0254, + "learning_rate": 1.864743419718222e-05, + "loss": 0.0082, "step": 122270 }, { "epoch": 5.71, - "learning_rate": 8.629225071492195e-06, - "loss": 0.0116, + "learning_rate": 1.86469661273462e-05, + "loss": 0.0479, "step": 122275 }, { "epoch": 5.71, - "learning_rate": 8.628756270217055e-06, - "loss": 0.0169, + "learning_rate": 1.864649805751018e-05, + "loss": 0.0207, "step": 122280 }, { "epoch": 5.71, - "learning_rate": 8.628287468941917e-06, - "loss": 0.0431, + "learning_rate": 1.8646029987674162e-05, + "loss": 0.0413, "step": 122285 }, { "epoch": 5.71, - "learning_rate": 8.627818667666777e-06, - "loss": 0.0445, + "learning_rate": 1.8645561917838142e-05, + "loss": 0.0963, "step": 122290 }, { "epoch": 5.71, - "learning_rate": 8.627349866391638e-06, - "loss": 0.0933, + "learning_rate": 1.8645093848002122e-05, + "loss": 0.0777, "step": 122295 }, { "epoch": 5.71, - "learning_rate": 8.626881065116498e-06, - "loss": 0.0908, + "learning_rate": 1.8644625778166105e-05, + "loss": 0.1419, "step": 122300 }, { "epoch": 5.71, - "learning_rate": 8.626412263841358e-06, - "loss": 0.2491, + "learning_rate": 1.8644157708330085e-05, + "loss": 0.2061, "step": 122305 }, { "epoch": 5.71, - "learning_rate": 8.625943462566218e-06, - "loss": 0.0679, + "learning_rate": 1.8643689638494065e-05, + "loss": 0.0785, "step": 122310 }, { "epoch": 5.71, - "learning_rate": 8.62547466129108e-06, - "loss": 0.0014, + "learning_rate": 1.8643221568658045e-05, + "loss": 0.017, "step": 122315 }, { "epoch": 5.71, - "learning_rate": 8.62500586001594e-06, - "loss": 0.0226, + "learning_rate": 1.8642753498822024e-05, + "loss": 0.0397, "step": 122320 }, { "epoch": 5.71, - "learning_rate": 8.624537058740801e-06, - "loss": 0.026, + "learning_rate": 1.8642285428986004e-05, + "loss": 0.0271, "step": 122325 }, { "epoch": 5.71, - "learning_rate": 8.624068257465661e-06, - "loss": 0.0367, + "learning_rate": 1.8641817359149984e-05, + "loss": 0.0229, "step": 122330 }, { "epoch": 5.71, - "learning_rate": 8.623599456190521e-06, - "loss": 0.0352, + "learning_rate": 1.8641349289313964e-05, + "loss": 0.0271, "step": 122335 }, { "epoch": 5.71, - "learning_rate": 8.623130654915383e-06, - "loss": 0.1162, + "learning_rate": 1.8640881219477947e-05, + "loss": 0.0436, "step": 122340 }, { "epoch": 5.71, - "learning_rate": 8.622661853640243e-06, - "loss": 0.1302, + "learning_rate": 1.8640413149641927e-05, + "loss": 0.1168, "step": 122345 }, { "epoch": 5.71, - "learning_rate": 8.622193052365103e-06, - "loss": 0.1775, + "learning_rate": 1.8639945079805907e-05, + "loss": 0.1249, "step": 122350 }, { "epoch": 5.71, - "learning_rate": 8.621724251089964e-06, - "loss": 0.1746, + "learning_rate": 1.8639477009969887e-05, + "loss": 0.1694, "step": 122355 }, { "epoch": 5.71, - "learning_rate": 8.621255449814824e-06, - "loss": 0.0739, + "learning_rate": 1.863900894013387e-05, + "loss": 0.0676, "step": 122360 }, { "epoch": 5.71, - "learning_rate": 8.620786648539686e-06, - "loss": 0.0455, + "learning_rate": 1.863854087029785e-05, + "loss": 0.0134, "step": 122365 }, { "epoch": 5.71, - "learning_rate": 8.620317847264546e-06, - "loss": 0.0324, + "learning_rate": 1.863807280046183e-05, + "loss": 0.0018, "step": 122370 }, { "epoch": 5.71, - "learning_rate": 8.619849045989406e-06, - "loss": 0.0468, + "learning_rate": 1.8637604730625813e-05, + "loss": 0.0373, "step": 122375 }, { "epoch": 5.71, - "learning_rate": 8.619380244714266e-06, - "loss": 0.0334, + "learning_rate": 1.8637136660789792e-05, + "loss": 0.0031, "step": 122380 }, { "epoch": 5.71, - "learning_rate": 8.618911443439127e-06, - "loss": 0.033, + "learning_rate": 1.863666859095377e-05, + "loss": 0.0593, "step": 122385 }, { "epoch": 5.71, - "learning_rate": 8.618442642163987e-06, - "loss": 0.0611, + "learning_rate": 1.863620052111775e-05, + "loss": 0.08, "step": 122390 }, { "epoch": 5.71, - "learning_rate": 8.617973840888849e-06, - "loss": 0.0762, + "learning_rate": 1.8635732451281732e-05, + "loss": 0.1067, "step": 122395 }, { "epoch": 5.71, - "learning_rate": 8.617505039613709e-06, - "loss": 0.2499, + "learning_rate": 1.863526438144571e-05, + "loss": 0.0652, "step": 122400 }, { "epoch": 5.71, - "learning_rate": 8.617036238338569e-06, - "loss": 0.225, + "learning_rate": 1.863479631160969e-05, + "loss": 0.2269, "step": 122405 }, { "epoch": 5.71, - "learning_rate": 8.61656743706343e-06, - "loss": 0.0821, + "learning_rate": 1.863432824177367e-05, + "loss": 0.0875, "step": 122410 }, { "epoch": 5.71, - "learning_rate": 8.61609863578829e-06, - "loss": 0.0253, + "learning_rate": 1.8633860171937655e-05, + "loss": 0.0541, "step": 122415 }, { "epoch": 5.71, - "learning_rate": 8.61562983451315e-06, - "loss": 0.0465, + "learning_rate": 1.8633392102101634e-05, + "loss": 0.0092, "step": 122420 }, { "epoch": 5.71, - "learning_rate": 8.615161033238012e-06, - "loss": 0.0277, + "learning_rate": 1.8632924032265614e-05, + "loss": 0.0772, "step": 122425 }, { "epoch": 5.71, - "learning_rate": 8.614692231962872e-06, - "loss": 0.063, + "learning_rate": 1.8632455962429597e-05, + "loss": 0.1024, "step": 122430 }, { "epoch": 5.71, - "learning_rate": 8.614223430687733e-06, - "loss": 0.0693, + "learning_rate": 1.8631987892593577e-05, + "loss": 0.0128, "step": 122435 }, { "epoch": 5.71, - "learning_rate": 8.613754629412593e-06, - "loss": 0.0873, + "learning_rate": 1.8631519822757557e-05, + "loss": 0.0331, "step": 122440 }, { "epoch": 5.71, - "learning_rate": 8.613285828137453e-06, - "loss": 0.1463, + "learning_rate": 1.8631051752921537e-05, + "loss": 0.1681, "step": 122445 }, { "epoch": 5.71, - "learning_rate": 8.612817026862313e-06, - "loss": 0.092, + "learning_rate": 1.8630583683085517e-05, + "loss": 0.0849, "step": 122450 }, { "epoch": 5.71, - "learning_rate": 8.612348225587173e-06, - "loss": 0.2486, + "learning_rate": 1.8630115613249496e-05, + "loss": 0.1424, "step": 122455 }, { "epoch": 5.71, - "learning_rate": 8.611879424312035e-06, - "loss": 0.1052, + "learning_rate": 1.8629647543413476e-05, + "loss": 0.1053, "step": 122460 }, { "epoch": 5.71, - "learning_rate": 8.611410623036896e-06, - "loss": 0.0384, + "learning_rate": 1.8629179473577456e-05, + "loss": 0.0124, "step": 122465 }, { "epoch": 5.71, - "learning_rate": 8.610941821761756e-06, - "loss": 0.0795, + "learning_rate": 1.862871140374144e-05, + "loss": 0.0192, "step": 122470 }, { "epoch": 5.71, - "learning_rate": 8.610473020486616e-06, - "loss": 0.0762, + "learning_rate": 1.862824333390542e-05, + "loss": 0.0172, "step": 122475 }, { "epoch": 5.72, - "learning_rate": 8.610004219211478e-06, - "loss": 0.0234, + "learning_rate": 1.86277752640694e-05, + "loss": 0.0224, "step": 122480 }, { "epoch": 5.72, - "learning_rate": 8.609535417936338e-06, - "loss": 0.0381, + "learning_rate": 1.8627307194233382e-05, + "loss": 0.0337, "step": 122485 }, { "epoch": 5.72, - "learning_rate": 8.609066616661198e-06, - "loss": 0.0606, + "learning_rate": 1.8626839124397362e-05, + "loss": 0.027, "step": 122490 }, { "epoch": 5.72, - "learning_rate": 8.608597815386058e-06, - "loss": 0.0673, + "learning_rate": 1.8626371054561342e-05, + "loss": 0.1187, "step": 122495 }, { "epoch": 5.72, - "learning_rate": 8.60812901411092e-06, - "loss": 0.1148, + "learning_rate": 1.862590298472532e-05, + "loss": 0.0816, "step": 122500 }, { "epoch": 5.72, - "learning_rate": 8.60766021283578e-06, - "loss": 0.22, + "learning_rate": 1.8625434914889305e-05, + "loss": 0.1702, "step": 122505 }, { "epoch": 5.72, - "learning_rate": 8.60719141156064e-06, - "loss": 0.0974, + "learning_rate": 1.862496684505328e-05, + "loss": 0.0716, "step": 122510 }, { "epoch": 5.72, - "learning_rate": 8.6067226102855e-06, - "loss": 0.0063, + "learning_rate": 1.862449877521726e-05, + "loss": 0.029, "step": 122515 }, { "epoch": 5.72, - "learning_rate": 8.60625380901036e-06, - "loss": 0.029, + "learning_rate": 1.862403070538124e-05, + "loss": 0.051, "step": 122520 }, { "epoch": 5.72, - "learning_rate": 8.605785007735222e-06, - "loss": 0.0223, + "learning_rate": 1.8623562635545224e-05, + "loss": 0.014, "step": 122525 }, { "epoch": 5.72, - "learning_rate": 8.605316206460082e-06, - "loss": 0.0624, + "learning_rate": 1.8623094565709204e-05, + "loss": 0.037, "step": 122530 }, { "epoch": 5.72, - "learning_rate": 8.604847405184942e-06, - "loss": 0.0618, + "learning_rate": 1.8622626495873184e-05, + "loss": 0.0107, "step": 122535 }, { "epoch": 5.72, - "learning_rate": 8.604378603909804e-06, - "loss": 0.1054, + "learning_rate": 1.8622158426037164e-05, + "loss": 0.0262, "step": 122540 }, { "epoch": 5.72, - "learning_rate": 8.603909802634664e-06, - "loss": 0.1121, + "learning_rate": 1.8621690356201147e-05, + "loss": 0.0911, "step": 122545 }, { "epoch": 5.72, - "learning_rate": 8.603441001359525e-06, - "loss": 0.1718, + "learning_rate": 1.8621222286365127e-05, + "loss": 0.0618, "step": 122550 }, { "epoch": 5.72, - "learning_rate": 8.602972200084385e-06, - "loss": 0.2528, + "learning_rate": 1.8620754216529106e-05, + "loss": 0.344, "step": 122555 }, { "epoch": 5.72, - "learning_rate": 8.602503398809245e-06, - "loss": 0.1101, + "learning_rate": 1.862028614669309e-05, + "loss": 0.0676, "step": 122560 }, { "epoch": 5.72, - "learning_rate": 8.602034597534105e-06, - "loss": 0.0189, + "learning_rate": 1.861981807685707e-05, + "loss": 0.0517, "step": 122565 }, { "epoch": 5.72, - "learning_rate": 8.601565796258967e-06, - "loss": 0.0262, + "learning_rate": 1.861935000702105e-05, + "loss": 0.0157, "step": 122570 }, { "epoch": 5.72, - "learning_rate": 8.601096994983827e-06, - "loss": 0.0161, + "learning_rate": 1.8618881937185026e-05, + "loss": 0.0116, "step": 122575 }, { "epoch": 5.72, - "learning_rate": 8.600628193708688e-06, - "loss": 0.027, + "learning_rate": 1.861841386734901e-05, + "loss": 0.0435, "step": 122580 }, { "epoch": 5.72, - "learning_rate": 8.600159392433548e-06, - "loss": 0.0561, + "learning_rate": 1.861794579751299e-05, + "loss": 0.0341, "step": 122585 }, { "epoch": 5.72, - "learning_rate": 8.599690591158408e-06, - "loss": 0.0096, + "learning_rate": 1.861747772767697e-05, + "loss": 0.0485, "step": 122590 }, { "epoch": 5.72, - "learning_rate": 8.59922178988327e-06, - "loss": 0.1241, + "learning_rate": 1.8617009657840948e-05, + "loss": 0.0954, "step": 122595 }, { "epoch": 5.72, - "learning_rate": 8.59875298860813e-06, - "loss": 0.0763, + "learning_rate": 1.861654158800493e-05, + "loss": 0.0836, "step": 122600 }, { "epoch": 5.72, - "learning_rate": 8.59828418733299e-06, - "loss": 0.2329, + "learning_rate": 1.861607351816891e-05, + "loss": 0.2948, "step": 122605 }, { "epoch": 5.72, - "learning_rate": 8.597815386057851e-06, - "loss": 0.1114, + "learning_rate": 1.861560544833289e-05, + "loss": 0.1089, "step": 122610 }, { "epoch": 5.72, - "learning_rate": 8.597346584782711e-06, - "loss": 0.0137, + "learning_rate": 1.8615137378496874e-05, + "loss": 0.0104, "step": 122615 }, { "epoch": 5.72, - "learning_rate": 8.596877783507573e-06, - "loss": 0.0267, + "learning_rate": 1.8614669308660854e-05, + "loss": 0.006, "step": 122620 }, { "epoch": 5.72, - "learning_rate": 8.596408982232433e-06, - "loss": 0.0398, + "learning_rate": 1.8614201238824834e-05, + "loss": 0.0584, "step": 122625 }, { "epoch": 5.72, - "learning_rate": 8.595940180957293e-06, - "loss": 0.0468, + "learning_rate": 1.8613733168988814e-05, + "loss": 0.0028, "step": 122630 }, { "epoch": 5.72, - "learning_rate": 8.595471379682153e-06, - "loss": 0.0784, + "learning_rate": 1.8613265099152794e-05, + "loss": 0.062, "step": 122635 }, { "epoch": 5.72, - "learning_rate": 8.595002578407014e-06, - "loss": 0.0642, + "learning_rate": 1.8612797029316773e-05, + "loss": 0.0564, "step": 122640 }, { "epoch": 5.72, - "learning_rate": 8.594533777131874e-06, - "loss": 0.0435, + "learning_rate": 1.8612328959480753e-05, + "loss": 0.1355, "step": 122645 }, { "epoch": 5.72, - "learning_rate": 8.594064975856736e-06, - "loss": 0.0951, + "learning_rate": 1.8611860889644733e-05, + "loss": 0.2125, "step": 122650 }, { "epoch": 5.72, - "learning_rate": 8.593596174581596e-06, - "loss": 0.1405, + "learning_rate": 1.8611392819808716e-05, + "loss": 0.2661, "step": 122655 }, { "epoch": 5.72, - "learning_rate": 8.593127373306456e-06, - "loss": 0.0483, + "learning_rate": 1.8610924749972696e-05, + "loss": 0.0649, "step": 122660 }, { "epoch": 5.72, - "learning_rate": 8.592658572031317e-06, - "loss": 0.0213, + "learning_rate": 1.8610456680136676e-05, + "loss": 0.0264, "step": 122665 }, { "epoch": 5.72, - "learning_rate": 8.592189770756177e-06, - "loss": 0.0087, + "learning_rate": 1.860998861030066e-05, + "loss": 0.0151, "step": 122670 }, { "epoch": 5.72, - "learning_rate": 8.591720969481037e-06, - "loss": 0.0195, + "learning_rate": 1.860952054046464e-05, + "loss": 0.0105, "step": 122675 }, { "epoch": 5.72, - "learning_rate": 8.591252168205899e-06, - "loss": 0.0447, + "learning_rate": 1.860905247062862e-05, + "loss": 0.0258, "step": 122680 }, { "epoch": 5.72, - "learning_rate": 8.590783366930759e-06, - "loss": 0.0606, + "learning_rate": 1.86085844007926e-05, + "loss": 0.0956, "step": 122685 }, { "epoch": 5.72, - "learning_rate": 8.59031456565562e-06, - "loss": 0.1077, + "learning_rate": 1.8608116330956582e-05, + "loss": 0.1048, "step": 122690 }, { "epoch": 5.73, - "learning_rate": 8.58984576438048e-06, - "loss": 0.089, + "learning_rate": 1.860764826112056e-05, + "loss": 0.0711, "step": 122695 }, { "epoch": 5.73, - "learning_rate": 8.58937696310534e-06, - "loss": 0.1024, + "learning_rate": 1.8607180191284538e-05, + "loss": 0.1471, "step": 122700 }, { "epoch": 5.73, - "learning_rate": 8.5889081618302e-06, - "loss": 0.2049, + "learning_rate": 1.8606712121448518e-05, + "loss": 0.1351, "step": 122705 }, { "epoch": 5.73, - "learning_rate": 8.58843936055506e-06, - "loss": 0.0824, + "learning_rate": 1.86062440516125e-05, + "loss": 0.0991, "step": 122710 }, { "epoch": 5.73, - "learning_rate": 8.587970559279922e-06, - "loss": 0.0186, + "learning_rate": 1.860577598177648e-05, + "loss": 0.0222, "step": 122715 }, { "epoch": 5.73, - "learning_rate": 8.587501758004783e-06, - "loss": 0.0125, + "learning_rate": 1.860530791194046e-05, + "loss": 0.0274, "step": 122720 }, { "epoch": 5.73, - "learning_rate": 8.587032956729643e-06, - "loss": 0.0444, + "learning_rate": 1.8604839842104444e-05, + "loss": 0.0117, "step": 122725 }, { "epoch": 5.73, - "learning_rate": 8.586564155454503e-06, - "loss": 0.0175, + "learning_rate": 1.8604371772268424e-05, + "loss": 0.0338, "step": 122730 }, { "epoch": 5.73, - "learning_rate": 8.586095354179365e-06, - "loss": 0.059, + "learning_rate": 1.8603903702432404e-05, + "loss": 0.0295, "step": 122735 }, { "epoch": 5.73, - "learning_rate": 8.585626552904225e-06, - "loss": 0.0898, + "learning_rate": 1.8603435632596383e-05, + "loss": 0.0704, "step": 122740 }, { "epoch": 5.73, - "learning_rate": 8.585157751629085e-06, - "loss": 0.0441, + "learning_rate": 1.8602967562760367e-05, + "loss": 0.089, "step": 122745 }, { "epoch": 5.73, - "learning_rate": 8.584688950353945e-06, - "loss": 0.1397, + "learning_rate": 1.8602499492924346e-05, + "loss": 0.124, "step": 122750 }, { "epoch": 5.73, - "learning_rate": 8.584220149078806e-06, - "loss": 0.2298, + "learning_rate": 1.8602031423088326e-05, + "loss": 0.2627, "step": 122755 }, { "epoch": 5.73, - "learning_rate": 8.583751347803668e-06, - "loss": 0.0907, + "learning_rate": 1.8601563353252306e-05, + "loss": 0.0916, "step": 122760 }, { "epoch": 5.73, - "learning_rate": 8.583282546528528e-06, - "loss": 0.0069, + "learning_rate": 1.8601095283416286e-05, + "loss": 0.0346, "step": 122765 }, { "epoch": 5.73, - "learning_rate": 8.582813745253388e-06, - "loss": 0.0353, + "learning_rate": 1.8600627213580266e-05, + "loss": 0.0093, "step": 122770 }, { "epoch": 5.73, - "learning_rate": 8.582344943978248e-06, - "loss": 0.0254, + "learning_rate": 1.8600159143744245e-05, + "loss": 0.0177, "step": 122775 }, { "epoch": 5.73, - "learning_rate": 8.581876142703108e-06, - "loss": 0.0174, + "learning_rate": 1.8599691073908225e-05, + "loss": 0.0361, "step": 122780 }, { "epoch": 5.73, - "learning_rate": 8.58140734142797e-06, - "loss": 0.0761, + "learning_rate": 1.859922300407221e-05, + "loss": 0.0851, "step": 122785 }, { "epoch": 5.73, - "learning_rate": 8.58093854015283e-06, - "loss": 0.0615, + "learning_rate": 1.8598754934236188e-05, + "loss": 0.028, "step": 122790 }, { "epoch": 5.73, - "learning_rate": 8.580469738877691e-06, - "loss": 0.0689, + "learning_rate": 1.8598286864400168e-05, + "loss": 0.0395, "step": 122795 }, { "epoch": 5.73, - "learning_rate": 8.58000093760255e-06, - "loss": 0.215, + "learning_rate": 1.859781879456415e-05, + "loss": 0.1902, "step": 122800 }, { "epoch": 5.73, - "learning_rate": 8.579532136327412e-06, - "loss": 0.2263, + "learning_rate": 1.859735072472813e-05, + "loss": 0.2556, "step": 122805 }, { "epoch": 5.73, - "learning_rate": 8.579063335052272e-06, - "loss": 0.0646, + "learning_rate": 1.859688265489211e-05, + "loss": 0.0771, "step": 122810 }, { "epoch": 5.73, - "learning_rate": 8.578594533777132e-06, - "loss": 0.0293, + "learning_rate": 1.859641458505609e-05, + "loss": 0.039, "step": 122815 }, { "epoch": 5.73, - "learning_rate": 8.578125732501992e-06, - "loss": 0.0192, + "learning_rate": 1.8595946515220074e-05, + "loss": 0.0293, "step": 122820 }, { "epoch": 5.73, - "learning_rate": 8.577656931226854e-06, - "loss": 0.0372, + "learning_rate": 1.859547844538405e-05, + "loss": 0.0326, "step": 122825 }, { "epoch": 5.73, - "learning_rate": 8.577188129951714e-06, - "loss": 0.0502, + "learning_rate": 1.859501037554803e-05, + "loss": 0.0524, "step": 122830 }, { "epoch": 5.73, - "learning_rate": 8.576719328676575e-06, - "loss": 0.032, + "learning_rate": 1.859454230571201e-05, + "loss": 0.0277, "step": 122835 }, { "epoch": 5.73, - "learning_rate": 8.576250527401435e-06, - "loss": 0.0414, + "learning_rate": 1.8594074235875993e-05, + "loss": 0.0122, "step": 122840 }, { "epoch": 5.73, - "learning_rate": 8.575781726126295e-06, - "loss": 0.0592, + "learning_rate": 1.8593606166039973e-05, + "loss": 0.1152, "step": 122845 }, { "epoch": 5.73, - "learning_rate": 8.575312924851157e-06, - "loss": 0.1006, + "learning_rate": 1.8593138096203953e-05, + "loss": 0.1772, "step": 122850 }, { "epoch": 5.73, - "learning_rate": 8.574844123576017e-06, - "loss": 0.1129, + "learning_rate": 1.8592670026367936e-05, + "loss": 0.2424, "step": 122855 }, { "epoch": 5.73, - "learning_rate": 8.574375322300877e-06, - "loss": 0.1039, + "learning_rate": 1.8592201956531916e-05, + "loss": 0.0952, "step": 122860 }, { "epoch": 5.73, - "learning_rate": 8.573906521025738e-06, - "loss": 0.0464, + "learning_rate": 1.8591733886695896e-05, + "loss": 0.0038, "step": 122865 }, { "epoch": 5.73, - "learning_rate": 8.573437719750598e-06, - "loss": 0.0134, + "learning_rate": 1.8591265816859876e-05, + "loss": 0.0202, "step": 122870 }, { "epoch": 5.73, - "learning_rate": 8.57296891847546e-06, - "loss": 0.029, + "learning_rate": 1.859079774702386e-05, + "loss": 0.0344, "step": 122875 }, { "epoch": 5.73, - "learning_rate": 8.57250011720032e-06, - "loss": 0.0396, + "learning_rate": 1.859032967718784e-05, + "loss": 0.0125, "step": 122880 }, { "epoch": 5.73, - "learning_rate": 8.57203131592518e-06, - "loss": 0.0969, + "learning_rate": 1.858986160735182e-05, + "loss": 0.0264, "step": 122885 }, { "epoch": 5.73, - "learning_rate": 8.57156251465004e-06, - "loss": 0.0783, + "learning_rate": 1.8589393537515795e-05, + "loss": 0.0899, "step": 122890 }, { "epoch": 5.73, - "learning_rate": 8.571093713374901e-06, - "loss": 0.1194, + "learning_rate": 1.8588925467679778e-05, + "loss": 0.1451, "step": 122895 }, { "epoch": 5.73, - "learning_rate": 8.570624912099761e-06, - "loss": 0.1368, + "learning_rate": 1.8588457397843758e-05, + "loss": 0.1118, "step": 122900 }, { "epoch": 5.73, - "learning_rate": 8.570156110824623e-06, - "loss": 0.4224, + "learning_rate": 1.8587989328007738e-05, + "loss": 0.1964, "step": 122905 }, { "epoch": 5.74, - "learning_rate": 8.569687309549483e-06, - "loss": 0.0886, + "learning_rate": 1.858752125817172e-05, + "loss": 0.0534, "step": 122910 }, { "epoch": 5.74, - "learning_rate": 8.569218508274343e-06, - "loss": 0.0163, + "learning_rate": 1.85870531883357e-05, + "loss": 0.0105, "step": 122915 }, { "epoch": 5.74, - "learning_rate": 8.568749706999204e-06, - "loss": 0.0465, + "learning_rate": 1.858658511849968e-05, + "loss": 0.0618, "step": 122920 }, { "epoch": 5.74, - "learning_rate": 8.568280905724064e-06, - "loss": 0.0449, + "learning_rate": 1.858611704866366e-05, + "loss": 0.1093, "step": 122925 }, { "epoch": 5.74, - "learning_rate": 8.567812104448924e-06, - "loss": 0.0309, + "learning_rate": 1.8585648978827644e-05, + "loss": 0.0116, "step": 122930 }, { "epoch": 5.74, - "learning_rate": 8.567343303173786e-06, - "loss": 0.0197, + "learning_rate": 1.8585180908991623e-05, + "loss": 0.0222, "step": 122935 }, { "epoch": 5.74, - "learning_rate": 8.566874501898646e-06, - "loss": 0.1255, + "learning_rate": 1.8584712839155603e-05, + "loss": 0.0985, "step": 122940 }, { "epoch": 5.74, - "learning_rate": 8.566405700623507e-06, - "loss": 0.0931, + "learning_rate": 1.8584244769319583e-05, + "loss": 0.0675, "step": 122945 }, { "epoch": 5.74, - "learning_rate": 8.565936899348367e-06, - "loss": 0.0667, + "learning_rate": 1.8583776699483566e-05, + "loss": 0.1209, "step": 122950 }, { "epoch": 5.74, - "learning_rate": 8.565468098073227e-06, - "loss": 0.1075, + "learning_rate": 1.8583308629647543e-05, + "loss": 0.1165, "step": 122955 }, { "epoch": 5.74, - "learning_rate": 8.564999296798087e-06, - "loss": 0.0943, + "learning_rate": 1.8582840559811522e-05, + "loss": 0.1205, "step": 122960 }, { "epoch": 5.74, - "learning_rate": 8.564530495522947e-06, - "loss": 0.0091, + "learning_rate": 1.8582372489975502e-05, + "loss": 0.0502, "step": 122965 }, { "epoch": 5.74, - "learning_rate": 8.564061694247809e-06, - "loss": 0.0476, + "learning_rate": 1.8581904420139485e-05, + "loss": 0.0494, "step": 122970 }, { "epoch": 5.74, - "learning_rate": 8.56359289297267e-06, - "loss": 0.1195, + "learning_rate": 1.8581436350303465e-05, + "loss": 0.0263, "step": 122975 }, { "epoch": 5.74, - "learning_rate": 8.56312409169753e-06, - "loss": 0.0272, + "learning_rate": 1.8580968280467445e-05, + "loss": 0.0197, "step": 122980 }, { "epoch": 5.74, - "learning_rate": 8.56265529042239e-06, - "loss": 0.0437, + "learning_rate": 1.8580500210631428e-05, + "loss": 0.0163, "step": 122985 }, { "epoch": 5.74, - "learning_rate": 8.562186489147252e-06, - "loss": 0.0403, + "learning_rate": 1.8580032140795408e-05, + "loss": 0.0769, "step": 122990 }, { "epoch": 5.74, - "learning_rate": 8.561717687872112e-06, - "loss": 0.0782, + "learning_rate": 1.8579564070959388e-05, + "loss": 0.0742, "step": 122995 }, { "epoch": 5.74, - "learning_rate": 8.561248886596972e-06, - "loss": 0.1706, + "learning_rate": 1.8579096001123368e-05, + "loss": 0.0729, "step": 123000 }, { "epoch": 5.74, - "learning_rate": 8.560780085321832e-06, - "loss": 0.2617, + "learning_rate": 1.857862793128735e-05, + "loss": 0.2951, "step": 123005 }, { "epoch": 5.74, - "learning_rate": 8.560311284046693e-06, - "loss": 0.0771, + "learning_rate": 1.857815986145133e-05, + "loss": 0.1033, "step": 123010 }, { "epoch": 5.74, - "learning_rate": 8.559842482771555e-06, - "loss": 0.0058, + "learning_rate": 1.8577691791615307e-05, + "loss": 0.0101, "step": 123015 }, { "epoch": 5.74, - "learning_rate": 8.559373681496415e-06, - "loss": 0.0109, + "learning_rate": 1.8577223721779287e-05, + "loss": 0.0247, "step": 123020 }, { "epoch": 5.74, - "learning_rate": 8.558904880221275e-06, - "loss": 0.0231, + "learning_rate": 1.857675565194327e-05, + "loss": 0.0107, "step": 123025 }, { "epoch": 5.74, - "learning_rate": 8.558436078946135e-06, - "loss": 0.0507, + "learning_rate": 1.857628758210725e-05, + "loss": 0.07, "step": 123030 }, { "epoch": 5.74, - "learning_rate": 8.557967277670995e-06, - "loss": 0.0468, + "learning_rate": 1.857581951227123e-05, + "loss": 0.0546, "step": 123035 }, { "epoch": 5.74, - "learning_rate": 8.557498476395856e-06, - "loss": 0.0441, + "learning_rate": 1.8575351442435213e-05, + "loss": 0.115, "step": 123040 }, { "epoch": 5.74, - "learning_rate": 8.557029675120716e-06, - "loss": 0.1109, + "learning_rate": 1.8574883372599193e-05, + "loss": 0.0961, "step": 123045 }, { "epoch": 5.74, - "learning_rate": 8.556560873845578e-06, - "loss": 0.1574, + "learning_rate": 1.8574415302763173e-05, + "loss": 0.1676, "step": 123050 }, { "epoch": 5.74, - "learning_rate": 8.556092072570438e-06, - "loss": 0.2388, + "learning_rate": 1.8573947232927152e-05, + "loss": 0.275, "step": 123055 }, { "epoch": 5.74, - "learning_rate": 8.5556232712953e-06, - "loss": 0.1186, + "learning_rate": 1.8573479163091136e-05, + "loss": 0.1706, "step": 123060 }, { "epoch": 5.74, - "learning_rate": 8.55515447002016e-06, - "loss": 0.0092, + "learning_rate": 1.8573011093255116e-05, + "loss": 0.0413, "step": 123065 }, { "epoch": 5.74, - "learning_rate": 8.55468566874502e-06, - "loss": 0.0483, + "learning_rate": 1.8572543023419095e-05, + "loss": 0.0423, "step": 123070 }, { "epoch": 5.74, - "learning_rate": 8.55421686746988e-06, - "loss": 0.0232, + "learning_rate": 1.8572074953583075e-05, + "loss": 0.0323, "step": 123075 }, { "epoch": 5.74, - "learning_rate": 8.553748066194741e-06, - "loss": 0.03, + "learning_rate": 1.8571606883747055e-05, + "loss": 0.0633, "step": 123080 }, { "epoch": 5.74, - "learning_rate": 8.553279264919601e-06, - "loss": 0.0489, + "learning_rate": 1.8571138813911035e-05, + "loss": 0.0415, "step": 123085 }, { "epoch": 5.74, - "learning_rate": 8.552810463644463e-06, - "loss": 0.0625, + "learning_rate": 1.8570670744075015e-05, + "loss": 0.0594, "step": 123090 }, { "epoch": 5.74, - "learning_rate": 8.552341662369322e-06, - "loss": 0.1179, + "learning_rate": 1.8570202674238998e-05, + "loss": 0.034, "step": 123095 }, { "epoch": 5.74, - "learning_rate": 8.551872861094182e-06, - "loss": 0.1482, + "learning_rate": 1.8569734604402978e-05, + "loss": 0.1171, "step": 123100 }, { "epoch": 5.74, - "learning_rate": 8.551404059819042e-06, - "loss": 0.3494, + "learning_rate": 1.8569266534566957e-05, + "loss": 0.1657, "step": 123105 }, { "epoch": 5.74, - "learning_rate": 8.550935258543904e-06, - "loss": 0.0715, + "learning_rate": 1.8568798464730937e-05, + "loss": 0.1054, "step": 123110 }, { "epoch": 5.74, - "learning_rate": 8.550466457268764e-06, - "loss": 0.008, + "learning_rate": 1.856833039489492e-05, + "loss": 0.0514, "step": 123115 }, { "epoch": 5.74, - "learning_rate": 8.549997655993625e-06, - "loss": 0.0144, + "learning_rate": 1.85678623250589e-05, + "loss": 0.0219, "step": 123120 }, { "epoch": 5.75, - "learning_rate": 8.549528854718485e-06, - "loss": 0.0676, + "learning_rate": 1.856739425522288e-05, + "loss": 0.0311, "step": 123125 }, { "epoch": 5.75, - "learning_rate": 8.549060053443347e-06, - "loss": 0.0281, + "learning_rate": 1.856692618538686e-05, + "loss": 0.0243, "step": 123130 }, { "epoch": 5.75, - "learning_rate": 8.548591252168207e-06, - "loss": 0.0444, + "learning_rate": 1.8566458115550843e-05, + "loss": 0.0625, "step": 123135 }, { "epoch": 5.75, - "learning_rate": 8.548122450893067e-06, - "loss": 0.083, + "learning_rate": 1.8565990045714823e-05, + "loss": 0.0402, "step": 123140 }, { "epoch": 5.75, - "learning_rate": 8.547653649617927e-06, - "loss": 0.1791, + "learning_rate": 1.85655219758788e-05, + "loss": 0.1011, "step": 123145 }, { "epoch": 5.75, - "learning_rate": 8.547184848342788e-06, - "loss": 0.1306, + "learning_rate": 1.856505390604278e-05, + "loss": 0.0865, "step": 123150 }, { "epoch": 5.75, - "learning_rate": 8.546716047067648e-06, - "loss": 0.175, + "learning_rate": 1.8564585836206762e-05, + "loss": 0.1735, "step": 123155 }, { "epoch": 5.75, - "learning_rate": 8.54624724579251e-06, - "loss": 0.0774, + "learning_rate": 1.8564117766370742e-05, + "loss": 0.1139, "step": 123160 }, { "epoch": 5.75, - "learning_rate": 8.54577844451737e-06, - "loss": 0.0121, + "learning_rate": 1.8563649696534722e-05, + "loss": 0.015, "step": 123165 }, { "epoch": 5.75, - "learning_rate": 8.54530964324223e-06, - "loss": 0.006, + "learning_rate": 1.8563181626698705e-05, + "loss": 0.0291, "step": 123170 }, { "epoch": 5.75, - "learning_rate": 8.544840841967092e-06, - "loss": 0.0271, + "learning_rate": 1.8562713556862685e-05, + "loss": 0.0749, "step": 123175 }, { "epoch": 5.75, - "learning_rate": 8.544372040691951e-06, - "loss": 0.0477, + "learning_rate": 1.8562245487026665e-05, + "loss": 0.0205, "step": 123180 }, { "epoch": 5.75, - "learning_rate": 8.543903239416811e-06, - "loss": 0.0446, + "learning_rate": 1.8561777417190645e-05, + "loss": 0.0934, "step": 123185 }, { "epoch": 5.75, - "learning_rate": 8.543434438141673e-06, - "loss": 0.0738, + "learning_rate": 1.8561309347354628e-05, + "loss": 0.0715, "step": 123190 }, { "epoch": 5.75, - "learning_rate": 8.542965636866533e-06, - "loss": 0.1856, + "learning_rate": 1.8560841277518608e-05, + "loss": 0.042, "step": 123195 }, { "epoch": 5.75, - "learning_rate": 8.542496835591395e-06, - "loss": 0.1861, + "learning_rate": 1.8560373207682588e-05, + "loss": 0.1447, "step": 123200 }, { "epoch": 5.75, - "learning_rate": 8.542028034316255e-06, - "loss": 0.1675, + "learning_rate": 1.8559905137846564e-05, + "loss": 0.3885, "step": 123205 }, { "epoch": 5.75, - "learning_rate": 8.541559233041114e-06, - "loss": 0.1242, + "learning_rate": 1.8559437068010547e-05, + "loss": 0.1045, "step": 123210 }, { "epoch": 5.75, - "learning_rate": 8.541090431765974e-06, - "loss": 0.0356, + "learning_rate": 1.8558968998174527e-05, + "loss": 0.0291, "step": 123215 }, { "epoch": 5.75, - "learning_rate": 8.540621630490834e-06, - "loss": 0.0078, + "learning_rate": 1.8558500928338507e-05, + "loss": 0.0205, "step": 123220 }, { "epoch": 5.75, - "learning_rate": 8.540152829215696e-06, - "loss": 0.063, + "learning_rate": 1.855803285850249e-05, + "loss": 0.0308, "step": 123225 }, { "epoch": 5.75, - "learning_rate": 8.539684027940558e-06, - "loss": 0.0564, + "learning_rate": 1.855756478866647e-05, + "loss": 0.0338, "step": 123230 }, { "epoch": 5.75, - "learning_rate": 8.539215226665418e-06, - "loss": 0.0427, + "learning_rate": 1.855709671883045e-05, + "loss": 0.0455, "step": 123235 }, { "epoch": 5.75, - "learning_rate": 8.538746425390277e-06, - "loss": 0.0754, + "learning_rate": 1.855662864899443e-05, + "loss": 0.0761, "step": 123240 }, { "epoch": 5.75, - "learning_rate": 8.538277624115139e-06, - "loss": 0.1315, + "learning_rate": 1.8556160579158413e-05, + "loss": 0.1368, "step": 123245 }, { "epoch": 5.75, - "learning_rate": 8.537808822839999e-06, - "loss": 0.1349, + "learning_rate": 1.8555692509322392e-05, + "loss": 0.1493, "step": 123250 }, { "epoch": 5.75, - "learning_rate": 8.537340021564859e-06, - "loss": 0.2072, + "learning_rate": 1.8555224439486372e-05, + "loss": 0.2585, "step": 123255 }, { "epoch": 5.75, - "learning_rate": 8.536871220289719e-06, - "loss": 0.0648, + "learning_rate": 1.8554756369650352e-05, + "loss": 0.0468, "step": 123260 }, { "epoch": 5.75, - "learning_rate": 8.53640241901458e-06, - "loss": 0.0195, + "learning_rate": 1.8554288299814335e-05, + "loss": 0.0058, "step": 123265 }, { "epoch": 5.75, - "learning_rate": 8.535933617739442e-06, - "loss": 0.0424, + "learning_rate": 1.8553820229978312e-05, + "loss": 0.0059, "step": 123270 }, { "epoch": 5.75, - "learning_rate": 8.535464816464302e-06, - "loss": 0.0646, + "learning_rate": 1.855335216014229e-05, + "loss": 0.0448, "step": 123275 }, { "epoch": 5.75, - "learning_rate": 8.534996015189162e-06, - "loss": 0.0539, + "learning_rate": 1.8552884090306275e-05, + "loss": 0.0229, "step": 123280 }, { "epoch": 5.75, - "learning_rate": 8.534527213914022e-06, - "loss": 0.0412, + "learning_rate": 1.8552416020470255e-05, + "loss": 0.0542, "step": 123285 }, { "epoch": 5.75, - "learning_rate": 8.534058412638882e-06, - "loss": 0.0675, + "learning_rate": 1.8551947950634234e-05, + "loss": 0.1094, "step": 123290 }, { "epoch": 5.75, - "learning_rate": 8.533589611363744e-06, - "loss": 0.088, + "learning_rate": 1.8551479880798214e-05, + "loss": 0.0673, "step": 123295 }, { "epoch": 5.75, - "learning_rate": 8.533120810088603e-06, - "loss": 0.1028, + "learning_rate": 1.8551011810962197e-05, + "loss": 0.0491, "step": 123300 }, { "epoch": 5.75, - "learning_rate": 8.532652008813465e-06, - "loss": 0.1957, + "learning_rate": 1.8550543741126177e-05, + "loss": 0.2295, "step": 123305 }, { "epoch": 5.75, - "learning_rate": 8.532183207538325e-06, - "loss": 0.1255, + "learning_rate": 1.8550075671290157e-05, + "loss": 0.0994, "step": 123310 }, { "epoch": 5.75, - "learning_rate": 8.531714406263187e-06, - "loss": 0.005, + "learning_rate": 1.8549607601454137e-05, + "loss": 0.0171, "step": 123315 }, { "epoch": 5.75, - "learning_rate": 8.531245604988047e-06, - "loss": 0.0092, + "learning_rate": 1.854913953161812e-05, + "loss": 0.0358, "step": 123320 }, { "epoch": 5.75, - "learning_rate": 8.530776803712906e-06, - "loss": 0.0187, + "learning_rate": 1.85486714617821e-05, + "loss": 0.0599, "step": 123325 }, { "epoch": 5.75, - "learning_rate": 8.530308002437766e-06, - "loss": 0.0434, + "learning_rate": 1.854820339194608e-05, + "loss": 0.0531, "step": 123330 }, { "epoch": 5.75, - "learning_rate": 8.529839201162628e-06, - "loss": 0.0239, + "learning_rate": 1.854773532211006e-05, + "loss": 0.0331, "step": 123335 }, { "epoch": 5.76, - "learning_rate": 8.529370399887488e-06, - "loss": 0.0438, + "learning_rate": 1.854726725227404e-05, + "loss": 0.0398, "step": 123340 }, { "epoch": 5.76, - "learning_rate": 8.52890159861235e-06, - "loss": 0.0563, + "learning_rate": 1.854679918243802e-05, + "loss": 0.1, "step": 123345 }, { "epoch": 5.76, - "learning_rate": 8.52843279733721e-06, - "loss": 0.0921, + "learning_rate": 1.8546331112602e-05, + "loss": 0.1423, "step": 123350 }, { "epoch": 5.76, - "learning_rate": 8.52796399606207e-06, - "loss": 0.1355, + "learning_rate": 1.8545863042765982e-05, + "loss": 0.2406, "step": 123355 }, { "epoch": 5.76, - "learning_rate": 8.52749519478693e-06, - "loss": 0.1312, + "learning_rate": 1.8545394972929962e-05, + "loss": 0.0627, "step": 123360 }, { "epoch": 5.76, - "learning_rate": 8.527026393511791e-06, - "loss": 0.0716, + "learning_rate": 1.8544926903093942e-05, + "loss": 0.017, "step": 123365 }, { "epoch": 5.76, - "learning_rate": 8.526557592236651e-06, - "loss": 0.0241, + "learning_rate": 1.854445883325792e-05, + "loss": 0.032, "step": 123370 }, { "epoch": 5.76, - "learning_rate": 8.526088790961513e-06, - "loss": 0.0567, + "learning_rate": 1.8543990763421905e-05, + "loss": 0.0547, "step": 123375 }, { "epoch": 5.76, - "learning_rate": 8.525619989686373e-06, - "loss": 0.0648, + "learning_rate": 1.8543522693585885e-05, + "loss": 0.0492, "step": 123380 }, { "epoch": 5.76, - "learning_rate": 8.525151188411234e-06, - "loss": 0.0446, + "learning_rate": 1.8543054623749865e-05, + "loss": 0.0444, "step": 123385 }, { "epoch": 5.76, - "learning_rate": 8.524682387136094e-06, - "loss": 0.0243, + "learning_rate": 1.8542586553913844e-05, + "loss": 0.0676, "step": 123390 }, { "epoch": 5.76, - "learning_rate": 8.524213585860954e-06, - "loss": 0.092, + "learning_rate": 1.8542118484077824e-05, + "loss": 0.0366, "step": 123395 }, { "epoch": 5.76, - "learning_rate": 8.523744784585814e-06, - "loss": 0.0793, + "learning_rate": 1.8541650414241804e-05, + "loss": 0.1153, "step": 123400 }, { "epoch": 5.76, - "learning_rate": 8.523275983310676e-06, - "loss": 0.2366, + "learning_rate": 1.8541182344405784e-05, + "loss": 0.1846, "step": 123405 }, { "epoch": 5.76, - "learning_rate": 8.522807182035536e-06, - "loss": 0.0885, + "learning_rate": 1.8540714274569767e-05, + "loss": 0.0667, "step": 123410 }, { "epoch": 5.76, - "learning_rate": 8.522338380760397e-06, - "loss": 0.0044, + "learning_rate": 1.8540246204733747e-05, + "loss": 0.0113, "step": 123415 }, { "epoch": 5.76, - "learning_rate": 8.521869579485257e-06, - "loss": 0.014, + "learning_rate": 1.8539778134897727e-05, + "loss": 0.075, "step": 123420 }, { "epoch": 5.76, - "learning_rate": 8.521400778210117e-06, - "loss": 0.0242, + "learning_rate": 1.8539310065061706e-05, + "loss": 0.0095, "step": 123425 }, { "epoch": 5.76, - "learning_rate": 8.520931976934977e-06, - "loss": 0.037, + "learning_rate": 1.853884199522569e-05, + "loss": 0.016, "step": 123430 }, { "epoch": 5.76, - "learning_rate": 8.520463175659839e-06, - "loss": 0.0644, + "learning_rate": 1.853837392538967e-05, + "loss": 0.0538, "step": 123435 }, { "epoch": 5.76, - "learning_rate": 8.519994374384699e-06, - "loss": 0.0504, + "learning_rate": 1.853790585555365e-05, + "loss": 0.0503, "step": 123440 }, { "epoch": 5.76, - "learning_rate": 8.51952557310956e-06, - "loss": 0.1222, + "learning_rate": 1.853743778571763e-05, + "loss": 0.1097, "step": 123445 }, { "epoch": 5.76, - "learning_rate": 8.51905677183442e-06, - "loss": 0.134, + "learning_rate": 1.8536969715881612e-05, + "loss": 0.074, "step": 123450 }, { "epoch": 5.76, - "learning_rate": 8.518587970559282e-06, - "loss": 0.2783, + "learning_rate": 1.8536501646045592e-05, + "loss": 0.3029, "step": 123455 }, { "epoch": 5.76, - "learning_rate": 8.518119169284142e-06, - "loss": 0.0944, + "learning_rate": 1.853603357620957e-05, + "loss": 0.0782, "step": 123460 }, { "epoch": 5.76, - "learning_rate": 8.517650368009002e-06, - "loss": 0.0173, + "learning_rate": 1.8535565506373552e-05, + "loss": 0.0376, "step": 123465 }, { "epoch": 5.76, - "learning_rate": 8.517181566733862e-06, - "loss": 0.0268, + "learning_rate": 1.853509743653753e-05, + "loss": 0.0067, "step": 123470 }, { "epoch": 5.76, - "learning_rate": 8.516712765458721e-06, - "loss": 0.0104, + "learning_rate": 1.853462936670151e-05, + "loss": 0.0368, "step": 123475 }, { "epoch": 5.76, - "learning_rate": 8.516243964183583e-06, - "loss": 0.0421, + "learning_rate": 1.853416129686549e-05, + "loss": 0.0534, "step": 123480 }, { "epoch": 5.76, - "learning_rate": 8.515775162908445e-06, - "loss": 0.0661, + "learning_rate": 1.8533693227029474e-05, + "loss": 0.0482, "step": 123485 }, { "epoch": 5.76, - "learning_rate": 8.515306361633305e-06, - "loss": 0.0388, + "learning_rate": 1.8533225157193454e-05, + "loss": 0.0951, "step": 123490 }, { "epoch": 5.76, - "learning_rate": 8.514837560358165e-06, - "loss": 0.0706, + "learning_rate": 1.8532757087357434e-05, + "loss": 0.0343, "step": 123495 }, { "epoch": 5.76, - "learning_rate": 8.514368759083025e-06, - "loss": 0.1597, + "learning_rate": 1.8532289017521414e-05, + "loss": 0.0679, "step": 123500 }, { "epoch": 5.76, - "learning_rate": 8.513899957807886e-06, - "loss": 0.2319, + "learning_rate": 1.8531820947685397e-05, + "loss": 0.3081, "step": 123505 }, { "epoch": 5.76, - "learning_rate": 8.513431156532746e-06, - "loss": 0.087, + "learning_rate": 1.8531352877849377e-05, + "loss": 0.0838, "step": 123510 }, { "epoch": 5.76, - "learning_rate": 8.512962355257606e-06, - "loss": 0.0045, + "learning_rate": 1.8530884808013357e-05, + "loss": 0.008, "step": 123515 }, { "epoch": 5.76, - "learning_rate": 8.512493553982468e-06, - "loss": 0.0253, + "learning_rate": 1.8530416738177337e-05, + "loss": 0.038, "step": 123520 }, { "epoch": 5.76, - "learning_rate": 8.51202475270733e-06, - "loss": 0.0074, + "learning_rate": 1.8529948668341316e-05, + "loss": 0.0246, "step": 123525 }, { "epoch": 5.76, - "learning_rate": 8.51155595143219e-06, - "loss": 0.1033, + "learning_rate": 1.8529480598505296e-05, + "loss": 0.0535, "step": 123530 }, { "epoch": 5.76, - "learning_rate": 8.511087150157049e-06, - "loss": 0.0719, + "learning_rate": 1.8529012528669276e-05, + "loss": 0.0578, "step": 123535 }, { "epoch": 5.76, - "learning_rate": 8.510618348881909e-06, - "loss": 0.0555, + "learning_rate": 1.852854445883326e-05, + "loss": 0.0389, "step": 123540 }, { "epoch": 5.76, - "learning_rate": 8.510149547606769e-06, - "loss": 0.0438, + "learning_rate": 1.852807638899724e-05, + "loss": 0.0451, "step": 123545 }, { "epoch": 5.77, - "learning_rate": 8.50968074633163e-06, - "loss": 0.1805, + "learning_rate": 1.852760831916122e-05, + "loss": 0.1423, "step": 123550 }, { "epoch": 5.77, - "learning_rate": 8.509211945056492e-06, - "loss": 0.2351, + "learning_rate": 1.85271402493252e-05, + "loss": 0.2337, "step": 123555 }, { "epoch": 5.77, - "learning_rate": 8.508743143781352e-06, - "loss": 0.1019, + "learning_rate": 1.8526672179489182e-05, + "loss": 0.0902, "step": 123560 }, { "epoch": 5.77, - "learning_rate": 8.508274342506212e-06, - "loss": 0.018, + "learning_rate": 1.852620410965316e-05, + "loss": 0.0318, "step": 123565 }, { "epoch": 5.77, - "learning_rate": 8.507805541231074e-06, - "loss": 0.0684, + "learning_rate": 1.852573603981714e-05, + "loss": 0.0228, "step": 123570 }, { "epoch": 5.77, - "learning_rate": 8.507336739955934e-06, - "loss": 0.0317, + "learning_rate": 1.852526796998112e-05, + "loss": 0.0408, "step": 123575 }, { "epoch": 5.77, - "learning_rate": 8.506867938680794e-06, - "loss": 0.1867, + "learning_rate": 1.8524799900145105e-05, + "loss": 0.0307, "step": 123580 }, { "epoch": 5.77, - "learning_rate": 8.506399137405654e-06, - "loss": 0.0618, + "learning_rate": 1.852433183030908e-05, + "loss": 0.0678, "step": 123585 }, { "epoch": 5.77, - "learning_rate": 8.505930336130515e-06, - "loss": 0.0647, + "learning_rate": 1.852386376047306e-05, + "loss": 0.091, "step": 123590 }, { "epoch": 5.77, - "learning_rate": 8.505461534855377e-06, - "loss": 0.0785, + "learning_rate": 1.8523395690637044e-05, + "loss": 0.0184, "step": 123595 }, { "epoch": 5.77, - "learning_rate": 8.504992733580237e-06, - "loss": 0.154, + "learning_rate": 1.8522927620801024e-05, + "loss": 0.1314, "step": 123600 }, { "epoch": 5.77, - "learning_rate": 8.504523932305097e-06, - "loss": 0.1586, + "learning_rate": 1.8522459550965004e-05, + "loss": 0.171, "step": 123605 }, { "epoch": 5.77, - "learning_rate": 8.504055131029957e-06, - "loss": 0.1016, + "learning_rate": 1.8521991481128983e-05, + "loss": 0.0846, "step": 123610 }, { "epoch": 5.77, - "learning_rate": 8.503586329754817e-06, - "loss": 0.011, + "learning_rate": 1.8521523411292967e-05, + "loss": 0.0391, "step": 123615 }, { "epoch": 5.77, - "learning_rate": 8.503117528479678e-06, - "loss": 0.0231, + "learning_rate": 1.8521055341456946e-05, + "loss": 0.0128, "step": 123620 }, { "epoch": 5.77, - "learning_rate": 8.502648727204538e-06, - "loss": 0.0365, + "learning_rate": 1.8520587271620926e-05, + "loss": 0.0552, "step": 123625 }, { "epoch": 5.77, - "learning_rate": 8.5021799259294e-06, - "loss": 0.0279, + "learning_rate": 1.8520119201784906e-05, + "loss": 0.0984, "step": 123630 }, { "epoch": 5.77, - "learning_rate": 8.50171112465426e-06, - "loss": 0.1056, + "learning_rate": 1.851965113194889e-05, + "loss": 0.0352, "step": 123635 }, { "epoch": 5.77, - "learning_rate": 8.501242323379121e-06, - "loss": 0.0842, + "learning_rate": 1.851918306211287e-05, + "loss": 0.1304, "step": 123640 }, { "epoch": 5.77, - "learning_rate": 8.500773522103981e-06, - "loss": 0.0525, + "learning_rate": 1.851871499227685e-05, + "loss": 0.1784, "step": 123645 }, { "epoch": 5.77, - "learning_rate": 8.500304720828841e-06, - "loss": 0.0584, + "learning_rate": 1.851824692244083e-05, + "loss": 0.1151, "step": 123650 }, { "epoch": 5.77, - "learning_rate": 8.499835919553701e-06, - "loss": 0.1956, + "learning_rate": 1.851777885260481e-05, + "loss": 0.1681, "step": 123655 }, { "epoch": 5.77, - "learning_rate": 8.499367118278563e-06, - "loss": 0.1088, + "learning_rate": 1.851731078276879e-05, + "loss": 0.1166, "step": 123660 }, { "epoch": 5.77, - "learning_rate": 8.498898317003423e-06, - "loss": 0.0204, + "learning_rate": 1.8516842712932768e-05, + "loss": 0.0191, "step": 123665 }, { "epoch": 5.77, - "learning_rate": 8.498429515728284e-06, - "loss": 0.0059, + "learning_rate": 1.851637464309675e-05, + "loss": 0.0058, "step": 123670 }, { "epoch": 5.77, - "learning_rate": 8.497960714453144e-06, - "loss": 0.0477, + "learning_rate": 1.851590657326073e-05, + "loss": 0.0382, "step": 123675 }, { "epoch": 5.77, - "learning_rate": 8.497491913178004e-06, - "loss": 0.0749, + "learning_rate": 1.851543850342471e-05, + "loss": 0.0916, "step": 123680 }, { "epoch": 5.77, - "learning_rate": 8.497023111902864e-06, - "loss": 0.0516, + "learning_rate": 1.851497043358869e-05, + "loss": 0.0803, "step": 123685 }, { "epoch": 5.77, - "learning_rate": 8.496554310627726e-06, - "loss": 0.0811, + "learning_rate": 1.8514502363752674e-05, + "loss": 0.0301, "step": 123690 }, { "epoch": 5.77, - "learning_rate": 8.496085509352586e-06, - "loss": 0.0811, + "learning_rate": 1.8514034293916654e-05, + "loss": 0.0573, "step": 123695 }, { "epoch": 5.77, - "learning_rate": 8.495616708077447e-06, - "loss": 0.0725, + "learning_rate": 1.8513566224080634e-05, + "loss": 0.2064, "step": 123700 }, { "epoch": 5.77, - "learning_rate": 8.495147906802307e-06, - "loss": 0.2563, + "learning_rate": 1.8513098154244617e-05, + "loss": 0.1729, "step": 123705 }, { "epoch": 5.77, - "learning_rate": 8.494679105527169e-06, - "loss": 0.0698, + "learning_rate": 1.8512630084408593e-05, + "loss": 0.0668, "step": 123710 }, { "epoch": 5.77, - "learning_rate": 8.494210304252029e-06, - "loss": 0.0179, + "learning_rate": 1.8512162014572573e-05, + "loss": 0.0344, "step": 123715 }, { "epoch": 5.77, - "learning_rate": 8.493741502976889e-06, - "loss": 0.011, + "learning_rate": 1.8511693944736553e-05, + "loss": 0.0455, "step": 123720 }, { "epoch": 5.77, - "learning_rate": 8.493272701701749e-06, - "loss": 0.0345, + "learning_rate": 1.8511225874900536e-05, + "loss": 0.0349, "step": 123725 }, { "epoch": 5.77, - "learning_rate": 8.49280390042661e-06, - "loss": 0.0408, + "learning_rate": 1.8510757805064516e-05, + "loss": 0.0878, "step": 123730 }, { "epoch": 5.77, - "learning_rate": 8.49233509915147e-06, - "loss": 0.0482, + "learning_rate": 1.8510289735228496e-05, + "loss": 0.0363, "step": 123735 }, { "epoch": 5.77, - "learning_rate": 8.491866297876332e-06, - "loss": 0.0366, + "learning_rate": 1.8509821665392476e-05, + "loss": 0.0433, "step": 123740 }, { "epoch": 5.77, - "learning_rate": 8.491397496601192e-06, - "loss": 0.1102, + "learning_rate": 1.850935359555646e-05, + "loss": 0.0294, "step": 123745 }, { "epoch": 5.77, - "learning_rate": 8.490928695326052e-06, - "loss": 0.1, + "learning_rate": 1.850888552572044e-05, + "loss": 0.0924, "step": 123750 }, { "epoch": 5.77, - "learning_rate": 8.490459894050912e-06, - "loss": 0.074, + "learning_rate": 1.850841745588442e-05, + "loss": 0.2012, "step": 123755 }, { "epoch": 5.77, - "learning_rate": 8.489991092775773e-06, - "loss": 0.0596, + "learning_rate": 1.8507949386048398e-05, + "loss": 0.0995, "step": 123760 }, { "epoch": 5.78, - "learning_rate": 8.489522291500633e-06, - "loss": 0.0123, + "learning_rate": 1.850748131621238e-05, + "loss": 0.0027, "step": 123765 }, { "epoch": 5.78, - "learning_rate": 8.489053490225495e-06, - "loss": 0.0091, + "learning_rate": 1.850701324637636e-05, + "loss": 0.0313, "step": 123770 }, { "epoch": 5.78, - "learning_rate": 8.488584688950355e-06, - "loss": 0.0541, + "learning_rate": 1.8506545176540338e-05, + "loss": 0.0105, "step": 123775 }, { "epoch": 5.78, - "learning_rate": 8.488115887675216e-06, - "loss": 0.017, + "learning_rate": 1.850607710670432e-05, + "loss": 0.0298, "step": 123780 }, { "epoch": 5.78, - "learning_rate": 8.487647086400076e-06, - "loss": 0.0198, + "learning_rate": 1.85056090368683e-05, + "loss": 0.0244, "step": 123785 }, { "epoch": 5.78, - "learning_rate": 8.487178285124936e-06, - "loss": 0.0595, + "learning_rate": 1.850514096703228e-05, + "loss": 0.1045, "step": 123790 }, { "epoch": 5.78, - "learning_rate": 8.486709483849796e-06, - "loss": 0.0827, + "learning_rate": 1.850467289719626e-05, + "loss": 0.0363, "step": 123795 }, { "epoch": 5.78, - "learning_rate": 8.486240682574656e-06, - "loss": 0.2317, + "learning_rate": 1.8504204827360244e-05, + "loss": 0.1007, "step": 123800 }, { "epoch": 5.78, - "learning_rate": 8.485771881299518e-06, - "loss": 0.2097, + "learning_rate": 1.8503736757524223e-05, + "loss": 0.2595, "step": 123805 }, { "epoch": 5.78, - "learning_rate": 8.48530308002438e-06, - "loss": 0.1219, + "learning_rate": 1.8503268687688203e-05, + "loss": 0.0723, "step": 123810 }, { "epoch": 5.78, - "learning_rate": 8.48483427874924e-06, - "loss": 0.0106, + "learning_rate": 1.8502800617852183e-05, + "loss": 0.0194, "step": 123815 }, { "epoch": 5.78, - "learning_rate": 8.4843654774741e-06, - "loss": 0.021, + "learning_rate": 1.8502332548016166e-05, + "loss": 0.0293, "step": 123820 }, { "epoch": 5.78, - "learning_rate": 8.483896676198959e-06, - "loss": 0.0448, + "learning_rate": 1.8501864478180146e-05, + "loss": 0.0406, "step": 123825 }, { "epoch": 5.78, - "learning_rate": 8.48342787492382e-06, - "loss": 0.0432, + "learning_rate": 1.8501396408344126e-05, + "loss": 0.0474, "step": 123830 }, { "epoch": 5.78, - "learning_rate": 8.48295907364868e-06, - "loss": 0.0725, + "learning_rate": 1.8500928338508106e-05, + "loss": 0.0678, "step": 123835 }, { "epoch": 5.78, - "learning_rate": 8.48249027237354e-06, - "loss": 0.0479, + "learning_rate": 1.8500460268672086e-05, + "loss": 0.0932, "step": 123840 }, { "epoch": 5.78, - "learning_rate": 8.482021471098402e-06, - "loss": 0.0999, + "learning_rate": 1.8499992198836065e-05, + "loss": 0.2236, "step": 123845 }, { "epoch": 5.78, - "learning_rate": 8.481552669823264e-06, - "loss": 0.0717, + "learning_rate": 1.8499524129000045e-05, + "loss": 0.1174, "step": 123850 }, { "epoch": 5.78, - "learning_rate": 8.481083868548124e-06, - "loss": 0.3282, + "learning_rate": 1.849905605916403e-05, + "loss": 0.1754, "step": 123855 }, { "epoch": 5.78, - "learning_rate": 8.480615067272984e-06, - "loss": 0.0829, + "learning_rate": 1.8498587989328008e-05, + "loss": 0.0764, "step": 123860 }, { "epoch": 5.78, - "learning_rate": 8.480146265997844e-06, - "loss": 0.0291, + "learning_rate": 1.8498119919491988e-05, + "loss": 0.0114, "step": 123865 }, { "epoch": 5.78, - "learning_rate": 8.479677464722704e-06, - "loss": 0.0654, + "learning_rate": 1.8497651849655968e-05, + "loss": 0.0191, "step": 123870 }, { "epoch": 5.78, - "learning_rate": 8.479208663447565e-06, - "loss": 0.0599, + "learning_rate": 1.849718377981995e-05, + "loss": 0.0412, "step": 123875 }, { "epoch": 5.78, - "learning_rate": 8.478739862172425e-06, - "loss": 0.0674, + "learning_rate": 1.849671570998393e-05, + "loss": 0.0528, "step": 123880 }, { "epoch": 5.78, - "learning_rate": 8.478271060897287e-06, - "loss": 0.064, + "learning_rate": 1.849624764014791e-05, + "loss": 0.0936, "step": 123885 }, { "epoch": 5.78, - "learning_rate": 8.477802259622147e-06, - "loss": 0.0615, + "learning_rate": 1.8495779570311894e-05, + "loss": 0.0271, "step": 123890 }, { "epoch": 5.78, - "learning_rate": 8.477333458347008e-06, - "loss": 0.1214, + "learning_rate": 1.8495311500475874e-05, + "loss": 0.1079, "step": 123895 }, { "epoch": 5.78, - "learning_rate": 8.476864657071868e-06, - "loss": 0.0812, + "learning_rate": 1.849484343063985e-05, + "loss": 0.1662, "step": 123900 }, { "epoch": 5.78, - "learning_rate": 8.476395855796728e-06, - "loss": 0.2649, + "learning_rate": 1.849437536080383e-05, + "loss": 0.2225, "step": 123905 }, { "epoch": 5.78, - "learning_rate": 8.475927054521588e-06, - "loss": 0.1143, + "learning_rate": 1.8493907290967813e-05, + "loss": 0.1033, "step": 123910 }, { "epoch": 5.78, - "learning_rate": 8.47545825324645e-06, - "loss": 0.0236, + "learning_rate": 1.8493439221131793e-05, + "loss": 0.0053, "step": 123915 }, { "epoch": 5.78, - "learning_rate": 8.47498945197131e-06, - "loss": 0.0138, + "learning_rate": 1.8492971151295773e-05, + "loss": 0.0057, "step": 123920 }, { "epoch": 5.78, - "learning_rate": 8.474520650696171e-06, - "loss": 0.0114, + "learning_rate": 1.8492503081459753e-05, + "loss": 0.0286, "step": 123925 }, { "epoch": 5.78, - "learning_rate": 8.474051849421031e-06, - "loss": 0.0514, + "learning_rate": 1.8492035011623736e-05, + "loss": 0.0152, "step": 123930 }, { "epoch": 5.78, - "learning_rate": 8.473583048145891e-06, - "loss": 0.0268, + "learning_rate": 1.8491566941787716e-05, + "loss": 0.0931, "step": 123935 }, { "epoch": 5.78, - "learning_rate": 8.473114246870751e-06, - "loss": 0.0785, + "learning_rate": 1.8491098871951695e-05, + "loss": 0.085, "step": 123940 }, { "epoch": 5.78, - "learning_rate": 8.472645445595613e-06, - "loss": 0.054, + "learning_rate": 1.8490630802115675e-05, + "loss": 0.0808, "step": 123945 }, { "epoch": 5.78, - "learning_rate": 8.472176644320473e-06, - "loss": 0.1064, + "learning_rate": 1.849016273227966e-05, + "loss": 0.1596, "step": 123950 }, { "epoch": 5.78, - "learning_rate": 8.471707843045334e-06, - "loss": 0.2431, + "learning_rate": 1.8489694662443638e-05, + "loss": 0.1738, "step": 123955 }, { "epoch": 5.78, - "learning_rate": 8.471239041770194e-06, - "loss": 0.0626, + "learning_rate": 1.8489226592607618e-05, + "loss": 0.1302, "step": 123960 }, { "epoch": 5.78, - "learning_rate": 8.470770240495056e-06, - "loss": 0.096, + "learning_rate": 1.8488758522771598e-05, + "loss": 0.0056, "step": 123965 }, { "epoch": 5.78, - "learning_rate": 8.470301439219916e-06, - "loss": 0.0356, + "learning_rate": 1.8488290452935578e-05, + "loss": 0.01, "step": 123970 }, { "epoch": 5.78, - "learning_rate": 8.469832637944776e-06, - "loss": 0.0219, + "learning_rate": 1.8487822383099558e-05, + "loss": 0.0613, "step": 123975 }, { "epoch": 5.79, - "learning_rate": 8.469363836669636e-06, - "loss": 0.1034, + "learning_rate": 1.8487354313263537e-05, + "loss": 0.0388, "step": 123980 }, { "epoch": 5.79, - "learning_rate": 8.468895035394497e-06, - "loss": 0.0921, + "learning_rate": 1.848688624342752e-05, + "loss": 0.053, "step": 123985 }, { "epoch": 5.79, - "learning_rate": 8.468426234119357e-06, - "loss": 0.069, + "learning_rate": 1.84864181735915e-05, + "loss": 0.1059, "step": 123990 }, { "epoch": 5.79, - "learning_rate": 8.467957432844219e-06, - "loss": 0.0574, + "learning_rate": 1.848595010375548e-05, + "loss": 0.0968, "step": 123995 }, { "epoch": 5.79, - "learning_rate": 8.467488631569079e-06, - "loss": 0.0851, + "learning_rate": 1.848548203391946e-05, + "loss": 0.0808, "step": 124000 }, { "epoch": 5.79, - "learning_rate": 8.467019830293939e-06, - "loss": 0.3676, + "learning_rate": 1.8485013964083443e-05, + "loss": 0.2192, "step": 124005 }, { "epoch": 5.79, - "learning_rate": 8.466551029018799e-06, - "loss": 0.1108, + "learning_rate": 1.8484545894247423e-05, + "loss": 0.1343, "step": 124010 }, { "epoch": 5.79, - "learning_rate": 8.46608222774366e-06, - "loss": 0.0074, + "learning_rate": 1.8484077824411403e-05, + "loss": 0.008, "step": 124015 }, { "epoch": 5.79, - "learning_rate": 8.46561342646852e-06, - "loss": 0.0327, + "learning_rate": 1.8483609754575386e-05, + "loss": 0.0096, "step": 124020 }, { "epoch": 5.79, - "learning_rate": 8.465144625193382e-06, - "loss": 0.0563, + "learning_rate": 1.8483141684739362e-05, + "loss": 0.0604, "step": 124025 }, { "epoch": 5.79, - "learning_rate": 8.464675823918242e-06, - "loss": 0.0297, + "learning_rate": 1.8482673614903342e-05, + "loss": 0.0669, "step": 124030 }, { "epoch": 5.79, - "learning_rate": 8.464207022643103e-06, - "loss": 0.0207, + "learning_rate": 1.8482205545067322e-05, + "loss": 0.0455, "step": 124035 }, { "epoch": 5.79, - "learning_rate": 8.463738221367963e-06, - "loss": 0.0855, + "learning_rate": 1.8481737475231305e-05, + "loss": 0.0351, "step": 124040 }, { "epoch": 5.79, - "learning_rate": 8.463269420092823e-06, - "loss": 0.0463, + "learning_rate": 1.8481269405395285e-05, + "loss": 0.1712, "step": 124045 }, { "epoch": 5.79, - "learning_rate": 8.462800618817683e-06, - "loss": 0.1052, + "learning_rate": 1.8480801335559265e-05, + "loss": 0.1637, "step": 124050 }, { "epoch": 5.79, - "learning_rate": 8.462331817542543e-06, - "loss": 0.1563, + "learning_rate": 1.8480333265723245e-05, + "loss": 0.2856, "step": 124055 }, { "epoch": 5.79, - "learning_rate": 8.461863016267405e-06, - "loss": 0.0777, + "learning_rate": 1.8479865195887228e-05, + "loss": 0.0855, "step": 124060 }, { "epoch": 5.79, - "learning_rate": 8.461394214992266e-06, - "loss": 0.0772, + "learning_rate": 1.8479397126051208e-05, + "loss": 0.0169, "step": 124065 }, { "epoch": 5.79, - "learning_rate": 8.460925413717126e-06, - "loss": 0.0339, + "learning_rate": 1.8478929056215188e-05, + "loss": 0.018, "step": 124070 }, { "epoch": 5.79, - "learning_rate": 8.460456612441986e-06, - "loss": 0.0493, + "learning_rate": 1.847846098637917e-05, + "loss": 0.0172, "step": 124075 }, { "epoch": 5.79, - "learning_rate": 8.459987811166846e-06, - "loss": 0.0518, + "learning_rate": 1.847799291654315e-05, + "loss": 0.0304, "step": 124080 }, { "epoch": 5.79, - "learning_rate": 8.459519009891708e-06, - "loss": 0.0192, + "learning_rate": 1.847752484670713e-05, + "loss": 0.0306, "step": 124085 }, { "epoch": 5.79, - "learning_rate": 8.459050208616568e-06, - "loss": 0.0732, + "learning_rate": 1.8477056776871107e-05, + "loss": 0.0911, "step": 124090 }, { "epoch": 5.79, - "learning_rate": 8.458581407341428e-06, - "loss": 0.1152, + "learning_rate": 1.847658870703509e-05, + "loss": 0.098, "step": 124095 }, { "epoch": 5.79, - "learning_rate": 8.45811260606629e-06, - "loss": 0.1535, + "learning_rate": 1.847612063719907e-05, + "loss": 0.0948, "step": 124100 }, { "epoch": 5.79, - "learning_rate": 8.457643804791151e-06, - "loss": 0.1257, + "learning_rate": 1.847565256736305e-05, + "loss": 0.1317, "step": 124105 }, { "epoch": 5.79, - "learning_rate": 8.457175003516011e-06, - "loss": 0.0627, + "learning_rate": 1.847518449752703e-05, + "loss": 0.0527, "step": 124110 }, { "epoch": 5.79, - "learning_rate": 8.456706202240871e-06, - "loss": 0.023, + "learning_rate": 1.8474716427691013e-05, + "loss": 0.0433, "step": 124115 }, { "epoch": 5.79, - "learning_rate": 8.45623740096573e-06, - "loss": 0.042, + "learning_rate": 1.8474248357854993e-05, + "loss": 0.0121, "step": 124120 }, { "epoch": 5.79, - "learning_rate": 8.45576859969059e-06, - "loss": 0.0472, + "learning_rate": 1.8473780288018972e-05, + "loss": 0.0463, "step": 124125 }, { "epoch": 5.79, - "learning_rate": 8.455299798415452e-06, - "loss": 0.0329, + "learning_rate": 1.8473312218182956e-05, + "loss": 0.0412, "step": 124130 }, { "epoch": 5.79, - "learning_rate": 8.454830997140312e-06, - "loss": 0.0389, + "learning_rate": 1.8472844148346935e-05, + "loss": 0.0626, "step": 124135 }, { "epoch": 5.79, - "learning_rate": 8.454362195865174e-06, - "loss": 0.0648, + "learning_rate": 1.8472376078510915e-05, + "loss": 0.0723, "step": 124140 }, { "epoch": 5.79, - "learning_rate": 8.453893394590034e-06, - "loss": 0.1288, + "learning_rate": 1.8471908008674895e-05, + "loss": 0.0833, "step": 124145 }, { "epoch": 5.79, - "learning_rate": 8.453424593314894e-06, - "loss": 0.1217, + "learning_rate": 1.8471439938838878e-05, + "loss": 0.1349, "step": 124150 }, { "epoch": 5.79, - "learning_rate": 8.452955792039755e-06, - "loss": 0.2766, + "learning_rate": 1.8470971869002855e-05, + "loss": 0.1329, "step": 124155 }, { "epoch": 5.79, - "learning_rate": 8.452486990764615e-06, - "loss": 0.0981, + "learning_rate": 1.8470503799166835e-05, + "loss": 0.0802, "step": 124160 }, { "epoch": 5.79, - "learning_rate": 8.452018189489475e-06, - "loss": 0.0214, + "learning_rate": 1.8470035729330814e-05, + "loss": 0.0098, "step": 124165 }, { "epoch": 5.79, - "learning_rate": 8.451549388214337e-06, - "loss": 0.0051, + "learning_rate": 1.8469567659494798e-05, + "loss": 0.0539, "step": 124170 }, { "epoch": 5.79, - "learning_rate": 8.451080586939197e-06, - "loss": 0.0643, + "learning_rate": 1.8469099589658777e-05, + "loss": 0.0598, "step": 124175 }, { "epoch": 5.79, - "learning_rate": 8.450611785664058e-06, - "loss": 0.052, + "learning_rate": 1.8468631519822757e-05, + "loss": 0.061, "step": 124180 }, { "epoch": 5.79, - "learning_rate": 8.450142984388918e-06, - "loss": 0.1016, + "learning_rate": 1.8468163449986737e-05, + "loss": 0.0248, "step": 124185 }, { "epoch": 5.79, - "learning_rate": 8.449674183113778e-06, - "loss": 0.0386, + "learning_rate": 1.846769538015072e-05, + "loss": 0.0222, "step": 124190 }, { "epoch": 5.8, - "learning_rate": 8.449205381838638e-06, - "loss": 0.0499, + "learning_rate": 1.84672273103147e-05, + "loss": 0.1067, "step": 124195 }, { "epoch": 5.8, - "learning_rate": 8.4487365805635e-06, - "loss": 0.1406, + "learning_rate": 1.846675924047868e-05, + "loss": 0.1194, "step": 124200 }, { "epoch": 5.8, - "learning_rate": 8.44826777928836e-06, - "loss": 0.1157, + "learning_rate": 1.8466291170642663e-05, + "loss": 0.2125, "step": 124205 }, { "epoch": 5.8, - "learning_rate": 8.447798978013221e-06, - "loss": 0.0816, + "learning_rate": 1.8465823100806643e-05, + "loss": 0.1025, "step": 124210 }, { "epoch": 5.8, - "learning_rate": 8.447330176738081e-06, - "loss": 0.0176, + "learning_rate": 1.846535503097062e-05, + "loss": 0.0081, "step": 124215 }, { "epoch": 5.8, - "learning_rate": 8.446861375462943e-06, - "loss": 0.0165, + "learning_rate": 1.84648869611346e-05, + "loss": 0.0878, "step": 124220 }, { "epoch": 5.8, - "learning_rate": 8.446392574187803e-06, - "loss": 0.0424, + "learning_rate": 1.8464418891298582e-05, + "loss": 0.0446, "step": 124225 }, { "epoch": 5.8, - "learning_rate": 8.445923772912663e-06, - "loss": 0.0603, + "learning_rate": 1.8463950821462562e-05, + "loss": 0.0188, "step": 124230 }, { "epoch": 5.8, - "learning_rate": 8.445454971637523e-06, - "loss": 0.0332, + "learning_rate": 1.8463482751626542e-05, + "loss": 0.0492, "step": 124235 }, { "epoch": 5.8, - "learning_rate": 8.444986170362384e-06, - "loss": 0.0645, + "learning_rate": 1.8463014681790522e-05, + "loss": 0.0694, "step": 124240 }, { "epoch": 5.8, - "learning_rate": 8.444517369087244e-06, - "loss": 0.0886, + "learning_rate": 1.8462546611954505e-05, + "loss": 0.1425, "step": 124245 }, { "epoch": 5.8, - "learning_rate": 8.444048567812106e-06, - "loss": 0.1129, + "learning_rate": 1.8462078542118485e-05, + "loss": 0.1036, "step": 124250 }, { "epoch": 5.8, - "learning_rate": 8.443579766536966e-06, - "loss": 0.1498, + "learning_rate": 1.8461610472282465e-05, + "loss": 0.2845, "step": 124255 }, { "epoch": 5.8, - "learning_rate": 8.443110965261826e-06, - "loss": 0.1134, + "learning_rate": 1.8461142402446448e-05, + "loss": 0.112, "step": 124260 }, { "epoch": 5.8, - "learning_rate": 8.442642163986686e-06, - "loss": 0.0025, + "learning_rate": 1.8460674332610428e-05, + "loss": 0.0232, "step": 124265 }, { "epoch": 5.8, - "learning_rate": 8.442173362711547e-06, - "loss": 0.0267, + "learning_rate": 1.8460206262774407e-05, + "loss": 0.0251, "step": 124270 }, { "epoch": 5.8, - "learning_rate": 8.441704561436407e-06, - "loss": 0.0236, + "learning_rate": 1.8459738192938387e-05, + "loss": 0.0138, "step": 124275 }, { "epoch": 5.8, - "learning_rate": 8.441235760161269e-06, - "loss": 0.0144, + "learning_rate": 1.8459270123102367e-05, + "loss": 0.0409, "step": 124280 }, { "epoch": 5.8, - "learning_rate": 8.440766958886129e-06, - "loss": 0.0935, + "learning_rate": 1.8458802053266347e-05, + "loss": 0.0364, "step": 124285 }, { "epoch": 5.8, - "learning_rate": 8.44029815761099e-06, - "loss": 0.0847, + "learning_rate": 1.8458333983430327e-05, + "loss": 0.0401, "step": 124290 }, { "epoch": 5.8, - "learning_rate": 8.43982935633585e-06, - "loss": 0.1251, + "learning_rate": 1.8457865913594307e-05, + "loss": 0.0962, "step": 124295 }, { "epoch": 5.8, - "learning_rate": 8.43936055506071e-06, - "loss": 0.0507, + "learning_rate": 1.845739784375829e-05, + "loss": 0.0593, "step": 124300 }, { "epoch": 5.8, - "learning_rate": 8.43889175378557e-06, - "loss": 0.1862, + "learning_rate": 1.845692977392227e-05, + "loss": 0.1393, "step": 124305 }, { "epoch": 5.8, - "learning_rate": 8.43842295251043e-06, - "loss": 0.0848, + "learning_rate": 1.845646170408625e-05, + "loss": 0.0496, "step": 124310 }, { "epoch": 5.8, - "learning_rate": 8.437954151235292e-06, - "loss": 0.0634, + "learning_rate": 1.8455993634250233e-05, + "loss": 0.0153, "step": 124315 }, { "epoch": 5.8, - "learning_rate": 8.437485349960154e-06, - "loss": 0.038, + "learning_rate": 1.8455525564414212e-05, + "loss": 0.0233, "step": 124320 }, { "epoch": 5.8, - "learning_rate": 8.437016548685014e-06, - "loss": 0.0217, + "learning_rate": 1.8455057494578192e-05, + "loss": 0.0299, "step": 124325 }, { "epoch": 5.8, - "learning_rate": 8.436547747409873e-06, - "loss": 0.027, + "learning_rate": 1.8454589424742172e-05, + "loss": 0.0429, "step": 124330 }, { "epoch": 5.8, - "learning_rate": 8.436078946134733e-06, - "loss": 0.0634, + "learning_rate": 1.8454121354906155e-05, + "loss": 0.056, "step": 124335 }, { "epoch": 5.8, - "learning_rate": 8.435610144859595e-06, - "loss": 0.0505, + "learning_rate": 1.8453653285070135e-05, + "loss": 0.1052, "step": 124340 }, { "epoch": 5.8, - "learning_rate": 8.435141343584455e-06, - "loss": 0.1095, + "learning_rate": 1.845318521523411e-05, + "loss": 0.0932, "step": 124345 }, { "epoch": 5.8, - "learning_rate": 8.434672542309315e-06, - "loss": 0.1682, + "learning_rate": 1.845271714539809e-05, + "loss": 0.1004, "step": 124350 }, { "epoch": 5.8, - "learning_rate": 8.434203741034176e-06, - "loss": 0.3648, + "learning_rate": 1.8452249075562074e-05, + "loss": 0.2114, "step": 124355 }, { "epoch": 5.8, - "learning_rate": 8.433734939759038e-06, - "loss": 0.0698, + "learning_rate": 1.8451781005726054e-05, + "loss": 0.0975, "step": 124360 }, { "epoch": 5.8, - "learning_rate": 8.433266138483898e-06, - "loss": 0.0297, + "learning_rate": 1.8451312935890034e-05, + "loss": 0.0321, "step": 124365 }, { "epoch": 5.8, - "learning_rate": 8.432797337208758e-06, - "loss": 0.0245, + "learning_rate": 1.8450844866054014e-05, + "loss": 0.0213, "step": 124370 }, { "epoch": 5.8, - "learning_rate": 8.432328535933618e-06, - "loss": 0.036, + "learning_rate": 1.8450376796217997e-05, + "loss": 0.0408, "step": 124375 }, { "epoch": 5.8, - "learning_rate": 8.431859734658478e-06, - "loss": 0.0795, + "learning_rate": 1.8449908726381977e-05, + "loss": 0.0454, "step": 124380 }, { "epoch": 5.8, - "learning_rate": 8.43139093338334e-06, - "loss": 0.0274, + "learning_rate": 1.8449440656545957e-05, + "loss": 0.0726, "step": 124385 }, { "epoch": 5.8, - "learning_rate": 8.4309221321082e-06, - "loss": 0.0571, + "learning_rate": 1.844897258670994e-05, + "loss": 0.1056, "step": 124390 }, { "epoch": 5.8, - "learning_rate": 8.430453330833061e-06, - "loss": 0.083, + "learning_rate": 1.844850451687392e-05, + "loss": 0.0728, "step": 124395 }, { "epoch": 5.8, - "learning_rate": 8.429984529557921e-06, - "loss": 0.1271, + "learning_rate": 1.84480364470379e-05, + "loss": 0.0761, "step": 124400 }, { "epoch": 5.8, - "learning_rate": 8.429515728282781e-06, - "loss": 0.2163, + "learning_rate": 1.8447568377201876e-05, + "loss": 0.2671, "step": 124405 }, { "epoch": 5.81, - "learning_rate": 8.429046927007643e-06, - "loss": 0.0793, + "learning_rate": 1.844710030736586e-05, + "loss": 0.0713, "step": 124410 }, { "epoch": 5.81, - "learning_rate": 8.428578125732502e-06, - "loss": 0.0062, + "learning_rate": 1.844663223752984e-05, + "loss": 0.0325, "step": 124415 }, { "epoch": 5.81, - "learning_rate": 8.428109324457362e-06, - "loss": 0.014, + "learning_rate": 1.844616416769382e-05, + "loss": 0.005, "step": 124420 }, { "epoch": 5.81, - "learning_rate": 8.427640523182224e-06, - "loss": 0.0194, + "learning_rate": 1.84456960978578e-05, + "loss": 0.0084, "step": 124425 }, { "epoch": 5.81, - "learning_rate": 8.427171721907084e-06, - "loss": 0.0742, + "learning_rate": 1.8445228028021782e-05, + "loss": 0.0152, "step": 124430 }, { "epoch": 5.81, - "learning_rate": 8.426702920631946e-06, - "loss": 0.0492, + "learning_rate": 1.8444759958185762e-05, + "loss": 0.0697, "step": 124435 }, { "epoch": 5.81, - "learning_rate": 8.426234119356806e-06, - "loss": 0.146, + "learning_rate": 1.844429188834974e-05, + "loss": 0.0717, "step": 124440 }, { "epoch": 5.81, - "learning_rate": 8.425765318081665e-06, - "loss": 0.0917, + "learning_rate": 1.8443823818513725e-05, + "loss": 0.0884, "step": 124445 }, { "epoch": 5.81, - "learning_rate": 8.425296516806525e-06, - "loss": 0.1578, + "learning_rate": 1.8443355748677705e-05, + "loss": 0.0604, "step": 124450 }, { "epoch": 5.81, - "learning_rate": 8.424827715531387e-06, - "loss": 0.1991, + "learning_rate": 1.8442887678841684e-05, + "loss": 0.3357, "step": 124455 }, { "epoch": 5.81, - "learning_rate": 8.424358914256247e-06, - "loss": 0.1021, + "learning_rate": 1.8442419609005664e-05, + "loss": 0.0921, "step": 124460 }, { "epoch": 5.81, - "learning_rate": 8.423890112981109e-06, - "loss": 0.0134, + "learning_rate": 1.8441951539169647e-05, + "loss": 0.006, "step": 124465 }, { "epoch": 5.81, - "learning_rate": 8.423421311705969e-06, - "loss": 0.0252, + "learning_rate": 1.8441483469333624e-05, + "loss": 0.0328, "step": 124470 }, { "epoch": 5.81, - "learning_rate": 8.422952510430828e-06, - "loss": 0.0111, + "learning_rate": 1.8441015399497604e-05, + "loss": 0.017, "step": 124475 }, { "epoch": 5.81, - "learning_rate": 8.42248370915569e-06, - "loss": 0.0084, + "learning_rate": 1.8440547329661583e-05, + "loss": 0.0485, "step": 124480 }, { "epoch": 5.81, - "learning_rate": 8.42201490788055e-06, - "loss": 0.0674, + "learning_rate": 1.8440079259825567e-05, + "loss": 0.0658, "step": 124485 }, { "epoch": 5.81, - "learning_rate": 8.42154610660541e-06, - "loss": 0.065, + "learning_rate": 1.8439611189989547e-05, + "loss": 0.0889, "step": 124490 }, { "epoch": 5.81, - "learning_rate": 8.421077305330272e-06, - "loss": 0.088, + "learning_rate": 1.8439143120153526e-05, + "loss": 0.1171, "step": 124495 }, { "epoch": 5.81, - "learning_rate": 8.420608504055132e-06, - "loss": 0.0941, + "learning_rate": 1.843867505031751e-05, + "loss": 0.1769, "step": 124500 }, { "epoch": 5.81, - "learning_rate": 8.420139702779993e-06, - "loss": 0.3606, + "learning_rate": 1.843820698048149e-05, + "loss": 0.2646, "step": 124505 }, { "epoch": 5.81, - "learning_rate": 8.419670901504853e-06, - "loss": 0.0741, + "learning_rate": 1.843773891064547e-05, + "loss": 0.1103, "step": 124510 }, { "epoch": 5.81, - "learning_rate": 8.419202100229713e-06, - "loss": 0.0059, + "learning_rate": 1.843727084080945e-05, + "loss": 0.0288, "step": 124515 }, { "epoch": 5.81, - "learning_rate": 8.418733298954573e-06, - "loss": 0.0283, + "learning_rate": 1.8436802770973432e-05, + "loss": 0.0011, "step": 124520 }, { "epoch": 5.81, - "learning_rate": 8.418264497679435e-06, - "loss": 0.0174, + "learning_rate": 1.8436334701137412e-05, + "loss": 0.0481, "step": 124525 }, { "epoch": 5.81, - "learning_rate": 8.417795696404294e-06, - "loss": 0.0772, + "learning_rate": 1.8435866631301392e-05, + "loss": 0.0341, "step": 124530 }, { "epoch": 5.81, - "learning_rate": 8.417326895129156e-06, - "loss": 0.0901, + "learning_rate": 1.8435398561465368e-05, + "loss": 0.056, "step": 124535 }, { "epoch": 5.81, - "learning_rate": 8.416858093854016e-06, - "loss": 0.0897, + "learning_rate": 1.843493049162935e-05, + "loss": 0.0733, "step": 124540 }, { "epoch": 5.81, - "learning_rate": 8.416389292578878e-06, - "loss": 0.1356, + "learning_rate": 1.843446242179333e-05, + "loss": 0.0938, "step": 124545 }, { "epoch": 5.81, - "learning_rate": 8.415920491303738e-06, - "loss": 0.2184, + "learning_rate": 1.843399435195731e-05, + "loss": 0.1263, "step": 124550 }, { "epoch": 5.81, - "learning_rate": 8.415451690028598e-06, - "loss": 0.2069, + "learning_rate": 1.843352628212129e-05, + "loss": 0.2487, "step": 124555 }, { "epoch": 5.81, - "learning_rate": 8.414982888753457e-06, - "loss": 0.0919, + "learning_rate": 1.8433058212285274e-05, + "loss": 0.0991, "step": 124560 }, { "epoch": 5.81, - "learning_rate": 8.414514087478317e-06, - "loss": 0.0127, + "learning_rate": 1.8432590142449254e-05, + "loss": 0.0139, "step": 124565 }, { "epoch": 5.81, - "learning_rate": 8.414045286203179e-06, - "loss": 0.0195, + "learning_rate": 1.8432122072613234e-05, + "loss": 0.0296, "step": 124570 }, { "epoch": 5.81, - "learning_rate": 8.41357648492804e-06, - "loss": 0.0732, + "learning_rate": 1.8431654002777217e-05, + "loss": 0.0266, "step": 124575 }, { "epoch": 5.81, - "learning_rate": 8.4131076836529e-06, - "loss": 0.0759, + "learning_rate": 1.8431185932941197e-05, + "loss": 0.0451, "step": 124580 }, { "epoch": 5.81, - "learning_rate": 8.41263888237776e-06, - "loss": 0.0384, + "learning_rate": 1.8430717863105177e-05, + "loss": 0.08, "step": 124585 }, { "epoch": 5.81, - "learning_rate": 8.41217008110262e-06, - "loss": 0.0402, + "learning_rate": 1.8430249793269156e-05, + "loss": 0.0643, "step": 124590 }, { "epoch": 5.81, - "learning_rate": 8.411701279827482e-06, - "loss": 0.037, + "learning_rate": 1.8429781723433136e-05, + "loss": 0.0543, "step": 124595 }, { "epoch": 5.81, - "learning_rate": 8.411232478552342e-06, - "loss": 0.0956, + "learning_rate": 1.8429313653597116e-05, + "loss": 0.1214, "step": 124600 }, { "epoch": 5.81, - "learning_rate": 8.410763677277202e-06, - "loss": 0.3478, + "learning_rate": 1.8428845583761096e-05, + "loss": 0.2273, "step": 124605 }, { "epoch": 5.81, - "learning_rate": 8.410294876002064e-06, - "loss": 0.0923, + "learning_rate": 1.8428377513925076e-05, + "loss": 0.0808, "step": 124610 }, { "epoch": 5.81, - "learning_rate": 8.409826074726925e-06, - "loss": 0.0404, + "learning_rate": 1.842790944408906e-05, + "loss": 0.0238, "step": 124615 }, { "epoch": 5.81, - "learning_rate": 8.409357273451785e-06, - "loss": 0.0235, + "learning_rate": 1.842744137425304e-05, + "loss": 0.0224, "step": 124620 }, { "epoch": 5.82, - "learning_rate": 8.408888472176645e-06, - "loss": 0.0379, + "learning_rate": 1.842697330441702e-05, + "loss": 0.0435, "step": 124625 }, { "epoch": 5.82, - "learning_rate": 8.408419670901505e-06, - "loss": 0.0537, + "learning_rate": 1.8426505234581002e-05, + "loss": 0.036, "step": 124630 }, { "epoch": 5.82, - "learning_rate": 8.407950869626365e-06, - "loss": 0.0366, + "learning_rate": 1.842603716474498e-05, + "loss": 0.055, "step": 124635 }, { "epoch": 5.82, - "learning_rate": 8.407482068351227e-06, - "loss": 0.0861, + "learning_rate": 1.842556909490896e-05, + "loss": 0.0533, "step": 124640 }, { "epoch": 5.82, - "learning_rate": 8.407013267076087e-06, - "loss": 0.0353, + "learning_rate": 1.842510102507294e-05, + "loss": 0.0691, "step": 124645 }, { "epoch": 5.82, - "learning_rate": 8.406544465800948e-06, - "loss": 0.1132, + "learning_rate": 1.8424632955236924e-05, + "loss": 0.1214, "step": 124650 }, { "epoch": 5.82, - "learning_rate": 8.406075664525808e-06, - "loss": 0.156, + "learning_rate": 1.8424164885400904e-05, + "loss": 0.1974, "step": 124655 }, { "epoch": 5.82, - "learning_rate": 8.405606863250668e-06, - "loss": 0.0548, + "learning_rate": 1.842369681556488e-05, + "loss": 0.0815, "step": 124660 }, { "epoch": 5.82, - "learning_rate": 8.40513806197553e-06, - "loss": 0.0267, + "learning_rate": 1.842322874572886e-05, + "loss": 0.016, "step": 124665 }, { "epoch": 5.82, - "learning_rate": 8.40466926070039e-06, - "loss": 0.0375, + "learning_rate": 1.8422760675892844e-05, + "loss": 0.013, "step": 124670 }, { "epoch": 5.82, - "learning_rate": 8.40420045942525e-06, - "loss": 0.0241, + "learning_rate": 1.8422292606056823e-05, + "loss": 0.0555, "step": 124675 }, { "epoch": 5.82, - "learning_rate": 8.403731658150111e-06, - "loss": 0.0508, + "learning_rate": 1.8421824536220803e-05, + "loss": 0.0605, "step": 124680 }, { "epoch": 5.82, - "learning_rate": 8.403262856874971e-06, - "loss": 0.0608, + "learning_rate": 1.8421356466384787e-05, + "loss": 0.0549, "step": 124685 }, { "epoch": 5.82, - "learning_rate": 8.402794055599833e-06, - "loss": 0.0596, + "learning_rate": 1.8420888396548766e-05, + "loss": 0.0829, "step": 124690 }, { "epoch": 5.82, - "learning_rate": 8.402325254324693e-06, - "loss": 0.1108, + "learning_rate": 1.8420420326712746e-05, + "loss": 0.0962, "step": 124695 }, { "epoch": 5.82, - "learning_rate": 8.401856453049553e-06, - "loss": 0.0804, + "learning_rate": 1.8419952256876726e-05, + "loss": 0.0668, "step": 124700 }, { "epoch": 5.82, - "learning_rate": 8.401387651774413e-06, - "loss": 0.2896, + "learning_rate": 1.841948418704071e-05, + "loss": 0.4603, "step": 124705 }, { "epoch": 5.82, - "learning_rate": 8.400918850499274e-06, - "loss": 0.0738, + "learning_rate": 1.841901611720469e-05, + "loss": 0.0686, "step": 124710 }, { "epoch": 5.82, - "learning_rate": 8.400450049224134e-06, - "loss": 0.0218, + "learning_rate": 1.841854804736867e-05, + "loss": 0.0179, "step": 124715 }, { "epoch": 5.82, - "learning_rate": 8.399981247948996e-06, - "loss": 0.0206, + "learning_rate": 1.841807997753265e-05, + "loss": 0.0199, "step": 124720 }, { "epoch": 5.82, - "learning_rate": 8.399512446673856e-06, - "loss": 0.0441, + "learning_rate": 1.841761190769663e-05, + "loss": 0.0097, "step": 124725 }, { "epoch": 5.82, - "learning_rate": 8.399043645398716e-06, - "loss": 0.0554, + "learning_rate": 1.8417143837860608e-05, + "loss": 0.0219, "step": 124730 }, { "epoch": 5.82, - "learning_rate": 8.398574844123577e-06, - "loss": 0.0429, + "learning_rate": 1.8416675768024588e-05, + "loss": 0.0887, "step": 124735 }, { "epoch": 5.82, - "learning_rate": 8.398106042848437e-06, - "loss": 0.0379, + "learning_rate": 1.841620769818857e-05, + "loss": 0.0979, "step": 124740 }, { "epoch": 5.82, - "learning_rate": 8.397637241573297e-06, - "loss": 0.0836, + "learning_rate": 1.841573962835255e-05, + "loss": 0.0893, "step": 124745 }, { "epoch": 5.82, - "learning_rate": 8.397168440298159e-06, - "loss": 0.0578, + "learning_rate": 1.841527155851653e-05, + "loss": 0.1399, "step": 124750 }, { "epoch": 5.82, - "learning_rate": 8.396699639023019e-06, - "loss": 0.1804, + "learning_rate": 1.841480348868051e-05, + "loss": 0.3298, "step": 124755 }, { "epoch": 5.82, - "learning_rate": 8.39623083774788e-06, - "loss": 0.0706, + "learning_rate": 1.8414335418844494e-05, + "loss": 0.1086, "step": 124760 }, { "epoch": 5.82, - "learning_rate": 8.39576203647274e-06, - "loss": 0.018, + "learning_rate": 1.8413867349008474e-05, + "loss": 0.0065, "step": 124765 }, { "epoch": 5.82, - "learning_rate": 8.3952932351976e-06, - "loss": 0.0182, + "learning_rate": 1.8413399279172454e-05, + "loss": 0.0211, "step": 124770 }, { "epoch": 5.82, - "learning_rate": 8.39482443392246e-06, - "loss": 0.0346, + "learning_rate": 1.8412931209336433e-05, + "loss": 0.0333, "step": 124775 }, { "epoch": 5.82, - "learning_rate": 8.394355632647322e-06, - "loss": 0.0307, + "learning_rate": 1.8412463139500417e-05, + "loss": 0.093, "step": 124780 }, { "epoch": 5.82, - "learning_rate": 8.393886831372182e-06, - "loss": 0.0991, + "learning_rate": 1.8411995069664393e-05, + "loss": 0.0533, "step": 124785 }, { "epoch": 5.82, - "learning_rate": 8.393418030097043e-06, - "loss": 0.0941, + "learning_rate": 1.8411526999828373e-05, + "loss": 0.1267, "step": 124790 }, { "epoch": 5.82, - "learning_rate": 8.392949228821903e-06, - "loss": 0.0955, + "learning_rate": 1.8411058929992353e-05, + "loss": 0.107, "step": 124795 }, { "epoch": 5.82, - "learning_rate": 8.392480427546763e-06, - "loss": 0.1468, + "learning_rate": 1.8410590860156336e-05, + "loss": 0.1252, "step": 124800 }, { "epoch": 5.82, - "learning_rate": 8.392011626271625e-06, - "loss": 0.2821, + "learning_rate": 1.8410122790320316e-05, + "loss": 0.3175, "step": 124805 }, { "epoch": 5.82, - "learning_rate": 8.391542824996485e-06, - "loss": 0.0758, + "learning_rate": 1.8409654720484296e-05, + "loss": 0.1115, "step": 124810 }, { "epoch": 5.82, - "learning_rate": 8.391074023721345e-06, - "loss": 0.0032, + "learning_rate": 1.840918665064828e-05, + "loss": 0.0203, "step": 124815 }, { "epoch": 5.82, - "learning_rate": 8.390605222446205e-06, - "loss": 0.0368, + "learning_rate": 1.840871858081226e-05, + "loss": 0.0057, "step": 124820 }, { "epoch": 5.82, - "learning_rate": 8.390136421171066e-06, - "loss": 0.0431, + "learning_rate": 1.840825051097624e-05, + "loss": 0.0236, "step": 124825 }, { "epoch": 5.82, - "learning_rate": 8.389667619895928e-06, - "loss": 0.0148, + "learning_rate": 1.8407782441140218e-05, + "loss": 0.0419, "step": 124830 }, { "epoch": 5.82, - "learning_rate": 8.389198818620788e-06, - "loss": 0.0803, + "learning_rate": 1.84073143713042e-05, + "loss": 0.0669, "step": 124835 }, { "epoch": 5.83, - "learning_rate": 8.388730017345648e-06, - "loss": 0.0619, + "learning_rate": 1.840684630146818e-05, + "loss": 0.1155, "step": 124840 }, { "epoch": 5.83, - "learning_rate": 8.388261216070508e-06, - "loss": 0.0704, + "learning_rate": 1.840637823163216e-05, + "loss": 0.0593, "step": 124845 }, { "epoch": 5.83, - "learning_rate": 8.38779241479537e-06, - "loss": 0.1401, + "learning_rate": 1.8405910161796137e-05, + "loss": 0.0668, "step": 124850 }, { "epoch": 5.83, - "learning_rate": 8.387323613520229e-06, - "loss": 0.2304, + "learning_rate": 1.840544209196012e-05, + "loss": 0.1443, "step": 124855 }, { "epoch": 5.83, - "learning_rate": 8.386854812245089e-06, - "loss": 0.0874, + "learning_rate": 1.84049740221241e-05, + "loss": 0.0716, "step": 124860 }, { "epoch": 5.83, - "learning_rate": 8.38638601096995e-06, - "loss": 0.0243, + "learning_rate": 1.840450595228808e-05, + "loss": 0.0307, "step": 124865 }, { "epoch": 5.83, - "learning_rate": 8.385917209694812e-06, - "loss": 0.0159, + "learning_rate": 1.8404037882452063e-05, + "loss": 0.0016, "step": 124870 }, { "epoch": 5.83, - "learning_rate": 8.385448408419672e-06, - "loss": 0.0326, + "learning_rate": 1.8403569812616043e-05, + "loss": 0.015, "step": 124875 }, { "epoch": 5.83, - "learning_rate": 8.384979607144532e-06, - "loss": 0.0983, + "learning_rate": 1.8403101742780023e-05, + "loss": 0.0455, "step": 124880 }, { "epoch": 5.83, - "learning_rate": 8.384510805869392e-06, - "loss": 0.176, + "learning_rate": 1.8402633672944003e-05, + "loss": 0.0312, "step": 124885 }, { "epoch": 5.83, - "learning_rate": 8.384042004594252e-06, - "loss": 0.0241, + "learning_rate": 1.8402165603107986e-05, + "loss": 0.0873, "step": 124890 }, { "epoch": 5.83, - "learning_rate": 8.383573203319114e-06, - "loss": 0.0659, + "learning_rate": 1.8401697533271966e-05, + "loss": 0.0442, "step": 124895 }, { "epoch": 5.83, - "learning_rate": 8.383104402043974e-06, - "loss": 0.1382, + "learning_rate": 1.8401229463435946e-05, + "loss": 0.136, "step": 124900 }, { "epoch": 5.83, - "learning_rate": 8.382635600768835e-06, - "loss": 0.1442, + "learning_rate": 1.8400761393599926e-05, + "loss": 0.3035, "step": 124905 }, { "epoch": 5.83, - "learning_rate": 8.382166799493695e-06, - "loss": 0.1213, + "learning_rate": 1.8400293323763905e-05, + "loss": 0.0804, "step": 124910 }, { "epoch": 5.83, - "learning_rate": 8.381697998218555e-06, - "loss": 0.0096, + "learning_rate": 1.8399825253927885e-05, + "loss": 0.0041, "step": 124915 }, { "epoch": 5.83, - "learning_rate": 8.381229196943417e-06, - "loss": 0.0234, + "learning_rate": 1.8399357184091865e-05, + "loss": 0.0427, "step": 124920 }, { "epoch": 5.83, - "learning_rate": 8.380760395668277e-06, - "loss": 0.106, + "learning_rate": 1.8398889114255848e-05, + "loss": 0.0125, "step": 124925 }, { "epoch": 5.83, - "learning_rate": 8.380291594393137e-06, - "loss": 0.0352, + "learning_rate": 1.8398421044419828e-05, + "loss": 0.0304, "step": 124930 }, { "epoch": 5.83, - "learning_rate": 8.379822793117998e-06, - "loss": 0.0604, + "learning_rate": 1.8397952974583808e-05, + "loss": 0.0313, "step": 124935 }, { "epoch": 5.83, - "learning_rate": 8.379353991842858e-06, - "loss": 0.1046, + "learning_rate": 1.8397484904747788e-05, + "loss": 0.0444, "step": 124940 }, { "epoch": 5.83, - "learning_rate": 8.37888519056772e-06, - "loss": 0.0642, + "learning_rate": 1.839701683491177e-05, + "loss": 0.0714, "step": 124945 }, { "epoch": 5.83, - "learning_rate": 8.37841638929258e-06, - "loss": 0.0893, + "learning_rate": 1.839654876507575e-05, + "loss": 0.0733, "step": 124950 }, { "epoch": 5.83, - "learning_rate": 8.37794758801744e-06, - "loss": 0.2964, + "learning_rate": 1.839608069523973e-05, + "loss": 0.2709, "step": 124955 }, { "epoch": 5.83, - "learning_rate": 8.3774787867423e-06, - "loss": 0.059, + "learning_rate": 1.839561262540371e-05, + "loss": 0.0647, "step": 124960 }, { "epoch": 5.83, - "learning_rate": 8.377009985467161e-06, - "loss": 0.0254, + "learning_rate": 1.8395144555567694e-05, + "loss": 0.0094, "step": 124965 }, { "epoch": 5.83, - "learning_rate": 8.376541184192021e-06, - "loss": 0.048, + "learning_rate": 1.8394676485731673e-05, + "loss": 0.021, "step": 124970 }, { "epoch": 5.83, - "learning_rate": 8.376072382916883e-06, - "loss": 0.0589, + "learning_rate": 1.839420841589565e-05, + "loss": 0.0334, "step": 124975 }, { "epoch": 5.83, - "learning_rate": 8.375603581641743e-06, - "loss": 0.072, + "learning_rate": 1.839374034605963e-05, + "loss": 0.0202, "step": 124980 }, { "epoch": 5.83, - "learning_rate": 8.375134780366603e-06, - "loss": 0.053, + "learning_rate": 1.8393272276223613e-05, + "loss": 0.0278, "step": 124985 }, { "epoch": 5.83, - "learning_rate": 8.374665979091464e-06, - "loss": 0.0655, + "learning_rate": 1.8392804206387593e-05, + "loss": 0.0418, "step": 124990 }, { "epoch": 5.83, - "learning_rate": 8.374197177816324e-06, - "loss": 0.064, + "learning_rate": 1.8392336136551572e-05, + "loss": 0.0699, "step": 124995 }, { "epoch": 5.83, - "learning_rate": 8.373728376541184e-06, - "loss": 0.054, + "learning_rate": 1.8391868066715556e-05, + "loss": 0.0992, "step": 125000 }, { "epoch": 5.83, - "learning_rate": 8.373259575266046e-06, - "loss": 0.1381, + "learning_rate": 1.8391399996879535e-05, + "loss": 0.3247, "step": 125005 }, { "epoch": 5.83, - "learning_rate": 8.372790773990906e-06, - "loss": 0.0847, + "learning_rate": 1.8390931927043515e-05, + "loss": 0.085, "step": 125010 }, { "epoch": 5.83, - "learning_rate": 8.372321972715767e-06, - "loss": 0.0108, + "learning_rate": 1.8390463857207495e-05, + "loss": 0.0056, "step": 125015 }, { "epoch": 5.83, - "learning_rate": 8.371853171440627e-06, - "loss": 0.0107, + "learning_rate": 1.838999578737148e-05, + "loss": 0.0194, "step": 125020 }, { "epoch": 5.83, - "learning_rate": 8.371384370165487e-06, - "loss": 0.0154, + "learning_rate": 1.8389527717535458e-05, + "loss": 0.0271, "step": 125025 }, { "epoch": 5.83, - "learning_rate": 8.370915568890347e-06, - "loss": 0.0644, + "learning_rate": 1.8389059647699438e-05, + "loss": 0.0556, "step": 125030 }, { "epoch": 5.83, - "learning_rate": 8.370446767615209e-06, - "loss": 0.0356, + "learning_rate": 1.8388591577863418e-05, + "loss": 0.0642, "step": 125035 }, { "epoch": 5.83, - "learning_rate": 8.369977966340069e-06, - "loss": 0.1617, + "learning_rate": 1.8388123508027398e-05, + "loss": 0.0769, "step": 125040 }, { "epoch": 5.83, - "learning_rate": 8.36950916506493e-06, - "loss": 0.0929, + "learning_rate": 1.8387655438191377e-05, + "loss": 0.0535, "step": 125045 }, { "epoch": 5.84, - "learning_rate": 8.36904036378979e-06, - "loss": 0.1354, + "learning_rate": 1.8387187368355357e-05, + "loss": 0.08, "step": 125050 }, { "epoch": 5.84, - "learning_rate": 8.36857156251465e-06, - "loss": 0.2536, + "learning_rate": 1.838671929851934e-05, + "loss": 0.1811, "step": 125055 }, { "epoch": 5.84, - "learning_rate": 8.368102761239512e-06, - "loss": 0.0626, + "learning_rate": 1.838625122868332e-05, + "loss": 0.0982, "step": 125060 }, { "epoch": 5.84, - "learning_rate": 8.367633959964372e-06, - "loss": 0.0318, + "learning_rate": 1.83857831588473e-05, + "loss": 0.0227, "step": 125065 }, { "epoch": 5.84, - "learning_rate": 8.367165158689232e-06, - "loss": 0.0325, + "learning_rate": 1.838531508901128e-05, + "loss": 0.0033, "step": 125070 }, { "epoch": 5.84, - "learning_rate": 8.366696357414092e-06, - "loss": 0.015, + "learning_rate": 1.8384847019175263e-05, + "loss": 0.0468, "step": 125075 }, { "epoch": 5.84, - "learning_rate": 8.366227556138953e-06, - "loss": 0.0711, + "learning_rate": 1.8384378949339243e-05, + "loss": 0.0439, "step": 125080 }, { "epoch": 5.84, - "learning_rate": 8.365758754863815e-06, - "loss": 0.0317, + "learning_rate": 1.8383910879503223e-05, + "loss": 0.0436, "step": 125085 }, { "epoch": 5.84, - "learning_rate": 8.365289953588675e-06, - "loss": 0.0188, + "learning_rate": 1.8383442809667203e-05, + "loss": 0.0533, "step": 125090 }, { "epoch": 5.84, - "learning_rate": 8.364821152313535e-06, - "loss": 0.0202, + "learning_rate": 1.8382974739831186e-05, + "loss": 0.0715, "step": 125095 }, { "epoch": 5.84, - "learning_rate": 8.364352351038395e-06, - "loss": 0.1398, + "learning_rate": 1.8382506669995162e-05, + "loss": 0.0929, "step": 125100 }, { "epoch": 5.84, - "learning_rate": 8.363883549763256e-06, - "loss": 0.3105, + "learning_rate": 1.8382038600159142e-05, + "loss": 0.2658, "step": 125105 }, { "epoch": 5.84, - "learning_rate": 8.363414748488116e-06, - "loss": 0.0808, + "learning_rate": 1.8381570530323125e-05, + "loss": 0.0727, "step": 125110 }, { "epoch": 5.84, - "learning_rate": 8.362945947212976e-06, - "loss": 0.0144, + "learning_rate": 1.8381102460487105e-05, + "loss": 0.0121, "step": 125115 }, { "epoch": 5.84, - "learning_rate": 8.362477145937838e-06, - "loss": 0.006, + "learning_rate": 1.8380634390651085e-05, + "loss": 0.0012, "step": 125120 }, { "epoch": 5.84, - "learning_rate": 8.362008344662698e-06, - "loss": 0.0807, + "learning_rate": 1.8380166320815065e-05, + "loss": 0.0191, "step": 125125 }, { "epoch": 5.84, - "learning_rate": 8.36153954338756e-06, - "loss": 0.0287, + "learning_rate": 1.8379698250979048e-05, + "loss": 0.0897, "step": 125130 }, { "epoch": 5.84, - "learning_rate": 8.36107074211242e-06, - "loss": 0.0621, + "learning_rate": 1.8379230181143028e-05, + "loss": 0.0363, "step": 125135 }, { "epoch": 5.84, - "learning_rate": 8.36060194083728e-06, - "loss": 0.096, + "learning_rate": 1.8378762111307008e-05, + "loss": 0.0551, "step": 125140 }, { "epoch": 5.84, - "learning_rate": 8.36013313956214e-06, - "loss": 0.1964, + "learning_rate": 1.8378294041470987e-05, + "loss": 0.0869, "step": 125145 }, { "epoch": 5.84, - "learning_rate": 8.359664338287e-06, - "loss": 0.1402, + "learning_rate": 1.837782597163497e-05, + "loss": 0.0741, "step": 125150 }, { "epoch": 5.84, - "learning_rate": 8.359195537011862e-06, - "loss": 0.2733, + "learning_rate": 1.837735790179895e-05, + "loss": 0.3547, "step": 125155 }, { "epoch": 5.84, - "learning_rate": 8.358726735736722e-06, - "loss": 0.0454, + "learning_rate": 1.837688983196293e-05, + "loss": 0.0914, "step": 125160 }, { "epoch": 5.84, - "learning_rate": 8.358257934461582e-06, - "loss": 0.0079, + "learning_rate": 1.8376421762126907e-05, + "loss": 0.0192, "step": 125165 }, { "epoch": 5.84, - "learning_rate": 8.357789133186442e-06, - "loss": 0.0213, + "learning_rate": 1.837595369229089e-05, + "loss": 0.0273, "step": 125170 }, { "epoch": 5.84, - "learning_rate": 8.357320331911304e-06, - "loss": 0.0123, + "learning_rate": 1.837548562245487e-05, + "loss": 0.0124, "step": 125175 }, { "epoch": 5.84, - "learning_rate": 8.356851530636164e-06, - "loss": 0.046, + "learning_rate": 1.837501755261885e-05, + "loss": 0.055, "step": 125180 }, { "epoch": 5.84, - "learning_rate": 8.356382729361024e-06, - "loss": 0.0085, + "learning_rate": 1.8374549482782833e-05, + "loss": 0.0535, "step": 125185 }, { "epoch": 5.84, - "learning_rate": 8.355913928085885e-06, - "loss": 0.0497, + "learning_rate": 1.8374081412946812e-05, + "loss": 0.0593, "step": 125190 }, { "epoch": 5.84, - "learning_rate": 8.355445126810747e-06, - "loss": 0.0659, + "learning_rate": 1.8373613343110792e-05, + "loss": 0.0898, "step": 125195 }, { "epoch": 5.84, - "learning_rate": 8.354976325535607e-06, - "loss": 0.108, + "learning_rate": 1.8373145273274772e-05, + "loss": 0.0714, "step": 125200 }, { "epoch": 5.84, - "learning_rate": 8.354507524260467e-06, - "loss": 0.3717, + "learning_rate": 1.8372677203438755e-05, + "loss": 0.3846, "step": 125205 }, { "epoch": 5.84, - "learning_rate": 8.354038722985327e-06, - "loss": 0.0924, + "learning_rate": 1.8372209133602735e-05, + "loss": 0.1476, "step": 125210 }, { "epoch": 5.84, - "learning_rate": 8.353569921710187e-06, - "loss": 0.0066, + "learning_rate": 1.8371741063766715e-05, + "loss": 0.0352, "step": 125215 }, { "epoch": 5.84, - "learning_rate": 8.353101120435048e-06, - "loss": 0.033, + "learning_rate": 1.8371272993930695e-05, + "loss": 0.0208, "step": 125220 }, { "epoch": 5.84, - "learning_rate": 8.352632319159908e-06, - "loss": 0.0214, + "learning_rate": 1.8370804924094675e-05, + "loss": 0.0495, "step": 125225 }, { "epoch": 5.84, - "learning_rate": 8.35216351788477e-06, - "loss": 0.0569, + "learning_rate": 1.8370336854258654e-05, + "loss": 0.0201, "step": 125230 }, { "epoch": 5.84, - "learning_rate": 8.35169471660963e-06, - "loss": 0.0561, + "learning_rate": 1.8369868784422634e-05, + "loss": 0.0602, "step": 125235 }, { "epoch": 5.84, - "learning_rate": 8.35122591533449e-06, - "loss": 0.0828, + "learning_rate": 1.8369400714586617e-05, + "loss": 0.0782, "step": 125240 }, { "epoch": 5.84, - "learning_rate": 8.350757114059351e-06, - "loss": 0.0082, + "learning_rate": 1.8368932644750597e-05, + "loss": 0.1118, "step": 125245 }, { "epoch": 5.84, - "learning_rate": 8.350288312784211e-06, - "loss": 0.1639, + "learning_rate": 1.8368464574914577e-05, + "loss": 0.1863, "step": 125250 }, { "epoch": 5.84, - "learning_rate": 8.349819511509071e-06, - "loss": 0.1669, + "learning_rate": 1.8367996505078557e-05, + "loss": 0.1833, "step": 125255 }, { "epoch": 5.84, - "learning_rate": 8.349350710233933e-06, - "loss": 0.0689, + "learning_rate": 1.836752843524254e-05, + "loss": 0.1034, "step": 125260 }, { "epoch": 5.85, - "learning_rate": 8.348881908958793e-06, - "loss": 0.0201, + "learning_rate": 1.836706036540652e-05, + "loss": 0.007, "step": 125265 }, { "epoch": 5.85, - "learning_rate": 8.348413107683654e-06, - "loss": 0.0158, + "learning_rate": 1.83665922955705e-05, + "loss": 0.0418, "step": 125270 }, { "epoch": 5.85, - "learning_rate": 8.347944306408514e-06, - "loss": 0.017, + "learning_rate": 1.836612422573448e-05, + "loss": 0.0157, "step": 125275 }, { "epoch": 5.85, - "learning_rate": 8.347475505133374e-06, - "loss": 0.0423, + "learning_rate": 1.8365656155898463e-05, + "loss": 0.0237, "step": 125280 }, { "epoch": 5.85, - "learning_rate": 8.347006703858234e-06, - "loss": 0.071, + "learning_rate": 1.8365188086062443e-05, + "loss": 0.0664, "step": 125285 }, { "epoch": 5.85, - "learning_rate": 8.346537902583096e-06, - "loss": 0.132, + "learning_rate": 1.836472001622642e-05, + "loss": 0.1084, "step": 125290 }, { "epoch": 5.85, - "learning_rate": 8.346069101307956e-06, - "loss": 0.0051, + "learning_rate": 1.8364251946390402e-05, + "loss": 0.0971, "step": 125295 }, { "epoch": 5.85, - "learning_rate": 8.345600300032817e-06, - "loss": 0.1746, + "learning_rate": 1.8363783876554382e-05, + "loss": 0.1219, "step": 125300 }, { "epoch": 5.85, - "learning_rate": 8.345131498757677e-06, - "loss": 0.1345, + "learning_rate": 1.8363315806718362e-05, + "loss": 0.3143, "step": 125305 }, { "epoch": 5.85, - "learning_rate": 8.344662697482537e-06, - "loss": 0.067, + "learning_rate": 1.836284773688234e-05, + "loss": 0.0758, "step": 125310 }, { "epoch": 5.85, - "learning_rate": 8.344193896207399e-06, - "loss": 0.0017, + "learning_rate": 1.8362379667046325e-05, + "loss": 0.0165, "step": 125315 }, { "epoch": 5.85, - "learning_rate": 8.343725094932259e-06, - "loss": 0.0153, + "learning_rate": 1.8361911597210305e-05, + "loss": 0.0095, "step": 125320 }, { "epoch": 5.85, - "learning_rate": 8.343256293657119e-06, - "loss": 0.0157, + "learning_rate": 1.8361443527374284e-05, + "loss": 0.0232, "step": 125325 }, { "epoch": 5.85, - "learning_rate": 8.34278749238198e-06, - "loss": 0.0559, + "learning_rate": 1.8360975457538264e-05, + "loss": 0.0956, "step": 125330 }, { "epoch": 5.85, - "learning_rate": 8.34231869110684e-06, - "loss": 0.0447, + "learning_rate": 1.8360507387702248e-05, + "loss": 0.0953, "step": 125335 }, { "epoch": 5.85, - "learning_rate": 8.341849889831702e-06, - "loss": 0.0714, + "learning_rate": 1.8360039317866227e-05, + "loss": 0.0447, "step": 125340 }, { "epoch": 5.85, - "learning_rate": 8.341381088556562e-06, - "loss": 0.1395, + "learning_rate": 1.8359571248030207e-05, + "loss": 0.0629, "step": 125345 }, { "epoch": 5.85, - "learning_rate": 8.340912287281422e-06, - "loss": 0.0804, + "learning_rate": 1.8359103178194187e-05, + "loss": 0.1187, "step": 125350 }, { "epoch": 5.85, - "learning_rate": 8.340443486006282e-06, - "loss": 0.2188, + "learning_rate": 1.8358635108358167e-05, + "loss": 0.2897, "step": 125355 }, { "epoch": 5.85, - "learning_rate": 8.339974684731143e-06, - "loss": 0.1405, + "learning_rate": 1.8358167038522147e-05, + "loss": 0.0876, "step": 125360 }, { "epoch": 5.85, - "learning_rate": 8.339505883456003e-06, - "loss": 0.0335, + "learning_rate": 1.8357698968686126e-05, + "loss": 0.0286, "step": 125365 }, { "epoch": 5.85, - "learning_rate": 8.339037082180865e-06, - "loss": 0.0182, + "learning_rate": 1.835723089885011e-05, + "loss": 0.0109, "step": 125370 }, { "epoch": 5.85, - "learning_rate": 8.338568280905725e-06, - "loss": 0.0604, + "learning_rate": 1.835676282901409e-05, + "loss": 0.0481, "step": 125375 }, { "epoch": 5.85, - "learning_rate": 8.338099479630585e-06, - "loss": 0.1064, + "learning_rate": 1.835629475917807e-05, + "loss": 0.0683, "step": 125380 }, { "epoch": 5.85, - "learning_rate": 8.337630678355446e-06, - "loss": 0.065, + "learning_rate": 1.835582668934205e-05, + "loss": 0.0116, "step": 125385 }, { "epoch": 5.85, - "learning_rate": 8.337161877080306e-06, - "loss": 0.0339, + "learning_rate": 1.8355358619506032e-05, + "loss": 0.0867, "step": 125390 }, { "epoch": 5.85, - "learning_rate": 8.336693075805166e-06, - "loss": 0.0855, + "learning_rate": 1.8354890549670012e-05, + "loss": 0.1045, "step": 125395 }, { "epoch": 5.85, - "learning_rate": 8.336224274530026e-06, - "loss": 0.1302, + "learning_rate": 1.8354422479833992e-05, + "loss": 0.1239, "step": 125400 }, { "epoch": 5.85, - "learning_rate": 8.335755473254888e-06, - "loss": 0.1708, + "learning_rate": 1.8353954409997972e-05, + "loss": 0.1804, "step": 125405 }, { "epoch": 5.85, - "learning_rate": 8.33528667197975e-06, - "loss": 0.1158, + "learning_rate": 1.8353486340161955e-05, + "loss": 0.0664, "step": 125410 }, { "epoch": 5.85, - "learning_rate": 8.33481787070461e-06, - "loss": 0.0045, + "learning_rate": 1.835301827032593e-05, + "loss": 0.0387, "step": 125415 }, { "epoch": 5.85, - "learning_rate": 8.33434906942947e-06, - "loss": 0.0418, + "learning_rate": 1.835255020048991e-05, + "loss": 0.068, "step": 125420 }, { "epoch": 5.85, - "learning_rate": 8.33388026815433e-06, - "loss": 0.0132, + "learning_rate": 1.8352082130653894e-05, + "loss": 0.0436, "step": 125425 }, { "epoch": 5.85, - "learning_rate": 8.333411466879191e-06, - "loss": 0.0768, + "learning_rate": 1.8351614060817874e-05, + "loss": 0.0994, "step": 125430 }, { "epoch": 5.85, - "learning_rate": 8.332942665604051e-06, - "loss": 0.0581, + "learning_rate": 1.8351145990981854e-05, + "loss": 0.0257, "step": 125435 }, { "epoch": 5.85, - "learning_rate": 8.33247386432891e-06, - "loss": 0.0326, + "learning_rate": 1.8350677921145834e-05, + "loss": 0.0681, "step": 125440 }, { "epoch": 5.85, - "learning_rate": 8.332005063053772e-06, - "loss": 0.0725, + "learning_rate": 1.8350209851309817e-05, + "loss": 0.0274, "step": 125445 }, { "epoch": 5.85, - "learning_rate": 8.331536261778632e-06, - "loss": 0.1337, + "learning_rate": 1.8349741781473797e-05, + "loss": 0.1345, "step": 125450 }, { "epoch": 5.85, - "learning_rate": 8.331067460503494e-06, - "loss": 0.2296, + "learning_rate": 1.8349273711637777e-05, + "loss": 0.3067, "step": 125455 }, { "epoch": 5.85, - "learning_rate": 8.330598659228354e-06, - "loss": 0.0899, + "learning_rate": 1.8348805641801757e-05, + "loss": 0.057, "step": 125460 }, { "epoch": 5.85, - "learning_rate": 8.330129857953214e-06, - "loss": 0.0072, + "learning_rate": 1.834833757196574e-05, + "loss": 0.0266, "step": 125465 }, { "epoch": 5.85, - "learning_rate": 8.329661056678074e-06, - "loss": 0.0152, + "learning_rate": 1.834786950212972e-05, + "loss": 0.064, "step": 125470 }, { "epoch": 5.85, - "learning_rate": 8.329192255402935e-06, - "loss": 0.0281, + "learning_rate": 1.83474014322937e-05, + "loss": 0.0126, "step": 125475 }, { "epoch": 5.86, - "learning_rate": 8.328723454127795e-06, - "loss": 0.014, + "learning_rate": 1.834693336245768e-05, + "loss": 0.0276, "step": 125480 }, { "epoch": 5.86, - "learning_rate": 8.328254652852657e-06, - "loss": 0.0709, + "learning_rate": 1.834646529262166e-05, + "loss": 0.0507, "step": 125485 }, { "epoch": 5.86, - "learning_rate": 8.327785851577517e-06, - "loss": 0.0698, + "learning_rate": 1.834599722278564e-05, + "loss": 0.0351, "step": 125490 }, { "epoch": 5.86, - "learning_rate": 8.327317050302377e-06, - "loss": 0.0879, + "learning_rate": 1.834552915294962e-05, + "loss": 0.0707, "step": 125495 }, { "epoch": 5.86, - "learning_rate": 8.326848249027239e-06, - "loss": 0.1236, + "learning_rate": 1.8345061083113602e-05, + "loss": 0.1756, "step": 125500 }, { "epoch": 5.86, - "learning_rate": 8.326379447752098e-06, - "loss": 0.2745, + "learning_rate": 1.834459301327758e-05, + "loss": 0.3433, "step": 125505 }, { "epoch": 5.86, - "learning_rate": 8.325910646476958e-06, - "loss": 0.0585, + "learning_rate": 1.834412494344156e-05, + "loss": 0.1333, "step": 125510 }, { "epoch": 5.86, - "learning_rate": 8.32544184520182e-06, - "loss": 0.0069, + "learning_rate": 1.834365687360554e-05, + "loss": 0.0028, "step": 125515 }, { "epoch": 5.86, - "learning_rate": 8.32497304392668e-06, - "loss": 0.0314, + "learning_rate": 1.8343188803769524e-05, + "loss": 0.0306, "step": 125520 }, { "epoch": 5.86, - "learning_rate": 8.324504242651542e-06, - "loss": 0.0645, + "learning_rate": 1.8342720733933504e-05, + "loss": 0.0296, "step": 125525 }, { "epoch": 5.86, - "learning_rate": 8.324035441376402e-06, - "loss": 0.0232, + "learning_rate": 1.8342252664097484e-05, + "loss": 0.0129, "step": 125530 }, { "epoch": 5.86, - "learning_rate": 8.323566640101261e-06, - "loss": 0.0233, + "learning_rate": 1.8341784594261467e-05, + "loss": 0.0568, "step": 125535 }, { "epoch": 5.86, - "learning_rate": 8.323097838826121e-06, - "loss": 0.0473, + "learning_rate": 1.8341316524425447e-05, + "loss": 0.084, "step": 125540 }, { "epoch": 5.86, - "learning_rate": 8.322629037550983e-06, - "loss": 0.141, + "learning_rate": 1.8340848454589424e-05, + "loss": 0.0455, "step": 125545 }, { "epoch": 5.86, - "learning_rate": 8.322160236275843e-06, - "loss": 0.0591, + "learning_rate": 1.8340380384753403e-05, + "loss": 0.1512, "step": 125550 }, { "epoch": 5.86, - "learning_rate": 8.321691435000705e-06, - "loss": 0.2629, + "learning_rate": 1.8339912314917387e-05, + "loss": 0.3203, "step": 125555 }, { "epoch": 5.86, - "learning_rate": 8.321222633725564e-06, - "loss": 0.0808, + "learning_rate": 1.8339444245081366e-05, + "loss": 0.0925, "step": 125560 }, { "epoch": 5.86, - "learning_rate": 8.320753832450424e-06, - "loss": 0.0328, + "learning_rate": 1.8338976175245346e-05, + "loss": 0.0049, "step": 125565 }, { "epoch": 5.86, - "learning_rate": 8.320285031175286e-06, - "loss": 0.0203, + "learning_rate": 1.8338508105409326e-05, + "loss": 0.0015, "step": 125570 }, { "epoch": 5.86, - "learning_rate": 8.319816229900146e-06, - "loss": 0.0173, + "learning_rate": 1.833804003557331e-05, + "loss": 0.0406, "step": 125575 }, { "epoch": 5.86, - "learning_rate": 8.319347428625006e-06, - "loss": 0.0324, + "learning_rate": 1.833757196573729e-05, + "loss": 0.0236, "step": 125580 }, { "epoch": 5.86, - "learning_rate": 8.318878627349868e-06, - "loss": 0.0499, + "learning_rate": 1.833710389590127e-05, + "loss": 0.0429, "step": 125585 }, { "epoch": 5.86, - "learning_rate": 8.318409826074727e-06, - "loss": 0.0251, + "learning_rate": 1.833663582606525e-05, + "loss": 0.1533, "step": 125590 }, { "epoch": 5.86, - "learning_rate": 8.317941024799589e-06, - "loss": 0.1066, + "learning_rate": 1.8336167756229232e-05, + "loss": 0.1212, "step": 125595 }, { "epoch": 5.86, - "learning_rate": 8.317472223524449e-06, - "loss": 0.1193, + "learning_rate": 1.8335699686393212e-05, + "loss": 0.0854, "step": 125600 }, { "epoch": 5.86, - "learning_rate": 8.317003422249309e-06, - "loss": 0.3475, + "learning_rate": 1.8335231616557188e-05, + "loss": 0.2453, "step": 125605 }, { "epoch": 5.86, - "learning_rate": 8.316534620974169e-06, - "loss": 0.0641, + "learning_rate": 1.833476354672117e-05, + "loss": 0.0861, "step": 125610 }, { "epoch": 5.86, - "learning_rate": 8.31606581969903e-06, - "loss": 0.0174, + "learning_rate": 1.833429547688515e-05, + "loss": 0.0048, "step": 125615 }, { "epoch": 5.86, - "learning_rate": 8.31559701842389e-06, - "loss": 0.0178, + "learning_rate": 1.833382740704913e-05, + "loss": 0.0483, "step": 125620 }, { "epoch": 5.86, - "learning_rate": 8.315128217148752e-06, - "loss": 0.0629, + "learning_rate": 1.833335933721311e-05, + "loss": 0.0727, "step": 125625 }, { "epoch": 5.86, - "learning_rate": 8.314659415873612e-06, - "loss": 0.0265, + "learning_rate": 1.8332891267377094e-05, + "loss": 0.0788, "step": 125630 }, { "epoch": 5.86, - "learning_rate": 8.314190614598472e-06, - "loss": 0.0579, + "learning_rate": 1.8332423197541074e-05, + "loss": 0.048, "step": 125635 }, { "epoch": 5.86, - "learning_rate": 8.313721813323334e-06, - "loss": 0.0642, + "learning_rate": 1.8331955127705054e-05, + "loss": 0.0351, "step": 125640 }, { "epoch": 5.86, - "learning_rate": 8.313253012048194e-06, - "loss": 0.0881, + "learning_rate": 1.8331487057869033e-05, + "loss": 0.0515, "step": 125645 }, { "epoch": 5.86, - "learning_rate": 8.312784210773053e-06, - "loss": 0.1306, + "learning_rate": 1.8331018988033017e-05, + "loss": 0.0945, "step": 125650 }, { "epoch": 5.86, - "learning_rate": 8.312315409497913e-06, - "loss": 0.3782, + "learning_rate": 1.8330550918196996e-05, + "loss": 0.2801, "step": 125655 }, { "epoch": 5.86, - "learning_rate": 8.311846608222775e-06, - "loss": 0.0625, + "learning_rate": 1.8330082848360976e-05, + "loss": 0.0526, "step": 125660 }, { "epoch": 5.86, - "learning_rate": 8.311377806947637e-06, - "loss": 0.0495, + "learning_rate": 1.832961477852496e-05, + "loss": 0.0697, "step": 125665 }, { "epoch": 5.86, - "learning_rate": 8.310909005672497e-06, - "loss": 0.0327, + "learning_rate": 1.8329146708688936e-05, + "loss": 0.0483, "step": 125670 }, { "epoch": 5.86, - "learning_rate": 8.310440204397357e-06, - "loss": 0.0268, + "learning_rate": 1.8328678638852916e-05, + "loss": 0.0167, "step": 125675 }, { "epoch": 5.86, - "learning_rate": 8.309971403122216e-06, - "loss": 0.039, + "learning_rate": 1.8328210569016896e-05, + "loss": 0.0455, "step": 125680 }, { "epoch": 5.86, - "learning_rate": 8.309502601847078e-06, - "loss": 0.0572, + "learning_rate": 1.832774249918088e-05, + "loss": 0.0375, "step": 125685 }, { "epoch": 5.86, - "learning_rate": 8.309033800571938e-06, - "loss": 0.0824, + "learning_rate": 1.832727442934486e-05, + "loss": 0.0484, "step": 125690 }, { "epoch": 5.87, - "learning_rate": 8.308564999296798e-06, - "loss": 0.089, + "learning_rate": 1.832680635950884e-05, + "loss": 0.0341, "step": 125695 }, { "epoch": 5.87, - "learning_rate": 8.30809619802166e-06, - "loss": 0.1149, + "learning_rate": 1.8326338289672818e-05, + "loss": 0.1166, "step": 125700 }, { "epoch": 5.87, - "learning_rate": 8.30762739674652e-06, - "loss": 0.2245, + "learning_rate": 1.83258702198368e-05, + "loss": 0.1652, "step": 125705 }, { "epoch": 5.87, - "learning_rate": 8.307158595471381e-06, - "loss": 0.0818, + "learning_rate": 1.832540215000078e-05, + "loss": 0.063, "step": 125710 }, { "epoch": 5.87, - "learning_rate": 8.306689794196241e-06, - "loss": 0.0566, + "learning_rate": 1.832493408016476e-05, + "loss": 0.0344, "step": 125715 }, { "epoch": 5.87, - "learning_rate": 8.306220992921101e-06, - "loss": 0.0045, + "learning_rate": 1.8324466010328744e-05, + "loss": 0.0085, "step": 125720 }, { "epoch": 5.87, - "learning_rate": 8.305752191645961e-06, - "loss": 0.0267, + "learning_rate": 1.8323997940492724e-05, + "loss": 0.0085, "step": 125725 }, { "epoch": 5.87, - "learning_rate": 8.305283390370823e-06, - "loss": 0.1037, + "learning_rate": 1.8323529870656704e-05, + "loss": 0.0642, "step": 125730 }, { "epoch": 5.87, - "learning_rate": 8.304814589095683e-06, - "loss": 0.0798, + "learning_rate": 1.832306180082068e-05, + "loss": 0.0311, "step": 125735 }, { "epoch": 5.87, - "learning_rate": 8.304345787820544e-06, - "loss": 0.1238, + "learning_rate": 1.8322593730984664e-05, + "loss": 0.0474, "step": 125740 }, { "epoch": 5.87, - "learning_rate": 8.303876986545404e-06, - "loss": 0.0714, + "learning_rate": 1.8322125661148643e-05, + "loss": 0.1035, "step": 125745 }, { "epoch": 5.87, - "learning_rate": 8.303408185270264e-06, - "loss": 0.1074, + "learning_rate": 1.8321657591312623e-05, + "loss": 0.149, "step": 125750 }, { "epoch": 5.87, - "learning_rate": 8.302939383995126e-06, - "loss": 0.2401, + "learning_rate": 1.8321189521476603e-05, + "loss": 0.2016, "step": 125755 }, { "epoch": 5.87, - "learning_rate": 8.302470582719986e-06, - "loss": 0.0992, + "learning_rate": 1.8320721451640586e-05, + "loss": 0.0995, "step": 125760 }, { "epoch": 5.87, - "learning_rate": 8.302001781444845e-06, - "loss": 0.0107, + "learning_rate": 1.8320253381804566e-05, + "loss": 0.012, "step": 125765 }, { "epoch": 5.87, - "learning_rate": 8.301532980169707e-06, - "loss": 0.0111, + "learning_rate": 1.8319785311968546e-05, + "loss": 0.0176, "step": 125770 }, { "epoch": 5.87, - "learning_rate": 8.301064178894567e-06, - "loss": 0.0727, + "learning_rate": 1.8319317242132526e-05, + "loss": 0.0257, "step": 125775 }, { "epoch": 5.87, - "learning_rate": 8.300595377619429e-06, - "loss": 0.0164, + "learning_rate": 1.831884917229651e-05, + "loss": 0.0378, "step": 125780 }, { "epoch": 5.87, - "learning_rate": 8.300126576344289e-06, - "loss": 0.0616, + "learning_rate": 1.831838110246049e-05, + "loss": 0.0612, "step": 125785 }, { "epoch": 5.87, - "learning_rate": 8.299657775069149e-06, - "loss": 0.1238, + "learning_rate": 1.831791303262447e-05, + "loss": 0.0427, "step": 125790 }, { "epoch": 5.87, - "learning_rate": 8.299188973794008e-06, - "loss": 0.1486, + "learning_rate": 1.831744496278845e-05, + "loss": 0.0514, "step": 125795 }, { "epoch": 5.87, - "learning_rate": 8.29872017251887e-06, - "loss": 0.1224, + "learning_rate": 1.8316976892952428e-05, + "loss": 0.1276, "step": 125800 }, { "epoch": 5.87, - "learning_rate": 8.29825137124373e-06, - "loss": 0.2494, + "learning_rate": 1.8316508823116408e-05, + "loss": 0.1637, "step": 125805 }, { "epoch": 5.87, - "learning_rate": 8.297782569968592e-06, - "loss": 0.0803, + "learning_rate": 1.8316040753280388e-05, + "loss": 0.113, "step": 125810 }, { "epoch": 5.87, - "learning_rate": 8.297313768693452e-06, - "loss": 0.0166, + "learning_rate": 1.831557268344437e-05, + "loss": 0.0261, "step": 125815 }, { "epoch": 5.87, - "learning_rate": 8.296844967418312e-06, - "loss": 0.041, + "learning_rate": 1.831510461360835e-05, + "loss": 0.0056, "step": 125820 }, { "epoch": 5.87, - "learning_rate": 8.296376166143173e-06, - "loss": 0.0377, + "learning_rate": 1.831463654377233e-05, + "loss": 0.0659, "step": 125825 }, { "epoch": 5.87, - "learning_rate": 8.295907364868033e-06, - "loss": 0.0832, + "learning_rate": 1.831416847393631e-05, + "loss": 0.0698, "step": 125830 }, { "epoch": 5.87, - "learning_rate": 8.295438563592893e-06, - "loss": 0.0447, + "learning_rate": 1.8313700404100294e-05, + "loss": 0.0631, "step": 125835 }, { "epoch": 5.87, - "learning_rate": 8.294969762317755e-06, - "loss": 0.0652, + "learning_rate": 1.8313232334264273e-05, + "loss": 0.0358, "step": 125840 }, { "epoch": 5.87, - "learning_rate": 8.294500961042615e-06, - "loss": 0.107, + "learning_rate": 1.8312764264428253e-05, + "loss": 0.1313, "step": 125845 }, { "epoch": 5.87, - "learning_rate": 8.294032159767476e-06, - "loss": 0.1587, + "learning_rate": 1.8312296194592236e-05, + "loss": 0.1282, "step": 125850 }, { "epoch": 5.87, - "learning_rate": 8.293563358492336e-06, - "loss": 0.2635, + "learning_rate": 1.8311828124756216e-05, + "loss": 0.2528, "step": 125855 }, { "epoch": 5.87, - "learning_rate": 8.293094557217196e-06, - "loss": 0.0903, + "learning_rate": 1.8311360054920193e-05, + "loss": 0.0685, "step": 125860 }, { "epoch": 5.87, - "learning_rate": 8.292625755942056e-06, - "loss": 0.0352, + "learning_rate": 1.8310891985084173e-05, + "loss": 0.016, "step": 125865 }, { "epoch": 5.87, - "learning_rate": 8.292156954666918e-06, - "loss": 0.0429, + "learning_rate": 1.8310423915248156e-05, + "loss": 0.0216, "step": 125870 }, { "epoch": 5.87, - "learning_rate": 8.291688153391778e-06, - "loss": 0.0102, + "learning_rate": 1.8309955845412136e-05, + "loss": 0.0277, "step": 125875 }, { "epoch": 5.87, - "learning_rate": 8.29121935211664e-06, - "loss": 0.0611, + "learning_rate": 1.8309487775576115e-05, + "loss": 0.0093, "step": 125880 }, { "epoch": 5.87, - "learning_rate": 8.290750550841499e-06, - "loss": 0.0469, + "learning_rate": 1.8309019705740095e-05, + "loss": 0.077, "step": 125885 }, { "epoch": 5.87, - "learning_rate": 8.290281749566359e-06, - "loss": 0.0748, + "learning_rate": 1.830855163590408e-05, + "loss": 0.0207, "step": 125890 }, { "epoch": 5.87, - "learning_rate": 8.28981294829122e-06, - "loss": 0.0685, + "learning_rate": 1.8308083566068058e-05, + "loss": 0.0768, "step": 125895 }, { "epoch": 5.87, - "learning_rate": 8.28934414701608e-06, - "loss": 0.046, + "learning_rate": 1.8307615496232038e-05, + "loss": 0.1449, "step": 125900 }, { "epoch": 5.87, - "learning_rate": 8.28887534574094e-06, - "loss": 0.2291, + "learning_rate": 1.830714742639602e-05, + "loss": 0.3453, "step": 125905 }, { "epoch": 5.88, - "learning_rate": 8.2884065444658e-06, - "loss": 0.0756, + "learning_rate": 1.830667935656e-05, + "loss": 0.0844, "step": 125910 }, { "epoch": 5.88, - "learning_rate": 8.287937743190662e-06, - "loss": 0.0108, + "learning_rate": 1.830621128672398e-05, + "loss": 0.0112, "step": 125915 }, { "epoch": 5.88, - "learning_rate": 8.287468941915524e-06, - "loss": 0.0304, + "learning_rate": 1.830574321688796e-05, + "loss": 0.0101, "step": 125920 }, { "epoch": 5.88, - "learning_rate": 8.287000140640384e-06, - "loss": 0.0211, + "learning_rate": 1.830527514705194e-05, + "loss": 0.0289, "step": 125925 }, { "epoch": 5.88, - "learning_rate": 8.286531339365244e-06, - "loss": 0.0351, + "learning_rate": 1.830480707721592e-05, + "loss": 0.0261, "step": 125930 }, { "epoch": 5.88, - "learning_rate": 8.286062538090104e-06, - "loss": 0.0593, + "learning_rate": 1.83043390073799e-05, + "loss": 0.0547, "step": 125935 }, { "epoch": 5.88, - "learning_rate": 8.285593736814965e-06, - "loss": 0.0699, + "learning_rate": 1.830387093754388e-05, + "loss": 0.0468, "step": 125940 }, { "epoch": 5.88, - "learning_rate": 8.285124935539825e-06, - "loss": 0.0502, + "learning_rate": 1.8303402867707863e-05, + "loss": 0.0623, "step": 125945 }, { "epoch": 5.88, - "learning_rate": 8.284656134264685e-06, - "loss": 0.1619, + "learning_rate": 1.8302934797871843e-05, + "loss": 0.089, "step": 125950 }, { "epoch": 5.88, - "learning_rate": 8.284187332989547e-06, - "loss": 0.296, + "learning_rate": 1.8302466728035823e-05, + "loss": 0.1954, "step": 125955 }, { "epoch": 5.88, - "learning_rate": 8.283718531714407e-06, - "loss": 0.0735, + "learning_rate": 1.8301998658199803e-05, + "loss": 0.0463, "step": 125960 }, { "epoch": 5.88, - "learning_rate": 8.283249730439268e-06, - "loss": 0.0215, + "learning_rate": 1.8301530588363786e-05, + "loss": 0.0091, "step": 125965 }, { "epoch": 5.88, - "learning_rate": 8.282780929164128e-06, - "loss": 0.0073, + "learning_rate": 1.8301062518527766e-05, + "loss": 0.0562, "step": 125970 }, { "epoch": 5.88, - "learning_rate": 8.282312127888988e-06, - "loss": 0.0825, + "learning_rate": 1.8300594448691745e-05, + "loss": 0.0718, "step": 125975 }, { "epoch": 5.88, - "learning_rate": 8.281843326613848e-06, - "loss": 0.0574, + "learning_rate": 1.830012637885573e-05, + "loss": 0.0774, "step": 125980 }, { "epoch": 5.88, - "learning_rate": 8.28137452533871e-06, - "loss": 0.0484, + "learning_rate": 1.8299658309019705e-05, + "loss": 0.0621, "step": 125985 }, { "epoch": 5.88, - "learning_rate": 8.28090572406357e-06, - "loss": 0.0502, + "learning_rate": 1.8299190239183685e-05, + "loss": 0.0556, "step": 125990 }, { "epoch": 5.88, - "learning_rate": 8.280436922788431e-06, - "loss": 0.0676, + "learning_rate": 1.8298722169347665e-05, + "loss": 0.1349, "step": 125995 }, { "epoch": 5.88, - "learning_rate": 8.279968121513291e-06, - "loss": 0.1, + "learning_rate": 1.8298254099511648e-05, + "loss": 0.0805, "step": 126000 }, { "epoch": 5.88, - "learning_rate": 8.279499320238151e-06, - "loss": 0.2971, + "learning_rate": 1.8297786029675628e-05, + "loss": 0.2855, "step": 126005 }, { "epoch": 5.88, - "learning_rate": 8.279030518963013e-06, - "loss": 0.092, + "learning_rate": 1.8297317959839608e-05, + "loss": 0.0651, "step": 126010 }, { "epoch": 5.88, - "learning_rate": 8.278561717687873e-06, - "loss": 0.0018, + "learning_rate": 1.8296849890003587e-05, + "loss": 0.0016, "step": 126015 }, { "epoch": 5.88, - "learning_rate": 8.278092916412733e-06, - "loss": 0.0037, + "learning_rate": 1.829638182016757e-05, + "loss": 0.0164, "step": 126020 }, { "epoch": 5.88, - "learning_rate": 8.277624115137594e-06, - "loss": 0.0043, + "learning_rate": 1.829591375033155e-05, + "loss": 0.0273, "step": 126025 }, { "epoch": 5.88, - "learning_rate": 8.277155313862454e-06, - "loss": 0.0252, + "learning_rate": 1.829544568049553e-05, + "loss": 0.0519, "step": 126030 }, { "epoch": 5.88, - "learning_rate": 8.276686512587316e-06, - "loss": 0.0462, + "learning_rate": 1.8294977610659513e-05, + "loss": 0.0236, "step": 126035 }, { "epoch": 5.88, - "learning_rate": 8.276217711312176e-06, - "loss": 0.0533, + "learning_rate": 1.8294509540823493e-05, + "loss": 0.0692, "step": 126040 }, { "epoch": 5.88, - "learning_rate": 8.275748910037036e-06, - "loss": 0.0795, + "learning_rate": 1.8294041470987473e-05, + "loss": 0.0937, "step": 126045 }, { "epoch": 5.88, - "learning_rate": 8.275280108761896e-06, - "loss": 0.1829, + "learning_rate": 1.829357340115145e-05, + "loss": 0.0652, "step": 126050 }, { "epoch": 5.88, - "learning_rate": 8.274811307486757e-06, - "loss": 0.3019, + "learning_rate": 1.8293105331315433e-05, + "loss": 0.1773, "step": 126055 }, { "epoch": 5.88, - "learning_rate": 8.274342506211617e-06, - "loss": 0.1069, + "learning_rate": 1.8292637261479413e-05, + "loss": 0.0758, "step": 126060 }, { "epoch": 5.88, - "learning_rate": 8.273873704936479e-06, - "loss": 0.0119, + "learning_rate": 1.8292169191643392e-05, + "loss": 0.0019, "step": 126065 }, { "epoch": 5.88, - "learning_rate": 8.273404903661339e-06, - "loss": 0.0111, + "learning_rate": 1.8291701121807372e-05, + "loss": 0.035, "step": 126070 }, { "epoch": 5.88, - "learning_rate": 8.272936102386199e-06, - "loss": 0.0443, + "learning_rate": 1.8291233051971355e-05, + "loss": 0.061, "step": 126075 }, { "epoch": 5.88, - "learning_rate": 8.27246730111106e-06, - "loss": 0.0791, + "learning_rate": 1.8290764982135335e-05, + "loss": 0.0505, "step": 126080 }, { "epoch": 5.88, - "learning_rate": 8.27199849983592e-06, - "loss": 0.0861, + "learning_rate": 1.8290296912299315e-05, + "loss": 0.0429, "step": 126085 }, { "epoch": 5.88, - "learning_rate": 8.27152969856078e-06, - "loss": 0.1219, + "learning_rate": 1.8289828842463298e-05, + "loss": 0.0524, "step": 126090 }, { "epoch": 5.88, - "learning_rate": 8.271060897285642e-06, - "loss": 0.1698, + "learning_rate": 1.8289360772627278e-05, + "loss": 0.0759, "step": 126095 }, { "epoch": 5.88, - "learning_rate": 8.270592096010502e-06, - "loss": 0.1225, + "learning_rate": 1.8288892702791258e-05, + "loss": 0.1411, "step": 126100 }, { "epoch": 5.88, - "learning_rate": 8.270123294735363e-06, - "loss": 0.2296, + "learning_rate": 1.8288424632955238e-05, + "loss": 0.3293, "step": 126105 }, { "epoch": 5.88, - "learning_rate": 8.269654493460223e-06, - "loss": 0.097, + "learning_rate": 1.8287956563119218e-05, + "loss": 0.0827, "step": 126110 }, { "epoch": 5.88, - "learning_rate": 8.269185692185083e-06, - "loss": 0.0134, + "learning_rate": 1.8287488493283197e-05, + "loss": 0.0075, "step": 126115 }, { "epoch": 5.88, - "learning_rate": 8.268716890909943e-06, - "loss": 0.014, + "learning_rate": 1.8287020423447177e-05, + "loss": 0.0267, "step": 126120 }, { "epoch": 5.89, - "learning_rate": 8.268248089634805e-06, - "loss": 0.0367, + "learning_rate": 1.8286552353611157e-05, + "loss": 0.0624, "step": 126125 }, { "epoch": 5.89, - "learning_rate": 8.267779288359665e-06, - "loss": 0.0182, + "learning_rate": 1.828608428377514e-05, + "loss": 0.0638, "step": 126130 }, { "epoch": 5.89, - "learning_rate": 8.267310487084526e-06, - "loss": 0.0345, + "learning_rate": 1.828561621393912e-05, + "loss": 0.0411, "step": 126135 }, { "epoch": 5.89, - "learning_rate": 8.266841685809386e-06, - "loss": 0.0893, + "learning_rate": 1.82851481441031e-05, + "loss": 0.0987, "step": 126140 }, { "epoch": 5.89, - "learning_rate": 8.266372884534246e-06, - "loss": 0.043, + "learning_rate": 1.8284680074267083e-05, + "loss": 0.042, "step": 126145 }, { "epoch": 5.89, - "learning_rate": 8.265904083259108e-06, - "loss": 0.1174, + "learning_rate": 1.8284212004431063e-05, + "loss": 0.0928, "step": 126150 }, { "epoch": 5.89, - "learning_rate": 8.265435281983968e-06, - "loss": 0.115, + "learning_rate": 1.8283743934595043e-05, + "loss": 0.2339, "step": 126155 }, { "epoch": 5.89, - "learning_rate": 8.264966480708828e-06, - "loss": 0.1354, + "learning_rate": 1.8283275864759022e-05, + "loss": 0.0948, "step": 126160 }, { "epoch": 5.89, - "learning_rate": 8.264497679433688e-06, - "loss": 0.0158, + "learning_rate": 1.8282807794923006e-05, + "loss": 0.0131, "step": 126165 }, { "epoch": 5.89, - "learning_rate": 8.26402887815855e-06, - "loss": 0.011, + "learning_rate": 1.8282339725086985e-05, + "loss": 0.036, "step": 126170 }, { "epoch": 5.89, - "learning_rate": 8.263560076883411e-06, - "loss": 0.0258, + "learning_rate": 1.8281871655250962e-05, + "loss": 0.0102, "step": 126175 }, { "epoch": 5.89, - "learning_rate": 8.26309127560827e-06, - "loss": 0.0292, + "learning_rate": 1.8281403585414942e-05, + "loss": 0.0284, "step": 126180 }, { "epoch": 5.89, - "learning_rate": 8.26262247433313e-06, - "loss": 0.0158, + "learning_rate": 1.8280935515578925e-05, + "loss": 0.0454, "step": 126185 }, { "epoch": 5.89, - "learning_rate": 8.26215367305799e-06, - "loss": 0.0186, + "learning_rate": 1.8280467445742905e-05, + "loss": 0.0315, "step": 126190 }, { "epoch": 5.89, - "learning_rate": 8.261684871782852e-06, - "loss": 0.0747, + "learning_rate": 1.8279999375906885e-05, + "loss": 0.0601, "step": 126195 }, { "epoch": 5.89, - "learning_rate": 8.261216070507712e-06, - "loss": 0.167, + "learning_rate": 1.8279531306070864e-05, + "loss": 0.0819, "step": 126200 }, { "epoch": 5.89, - "learning_rate": 8.260747269232572e-06, - "loss": 0.1763, + "learning_rate": 1.8279063236234848e-05, + "loss": 0.1227, "step": 126205 }, { "epoch": 5.89, - "learning_rate": 8.260278467957434e-06, - "loss": 0.1172, + "learning_rate": 1.8278595166398827e-05, + "loss": 0.0838, "step": 126210 }, { "epoch": 5.89, - "learning_rate": 8.259809666682294e-06, - "loss": 0.0004, + "learning_rate": 1.8278127096562807e-05, + "loss": 0.0141, "step": 126215 }, { "epoch": 5.89, - "learning_rate": 8.259340865407155e-06, - "loss": 0.0097, + "learning_rate": 1.827765902672679e-05, + "loss": 0.0152, "step": 126220 }, { "epoch": 5.89, - "learning_rate": 8.258872064132015e-06, - "loss": 0.0191, + "learning_rate": 1.827719095689077e-05, + "loss": 0.0825, "step": 126225 }, { "epoch": 5.89, - "learning_rate": 8.258403262856875e-06, - "loss": 0.0684, + "learning_rate": 1.827672288705475e-05, + "loss": 0.0335, "step": 126230 }, { "epoch": 5.89, - "learning_rate": 8.257934461581735e-06, - "loss": 0.0207, + "learning_rate": 1.827625481721873e-05, + "loss": 0.0516, "step": 126235 }, { "epoch": 5.89, - "learning_rate": 8.257465660306597e-06, - "loss": 0.0801, + "learning_rate": 1.827578674738271e-05, + "loss": 0.0451, "step": 126240 }, { "epoch": 5.89, - "learning_rate": 8.256996859031457e-06, - "loss": 0.022, + "learning_rate": 1.827531867754669e-05, + "loss": 0.0641, "step": 126245 }, { "epoch": 5.89, - "learning_rate": 8.256528057756318e-06, - "loss": 0.088, + "learning_rate": 1.827485060771067e-05, + "loss": 0.1343, "step": 126250 }, { "epoch": 5.89, - "learning_rate": 8.256059256481178e-06, - "loss": 0.2428, + "learning_rate": 1.827438253787465e-05, + "loss": 0.1681, "step": 126255 }, { "epoch": 5.89, - "learning_rate": 8.255590455206038e-06, - "loss": 0.077, + "learning_rate": 1.8273914468038632e-05, + "loss": 0.11, "step": 126260 }, { "epoch": 5.89, - "learning_rate": 8.2551216539309e-06, - "loss": 0.0071, + "learning_rate": 1.8273446398202612e-05, + "loss": 0.0012, "step": 126265 }, { "epoch": 5.89, - "learning_rate": 8.25465285265576e-06, - "loss": 0.0325, + "learning_rate": 1.8272978328366592e-05, + "loss": 0.0054, "step": 126270 }, { "epoch": 5.89, - "learning_rate": 8.25418405138062e-06, - "loss": 0.0105, + "learning_rate": 1.8272510258530575e-05, + "loss": 0.0252, "step": 126275 }, { "epoch": 5.89, - "learning_rate": 8.253715250105481e-06, - "loss": 0.0614, + "learning_rate": 1.8272042188694555e-05, + "loss": 0.0354, "step": 126280 }, { "epoch": 5.89, - "learning_rate": 8.253246448830341e-06, - "loss": 0.0478, + "learning_rate": 1.8271574118858535e-05, + "loss": 0.0075, "step": 126285 }, { "epoch": 5.89, - "learning_rate": 8.252777647555203e-06, - "loss": 0.0842, + "learning_rate": 1.8271106049022515e-05, + "loss": 0.1031, "step": 126290 }, { "epoch": 5.89, - "learning_rate": 8.252308846280063e-06, - "loss": 0.0564, + "learning_rate": 1.8270637979186498e-05, + "loss": 0.0995, "step": 126295 }, { "epoch": 5.89, - "learning_rate": 8.251840045004923e-06, - "loss": 0.2656, + "learning_rate": 1.8270169909350474e-05, + "loss": 0.134, "step": 126300 }, { "epoch": 5.89, - "learning_rate": 8.251371243729783e-06, - "loss": 0.2648, + "learning_rate": 1.8269701839514454e-05, + "loss": 0.198, "step": 126305 }, { "epoch": 5.89, - "learning_rate": 8.250902442454644e-06, - "loss": 0.1007, + "learning_rate": 1.8269233769678434e-05, + "loss": 0.0741, "step": 126310 }, { "epoch": 5.89, - "learning_rate": 8.250433641179504e-06, - "loss": 0.05, + "learning_rate": 1.8268765699842417e-05, + "loss": 0.0101, "step": 126315 }, { "epoch": 5.89, - "learning_rate": 8.249964839904366e-06, - "loss": 0.0279, + "learning_rate": 1.8268297630006397e-05, + "loss": 0.0078, "step": 126320 }, { "epoch": 5.89, - "learning_rate": 8.249496038629226e-06, - "loss": 0.0109, + "learning_rate": 1.8267829560170377e-05, + "loss": 0.0183, "step": 126325 }, { "epoch": 5.89, - "learning_rate": 8.249027237354086e-06, - "loss": 0.0288, + "learning_rate": 1.826736149033436e-05, + "loss": 0.0702, "step": 126330 }, { "epoch": 5.89, - "learning_rate": 8.248558436078947e-06, - "loss": 0.1083, + "learning_rate": 1.826689342049834e-05, + "loss": 0.0595, "step": 126335 }, { "epoch": 5.9, - "learning_rate": 8.248089634803807e-06, - "loss": 0.0253, + "learning_rate": 1.826642535066232e-05, + "loss": 0.007, "step": 126340 }, { "epoch": 5.9, - "learning_rate": 8.247620833528667e-06, - "loss": 0.0969, + "learning_rate": 1.82659572808263e-05, + "loss": 0.1189, "step": 126345 }, { "epoch": 5.9, - "learning_rate": 8.247152032253529e-06, - "loss": 0.1111, + "learning_rate": 1.8265489210990283e-05, + "loss": 0.1181, "step": 126350 }, { "epoch": 5.9, - "learning_rate": 8.246683230978389e-06, - "loss": 0.2801, + "learning_rate": 1.8265021141154262e-05, + "loss": 0.0847, "step": 126355 }, { "epoch": 5.9, - "learning_rate": 8.24621442970325e-06, - "loss": 0.1491, + "learning_rate": 1.8264553071318242e-05, + "loss": 0.0855, "step": 126360 }, { "epoch": 5.9, - "learning_rate": 8.24574562842811e-06, - "loss": 0.0976, + "learning_rate": 1.826408500148222e-05, + "loss": 0.0254, "step": 126365 }, { "epoch": 5.9, - "learning_rate": 8.24527682715297e-06, - "loss": 0.0041, + "learning_rate": 1.8263616931646202e-05, + "loss": 0.044, "step": 126370 }, { "epoch": 5.9, - "learning_rate": 8.24480802587783e-06, - "loss": 0.0491, + "learning_rate": 1.8263148861810182e-05, + "loss": 0.065, "step": 126375 }, { "epoch": 5.9, - "learning_rate": 8.244339224602692e-06, - "loss": 0.024, + "learning_rate": 1.826268079197416e-05, + "loss": 0.0192, "step": 126380 }, { "epoch": 5.9, - "learning_rate": 8.243870423327552e-06, - "loss": 0.0212, + "learning_rate": 1.826221272213814e-05, + "loss": 0.0275, "step": 126385 }, { "epoch": 5.9, - "learning_rate": 8.243401622052413e-06, - "loss": 0.0829, + "learning_rate": 1.8261744652302125e-05, + "loss": 0.0293, "step": 126390 }, { "epoch": 5.9, - "learning_rate": 8.242932820777273e-06, - "loss": 0.0421, + "learning_rate": 1.8261276582466104e-05, + "loss": 0.1545, "step": 126395 }, { "epoch": 5.9, - "learning_rate": 8.242464019502133e-06, - "loss": 0.1214, + "learning_rate": 1.8260808512630084e-05, + "loss": 0.1966, "step": 126400 }, { "epoch": 5.9, - "learning_rate": 8.241995218226995e-06, - "loss": 0.2608, + "learning_rate": 1.8260340442794067e-05, + "loss": 0.2549, "step": 126405 }, { "epoch": 5.9, - "learning_rate": 8.241526416951855e-06, - "loss": 0.0816, + "learning_rate": 1.8259872372958047e-05, + "loss": 0.0759, "step": 126410 }, { "epoch": 5.9, - "learning_rate": 8.241057615676715e-06, - "loss": 0.0073, + "learning_rate": 1.8259404303122027e-05, + "loss": 0.0556, "step": 126415 }, { "epoch": 5.9, - "learning_rate": 8.240588814401575e-06, - "loss": 0.0159, + "learning_rate": 1.8258936233286007e-05, + "loss": 0.0337, "step": 126420 }, { "epoch": 5.9, - "learning_rate": 8.240120013126436e-06, - "loss": 0.038, + "learning_rate": 1.825846816344999e-05, + "loss": 0.061, "step": 126425 }, { "epoch": 5.9, - "learning_rate": 8.239651211851298e-06, - "loss": 0.0551, + "learning_rate": 1.8258000093613966e-05, + "loss": 0.0411, "step": 126430 }, { "epoch": 5.9, - "learning_rate": 8.239182410576158e-06, - "loss": 0.0611, + "learning_rate": 1.8257532023777946e-05, + "loss": 0.0807, "step": 126435 }, { "epoch": 5.9, - "learning_rate": 8.238713609301018e-06, - "loss": 0.0486, + "learning_rate": 1.8257063953941926e-05, + "loss": 0.0583, "step": 126440 }, { "epoch": 5.9, - "learning_rate": 8.238244808025878e-06, - "loss": 0.0796, + "learning_rate": 1.825659588410591e-05, + "loss": 0.0975, "step": 126445 }, { "epoch": 5.9, - "learning_rate": 8.23777600675074e-06, - "loss": 0.0777, + "learning_rate": 1.825612781426989e-05, + "loss": 0.1626, "step": 126450 }, { "epoch": 5.9, - "learning_rate": 8.2373072054756e-06, - "loss": 0.2164, + "learning_rate": 1.825565974443387e-05, + "loss": 0.2224, "step": 126455 }, { "epoch": 5.9, - "learning_rate": 8.23683840420046e-06, - "loss": 0.0776, + "learning_rate": 1.8255191674597852e-05, + "loss": 0.083, "step": 126460 }, { "epoch": 5.9, - "learning_rate": 8.236369602925321e-06, - "loss": 0.02, + "learning_rate": 1.8254723604761832e-05, + "loss": 0.0252, "step": 126465 }, { "epoch": 5.9, - "learning_rate": 8.23590080165018e-06, - "loss": 0.0151, + "learning_rate": 1.8254255534925812e-05, + "loss": 0.0134, "step": 126470 }, { "epoch": 5.9, - "learning_rate": 8.235432000375042e-06, - "loss": 0.0776, + "learning_rate": 1.825378746508979e-05, + "loss": 0.0379, "step": 126475 }, { "epoch": 5.9, - "learning_rate": 8.234963199099902e-06, - "loss": 0.0382, + "learning_rate": 1.8253319395253775e-05, + "loss": 0.0611, "step": 126480 }, { "epoch": 5.9, - "learning_rate": 8.234494397824762e-06, - "loss": 0.0407, + "learning_rate": 1.8252851325417755e-05, + "loss": 0.0576, "step": 126485 }, { "epoch": 5.9, - "learning_rate": 8.234025596549622e-06, - "loss": 0.1025, + "learning_rate": 1.825238325558173e-05, + "loss": 0.1406, "step": 126490 }, { "epoch": 5.9, - "learning_rate": 8.233556795274484e-06, - "loss": 0.0626, + "learning_rate": 1.825191518574571e-05, + "loss": 0.0457, "step": 126495 }, { "epoch": 5.9, - "learning_rate": 8.233087993999344e-06, - "loss": 0.079, + "learning_rate": 1.8251447115909694e-05, + "loss": 0.1044, "step": 126500 }, { "epoch": 5.9, - "learning_rate": 8.232619192724205e-06, - "loss": 0.1957, + "learning_rate": 1.8250979046073674e-05, + "loss": 0.2577, "step": 126505 }, { "epoch": 5.9, - "learning_rate": 8.232150391449065e-06, - "loss": 0.0846, + "learning_rate": 1.8250510976237654e-05, + "loss": 0.0646, "step": 126510 }, { "epoch": 5.9, - "learning_rate": 8.231681590173925e-06, - "loss": 0.0094, + "learning_rate": 1.8250042906401637e-05, + "loss": 0.0199, "step": 126515 }, { "epoch": 5.9, - "learning_rate": 8.231212788898787e-06, - "loss": 0.0103, + "learning_rate": 1.8249574836565617e-05, + "loss": 0.006, "step": 126520 }, { "epoch": 5.9, - "learning_rate": 8.230743987623647e-06, - "loss": 0.0176, + "learning_rate": 1.8249106766729597e-05, + "loss": 0.0523, "step": 126525 }, { "epoch": 5.9, - "learning_rate": 8.230275186348507e-06, - "loss": 0.008, + "learning_rate": 1.8248638696893576e-05, + "loss": 0.0516, "step": 126530 }, { "epoch": 5.9, - "learning_rate": 8.229806385073368e-06, - "loss": 0.0479, + "learning_rate": 1.824817062705756e-05, + "loss": 0.0411, "step": 126535 }, { "epoch": 5.9, - "learning_rate": 8.229337583798228e-06, - "loss": 0.0636, + "learning_rate": 1.824770255722154e-05, + "loss": 0.0719, "step": 126540 }, { "epoch": 5.9, - "learning_rate": 8.22886878252309e-06, - "loss": 0.1319, + "learning_rate": 1.824723448738552e-05, + "loss": 0.0736, "step": 126545 }, { "epoch": 5.9, - "learning_rate": 8.22839998124795e-06, - "loss": 0.1853, + "learning_rate": 1.82467664175495e-05, + "loss": 0.1359, "step": 126550 }, { "epoch": 5.91, - "learning_rate": 8.22793117997281e-06, - "loss": 0.3079, + "learning_rate": 1.824629834771348e-05, + "loss": 0.2559, "step": 126555 }, { "epoch": 5.91, - "learning_rate": 8.22746237869767e-06, - "loss": 0.0828, + "learning_rate": 1.824583027787746e-05, + "loss": 0.1118, "step": 126560 }, { "epoch": 5.91, - "learning_rate": 8.226993577422531e-06, - "loss": 0.0314, + "learning_rate": 1.824536220804144e-05, + "loss": 0.016, "step": 126565 }, { "epoch": 5.91, - "learning_rate": 8.226524776147391e-06, - "loss": 0.0283, + "learning_rate": 1.824489413820542e-05, + "loss": 0.0338, "step": 126570 }, { "epoch": 5.91, - "learning_rate": 8.226055974872253e-06, - "loss": 0.0086, + "learning_rate": 1.82444260683694e-05, + "loss": 0.0322, "step": 126575 }, { "epoch": 5.91, - "learning_rate": 8.225587173597113e-06, - "loss": 0.0454, + "learning_rate": 1.824395799853338e-05, + "loss": 0.0394, "step": 126580 }, { "epoch": 5.91, - "learning_rate": 8.225118372321973e-06, - "loss": 0.0372, + "learning_rate": 1.824348992869736e-05, + "loss": 0.063, "step": 126585 }, { "epoch": 5.91, - "learning_rate": 8.224649571046834e-06, - "loss": 0.078, + "learning_rate": 1.8243021858861344e-05, + "loss": 0.0385, "step": 126590 }, { "epoch": 5.91, - "learning_rate": 8.224180769771694e-06, - "loss": 0.0745, + "learning_rate": 1.8242553789025324e-05, + "loss": 0.0533, "step": 126595 }, { "epoch": 5.91, - "learning_rate": 8.223711968496554e-06, - "loss": 0.1971, + "learning_rate": 1.8242085719189304e-05, + "loss": 0.1209, "step": 126600 }, { "epoch": 5.91, - "learning_rate": 8.223243167221416e-06, - "loss": 0.2313, + "learning_rate": 1.8241617649353284e-05, + "loss": 0.2642, "step": 126605 }, { "epoch": 5.91, - "learning_rate": 8.222774365946276e-06, - "loss": 0.1155, + "learning_rate": 1.8241149579517267e-05, + "loss": 0.065, "step": 126610 }, { "epoch": 5.91, - "learning_rate": 8.222305564671138e-06, - "loss": 0.0185, + "learning_rate": 1.8240681509681243e-05, + "loss": 0.0163, "step": 126615 }, { "epoch": 5.91, - "learning_rate": 8.221836763395997e-06, - "loss": 0.0064, + "learning_rate": 1.8240213439845223e-05, + "loss": 0.0065, "step": 126620 }, { "epoch": 5.91, - "learning_rate": 8.221367962120857e-06, - "loss": 0.0194, + "learning_rate": 1.8239745370009203e-05, + "loss": 0.0223, "step": 126625 }, { "epoch": 5.91, - "learning_rate": 8.220899160845717e-06, - "loss": 0.0374, + "learning_rate": 1.8239277300173186e-05, + "loss": 0.0288, "step": 126630 }, { "epoch": 5.91, - "learning_rate": 8.220430359570579e-06, - "loss": 0.0368, + "learning_rate": 1.8238809230337166e-05, + "loss": 0.059, "step": 126635 }, { "epoch": 5.91, - "learning_rate": 8.219961558295439e-06, - "loss": 0.0835, + "learning_rate": 1.8238341160501146e-05, + "loss": 0.0546, "step": 126640 }, { "epoch": 5.91, - "learning_rate": 8.2194927570203e-06, - "loss": 0.1025, + "learning_rate": 1.823787309066513e-05, + "loss": 0.1168, "step": 126645 }, { "epoch": 5.91, - "learning_rate": 8.21902395574516e-06, - "loss": 0.1341, + "learning_rate": 1.823740502082911e-05, + "loss": 0.1333, "step": 126650 }, { "epoch": 5.91, - "learning_rate": 8.21855515447002e-06, - "loss": 0.2223, + "learning_rate": 1.823693695099309e-05, + "loss": 0.1144, "step": 126655 }, { "epoch": 5.91, - "learning_rate": 8.218086353194882e-06, - "loss": 0.1193, + "learning_rate": 1.823646888115707e-05, + "loss": 0.0652, "step": 126660 }, { "epoch": 5.91, - "learning_rate": 8.217617551919742e-06, - "loss": 0.0165, + "learning_rate": 1.8236000811321052e-05, + "loss": 0.0395, "step": 126665 }, { "epoch": 5.91, - "learning_rate": 8.217148750644602e-06, - "loss": 0.0058, + "learning_rate": 1.823553274148503e-05, + "loss": 0.05, "step": 126670 }, { "epoch": 5.91, - "learning_rate": 8.216679949369462e-06, - "loss": 0.0287, + "learning_rate": 1.823506467164901e-05, + "loss": 0.0585, "step": 126675 }, { "epoch": 5.91, - "learning_rate": 8.216211148094323e-06, - "loss": 0.0269, + "learning_rate": 1.8234596601812988e-05, + "loss": 0.0129, "step": 126680 }, { "epoch": 5.91, - "learning_rate": 8.215742346819185e-06, - "loss": 0.0308, + "learning_rate": 1.823412853197697e-05, + "loss": 0.096, "step": 126685 }, { "epoch": 5.91, - "learning_rate": 8.215273545544045e-06, - "loss": 0.0824, + "learning_rate": 1.823366046214095e-05, + "loss": 0.0621, "step": 126690 }, { "epoch": 5.91, - "learning_rate": 8.214804744268905e-06, - "loss": 0.0878, + "learning_rate": 1.823319239230493e-05, + "loss": 0.1071, "step": 126695 }, { "epoch": 5.91, - "learning_rate": 8.214335942993765e-06, - "loss": 0.1158, + "learning_rate": 1.8232724322468914e-05, + "loss": 0.2036, "step": 126700 }, { "epoch": 5.91, - "learning_rate": 8.213867141718627e-06, - "loss": 0.1553, + "learning_rate": 1.8232256252632894e-05, + "loss": 0.2206, "step": 126705 }, { "epoch": 5.91, - "learning_rate": 8.213398340443486e-06, - "loss": 0.0894, + "learning_rate": 1.8231788182796874e-05, + "loss": 0.065, "step": 126710 }, { "epoch": 5.91, - "learning_rate": 8.212929539168346e-06, - "loss": 0.027, + "learning_rate": 1.8231320112960853e-05, + "loss": 0.0579, "step": 126715 }, { "epoch": 5.91, - "learning_rate": 8.212460737893208e-06, - "loss": 0.0494, + "learning_rate": 1.8230852043124837e-05, + "loss": 0.0164, "step": 126720 }, { "epoch": 5.91, - "learning_rate": 8.211991936618068e-06, - "loss": 0.0326, + "learning_rate": 1.8230383973288816e-05, + "loss": 0.0288, "step": 126725 }, { "epoch": 5.91, - "learning_rate": 8.21152313534293e-06, - "loss": 0.034, + "learning_rate": 1.8229915903452796e-05, + "loss": 0.0126, "step": 126730 }, { "epoch": 5.91, - "learning_rate": 8.21105433406779e-06, - "loss": 0.0634, + "learning_rate": 1.8229447833616776e-05, + "loss": 0.0421, "step": 126735 }, { "epoch": 5.91, - "learning_rate": 8.21058553279265e-06, - "loss": 0.1048, + "learning_rate": 1.822897976378076e-05, + "loss": 0.0642, "step": 126740 }, { "epoch": 5.91, - "learning_rate": 8.21011673151751e-06, - "loss": 0.0879, + "learning_rate": 1.8228511693944736e-05, + "loss": 0.0343, "step": 126745 }, { "epoch": 5.91, - "learning_rate": 8.209647930242371e-06, - "loss": 0.0503, + "learning_rate": 1.8228043624108715e-05, + "loss": 0.0929, "step": 126750 }, { "epoch": 5.91, - "learning_rate": 8.209179128967233e-06, - "loss": 0.2936, + "learning_rate": 1.8227575554272695e-05, + "loss": 0.2324, "step": 126755 }, { "epoch": 5.91, - "learning_rate": 8.208710327692093e-06, - "loss": 0.1077, + "learning_rate": 1.822710748443668e-05, + "loss": 0.0939, "step": 126760 }, { "epoch": 5.92, - "learning_rate": 8.208241526416953e-06, - "loss": 0.009, + "learning_rate": 1.822663941460066e-05, + "loss": 0.0181, "step": 126765 }, { "epoch": 5.92, - "learning_rate": 8.207772725141812e-06, - "loss": 0.0408, + "learning_rate": 1.8226171344764638e-05, + "loss": 0.0105, "step": 126770 }, { "epoch": 5.92, - "learning_rate": 8.207303923866674e-06, - "loss": 0.0221, + "learning_rate": 1.822570327492862e-05, + "loss": 0.0499, "step": 126775 }, { "epoch": 5.92, - "learning_rate": 8.206835122591534e-06, - "loss": 0.0295, + "learning_rate": 1.82252352050926e-05, + "loss": 0.0728, "step": 126780 }, { "epoch": 5.92, - "learning_rate": 8.206366321316394e-06, - "loss": 0.0658, + "learning_rate": 1.822476713525658e-05, + "loss": 0.0813, "step": 126785 }, { "epoch": 5.92, - "learning_rate": 8.205897520041256e-06, - "loss": 0.0582, + "learning_rate": 1.822429906542056e-05, + "loss": 0.0725, "step": 126790 }, { "epoch": 5.92, - "learning_rate": 8.205428718766115e-06, - "loss": 0.0607, + "learning_rate": 1.8223830995584544e-05, + "loss": 0.1359, "step": 126795 }, { "epoch": 5.92, - "learning_rate": 8.204959917490977e-06, - "loss": 0.2158, + "learning_rate": 1.8223362925748524e-05, + "loss": 0.0611, "step": 126800 }, { "epoch": 5.92, - "learning_rate": 8.204491116215837e-06, - "loss": 0.3302, + "learning_rate": 1.82228948559125e-05, + "loss": 0.1961, "step": 126805 }, { "epoch": 5.92, - "learning_rate": 8.204022314940697e-06, - "loss": 0.1286, + "learning_rate": 1.822242678607648e-05, + "loss": 0.0709, "step": 126810 }, { "epoch": 5.92, - "learning_rate": 8.203553513665557e-06, - "loss": 0.0236, + "learning_rate": 1.8221958716240463e-05, + "loss": 0.0266, "step": 126815 }, { "epoch": 5.92, - "learning_rate": 8.203084712390419e-06, - "loss": 0.019, + "learning_rate": 1.8221490646404443e-05, + "loss": 0.0402, "step": 126820 }, { "epoch": 5.92, - "learning_rate": 8.202615911115278e-06, - "loss": 0.0877, + "learning_rate": 1.8221022576568423e-05, + "loss": 0.0879, "step": 126825 }, { "epoch": 5.92, - "learning_rate": 8.20214710984014e-06, - "loss": 0.0377, + "learning_rate": 1.8220554506732406e-05, + "loss": 0.0305, "step": 126830 }, { "epoch": 5.92, - "learning_rate": 8.201678308565e-06, - "loss": 0.087, + "learning_rate": 1.8220086436896386e-05, + "loss": 0.0553, "step": 126835 }, { "epoch": 5.92, - "learning_rate": 8.20120950728986e-06, - "loss": 0.1002, + "learning_rate": 1.8219618367060366e-05, + "loss": 0.0481, "step": 126840 }, { "epoch": 5.92, - "learning_rate": 8.200740706014722e-06, - "loss": 0.1038, + "learning_rate": 1.8219150297224346e-05, + "loss": 0.0664, "step": 126845 }, { "epoch": 5.92, - "learning_rate": 8.200271904739582e-06, - "loss": 0.1313, + "learning_rate": 1.821868222738833e-05, + "loss": 0.2, "step": 126850 }, { "epoch": 5.92, - "learning_rate": 8.199803103464441e-06, - "loss": 0.2263, + "learning_rate": 1.821821415755231e-05, + "loss": 0.3242, "step": 126855 }, { "epoch": 5.92, - "learning_rate": 8.199334302189303e-06, - "loss": 0.0951, + "learning_rate": 1.821774608771629e-05, + "loss": 0.079, "step": 126860 }, { "epoch": 5.92, - "learning_rate": 8.198865500914163e-06, - "loss": 0.0087, + "learning_rate": 1.8217278017880268e-05, + "loss": 0.0873, "step": 126865 }, { "epoch": 5.92, - "learning_rate": 8.198396699639025e-06, - "loss": 0.0206, + "learning_rate": 1.8216809948044248e-05, + "loss": 0.0337, "step": 126870 }, { "epoch": 5.92, - "learning_rate": 8.197927898363885e-06, - "loss": 0.0855, + "learning_rate": 1.8216341878208228e-05, + "loss": 0.026, "step": 126875 }, { "epoch": 5.92, - "learning_rate": 8.197459097088745e-06, - "loss": 0.0216, + "learning_rate": 1.8215873808372208e-05, + "loss": 0.0366, "step": 126880 }, { "epoch": 5.92, - "learning_rate": 8.196990295813604e-06, - "loss": 0.0294, + "learning_rate": 1.821540573853619e-05, + "loss": 0.0551, "step": 126885 }, { "epoch": 5.92, - "learning_rate": 8.196521494538466e-06, - "loss": 0.0617, + "learning_rate": 1.821493766870017e-05, + "loss": 0.0863, "step": 126890 }, { "epoch": 5.92, - "learning_rate": 8.196052693263326e-06, - "loss": 0.1395, + "learning_rate": 1.821446959886415e-05, + "loss": 0.0994, "step": 126895 }, { "epoch": 5.92, - "learning_rate": 8.195583891988188e-06, - "loss": 0.2131, + "learning_rate": 1.821400152902813e-05, + "loss": 0.0881, "step": 126900 }, { "epoch": 5.92, - "learning_rate": 8.195115090713048e-06, - "loss": 0.1527, + "learning_rate": 1.8213533459192114e-05, + "loss": 0.176, "step": 126905 }, { "epoch": 5.92, - "learning_rate": 8.194646289437908e-06, - "loss": 0.0787, + "learning_rate": 1.8213065389356093e-05, + "loss": 0.1041, "step": 126910 }, { "epoch": 5.92, - "learning_rate": 8.194177488162769e-06, - "loss": 0.0109, + "learning_rate": 1.8212597319520073e-05, + "loss": 0.0169, "step": 126915 }, { "epoch": 5.92, - "learning_rate": 8.193708686887629e-06, - "loss": 0.0233, + "learning_rate": 1.8212129249684053e-05, + "loss": 0.0374, "step": 126920 }, { "epoch": 5.92, - "learning_rate": 8.193239885612489e-06, - "loss": 0.0374, + "learning_rate": 1.8211661179848036e-05, + "loss": 0.0431, "step": 126925 }, { "epoch": 5.92, - "learning_rate": 8.19277108433735e-06, - "loss": 0.0447, + "learning_rate": 1.8211193110012016e-05, + "loss": 0.021, "step": 126930 }, { "epoch": 5.92, - "learning_rate": 8.19230228306221e-06, - "loss": 0.0489, + "learning_rate": 1.8210725040175992e-05, + "loss": 0.0274, "step": 126935 }, { "epoch": 5.92, - "learning_rate": 8.191833481787072e-06, - "loss": 0.0539, + "learning_rate": 1.8210256970339976e-05, + "loss": 0.0832, "step": 126940 }, { "epoch": 5.92, - "learning_rate": 8.191364680511932e-06, - "loss": 0.1328, + "learning_rate": 1.8209788900503955e-05, + "loss": 0.0638, "step": 126945 }, { "epoch": 5.92, - "learning_rate": 8.190895879236792e-06, - "loss": 0.1058, + "learning_rate": 1.8209320830667935e-05, + "loss": 0.1258, "step": 126950 }, { "epoch": 5.92, - "learning_rate": 8.190427077961652e-06, - "loss": 0.2445, + "learning_rate": 1.8208852760831915e-05, + "loss": 0.2309, "step": 126955 }, { "epoch": 5.92, - "learning_rate": 8.189958276686514e-06, - "loss": 0.0936, + "learning_rate": 1.82083846909959e-05, + "loss": 0.1331, "step": 126960 }, { "epoch": 5.92, - "learning_rate": 8.189489475411374e-06, - "loss": 0.0055, + "learning_rate": 1.8207916621159878e-05, + "loss": 0.0348, "step": 126965 }, { "epoch": 5.92, - "learning_rate": 8.189020674136235e-06, - "loss": 0.0239, + "learning_rate": 1.8207448551323858e-05, + "loss": 0.0094, "step": 126970 }, { "epoch": 5.92, - "learning_rate": 8.188551872861095e-06, - "loss": 0.0454, + "learning_rate": 1.8206980481487838e-05, + "loss": 0.0371, "step": 126975 }, { "epoch": 5.93, - "learning_rate": 8.188083071585955e-06, - "loss": 0.0455, + "learning_rate": 1.820651241165182e-05, + "loss": 0.0249, "step": 126980 }, { "epoch": 5.93, - "learning_rate": 8.187614270310817e-06, - "loss": 0.0707, + "learning_rate": 1.82060443418158e-05, + "loss": 0.0467, "step": 126985 }, { "epoch": 5.93, - "learning_rate": 8.187145469035677e-06, - "loss": 0.0657, + "learning_rate": 1.820557627197978e-05, + "loss": 0.0622, "step": 126990 }, { "epoch": 5.93, - "learning_rate": 8.186676667760537e-06, - "loss": 0.0664, + "learning_rate": 1.8205108202143757e-05, + "loss": 0.1077, "step": 126995 }, { "epoch": 5.93, - "learning_rate": 8.186207866485396e-06, - "loss": 0.1272, + "learning_rate": 1.820464013230774e-05, + "loss": 0.1912, "step": 127000 }, { "epoch": 5.93, - "learning_rate": 8.185739065210258e-06, - "loss": 0.3563, + "learning_rate": 1.820417206247172e-05, + "loss": 0.2269, "step": 127005 }, { "epoch": 5.93, - "learning_rate": 8.18527026393512e-06, - "loss": 0.1038, + "learning_rate": 1.82037039926357e-05, + "loss": 0.0505, "step": 127010 }, { "epoch": 5.93, - "learning_rate": 8.18480146265998e-06, - "loss": 0.0063, + "learning_rate": 1.8203235922799683e-05, + "loss": 0.0082, "step": 127015 }, { "epoch": 5.93, - "learning_rate": 8.18433266138484e-06, - "loss": 0.0237, + "learning_rate": 1.8202767852963663e-05, + "loss": 0.0144, "step": 127020 }, { "epoch": 5.93, - "learning_rate": 8.1838638601097e-06, - "loss": 0.0103, + "learning_rate": 1.8202299783127643e-05, + "loss": 0.0298, "step": 127025 }, { "epoch": 5.93, - "learning_rate": 8.183395058834561e-06, - "loss": 0.0338, + "learning_rate": 1.8201831713291623e-05, + "loss": 0.0156, "step": 127030 }, { "epoch": 5.93, - "learning_rate": 8.182926257559421e-06, - "loss": 0.0344, + "learning_rate": 1.8201363643455606e-05, + "loss": 0.0788, "step": 127035 }, { "epoch": 5.93, - "learning_rate": 8.182457456284281e-06, - "loss": 0.0576, + "learning_rate": 1.8200895573619586e-05, + "loss": 0.0373, "step": 127040 }, { "epoch": 5.93, - "learning_rate": 8.181988655009143e-06, - "loss": 0.0801, + "learning_rate": 1.8200427503783565e-05, + "loss": 0.1012, "step": 127045 }, { "epoch": 5.93, - "learning_rate": 8.181519853734003e-06, - "loss": 0.1303, + "learning_rate": 1.8199959433947545e-05, + "loss": 0.1856, "step": 127050 }, { "epoch": 5.93, - "learning_rate": 8.181051052458864e-06, - "loss": 0.2755, + "learning_rate": 1.819949136411153e-05, + "loss": 0.3848, "step": 127055 }, { "epoch": 5.93, - "learning_rate": 8.180582251183724e-06, - "loss": 0.1133, + "learning_rate": 1.8199023294275505e-05, + "loss": 0.0661, "step": 127060 }, { "epoch": 5.93, - "learning_rate": 8.180113449908584e-06, - "loss": 0.0358, + "learning_rate": 1.8198555224439485e-05, + "loss": 0.0151, "step": 127065 }, { "epoch": 5.93, - "learning_rate": 8.179644648633444e-06, - "loss": 0.0298, + "learning_rate": 1.8198087154603468e-05, + "loss": 0.0235, "step": 127070 }, { "epoch": 5.93, - "learning_rate": 8.179175847358306e-06, - "loss": 0.051, + "learning_rate": 1.8197619084767448e-05, + "loss": 0.0163, "step": 127075 }, { "epoch": 5.93, - "learning_rate": 8.178707046083166e-06, - "loss": 0.0129, + "learning_rate": 1.8197151014931427e-05, + "loss": 0.0148, "step": 127080 }, { "epoch": 5.93, - "learning_rate": 8.178238244808027e-06, - "loss": 0.0407, + "learning_rate": 1.8196682945095407e-05, + "loss": 0.0404, "step": 127085 }, { "epoch": 5.93, - "learning_rate": 8.177769443532887e-06, - "loss": 0.0684, + "learning_rate": 1.819621487525939e-05, + "loss": 0.1461, "step": 127090 }, { "epoch": 5.93, - "learning_rate": 8.177300642257747e-06, - "loss": 0.1773, + "learning_rate": 1.819574680542337e-05, + "loss": 0.0682, "step": 127095 }, { "epoch": 5.93, - "learning_rate": 8.176831840982609e-06, - "loss": 0.1413, + "learning_rate": 1.819527873558735e-05, + "loss": 0.0873, "step": 127100 }, { "epoch": 5.93, - "learning_rate": 8.176363039707469e-06, - "loss": 0.2806, + "learning_rate": 1.819481066575133e-05, + "loss": 0.1687, "step": 127105 }, { "epoch": 5.93, - "learning_rate": 8.175894238432329e-06, - "loss": 0.1134, + "learning_rate": 1.8194342595915313e-05, + "loss": 0.1107, "step": 127110 }, { "epoch": 5.93, - "learning_rate": 8.17542543715719e-06, - "loss": 0.0009, + "learning_rate": 1.8193874526079293e-05, + "loss": 0.0107, "step": 127115 }, { "epoch": 5.93, - "learning_rate": 8.17495663588205e-06, - "loss": 0.0377, + "learning_rate": 1.8193406456243273e-05, + "loss": 0.0098, "step": 127120 }, { "epoch": 5.93, - "learning_rate": 8.174487834606912e-06, - "loss": 0.013, + "learning_rate": 1.8192938386407253e-05, + "loss": 0.0152, "step": 127125 }, { "epoch": 5.93, - "learning_rate": 8.174019033331772e-06, - "loss": 0.0195, + "learning_rate": 1.8192470316571232e-05, + "loss": 0.0118, "step": 127130 }, { "epoch": 5.93, - "learning_rate": 8.173550232056632e-06, - "loss": 0.0201, + "learning_rate": 1.8192002246735212e-05, + "loss": 0.0141, "step": 127135 }, { "epoch": 5.93, - "learning_rate": 8.173081430781492e-06, - "loss": 0.0994, + "learning_rate": 1.8191534176899192e-05, + "loss": 0.0642, "step": 127140 }, { "epoch": 5.93, - "learning_rate": 8.172612629506353e-06, - "loss": 0.0742, + "learning_rate": 1.8191066107063175e-05, + "loss": 0.081, "step": 127145 }, { "epoch": 5.93, - "learning_rate": 8.172143828231213e-06, - "loss": 0.1336, + "learning_rate": 1.8190598037227155e-05, + "loss": 0.1147, "step": 127150 }, { "epoch": 5.93, - "learning_rate": 8.171675026956075e-06, - "loss": 0.2115, + "learning_rate": 1.8190129967391135e-05, + "loss": 0.3041, "step": 127155 }, { "epoch": 5.93, - "learning_rate": 8.171206225680935e-06, - "loss": 0.0646, + "learning_rate": 1.8189661897555115e-05, + "loss": 0.0453, "step": 127160 }, { "epoch": 5.93, - "learning_rate": 8.170737424405795e-06, - "loss": 0.013, + "learning_rate": 1.8189193827719098e-05, + "loss": 0.0064, "step": 127165 }, { "epoch": 5.93, - "learning_rate": 8.170268623130656e-06, - "loss": 0.0105, + "learning_rate": 1.8188725757883078e-05, + "loss": 0.0245, "step": 127170 }, { "epoch": 5.93, - "learning_rate": 8.169799821855516e-06, - "loss": 0.0403, + "learning_rate": 1.8188257688047058e-05, + "loss": 0.0018, "step": 127175 }, { "epoch": 5.93, - "learning_rate": 8.169331020580376e-06, - "loss": 0.0273, + "learning_rate": 1.8187789618211037e-05, + "loss": 0.021, "step": 127180 }, { "epoch": 5.93, - "learning_rate": 8.168862219305238e-06, - "loss": 0.0414, + "learning_rate": 1.8187321548375017e-05, + "loss": 0.0451, "step": 127185 }, { "epoch": 5.93, - "learning_rate": 8.168393418030098e-06, - "loss": 0.07, + "learning_rate": 1.8186853478538997e-05, + "loss": 0.0174, "step": 127190 }, { "epoch": 5.94, - "learning_rate": 8.16792461675496e-06, - "loss": 0.0918, + "learning_rate": 1.8186385408702977e-05, + "loss": 0.0851, "step": 127195 }, { "epoch": 5.94, - "learning_rate": 8.16745581547982e-06, - "loss": 0.1295, + "learning_rate": 1.818591733886696e-05, + "loss": 0.0859, "step": 127200 }, { "epoch": 5.94, - "learning_rate": 8.16698701420468e-06, - "loss": 0.2633, + "learning_rate": 1.818544926903094e-05, + "loss": 0.397, "step": 127205 }, { "epoch": 5.94, - "learning_rate": 8.166518212929539e-06, - "loss": 0.3052, + "learning_rate": 1.818498119919492e-05, + "loss": 0.1196, "step": 127210 }, { "epoch": 5.94, - "learning_rate": 8.1660494116544e-06, - "loss": 0.0141, + "learning_rate": 1.81845131293589e-05, + "loss": 0.0285, "step": 127215 }, { "epoch": 5.94, - "learning_rate": 8.16558061037926e-06, - "loss": 0.0016, + "learning_rate": 1.8184045059522883e-05, + "loss": 0.042, "step": 127220 }, { "epoch": 5.94, - "learning_rate": 8.165111809104122e-06, - "loss": 0.0151, + "learning_rate": 1.8183576989686863e-05, + "loss": 0.0499, "step": 127225 }, { "epoch": 5.94, - "learning_rate": 8.164643007828982e-06, - "loss": 0.0509, + "learning_rate": 1.8183108919850842e-05, + "loss": 0.0252, "step": 127230 }, { "epoch": 5.94, - "learning_rate": 8.164174206553842e-06, - "loss": 0.0477, + "learning_rate": 1.8182640850014822e-05, + "loss": 0.0425, "step": 127235 }, { "epoch": 5.94, - "learning_rate": 8.163705405278704e-06, - "loss": 0.0687, + "learning_rate": 1.8182172780178805e-05, + "loss": 0.1505, "step": 127240 }, { "epoch": 5.94, - "learning_rate": 8.163236604003564e-06, - "loss": 0.1652, + "learning_rate": 1.8181704710342785e-05, + "loss": 0.1594, "step": 127245 }, { "epoch": 5.94, - "learning_rate": 8.162767802728424e-06, - "loss": 0.0843, + "learning_rate": 1.818123664050676e-05, + "loss": 0.1309, "step": 127250 }, { "epoch": 5.94, - "learning_rate": 8.162299001453284e-06, - "loss": 0.2756, + "learning_rate": 1.8180768570670745e-05, + "loss": 0.1417, "step": 127255 }, { "epoch": 5.94, - "learning_rate": 8.161830200178145e-06, - "loss": 0.0499, + "learning_rate": 1.8180300500834725e-05, + "loss": 0.076, "step": 127260 }, { "epoch": 5.94, - "learning_rate": 8.161361398903007e-06, - "loss": 0.013, + "learning_rate": 1.8179832430998704e-05, + "loss": 0.0193, "step": 127265 }, { "epoch": 5.94, - "learning_rate": 8.160892597627867e-06, - "loss": 0.0104, + "learning_rate": 1.8179364361162684e-05, + "loss": 0.006, "step": 127270 }, { "epoch": 5.94, - "learning_rate": 8.160423796352727e-06, - "loss": 0.0084, + "learning_rate": 1.8178896291326667e-05, + "loss": 0.0374, "step": 127275 }, { "epoch": 5.94, - "learning_rate": 8.159954995077587e-06, - "loss": 0.0335, + "learning_rate": 1.8178428221490647e-05, + "loss": 0.0611, "step": 127280 }, { "epoch": 5.94, - "learning_rate": 8.159486193802448e-06, - "loss": 0.0949, + "learning_rate": 1.8177960151654627e-05, + "loss": 0.0361, "step": 127285 }, { "epoch": 5.94, - "learning_rate": 8.159017392527308e-06, - "loss": 0.0693, + "learning_rate": 1.8177492081818607e-05, + "loss": 0.0512, "step": 127290 }, { "epoch": 5.94, - "learning_rate": 8.158548591252168e-06, - "loss": 0.1222, + "learning_rate": 1.817702401198259e-05, + "loss": 0.0771, "step": 127295 }, { "epoch": 5.94, - "learning_rate": 8.15807978997703e-06, - "loss": 0.1071, + "learning_rate": 1.817655594214657e-05, + "loss": 0.0547, "step": 127300 }, { "epoch": 5.94, - "learning_rate": 8.15761098870189e-06, - "loss": 0.2074, + "learning_rate": 1.817608787231055e-05, + "loss": 0.2538, "step": 127305 }, { "epoch": 5.94, - "learning_rate": 8.157142187426751e-06, - "loss": 0.0569, + "learning_rate": 1.817561980247453e-05, + "loss": 0.0872, "step": 127310 }, { "epoch": 5.94, - "learning_rate": 8.156673386151611e-06, - "loss": 0.0474, + "learning_rate": 1.817515173263851e-05, + "loss": 0.0099, "step": 127315 }, { "epoch": 5.94, - "learning_rate": 8.156204584876471e-06, - "loss": 0.0358, + "learning_rate": 1.817468366280249e-05, + "loss": 0.0596, "step": 127320 }, { "epoch": 5.94, - "learning_rate": 8.155735783601331e-06, - "loss": 0.0158, + "learning_rate": 1.817421559296647e-05, + "loss": 0.0093, "step": 127325 }, { "epoch": 5.94, - "learning_rate": 8.155266982326193e-06, - "loss": 0.0315, + "learning_rate": 1.8173747523130452e-05, + "loss": 0.0629, "step": 127330 }, { "epoch": 5.94, - "learning_rate": 8.154798181051053e-06, - "loss": 0.0639, + "learning_rate": 1.8173279453294432e-05, + "loss": 0.0528, "step": 127335 }, { "epoch": 5.94, - "learning_rate": 8.154329379775914e-06, - "loss": 0.0705, + "learning_rate": 1.8172811383458412e-05, + "loss": 0.0752, "step": 127340 }, { "epoch": 5.94, - "learning_rate": 8.153860578500774e-06, - "loss": 0.101, + "learning_rate": 1.8172343313622392e-05, + "loss": 0.0784, "step": 127345 }, { "epoch": 5.94, - "learning_rate": 8.153391777225634e-06, - "loss": 0.1148, + "learning_rate": 1.8171875243786375e-05, + "loss": 0.0574, "step": 127350 }, { "epoch": 5.94, - "learning_rate": 8.152922975950496e-06, - "loss": 0.2567, + "learning_rate": 1.8171407173950355e-05, + "loss": 0.1666, "step": 127355 }, { "epoch": 5.94, - "learning_rate": 8.152454174675356e-06, - "loss": 0.088, + "learning_rate": 1.8170939104114335e-05, + "loss": 0.0644, "step": 127360 }, { "epoch": 5.94, - "learning_rate": 8.151985373400216e-06, - "loss": 0.0504, + "learning_rate": 1.8170471034278314e-05, + "loss": 0.007, "step": 127365 }, { "epoch": 5.94, - "learning_rate": 8.151516572125077e-06, - "loss": 0.0112, + "learning_rate": 1.8170002964442298e-05, + "loss": 0.0057, "step": 127370 }, { "epoch": 5.94, - "learning_rate": 8.151047770849937e-06, - "loss": 0.0514, + "learning_rate": 1.8169534894606274e-05, + "loss": 0.0267, "step": 127375 }, { "epoch": 5.94, - "learning_rate": 8.150578969574799e-06, - "loss": 0.0379, + "learning_rate": 1.8169066824770254e-05, + "loss": 0.0351, "step": 127380 }, { "epoch": 5.94, - "learning_rate": 8.150110168299659e-06, - "loss": 0.0202, + "learning_rate": 1.8168598754934237e-05, + "loss": 0.0393, "step": 127385 }, { "epoch": 5.94, - "learning_rate": 8.149641367024519e-06, - "loss": 0.0963, + "learning_rate": 1.8168130685098217e-05, + "loss": 0.061, "step": 127390 }, { "epoch": 5.94, - "learning_rate": 8.149172565749379e-06, - "loss": 0.047, + "learning_rate": 1.8167662615262197e-05, + "loss": 0.0963, "step": 127395 }, { "epoch": 5.94, - "learning_rate": 8.14870376447424e-06, - "loss": 0.1008, + "learning_rate": 1.8167194545426176e-05, + "loss": 0.085, "step": 127400 }, { "epoch": 5.94, - "learning_rate": 8.1482349631991e-06, - "loss": 0.2617, + "learning_rate": 1.816672647559016e-05, + "loss": 0.1694, "step": 127405 }, { "epoch": 5.95, - "learning_rate": 8.147766161923962e-06, - "loss": 0.0987, + "learning_rate": 1.816625840575414e-05, + "loss": 0.0973, "step": 127410 }, { "epoch": 5.95, - "learning_rate": 8.147297360648822e-06, - "loss": 0.0046, + "learning_rate": 1.816579033591812e-05, + "loss": 0.0199, "step": 127415 }, { "epoch": 5.95, - "learning_rate": 8.146828559373682e-06, - "loss": 0.0082, + "learning_rate": 1.81653222660821e-05, + "loss": 0.0045, "step": 127420 }, { "epoch": 5.95, - "learning_rate": 8.146359758098543e-06, - "loss": 0.0419, + "learning_rate": 1.8164854196246082e-05, + "loss": 0.0116, "step": 127425 }, { "epoch": 5.95, - "learning_rate": 8.145890956823403e-06, - "loss": 0.0411, + "learning_rate": 1.8164386126410062e-05, + "loss": 0.0384, "step": 127430 }, { "epoch": 5.95, - "learning_rate": 8.145422155548263e-06, - "loss": 0.0345, + "learning_rate": 1.8163918056574042e-05, + "loss": 0.0064, "step": 127435 }, { "epoch": 5.95, - "learning_rate": 8.144953354273125e-06, - "loss": 0.1133, + "learning_rate": 1.8163449986738022e-05, + "loss": 0.0525, "step": 127440 }, { "epoch": 5.95, - "learning_rate": 8.144484552997985e-06, - "loss": 0.0627, + "learning_rate": 1.8162981916902e-05, + "loss": 0.1016, "step": 127445 }, { "epoch": 5.95, - "learning_rate": 8.144015751722846e-06, - "loss": 0.1253, + "learning_rate": 1.816251384706598e-05, + "loss": 0.0897, "step": 127450 }, { "epoch": 5.95, - "learning_rate": 8.143546950447706e-06, - "loss": 0.3977, + "learning_rate": 1.816204577722996e-05, + "loss": 0.2984, "step": 127455 }, { "epoch": 5.95, - "learning_rate": 8.143078149172566e-06, - "loss": 0.0689, + "learning_rate": 1.8161577707393944e-05, + "loss": 0.0865, "step": 127460 }, { "epoch": 5.95, - "learning_rate": 8.142609347897426e-06, - "loss": 0.0159, + "learning_rate": 1.8161109637557924e-05, + "loss": 0.0424, "step": 127465 }, { "epoch": 5.95, - "learning_rate": 8.142140546622288e-06, - "loss": 0.0418, + "learning_rate": 1.8160641567721904e-05, + "loss": 0.009, "step": 127470 }, { "epoch": 5.95, - "learning_rate": 8.141671745347148e-06, - "loss": 0.0362, + "learning_rate": 1.8160173497885884e-05, + "loss": 0.0409, "step": 127475 }, { "epoch": 5.95, - "learning_rate": 8.14120294407201e-06, - "loss": 0.0873, + "learning_rate": 1.8159705428049867e-05, + "loss": 0.0458, "step": 127480 }, { "epoch": 5.95, - "learning_rate": 8.14073414279687e-06, - "loss": 0.0607, + "learning_rate": 1.8159237358213847e-05, + "loss": 0.0198, "step": 127485 }, { "epoch": 5.95, - "learning_rate": 8.14026534152173e-06, - "loss": 0.0747, + "learning_rate": 1.8158769288377827e-05, + "loss": 0.6064, "step": 127490 }, { "epoch": 5.95, - "learning_rate": 8.139796540246591e-06, - "loss": 0.0857, + "learning_rate": 1.815830121854181e-05, + "loss": 0.0936, "step": 127495 }, { "epoch": 5.95, - "learning_rate": 8.13932773897145e-06, - "loss": 0.086, + "learning_rate": 1.8157833148705786e-05, + "loss": 0.0617, "step": 127500 }, { "epoch": 5.95, - "learning_rate": 8.13885893769631e-06, - "loss": 0.2696, + "learning_rate": 1.8157365078869766e-05, + "loss": 0.1977, "step": 127505 }, { "epoch": 5.95, - "learning_rate": 8.13839013642117e-06, - "loss": 0.0704, + "learning_rate": 1.8156897009033746e-05, + "loss": 0.1053, "step": 127510 }, { "epoch": 5.95, - "learning_rate": 8.137921335146032e-06, - "loss": 0.0324, + "learning_rate": 1.815642893919773e-05, + "loss": 0.0339, "step": 127515 }, { "epoch": 5.95, - "learning_rate": 8.137452533870894e-06, - "loss": 0.0202, + "learning_rate": 1.815596086936171e-05, + "loss": 0.0312, "step": 127520 }, { "epoch": 5.95, - "learning_rate": 8.136983732595754e-06, - "loss": 0.0324, + "learning_rate": 1.815549279952569e-05, + "loss": 0.0194, "step": 127525 }, { "epoch": 5.95, - "learning_rate": 8.136514931320614e-06, - "loss": 0.0219, + "learning_rate": 1.815502472968967e-05, + "loss": 0.0495, "step": 127530 }, { "epoch": 5.95, - "learning_rate": 8.136046130045474e-06, - "loss": 0.0706, + "learning_rate": 1.8154556659853652e-05, + "loss": 0.0917, "step": 127535 }, { "epoch": 5.95, - "learning_rate": 8.135577328770335e-06, - "loss": 0.0685, + "learning_rate": 1.8154088590017632e-05, + "loss": 0.0787, "step": 127540 }, { "epoch": 5.95, - "learning_rate": 8.135108527495195e-06, - "loss": 0.0317, + "learning_rate": 1.815362052018161e-05, + "loss": 0.1208, "step": 127545 }, { "epoch": 5.95, - "learning_rate": 8.134639726220055e-06, - "loss": 0.1228, + "learning_rate": 1.8153152450345595e-05, + "loss": 0.1245, "step": 127550 }, { "epoch": 5.95, - "learning_rate": 8.134170924944917e-06, - "loss": 0.3446, + "learning_rate": 1.8152684380509575e-05, + "loss": 0.198, "step": 127555 }, { "epoch": 5.95, - "learning_rate": 8.133702123669777e-06, - "loss": 0.0891, + "learning_rate": 1.8152216310673554e-05, + "loss": 0.0969, "step": 127560 }, { "epoch": 5.95, - "learning_rate": 8.133233322394638e-06, - "loss": 0.066, + "learning_rate": 1.815174824083753e-05, + "loss": 0.0051, "step": 127565 }, { "epoch": 5.95, - "learning_rate": 8.132764521119498e-06, - "loss": 0.0479, + "learning_rate": 1.8151280171001514e-05, + "loss": 0.0091, "step": 127570 }, { "epoch": 5.95, - "learning_rate": 8.132295719844358e-06, - "loss": 0.0566, + "learning_rate": 1.8150812101165494e-05, + "loss": 0.035, "step": 127575 }, { "epoch": 5.95, - "learning_rate": 8.131826918569218e-06, - "loss": 0.0534, + "learning_rate": 1.8150344031329474e-05, + "loss": 0.0289, "step": 127580 }, { "epoch": 5.95, - "learning_rate": 8.13135811729408e-06, - "loss": 0.0358, + "learning_rate": 1.8149875961493453e-05, + "loss": 0.0578, "step": 127585 }, { "epoch": 5.95, - "learning_rate": 8.13088931601894e-06, - "loss": 0.0398, + "learning_rate": 1.8149407891657437e-05, + "loss": 0.0285, "step": 127590 }, { "epoch": 5.95, - "learning_rate": 8.130420514743801e-06, - "loss": 0.0767, + "learning_rate": 1.8148939821821416e-05, + "loss": 0.1692, "step": 127595 }, { "epoch": 5.95, - "learning_rate": 8.129951713468661e-06, - "loss": 0.1638, + "learning_rate": 1.8148471751985396e-05, + "loss": 0.1022, "step": 127600 }, { "epoch": 5.95, - "learning_rate": 8.129482912193521e-06, - "loss": 0.327, + "learning_rate": 1.8148003682149376e-05, + "loss": 0.2377, "step": 127605 }, { "epoch": 5.95, - "learning_rate": 8.129014110918383e-06, - "loss": 0.1043, + "learning_rate": 1.814753561231336e-05, + "loss": 0.1406, "step": 127610 }, { "epoch": 5.95, - "learning_rate": 8.128545309643243e-06, - "loss": 0.0106, + "learning_rate": 1.814706754247734e-05, + "loss": 0.0075, "step": 127615 }, { "epoch": 5.95, - "learning_rate": 8.128076508368103e-06, - "loss": 0.0061, + "learning_rate": 1.814659947264132e-05, + "loss": 0.0276, "step": 127620 }, { "epoch": 5.96, - "learning_rate": 8.127607707092964e-06, - "loss": 0.0169, + "learning_rate": 1.8146131402805302e-05, + "loss": 0.031, "step": 127625 }, { "epoch": 5.96, - "learning_rate": 8.127138905817824e-06, - "loss": 0.0849, + "learning_rate": 1.814566333296928e-05, + "loss": 0.0473, "step": 127630 }, { "epoch": 5.96, - "learning_rate": 8.126670104542686e-06, - "loss": 0.0527, + "learning_rate": 1.814519526313326e-05, + "loss": 0.0342, "step": 127635 }, { "epoch": 5.96, - "learning_rate": 8.126201303267546e-06, - "loss": 0.0424, + "learning_rate": 1.8144727193297238e-05, + "loss": 0.1008, "step": 127640 }, { "epoch": 5.96, - "learning_rate": 8.125732501992406e-06, - "loss": 0.1071, + "learning_rate": 1.814425912346122e-05, + "loss": 0.0998, "step": 127645 }, { "epoch": 5.96, - "learning_rate": 8.125263700717266e-06, - "loss": 0.0936, + "learning_rate": 1.81437910536252e-05, + "loss": 0.2179, "step": 127650 }, { "epoch": 5.96, - "learning_rate": 8.124794899442127e-06, - "loss": 0.2252, + "learning_rate": 1.814332298378918e-05, + "loss": 0.2752, "step": 127655 }, { "epoch": 5.96, - "learning_rate": 8.124326098166987e-06, - "loss": 0.0955, + "learning_rate": 1.814285491395316e-05, + "loss": 0.1129, "step": 127660 }, { "epoch": 5.96, - "learning_rate": 8.123857296891849e-06, - "loss": 0.0124, + "learning_rate": 1.8142386844117144e-05, + "loss": 0.013, "step": 127665 }, { "epoch": 5.96, - "learning_rate": 8.123388495616709e-06, - "loss": 0.0091, + "learning_rate": 1.8141918774281124e-05, + "loss": 0.0292, "step": 127670 }, { "epoch": 5.96, - "learning_rate": 8.122919694341569e-06, - "loss": 0.013, + "learning_rate": 1.8141450704445104e-05, + "loss": 0.0563, "step": 127675 }, { "epoch": 5.96, - "learning_rate": 8.12245089306643e-06, - "loss": 0.041, + "learning_rate": 1.8140982634609087e-05, + "loss": 0.0378, "step": 127680 }, { "epoch": 5.96, - "learning_rate": 8.12198209179129e-06, - "loss": 0.0287, + "learning_rate": 1.8140514564773067e-05, + "loss": 0.0542, "step": 127685 }, { "epoch": 5.96, - "learning_rate": 8.12151329051615e-06, - "loss": 0.0307, + "learning_rate": 1.8140046494937043e-05, + "loss": 0.0732, "step": 127690 }, { "epoch": 5.96, - "learning_rate": 8.121044489241012e-06, - "loss": 0.0879, + "learning_rate": 1.8139578425101023e-05, + "loss": 0.0735, "step": 127695 }, { "epoch": 5.96, - "learning_rate": 8.120575687965872e-06, - "loss": 0.0853, + "learning_rate": 1.8139110355265006e-05, + "loss": 0.1285, "step": 127700 }, { "epoch": 5.96, - "learning_rate": 8.120106886690734e-06, - "loss": 0.234, + "learning_rate": 1.8138642285428986e-05, + "loss": 0.2806, "step": 127705 }, { "epoch": 5.96, - "learning_rate": 8.119638085415593e-06, - "loss": 0.0594, + "learning_rate": 1.8138174215592966e-05, + "loss": 0.1213, "step": 127710 }, { "epoch": 5.96, - "learning_rate": 8.119169284140453e-06, - "loss": 0.0066, + "learning_rate": 1.8137706145756946e-05, + "loss": 0.0334, "step": 127715 }, { "epoch": 5.96, - "learning_rate": 8.118700482865313e-06, - "loss": 0.036, + "learning_rate": 1.813723807592093e-05, + "loss": 0.0139, "step": 127720 }, { "epoch": 5.96, - "learning_rate": 8.118231681590173e-06, - "loss": 0.0281, + "learning_rate": 1.813677000608491e-05, + "loss": 0.0292, "step": 127725 }, { "epoch": 5.96, - "learning_rate": 8.117762880315035e-06, - "loss": 0.0565, + "learning_rate": 1.813630193624889e-05, + "loss": 0.0237, "step": 127730 }, { "epoch": 5.96, - "learning_rate": 8.117294079039897e-06, - "loss": 0.1203, + "learning_rate": 1.8135833866412872e-05, + "loss": 0.0688, "step": 127735 }, { "epoch": 5.96, - "learning_rate": 8.116825277764756e-06, - "loss": 0.0391, + "learning_rate": 1.813536579657685e-05, + "loss": 0.0201, "step": 127740 }, { "epoch": 5.96, - "learning_rate": 8.116356476489616e-06, - "loss": 0.0832, + "learning_rate": 1.813489772674083e-05, + "loss": 0.0911, "step": 127745 }, { "epoch": 5.96, - "learning_rate": 8.115887675214478e-06, - "loss": 0.1628, + "learning_rate": 1.813442965690481e-05, + "loss": 0.1161, "step": 127750 }, { "epoch": 5.96, - "learning_rate": 8.115418873939338e-06, - "loss": 0.3057, + "learning_rate": 1.813396158706879e-05, + "loss": 0.2769, "step": 127755 }, { "epoch": 5.96, - "learning_rate": 8.114950072664198e-06, - "loss": 0.0874, + "learning_rate": 1.813349351723277e-05, + "loss": 0.0894, "step": 127760 }, { "epoch": 5.96, - "learning_rate": 8.114481271389058e-06, - "loss": 0.0153, + "learning_rate": 1.813302544739675e-05, + "loss": 0.0069, "step": 127765 }, { "epoch": 5.96, - "learning_rate": 8.11401247011392e-06, - "loss": 0.0051, + "learning_rate": 1.813255737756073e-05, + "loss": 0.0301, "step": 127770 }, { "epoch": 5.96, - "learning_rate": 8.113543668838781e-06, - "loss": 0.0437, + "learning_rate": 1.8132089307724714e-05, + "loss": 0.0578, "step": 127775 }, { "epoch": 5.96, - "learning_rate": 8.113074867563641e-06, - "loss": 0.0634, + "learning_rate": 1.8131621237888693e-05, + "loss": 0.0151, "step": 127780 }, { "epoch": 5.96, - "learning_rate": 8.112606066288501e-06, - "loss": 0.0685, + "learning_rate": 1.8131153168052673e-05, + "loss": 0.0321, "step": 127785 }, { "epoch": 5.96, - "learning_rate": 8.112137265013361e-06, - "loss": 0.0283, + "learning_rate": 1.8130685098216653e-05, + "loss": 0.0688, "step": 127790 }, { "epoch": 5.96, - "learning_rate": 8.11166846373822e-06, - "loss": 0.1444, + "learning_rate": 1.8130217028380636e-05, + "loss": 0.0684, "step": 127795 }, { "epoch": 5.96, - "learning_rate": 8.111199662463082e-06, - "loss": 0.2454, + "learning_rate": 1.8129748958544616e-05, + "loss": 0.1271, "step": 127800 }, { "epoch": 5.96, - "learning_rate": 8.110730861187942e-06, - "loss": 0.1598, + "learning_rate": 1.8129280888708596e-05, + "loss": 0.2523, "step": 127805 }, { "epoch": 5.96, - "learning_rate": 8.110262059912804e-06, - "loss": 0.0545, + "learning_rate": 1.812881281887258e-05, + "loss": 0.1105, "step": 127810 }, { "epoch": 5.96, - "learning_rate": 8.109793258637664e-06, - "loss": 0.0135, + "learning_rate": 1.812834474903656e-05, + "loss": 0.0192, "step": 127815 }, { "epoch": 5.96, - "learning_rate": 8.109324457362526e-06, - "loss": 0.0299, + "learning_rate": 1.8127876679200535e-05, + "loss": 0.0044, "step": 127820 }, { "epoch": 5.96, - "learning_rate": 8.108855656087385e-06, - "loss": 0.0596, + "learning_rate": 1.8127408609364515e-05, + "loss": 0.0286, "step": 127825 }, { "epoch": 5.96, - "learning_rate": 8.108386854812245e-06, - "loss": 0.0149, + "learning_rate": 1.81269405395285e-05, + "loss": 0.0619, "step": 127830 }, { "epoch": 5.96, - "learning_rate": 8.107918053537105e-06, - "loss": 0.0755, + "learning_rate": 1.8126472469692478e-05, + "loss": 0.0253, "step": 127835 }, { "epoch": 5.97, - "learning_rate": 8.107449252261967e-06, - "loss": 0.0403, + "learning_rate": 1.8126004399856458e-05, + "loss": 0.0153, "step": 127840 }, { "epoch": 5.97, - "learning_rate": 8.106980450986827e-06, - "loss": 0.0658, + "learning_rate": 1.8125536330020438e-05, + "loss": 0.0475, "step": 127845 }, { "epoch": 5.97, - "learning_rate": 8.106511649711689e-06, - "loss": 0.0706, + "learning_rate": 1.812506826018442e-05, + "loss": 0.1493, "step": 127850 }, { "epoch": 5.97, - "learning_rate": 8.106042848436548e-06, - "loss": 0.1601, + "learning_rate": 1.81246001903484e-05, + "loss": 0.462, "step": 127855 }, { "epoch": 5.97, - "learning_rate": 8.105574047161408e-06, - "loss": 0.0842, + "learning_rate": 1.812413212051238e-05, + "loss": 0.1125, "step": 127860 }, { "epoch": 5.97, - "learning_rate": 8.10510524588627e-06, - "loss": 0.0304, + "learning_rate": 1.8123664050676364e-05, + "loss": 0.0271, "step": 127865 }, { "epoch": 5.97, - "learning_rate": 8.10463644461113e-06, - "loss": 0.0184, + "learning_rate": 1.8123195980840344e-05, + "loss": 0.0351, "step": 127870 }, { "epoch": 5.97, - "learning_rate": 8.10416764333599e-06, - "loss": 0.0195, + "learning_rate": 1.8122727911004324e-05, + "loss": 0.0088, "step": 127875 }, { "epoch": 5.97, - "learning_rate": 8.103698842060852e-06, - "loss": 0.0639, + "learning_rate": 1.81222598411683e-05, + "loss": 0.0265, "step": 127880 }, { "epoch": 5.97, - "learning_rate": 8.103230040785711e-06, - "loss": 0.0229, + "learning_rate": 1.8121791771332283e-05, + "loss": 0.0273, "step": 127885 }, { "epoch": 5.97, - "learning_rate": 8.102761239510573e-06, - "loss": 0.046, + "learning_rate": 1.8121323701496263e-05, + "loss": 0.0168, "step": 127890 }, { "epoch": 5.97, - "learning_rate": 8.102292438235433e-06, - "loss": 0.1069, + "learning_rate": 1.8120855631660243e-05, + "loss": 0.0833, "step": 127895 }, { "epoch": 5.97, - "learning_rate": 8.101823636960293e-06, - "loss": 0.1163, + "learning_rate": 1.8120387561824223e-05, + "loss": 0.0804, "step": 127900 }, { "epoch": 5.97, - "learning_rate": 8.101354835685153e-06, - "loss": 0.2515, + "learning_rate": 1.8119919491988206e-05, + "loss": 0.2286, "step": 127905 }, { "epoch": 5.97, - "learning_rate": 8.100886034410015e-06, - "loss": 0.0849, + "learning_rate": 1.8119451422152186e-05, + "loss": 0.068, "step": 127910 }, { "epoch": 5.97, - "learning_rate": 8.100417233134874e-06, - "loss": 0.0108, + "learning_rate": 1.8118983352316165e-05, + "loss": 0.0079, "step": 127915 }, { "epoch": 5.97, - "learning_rate": 8.099948431859736e-06, - "loss": 0.0378, + "learning_rate": 1.811851528248015e-05, + "loss": 0.0261, "step": 127920 }, { "epoch": 5.97, - "learning_rate": 8.099479630584596e-06, - "loss": 0.0226, + "learning_rate": 1.811804721264413e-05, + "loss": 0.03, "step": 127925 }, { "epoch": 5.97, - "learning_rate": 8.099010829309456e-06, - "loss": 0.0491, + "learning_rate": 1.8117579142808108e-05, + "loss": 0.0446, "step": 127930 }, { "epoch": 5.97, - "learning_rate": 8.098542028034318e-06, - "loss": 0.0573, + "learning_rate": 1.8117111072972088e-05, + "loss": 0.0455, "step": 127935 }, { "epoch": 5.97, - "learning_rate": 8.098073226759178e-06, - "loss": 0.1236, + "learning_rate": 1.811664300313607e-05, + "loss": 0.0227, "step": 127940 }, { "epoch": 5.97, - "learning_rate": 8.097604425484037e-06, - "loss": 0.0921, + "learning_rate": 1.8116174933300048e-05, + "loss": 0.0506, "step": 127945 }, { "epoch": 5.97, - "learning_rate": 8.097135624208899e-06, - "loss": 0.1399, + "learning_rate": 1.8115706863464028e-05, + "loss": 0.0854, "step": 127950 }, { "epoch": 5.97, - "learning_rate": 8.096666822933759e-06, - "loss": 0.1773, + "learning_rate": 1.8115238793628007e-05, + "loss": 0.2898, "step": 127955 }, { "epoch": 5.97, - "learning_rate": 8.09619802165862e-06, - "loss": 0.0792, + "learning_rate": 1.811477072379199e-05, + "loss": 0.09, "step": 127960 }, { "epoch": 5.97, - "learning_rate": 8.09572922038348e-06, - "loss": 0.0428, + "learning_rate": 1.811430265395597e-05, + "loss": 0.0172, "step": 127965 }, { "epoch": 5.97, - "learning_rate": 8.09526041910834e-06, - "loss": 0.0051, + "learning_rate": 1.811383458411995e-05, + "loss": 0.0098, "step": 127970 }, { "epoch": 5.97, - "learning_rate": 8.0947916178332e-06, - "loss": 0.0418, + "learning_rate": 1.811336651428393e-05, + "loss": 0.0216, "step": 127975 }, { "epoch": 5.97, - "learning_rate": 8.09432281655806e-06, - "loss": 0.0795, + "learning_rate": 1.8112898444447913e-05, + "loss": 0.0385, "step": 127980 }, { "epoch": 5.97, - "learning_rate": 8.093854015282922e-06, - "loss": 0.1256, + "learning_rate": 1.8112430374611893e-05, + "loss": 0.0906, "step": 127985 }, { "epoch": 5.97, - "learning_rate": 8.093385214007784e-06, - "loss": 0.0789, + "learning_rate": 1.8111962304775873e-05, + "loss": 0.1322, "step": 127990 }, { "epoch": 5.97, - "learning_rate": 8.092916412732644e-06, - "loss": 0.0393, + "learning_rate": 1.8111494234939856e-05, + "loss": 0.0705, "step": 127995 }, { "epoch": 5.97, - "learning_rate": 8.092447611457503e-06, - "loss": 0.1205, + "learning_rate": 1.8111026165103836e-05, + "loss": 0.1722, "step": 128000 }, { "epoch": 5.97, - "learning_rate": 8.091978810182365e-06, - "loss": 0.25, + "learning_rate": 1.8110558095267812e-05, + "loss": 0.1514, "step": 128005 }, { "epoch": 5.97, - "learning_rate": 8.091510008907225e-06, - "loss": 0.0784, + "learning_rate": 1.8110090025431792e-05, + "loss": 0.058, "step": 128010 }, { "epoch": 5.97, - "learning_rate": 8.091041207632085e-06, - "loss": 0.0154, + "learning_rate": 1.8109621955595775e-05, + "loss": 0.0456, "step": 128015 }, { "epoch": 5.97, - "learning_rate": 8.090572406356945e-06, - "loss": 0.048, + "learning_rate": 1.8109153885759755e-05, + "loss": 0.0474, "step": 128020 }, { "epoch": 5.97, - "learning_rate": 8.090103605081807e-06, - "loss": 0.0499, + "learning_rate": 1.8108685815923735e-05, + "loss": 0.0329, "step": 128025 }, { "epoch": 5.97, - "learning_rate": 8.089634803806668e-06, - "loss": 0.0282, + "learning_rate": 1.8108217746087715e-05, + "loss": 0.0228, "step": 128030 }, { "epoch": 5.97, - "learning_rate": 8.089166002531528e-06, - "loss": 0.0546, + "learning_rate": 1.8107749676251698e-05, + "loss": 0.0145, "step": 128035 }, { "epoch": 5.97, - "learning_rate": 8.088697201256388e-06, - "loss": 0.1218, + "learning_rate": 1.8107281606415678e-05, + "loss": 0.0579, "step": 128040 }, { "epoch": 5.97, - "learning_rate": 8.088228399981248e-06, - "loss": 0.1456, + "learning_rate": 1.8106813536579658e-05, + "loss": 0.0582, "step": 128045 }, { "epoch": 5.97, - "learning_rate": 8.087759598706108e-06, - "loss": 0.1712, + "learning_rate": 1.810634546674364e-05, + "loss": 0.0499, "step": 128050 }, { "epoch": 5.98, - "learning_rate": 8.08729079743097e-06, - "loss": 0.4388, + "learning_rate": 1.810587739690762e-05, + "loss": 0.2952, "step": 128055 }, { "epoch": 5.98, - "learning_rate": 8.08682199615583e-06, - "loss": 0.0848, + "learning_rate": 1.81054093270716e-05, + "loss": 0.0885, "step": 128060 }, { "epoch": 5.98, - "learning_rate": 8.086353194880691e-06, - "loss": 0.019, + "learning_rate": 1.810494125723558e-05, + "loss": 0.0084, "step": 128065 }, { "epoch": 5.98, - "learning_rate": 8.085884393605551e-06, - "loss": 0.0185, + "learning_rate": 1.810447318739956e-05, + "loss": 0.0248, "step": 128070 }, { "epoch": 5.98, - "learning_rate": 8.085415592330413e-06, - "loss": 0.0175, + "learning_rate": 1.810400511756354e-05, + "loss": 0.0594, "step": 128075 }, { "epoch": 5.98, - "learning_rate": 8.084946791055273e-06, - "loss": 0.0537, + "learning_rate": 1.810353704772752e-05, + "loss": 0.0392, "step": 128080 }, { "epoch": 5.98, - "learning_rate": 8.084477989780133e-06, - "loss": 0.0535, + "learning_rate": 1.81030689778915e-05, + "loss": 0.0487, "step": 128085 }, { "epoch": 5.98, - "learning_rate": 8.084009188504992e-06, - "loss": 0.0371, + "learning_rate": 1.8102600908055483e-05, + "loss": 0.0469, "step": 128090 }, { "epoch": 5.98, - "learning_rate": 8.083540387229854e-06, - "loss": 0.0497, + "learning_rate": 1.8102132838219463e-05, + "loss": 0.067, "step": 128095 }, { "epoch": 5.98, - "learning_rate": 8.083071585954714e-06, - "loss": 0.174, + "learning_rate": 1.8101664768383442e-05, + "loss": 0.1239, "step": 128100 }, { "epoch": 5.98, - "learning_rate": 8.082602784679576e-06, - "loss": 0.2198, + "learning_rate": 1.8101196698547426e-05, + "loss": 0.1722, "step": 128105 }, { "epoch": 5.98, - "learning_rate": 8.082133983404436e-06, - "loss": 0.0743, + "learning_rate": 1.8100728628711405e-05, + "loss": 0.0588, "step": 128110 }, { "epoch": 5.98, - "learning_rate": 8.081665182129296e-06, - "loss": 0.0623, + "learning_rate": 1.8100260558875385e-05, + "loss": 0.0157, "step": 128115 }, { "epoch": 5.98, - "learning_rate": 8.081196380854155e-06, - "loss": 0.018, + "learning_rate": 1.8099792489039365e-05, + "loss": 0.0094, "step": 128120 }, { "epoch": 5.98, - "learning_rate": 8.080727579579017e-06, - "loss": 0.0453, + "learning_rate": 1.8099324419203348e-05, + "loss": 0.0329, "step": 128125 }, { "epoch": 5.98, - "learning_rate": 8.080258778303877e-06, - "loss": 0.0459, + "learning_rate": 1.8098856349367328e-05, + "loss": 0.0336, "step": 128130 }, { "epoch": 5.98, - "learning_rate": 8.079789977028739e-06, - "loss": 0.0762, + "learning_rate": 1.8098388279531305e-05, + "loss": 0.0316, "step": 128135 }, { "epoch": 5.98, - "learning_rate": 8.079321175753599e-06, - "loss": 0.1122, + "learning_rate": 1.8097920209695284e-05, + "loss": 0.1093, "step": 128140 }, { "epoch": 5.98, - "learning_rate": 8.07885237447846e-06, - "loss": 0.0817, + "learning_rate": 1.8097452139859268e-05, + "loss": 0.0854, "step": 128145 }, { "epoch": 5.98, - "learning_rate": 8.07838357320332e-06, - "loss": 0.2215, + "learning_rate": 1.8096984070023247e-05, + "loss": 0.3929, "step": 128150 }, { "epoch": 5.98, - "learning_rate": 8.07791477192818e-06, - "loss": 0.226, + "learning_rate": 1.8096516000187227e-05, + "loss": 0.2083, "step": 128155 }, { "epoch": 5.98, - "learning_rate": 8.07744597065304e-06, - "loss": 0.1186, + "learning_rate": 1.809604793035121e-05, + "loss": 0.0644, "step": 128160 }, { "epoch": 5.98, - "learning_rate": 8.076977169377902e-06, - "loss": 0.0149, + "learning_rate": 1.809557986051519e-05, + "loss": 0.028, "step": 128165 }, { "epoch": 5.98, - "learning_rate": 8.076508368102762e-06, - "loss": 0.0175, + "learning_rate": 1.809511179067917e-05, + "loss": 0.0353, "step": 128170 }, { "epoch": 5.98, - "learning_rate": 8.076039566827623e-06, - "loss": 0.0329, + "learning_rate": 1.809464372084315e-05, + "loss": 0.0826, "step": 128175 }, { "epoch": 5.98, - "learning_rate": 8.075570765552483e-06, - "loss": 0.0525, + "learning_rate": 1.8094175651007133e-05, + "loss": 0.0614, "step": 128180 }, { "epoch": 5.98, - "learning_rate": 8.075101964277343e-06, - "loss": 0.0326, + "learning_rate": 1.8093707581171113e-05, + "loss": 0.0379, "step": 128185 }, { "epoch": 5.98, - "learning_rate": 8.074633163002205e-06, - "loss": 0.0306, + "learning_rate": 1.8093239511335093e-05, + "loss": 0.0356, "step": 128190 }, { "epoch": 5.98, - "learning_rate": 8.074164361727065e-06, - "loss": 0.1085, + "learning_rate": 1.809277144149907e-05, + "loss": 0.1068, "step": 128195 }, { "epoch": 5.98, - "learning_rate": 8.073695560451925e-06, - "loss": 0.1159, + "learning_rate": 1.8092303371663052e-05, + "loss": 0.0537, "step": 128200 }, { "epoch": 5.98, - "learning_rate": 8.073226759176786e-06, - "loss": 0.279, + "learning_rate": 1.8091835301827032e-05, + "loss": 0.2323, "step": 128205 }, { "epoch": 5.98, - "learning_rate": 8.072757957901646e-06, - "loss": 0.0447, + "learning_rate": 1.8091367231991012e-05, + "loss": 0.0894, "step": 128210 }, { "epoch": 5.98, - "learning_rate": 8.072289156626508e-06, - "loss": 0.0141, + "learning_rate": 1.8090899162154992e-05, + "loss": 0.0143, "step": 128215 }, { "epoch": 5.98, - "learning_rate": 8.071820355351368e-06, - "loss": 0.0024, + "learning_rate": 1.8090431092318975e-05, + "loss": 0.0129, "step": 128220 }, { "epoch": 5.98, - "learning_rate": 8.071351554076228e-06, - "loss": 0.0288, + "learning_rate": 1.8089963022482955e-05, + "loss": 0.0384, "step": 128225 }, { "epoch": 5.98, - "learning_rate": 8.070882752801088e-06, - "loss": 0.0239, + "learning_rate": 1.8089494952646935e-05, + "loss": 0.0695, "step": 128230 }, { "epoch": 5.98, - "learning_rate": 8.070413951525947e-06, - "loss": 0.0335, + "learning_rate": 1.8089026882810918e-05, + "loss": 0.0793, "step": 128235 }, { "epoch": 5.98, - "learning_rate": 8.069945150250809e-06, - "loss": 0.0314, + "learning_rate": 1.8088558812974898e-05, + "loss": 0.0568, "step": 128240 }, { "epoch": 5.98, - "learning_rate": 8.06947634897567e-06, - "loss": 0.1453, + "learning_rate": 1.8088090743138877e-05, + "loss": 0.075, "step": 128245 }, { "epoch": 5.98, - "learning_rate": 8.06900754770053e-06, - "loss": 0.1481, + "learning_rate": 1.8087622673302857e-05, + "loss": 0.0629, "step": 128250 }, { "epoch": 5.98, - "learning_rate": 8.06853874642539e-06, - "loss": 0.334, + "learning_rate": 1.808715460346684e-05, + "loss": 0.2107, "step": 128255 }, { "epoch": 5.98, - "learning_rate": 8.068069945150252e-06, - "loss": 0.1361, + "learning_rate": 1.8086686533630817e-05, + "loss": 0.0521, "step": 128260 }, { "epoch": 5.99, - "learning_rate": 8.067601143875112e-06, + "learning_rate": 1.8086218463794797e-05, "loss": 0.0275, "step": 128265 }, { "epoch": 5.99, - "learning_rate": 8.067132342599972e-06, - "loss": 0.0157, + "learning_rate": 1.8085750393958777e-05, + "loss": 0.0063, "step": 128270 }, { "epoch": 5.99, - "learning_rate": 8.066663541324832e-06, - "loss": 0.0012, + "learning_rate": 1.808528232412276e-05, + "loss": 0.0131, "step": 128275 }, { "epoch": 5.99, - "learning_rate": 8.066194740049694e-06, - "loss": 0.0366, + "learning_rate": 1.808481425428674e-05, + "loss": 0.0239, "step": 128280 }, { "epoch": 5.99, - "learning_rate": 8.065725938774555e-06, - "loss": 0.045, + "learning_rate": 1.808434618445072e-05, + "loss": 0.0541, "step": 128285 }, { "epoch": 5.99, - "learning_rate": 8.065257137499415e-06, - "loss": 0.0449, + "learning_rate": 1.8083878114614703e-05, + "loss": 0.0902, "step": 128290 }, { "epoch": 5.99, - "learning_rate": 8.064788336224275e-06, - "loss": 0.0488, + "learning_rate": 1.8083410044778682e-05, + "loss": 0.1375, "step": 128295 }, { "epoch": 5.99, - "learning_rate": 8.064319534949135e-06, - "loss": 0.1088, + "learning_rate": 1.8082941974942662e-05, + "loss": 0.12, "step": 128300 }, { "epoch": 5.99, - "learning_rate": 8.063850733673995e-06, - "loss": 0.1773, + "learning_rate": 1.8082473905106642e-05, + "loss": 0.3977, "step": 128305 }, { "epoch": 5.99, - "learning_rate": 8.063381932398857e-06, - "loss": 0.1179, + "learning_rate": 1.8082005835270625e-05, + "loss": 0.1056, "step": 128310 }, { "epoch": 5.99, - "learning_rate": 8.062913131123717e-06, - "loss": 0.0218, + "learning_rate": 1.8081537765434605e-05, + "loss": 0.0046, "step": 128315 }, { "epoch": 5.99, - "learning_rate": 8.062444329848578e-06, - "loss": 0.0098, + "learning_rate": 1.8081069695598585e-05, + "loss": 0.0683, "step": 128320 }, { "epoch": 5.99, - "learning_rate": 8.061975528573438e-06, - "loss": 0.0574, + "learning_rate": 1.808060162576256e-05, + "loss": 0.0133, "step": 128325 }, { "epoch": 5.99, - "learning_rate": 8.0615067272983e-06, - "loss": 0.0517, + "learning_rate": 1.8080133555926545e-05, + "loss": 0.0325, "step": 128330 }, { "epoch": 5.99, - "learning_rate": 8.06103792602316e-06, - "loss": 0.0699, + "learning_rate": 1.8079665486090524e-05, + "loss": 0.0565, "step": 128335 }, { "epoch": 5.99, - "learning_rate": 8.06056912474802e-06, - "loss": 0.0567, + "learning_rate": 1.8079197416254504e-05, + "loss": 0.012, "step": 128340 }, { "epoch": 5.99, - "learning_rate": 8.06010032347288e-06, - "loss": 0.0727, + "learning_rate": 1.8078729346418487e-05, + "loss": 0.1687, "step": 128345 }, { "epoch": 5.99, - "learning_rate": 8.059631522197741e-06, - "loss": 0.0568, + "learning_rate": 1.8078261276582467e-05, + "loss": 0.0978, "step": 128350 }, { "epoch": 5.99, - "learning_rate": 8.059162720922603e-06, - "loss": 0.2089, + "learning_rate": 1.8077793206746447e-05, + "loss": 0.2578, "step": 128355 }, { "epoch": 5.99, - "learning_rate": 8.058693919647463e-06, - "loss": 0.077, + "learning_rate": 1.8077325136910427e-05, + "loss": 0.0574, "step": 128360 }, { "epoch": 5.99, - "learning_rate": 8.058225118372323e-06, - "loss": 0.0006, + "learning_rate": 1.807685706707441e-05, + "loss": 0.0336, "step": 128365 }, { "epoch": 5.99, - "learning_rate": 8.057756317097183e-06, - "loss": 0.0098, + "learning_rate": 1.807638899723839e-05, + "loss": 0.0186, "step": 128370 }, { "epoch": 5.99, - "learning_rate": 8.057287515822043e-06, - "loss": 0.0762, + "learning_rate": 1.807592092740237e-05, + "loss": 0.0044, "step": 128375 }, { "epoch": 5.99, - "learning_rate": 8.056818714546904e-06, - "loss": 0.0528, + "learning_rate": 1.807545285756635e-05, + "loss": 0.0453, "step": 128380 }, { "epoch": 5.99, - "learning_rate": 8.056349913271764e-06, - "loss": 0.0317, + "learning_rate": 1.807498478773033e-05, + "loss": 0.032, "step": 128385 }, { "epoch": 5.99, - "learning_rate": 8.055881111996626e-06, - "loss": 0.0408, + "learning_rate": 1.807451671789431e-05, + "loss": 0.0894, "step": 128390 }, { "epoch": 5.99, - "learning_rate": 8.055412310721486e-06, - "loss": 0.0657, + "learning_rate": 1.807404864805829e-05, + "loss": 0.0637, "step": 128395 }, { "epoch": 5.99, - "learning_rate": 8.054943509446347e-06, - "loss": 0.196, + "learning_rate": 1.807358057822227e-05, + "loss": 0.1355, "step": 128400 }, { "epoch": 5.99, - "learning_rate": 8.054474708171207e-06, - "loss": 0.4327, + "learning_rate": 1.8073112508386252e-05, + "loss": 0.2426, "step": 128405 }, { "epoch": 5.99, - "learning_rate": 8.054005906896067e-06, - "loss": 0.0927, + "learning_rate": 1.8072644438550232e-05, + "loss": 0.0953, "step": 128410 }, { "epoch": 5.99, - "learning_rate": 8.053537105620927e-06, - "loss": 0.0463, + "learning_rate": 1.807217636871421e-05, + "loss": 0.0095, "step": 128415 }, { "epoch": 5.99, - "learning_rate": 8.053068304345789e-06, - "loss": 0.0254, + "learning_rate": 1.8071708298878195e-05, + "loss": 0.0743, "step": 128420 }, { "epoch": 5.99, - "learning_rate": 8.052599503070649e-06, - "loss": 0.029, + "learning_rate": 1.8071240229042175e-05, + "loss": 0.0222, "step": 128425 }, { "epoch": 5.99, - "learning_rate": 8.05213070179551e-06, - "loss": 0.0274, + "learning_rate": 1.8070772159206154e-05, + "loss": 0.0584, "step": 128430 }, { "epoch": 5.99, - "learning_rate": 8.05166190052037e-06, - "loss": 0.0085, + "learning_rate": 1.8070304089370134e-05, + "loss": 0.0433, "step": 128435 }, { "epoch": 5.99, - "learning_rate": 8.05119309924523e-06, - "loss": 0.0523, + "learning_rate": 1.8069836019534117e-05, + "loss": 0.0789, "step": 128440 }, { "epoch": 5.99, - "learning_rate": 8.05072429797009e-06, - "loss": 0.0672, + "learning_rate": 1.8069367949698097e-05, + "loss": 0.1072, "step": 128445 }, { "epoch": 5.99, - "learning_rate": 8.050255496694952e-06, - "loss": 0.0632, + "learning_rate": 1.8068899879862074e-05, + "loss": 0.2022, "step": 128450 }, { "epoch": 5.99, - "learning_rate": 8.049786695419812e-06, - "loss": 0.2312, + "learning_rate": 1.8068431810026054e-05, + "loss": 0.2357, "step": 128455 }, { "epoch": 5.99, - "learning_rate": 8.049317894144673e-06, - "loss": 0.1077, + "learning_rate": 1.8067963740190037e-05, + "loss": 0.0672, "step": 128460 }, { "epoch": 5.99, - "learning_rate": 8.048849092869533e-06, - "loss": 0.0089, + "learning_rate": 1.8067495670354017e-05, + "loss": 0.0114, "step": 128465 }, { "epoch": 5.99, - "learning_rate": 8.048380291594395e-06, - "loss": 0.0173, + "learning_rate": 1.8067027600517996e-05, + "loss": 0.0061, "step": 128470 }, { "epoch": 5.99, - "learning_rate": 8.047911490319255e-06, - "loss": 0.0631, + "learning_rate": 1.806655953068198e-05, + "loss": 0.0053, "step": 128475 }, { "epoch": 6.0, - "learning_rate": 8.047442689044115e-06, - "loss": 0.0497, + "learning_rate": 1.806609146084596e-05, + "loss": 0.0392, "step": 128480 }, { "epoch": 6.0, - "learning_rate": 8.046973887768975e-06, - "loss": 0.0657, + "learning_rate": 1.806562339100994e-05, + "loss": 0.0895, "step": 128485 }, { "epoch": 6.0, - "learning_rate": 8.046505086493836e-06, - "loss": 0.0377, + "learning_rate": 1.806515532117392e-05, + "loss": 0.0427, "step": 128490 }, { "epoch": 6.0, - "learning_rate": 8.046036285218696e-06, - "loss": 0.0736, + "learning_rate": 1.8064687251337902e-05, + "loss": 0.079, "step": 128495 }, { "epoch": 6.0, - "learning_rate": 8.045567483943558e-06, - "loss": 0.0999, + "learning_rate": 1.8064219181501882e-05, + "loss": 0.164, "step": 128500 }, { "epoch": 6.0, - "learning_rate": 8.045098682668418e-06, - "loss": 0.2102, + "learning_rate": 1.8063751111665862e-05, + "loss": 0.2092, "step": 128505 }, { "epoch": 6.0, - "learning_rate": 8.044629881393278e-06, - "loss": 0.0568, + "learning_rate": 1.806328304182984e-05, + "loss": 0.0671, "step": 128510 }, { "epoch": 6.0, - "learning_rate": 8.04416108011814e-06, - "loss": 0.0633, + "learning_rate": 1.806281497199382e-05, + "loss": 0.0321, "step": 128515 }, { "epoch": 6.0, - "learning_rate": 8.043692278843e-06, - "loss": 0.0429, + "learning_rate": 1.80623469021578e-05, + "loss": 0.0275, "step": 128520 }, { "epoch": 6.0, - "learning_rate": 8.04322347756786e-06, - "loss": 0.0398, + "learning_rate": 1.806187883232178e-05, + "loss": 0.0184, "step": 128525 }, { "epoch": 6.0, - "learning_rate": 8.04275467629272e-06, - "loss": 0.0589, + "learning_rate": 1.8061410762485764e-05, + "loss": 0.0167, "step": 128530 }, { "epoch": 6.0, - "learning_rate": 8.04228587501758e-06, - "loss": 0.0667, + "learning_rate": 1.8060942692649744e-05, + "loss": 0.0442, "step": 128535 }, { "epoch": 6.0, - "learning_rate": 8.041817073742442e-06, - "loss": 0.0837, + "learning_rate": 1.8060474622813724e-05, + "loss": 0.0558, "step": 128540 }, { "epoch": 6.0, - "learning_rate": 8.041348272467302e-06, - "loss": 0.0505, + "learning_rate": 1.8060006552977704e-05, + "loss": 0.1573, "step": 128545 }, { "epoch": 6.0, - "learning_rate": 8.040879471192162e-06, - "loss": 0.091, + "learning_rate": 1.8059538483141687e-05, + "loss": 0.0807, "step": 128550 }, { "epoch": 6.0, - "learning_rate": 8.040410669917022e-06, - "loss": 0.2976, + "learning_rate": 1.8059070413305667e-05, + "loss": 0.1992, "step": 128555 }, { "epoch": 6.0, - "learning_rate": 8.039941868641882e-06, - "loss": 0.0479, + "learning_rate": 1.8058602343469647e-05, + "loss": 0.0713, "step": 128560 }, { "epoch": 6.0, - "learning_rate": 8.039473067366744e-06, - "loss": 0.0308, + "learning_rate": 1.8058134273633626e-05, + "loss": 0.0168, "step": 128565 }, { "epoch": 6.0, - "learning_rate": 8.039004266091605e-06, - "loss": 0.0312, + "learning_rate": 1.805766620379761e-05, + "loss": 0.1386, "step": 128570 }, { "epoch": 6.0, - "learning_rate": 8.038535464816465e-06, - "loss": 0.06, + "learning_rate": 1.8057198133961586e-05, + "loss": 0.093, "step": 128575 }, { "epoch": 6.0, - "learning_rate": 8.038066663541325e-06, - "loss": 0.0709, + "learning_rate": 1.8056730064125566e-05, + "loss": 0.0473, "step": 128580 }, { "epoch": 6.0, - "learning_rate": 8.037597862266187e-06, - "loss": 0.1764, + "learning_rate": 1.8056261994289546e-05, + "loss": 0.1569, "step": 128585 }, { "epoch": 6.0, - "eval_cer": 0.010655025826210175, - "eval_loss": 0.04709955304861069, - "eval_runtime": 380.0432, - "eval_samples_per_second": 50.126, - "eval_steps_per_second": 12.533, - "eval_wer": 0.08086986068637445, + "eval_cer": 0.008301097558718466, + "eval_loss": 0.01730768382549286, + "eval_runtime": 385.0769, + "eval_samples_per_second": 49.471, + "eval_steps_per_second": 12.369, + "eval_wer": 0.06868972906780958, "step": 128586 }, { "epoch": 6.0, - "learning_rate": 8.037129060991047e-06, - "loss": 0.1634, + "learning_rate": 1.805579392445353e-05, + "loss": 0.1596, "step": 128590 }, { "epoch": 6.0, - "learning_rate": 8.036660259715907e-06, - "loss": 0.0101, + "learning_rate": 1.805532585461751e-05, + "loss": 0.0041, "step": 128595 }, { "epoch": 6.0, - "learning_rate": 8.036191458440767e-06, - "loss": 0.0144, + "learning_rate": 1.805485778478149e-05, + "loss": 0.048, "step": 128600 }, { "epoch": 6.0, - "learning_rate": 8.035722657165628e-06, - "loss": 0.0115, + "learning_rate": 1.8054389714945472e-05, + "loss": 0.0135, "step": 128605 }, { "epoch": 6.0, - "learning_rate": 8.03525385589049e-06, - "loss": 0.1023, + "learning_rate": 1.805392164510945e-05, + "loss": 0.0528, "step": 128610 }, { "epoch": 6.0, - "learning_rate": 8.03478505461535e-06, - "loss": 0.03, + "learning_rate": 1.805345357527343e-05, + "loss": 0.0165, "step": 128615 }, { "epoch": 6.0, - "learning_rate": 8.03431625334021e-06, - "loss": 0.081, + "learning_rate": 1.805298550543741e-05, + "loss": 0.0646, "step": 128620 }, { "epoch": 6.0, - "learning_rate": 8.03384745206507e-06, - "loss": 0.0292, + "learning_rate": 1.8052517435601394e-05, + "loss": 0.049, "step": 128625 }, { "epoch": 6.0, - "learning_rate": 8.03337865078993e-06, - "loss": 0.1229, + "learning_rate": 1.8052049365765374e-05, + "loss": 0.1, "step": 128630 }, { "epoch": 6.0, - "learning_rate": 8.032909849514791e-06, - "loss": 0.1276, + "learning_rate": 1.8051581295929354e-05, + "loss": 0.1015, "step": 128635 }, { "epoch": 6.0, - "learning_rate": 8.032441048239651e-06, - "loss": 0.1674, + "learning_rate": 1.805111322609333e-05, + "loss": 0.1444, "step": 128640 }, { "epoch": 6.0, - "learning_rate": 8.031972246964513e-06, - "loss": 0.0206, + "learning_rate": 1.8050645156257314e-05, + "loss": 0.0269, "step": 128645 }, { "epoch": 6.0, - "learning_rate": 8.031503445689373e-06, - "loss": 0.0169, + "learning_rate": 1.8050177086421294e-05, + "loss": 0.0051, "step": 128650 }, { "epoch": 6.0, - "learning_rate": 8.031034644414234e-06, - "loss": 0.1166, + "learning_rate": 1.8049709016585273e-05, + "loss": 0.0138, "step": 128655 }, { "epoch": 6.0, - "learning_rate": 8.030565843139094e-06, - "loss": 0.0278, + "learning_rate": 1.8049240946749257e-05, + "loss": 0.0376, "step": 128660 }, { "epoch": 6.0, - "learning_rate": 8.030097041863954e-06, - "loss": 0.0476, + "learning_rate": 1.8048772876913236e-05, + "loss": 0.0806, "step": 128665 }, { "epoch": 6.0, - "learning_rate": 8.029628240588814e-06, - "loss": 0.0776, + "learning_rate": 1.8048304807077216e-05, + "loss": 0.0351, "step": 128670 }, { "epoch": 6.0, - "learning_rate": 8.029159439313676e-06, - "loss": 0.068, + "learning_rate": 1.8047836737241196e-05, + "loss": 0.1037, "step": 128675 }, { "epoch": 6.0, - "learning_rate": 8.028690638038536e-06, - "loss": 0.0316, + "learning_rate": 1.804736866740518e-05, + "loss": 0.0957, "step": 128680 }, { "epoch": 6.0, - "learning_rate": 8.028221836763397e-06, - "loss": 0.2314, + "learning_rate": 1.804690059756916e-05, + "loss": 0.2352, "step": 128685 }, { "epoch": 6.0, - "learning_rate": 8.027753035488257e-06, - "loss": 0.1683, + "learning_rate": 1.804643252773314e-05, + "loss": 0.1349, "step": 128690 }, { "epoch": 6.01, - "learning_rate": 8.027284234213117e-06, - "loss": 0.0685, + "learning_rate": 1.804596445789712e-05, + "loss": 0.0063, "step": 128695 }, { "epoch": 6.01, - "learning_rate": 8.026815432937977e-06, - "loss": 0.004, + "learning_rate": 1.80454963880611e-05, + "loss": 0.0157, "step": 128700 }, { "epoch": 6.01, - "learning_rate": 8.026346631662839e-06, - "loss": 0.0351, + "learning_rate": 1.8045028318225078e-05, + "loss": 0.0362, "step": 128705 }, { "epoch": 6.01, - "learning_rate": 8.025877830387699e-06, - "loss": 0.053, + "learning_rate": 1.8044560248389058e-05, + "loss": 0.0204, "step": 128710 }, { "epoch": 6.01, - "learning_rate": 8.02540902911256e-06, - "loss": 0.0487, + "learning_rate": 1.804409217855304e-05, + "loss": 0.0577, "step": 128715 }, { "epoch": 6.01, - "learning_rate": 8.02494022783742e-06, - "loss": 0.0489, + "learning_rate": 1.804362410871702e-05, + "loss": 0.06, "step": 128720 }, { "epoch": 6.01, - "learning_rate": 8.024471426562282e-06, - "loss": 0.0851, + "learning_rate": 1.8043156038881e-05, + "loss": 0.1184, "step": 128725 }, { "epoch": 6.01, - "learning_rate": 8.024002625287142e-06, - "loss": 0.1381, + "learning_rate": 1.804268796904498e-05, + "loss": 0.1691, "step": 128730 }, { "epoch": 6.01, - "learning_rate": 8.023533824012002e-06, - "loss": 0.2643, + "learning_rate": 1.8042219899208964e-05, + "loss": 0.1694, "step": 128735 }, { "epoch": 6.01, - "learning_rate": 8.023065022736862e-06, - "loss": 0.1696, + "learning_rate": 1.8041751829372944e-05, + "loss": 0.1672, "step": 128740 }, { "epoch": 6.01, - "learning_rate": 8.022596221461723e-06, - "loss": 0.0143, + "learning_rate": 1.8041283759536924e-05, + "loss": 0.0195, "step": 128745 }, { "epoch": 6.01, - "learning_rate": 8.022127420186583e-06, - "loss": 0.0087, + "learning_rate": 1.8040815689700903e-05, + "loss": 0.0268, "step": 128750 }, { "epoch": 6.01, - "learning_rate": 8.021658618911445e-06, - "loss": 0.0051, + "learning_rate": 1.8040347619864887e-05, + "loss": 0.0529, "step": 128755 }, { "epoch": 6.01, - "learning_rate": 8.021189817636305e-06, - "loss": 0.037, + "learning_rate": 1.8039879550028866e-05, + "loss": 0.0605, "step": 128760 }, { "epoch": 6.01, - "learning_rate": 8.020721016361165e-06, - "loss": 0.0165, + "learning_rate": 1.8039411480192843e-05, + "loss": 0.034, "step": 128765 }, { "epoch": 6.01, - "learning_rate": 8.020252215086025e-06, - "loss": 0.0789, + "learning_rate": 1.8038943410356823e-05, + "loss": 0.021, "step": 128770 }, { "epoch": 6.01, - "learning_rate": 8.019783413810886e-06, - "loss": 0.0788, + "learning_rate": 1.8038475340520806e-05, + "loss": 0.2022, "step": 128775 }, { "epoch": 6.01, - "learning_rate": 8.019314612535746e-06, - "loss": 0.1981, + "learning_rate": 1.8038007270684786e-05, + "loss": 0.1893, "step": 128780 }, { "epoch": 6.01, - "learning_rate": 8.018845811260608e-06, - "loss": 0.1887, + "learning_rate": 1.8037539200848766e-05, + "loss": 0.1697, "step": 128785 }, { "epoch": 6.01, - "learning_rate": 8.018377009985468e-06, - "loss": 0.1191, + "learning_rate": 1.803707113101275e-05, + "loss": 0.187, "step": 128790 }, { "epoch": 6.01, - "learning_rate": 8.01790820871033e-06, - "loss": 0.0392, + "learning_rate": 1.803660306117673e-05, + "loss": 0.0127, "step": 128795 }, { "epoch": 6.01, - "learning_rate": 8.01743940743519e-06, - "loss": 0.0196, + "learning_rate": 1.803613499134071e-05, + "loss": 0.0261, "step": 128800 }, { "epoch": 6.01, - "learning_rate": 8.01697060616005e-06, - "loss": 0.0271, + "learning_rate": 1.8035666921504688e-05, + "loss": 0.0248, "step": 128805 }, { "epoch": 6.01, - "learning_rate": 8.01650180488491e-06, - "loss": 0.0225, + "learning_rate": 1.803519885166867e-05, + "loss": 0.0151, "step": 128810 }, { "epoch": 6.01, - "learning_rate": 8.01603300360977e-06, - "loss": 0.041, + "learning_rate": 1.803473078183265e-05, + "loss": 0.0664, "step": 128815 }, { "epoch": 6.01, - "learning_rate": 8.015564202334631e-06, - "loss": 0.0427, + "learning_rate": 1.803426271199663e-05, + "loss": 0.0642, "step": 128820 }, { "epoch": 6.01, - "learning_rate": 8.015095401059492e-06, - "loss": 0.0626, + "learning_rate": 1.803379464216061e-05, + "loss": 0.1503, "step": 128825 }, { "epoch": 6.01, - "learning_rate": 8.014626599784352e-06, - "loss": 0.155, + "learning_rate": 1.803332657232459e-05, + "loss": 0.1445, "step": 128830 }, { "epoch": 6.01, - "learning_rate": 8.014157798509212e-06, - "loss": 0.1439, + "learning_rate": 1.803285850248857e-05, + "loss": 0.2348, "step": 128835 }, { "epoch": 6.01, - "learning_rate": 8.013688997234074e-06, - "loss": 0.1215, + "learning_rate": 1.803239043265255e-05, + "loss": 0.1014, "step": 128840 }, { "epoch": 6.01, - "learning_rate": 8.013220195958934e-06, - "loss": 0.009, + "learning_rate": 1.8031922362816534e-05, + "loss": 0.0366, "step": 128845 }, { "epoch": 6.01, - "learning_rate": 8.012751394683794e-06, - "loss": 0.044, + "learning_rate": 1.8031454292980513e-05, + "loss": 0.0178, "step": 128850 }, { "epoch": 6.01, - "learning_rate": 8.012282593408654e-06, - "loss": 0.0274, + "learning_rate": 1.8030986223144493e-05, + "loss": 0.0226, "step": 128855 }, { "epoch": 6.01, - "learning_rate": 8.011813792133515e-06, - "loss": 0.0411, + "learning_rate": 1.8030518153308473e-05, + "loss": 0.0397, "step": 128860 }, { "epoch": 6.01, - "learning_rate": 8.011344990858377e-06, - "loss": 0.0539, + "learning_rate": 1.8030050083472456e-05, + "loss": 0.0479, "step": 128865 }, { "epoch": 6.01, - "learning_rate": 8.010876189583237e-06, - "loss": 0.0666, + "learning_rate": 1.8029582013636436e-05, + "loss": 0.0153, "step": 128870 }, { "epoch": 6.01, - "learning_rate": 8.010407388308097e-06, - "loss": 0.0719, + "learning_rate": 1.8029113943800416e-05, + "loss": 0.1143, "step": 128875 }, { "epoch": 6.01, - "learning_rate": 8.009938587032957e-06, - "loss": 0.0892, + "learning_rate": 1.8028645873964396e-05, + "loss": 0.0465, "step": 128880 }, { "epoch": 6.01, - "learning_rate": 8.009469785757817e-06, - "loss": 0.1199, + "learning_rate": 1.802817780412838e-05, + "loss": 0.0901, "step": 128885 }, { "epoch": 6.01, - "learning_rate": 8.009000984482678e-06, - "loss": 0.2039, + "learning_rate": 1.8027709734292355e-05, + "loss": 0.0844, "step": 128890 }, { "epoch": 6.01, - "learning_rate": 8.008532183207538e-06, - "loss": 0.0048, + "learning_rate": 1.8027241664456335e-05, + "loss": 0.008, "step": 128895 }, { "epoch": 6.01, - "learning_rate": 8.0080633819324e-06, - "loss": 0.0084, + "learning_rate": 1.8026773594620318e-05, + "loss": 0.0178, "step": 128900 }, { "epoch": 6.01, - "learning_rate": 8.00759458065726e-06, - "loss": 0.0281, + "learning_rate": 1.8026305524784298e-05, + "loss": 0.0242, "step": 128905 }, { "epoch": 6.02, - "learning_rate": 8.007125779382122e-06, - "loss": 0.0277, + "learning_rate": 1.8025837454948278e-05, + "loss": 0.0896, "step": 128910 }, { "epoch": 6.02, - "learning_rate": 8.006656978106981e-06, - "loss": 0.1043, + "learning_rate": 1.8025369385112258e-05, + "loss": 0.0237, "step": 128915 }, { "epoch": 6.02, - "learning_rate": 8.006188176831841e-06, - "loss": 0.0541, + "learning_rate": 1.802490131527624e-05, + "loss": 0.0435, "step": 128920 }, { "epoch": 6.02, - "learning_rate": 8.005719375556701e-06, - "loss": 0.15, + "learning_rate": 1.802443324544022e-05, + "loss": 0.0514, "step": 128925 }, { "epoch": 6.02, - "learning_rate": 8.005250574281563e-06, - "loss": 0.1266, + "learning_rate": 1.80239651756042e-05, + "loss": 0.0993, "step": 128930 }, { "epoch": 6.02, - "learning_rate": 8.004781773006423e-06, - "loss": 0.2535, + "learning_rate": 1.802349710576818e-05, + "loss": 0.1212, "step": 128935 }, { "epoch": 6.02, - "learning_rate": 8.004312971731285e-06, - "loss": 0.2048, + "learning_rate": 1.8023029035932164e-05, + "loss": 0.1291, "step": 128940 }, { "epoch": 6.02, - "learning_rate": 8.003844170456144e-06, - "loss": 0.0282, + "learning_rate": 1.8022560966096143e-05, + "loss": 0.0057, "step": 128945 }, { "epoch": 6.02, - "learning_rate": 8.003375369181004e-06, - "loss": 0.0099, + "learning_rate": 1.8022092896260123e-05, + "loss": 0.0041, "step": 128950 }, { "epoch": 6.02, - "learning_rate": 8.002906567905864e-06, - "loss": 0.0261, + "learning_rate": 1.8021624826424103e-05, + "loss": 0.0649, "step": 128955 }, { "epoch": 6.02, - "learning_rate": 8.002437766630726e-06, - "loss": 0.037, + "learning_rate": 1.8021156756588083e-05, + "loss": 0.0183, "step": 128960 }, { "epoch": 6.02, - "learning_rate": 8.001968965355586e-06, - "loss": 0.0494, + "learning_rate": 1.8020688686752063e-05, + "loss": 0.0706, "step": 128965 }, { "epoch": 6.02, - "learning_rate": 8.001500164080448e-06, - "loss": 0.0163, + "learning_rate": 1.8020220616916043e-05, + "loss": 0.0833, "step": 128970 }, { "epoch": 6.02, - "learning_rate": 8.001031362805307e-06, - "loss": 0.0727, + "learning_rate": 1.8019752547080026e-05, + "loss": 0.0227, "step": 128975 }, { "epoch": 6.02, - "learning_rate": 8.000562561530169e-06, - "loss": 0.1992, + "learning_rate": 1.8019284477244006e-05, + "loss": 0.0883, "step": 128980 }, { "epoch": 6.02, - "learning_rate": 8.000093760255029e-06, - "loss": 0.2, + "learning_rate": 1.8018816407407985e-05, + "loss": 0.2236, "step": 128985 }, { "epoch": 6.02, - "learning_rate": 7.999624958979889e-06, - "loss": 0.1744, + "learning_rate": 1.8018348337571965e-05, + "loss": 0.1925, "step": 128990 }, { "epoch": 6.02, - "learning_rate": 7.999156157704749e-06, - "loss": 0.0204, + "learning_rate": 1.801788026773595e-05, + "loss": 0.0253, "step": 128995 }, { "epoch": 6.02, - "learning_rate": 7.99868735642961e-06, - "loss": 0.0013, + "learning_rate": 1.8017412197899928e-05, + "loss": 0.0588, "step": 129000 }, { "epoch": 6.02, - "learning_rate": 7.99821855515447e-06, - "loss": 0.025, + "learning_rate": 1.8016944128063908e-05, + "loss": 0.0124, "step": 129005 }, { "epoch": 6.02, - "learning_rate": 7.997749753879332e-06, - "loss": 0.0235, + "learning_rate": 1.8016476058227888e-05, + "loss": 0.0311, "step": 129010 }, { "epoch": 6.02, - "learning_rate": 7.997280952604192e-06, - "loss": 0.0491, + "learning_rate": 1.801600798839187e-05, + "loss": 0.054, "step": 129015 }, { "epoch": 6.02, - "learning_rate": 7.996812151329052e-06, - "loss": 0.0477, + "learning_rate": 1.8015539918555847e-05, + "loss": 0.0631, "step": 129020 }, { "epoch": 6.02, - "learning_rate": 7.996343350053912e-06, - "loss": 0.0422, + "learning_rate": 1.8015071848719827e-05, + "loss": 0.0829, "step": 129025 }, { "epoch": 6.02, - "learning_rate": 7.995874548778773e-06, - "loss": 0.1119, + "learning_rate": 1.801460377888381e-05, + "loss": 0.1535, "step": 129030 }, { "epoch": 6.02, - "learning_rate": 7.995405747503633e-06, - "loss": 0.2329, + "learning_rate": 1.801413570904779e-05, + "loss": 0.205, "step": 129035 }, { "epoch": 6.02, - "learning_rate": 7.994936946228495e-06, - "loss": 0.1244, + "learning_rate": 1.801366763921177e-05, + "loss": 0.163, "step": 129040 }, { "epoch": 6.02, - "learning_rate": 7.994468144953355e-06, - "loss": 0.0425, + "learning_rate": 1.801319956937575e-05, + "loss": 0.033, "step": 129045 }, { "epoch": 6.02, - "learning_rate": 7.993999343678217e-06, - "loss": 0.0494, + "learning_rate": 1.8012731499539733e-05, + "loss": 0.0495, "step": 129050 }, { "epoch": 6.02, - "learning_rate": 7.993530542403077e-06, - "loss": 0.0137, + "learning_rate": 1.8012263429703713e-05, + "loss": 0.051, "step": 129055 }, { "epoch": 6.02, - "learning_rate": 7.993061741127936e-06, - "loss": 0.0303, + "learning_rate": 1.8011795359867693e-05, + "loss": 0.0679, "step": 129060 }, { "epoch": 6.02, - "learning_rate": 7.992592939852796e-06, - "loss": 0.047, + "learning_rate": 1.8011327290031673e-05, + "loss": 0.0319, "step": 129065 }, { "epoch": 6.02, - "learning_rate": 7.992124138577656e-06, - "loss": 0.091, + "learning_rate": 1.8010859220195656e-05, + "loss": 0.0242, "step": 129070 }, { "epoch": 6.02, - "learning_rate": 7.991655337302518e-06, - "loss": 0.0455, + "learning_rate": 1.8010391150359636e-05, + "loss": 0.0703, "step": 129075 }, { "epoch": 6.02, - "learning_rate": 7.99118653602738e-06, - "loss": 0.0833, + "learning_rate": 1.8009923080523612e-05, + "loss": 0.0494, "step": 129080 }, { "epoch": 6.02, - "learning_rate": 7.99071773475224e-06, - "loss": 0.1335, + "learning_rate": 1.8009455010687595e-05, + "loss": 0.3454, "step": 129085 }, { "epoch": 6.02, - "learning_rate": 7.9902489334771e-06, - "loss": 0.1895, + "learning_rate": 1.8008986940851575e-05, + "loss": 0.0938, "step": 129090 }, { "epoch": 6.02, - "learning_rate": 7.98978013220196e-06, - "loss": 0.0107, + "learning_rate": 1.8008518871015555e-05, + "loss": 0.0117, "step": 129095 }, { "epoch": 6.02, - "learning_rate": 7.989311330926821e-06, - "loss": 0.0383, + "learning_rate": 1.8008050801179535e-05, + "loss": 0.0179, "step": 129100 }, { "epoch": 6.02, - "learning_rate": 7.988842529651681e-06, - "loss": 0.0442, + "learning_rate": 1.8007582731343518e-05, + "loss": 0.0297, "step": 129105 }, { "epoch": 6.02, - "learning_rate": 7.988373728376541e-06, - "loss": 0.0287, + "learning_rate": 1.8007114661507498e-05, + "loss": 0.0267, "step": 129110 }, { "epoch": 6.02, - "learning_rate": 7.987904927101403e-06, - "loss": 0.0302, + "learning_rate": 1.8006646591671478e-05, + "loss": 0.0162, "step": 129115 }, { "epoch": 6.02, - "learning_rate": 7.987436125826264e-06, - "loss": 0.1306, + "learning_rate": 1.8006178521835457e-05, + "loss": 0.0807, "step": 129120 }, { "epoch": 6.03, - "learning_rate": 7.986967324551124e-06, - "loss": 0.0899, + "learning_rate": 1.800571045199944e-05, + "loss": 0.0427, "step": 129125 }, { "epoch": 6.03, - "learning_rate": 7.986498523275984e-06, - "loss": 0.1369, + "learning_rate": 1.800524238216342e-05, + "loss": 0.1348, "step": 129130 }, { "epoch": 6.03, - "learning_rate": 7.986029722000844e-06, - "loss": 0.2139, + "learning_rate": 1.80047743123274e-05, + "loss": 0.1114, "step": 129135 }, { "epoch": 6.03, - "learning_rate": 7.985560920725704e-06, - "loss": 0.2214, + "learning_rate": 1.8004306242491383e-05, + "loss": 0.1266, "step": 129140 }, { "epoch": 6.03, - "learning_rate": 7.985092119450566e-06, - "loss": 0.0065, + "learning_rate": 1.800383817265536e-05, + "loss": 0.0053, "step": 129145 }, { "epoch": 6.03, - "learning_rate": 7.984623318175425e-06, - "loss": 0.0072, + "learning_rate": 1.800337010281934e-05, + "loss": 0.0063, "step": 129150 }, { "epoch": 6.03, - "learning_rate": 7.984154516900287e-06, - "loss": 0.0085, + "learning_rate": 1.800290203298332e-05, + "loss": 0.0102, "step": 129155 }, { "epoch": 6.03, - "learning_rate": 7.983685715625147e-06, - "loss": 0.0512, + "learning_rate": 1.8002433963147303e-05, + "loss": 0.1095, "step": 129160 }, { "epoch": 6.03, - "learning_rate": 7.983216914350009e-06, - "loss": 0.0236, + "learning_rate": 1.8001965893311283e-05, + "loss": 0.0531, "step": 129165 }, { "epoch": 6.03, - "learning_rate": 7.982748113074869e-06, - "loss": 0.05, + "learning_rate": 1.8001497823475262e-05, + "loss": 0.0722, "step": 129170 }, { "epoch": 6.03, - "learning_rate": 7.982279311799729e-06, - "loss": 0.13, + "learning_rate": 1.8001029753639242e-05, + "loss": 0.0759, "step": 129175 }, { "epoch": 6.03, - "learning_rate": 7.981810510524588e-06, - "loss": 0.1249, + "learning_rate": 1.8000561683803225e-05, + "loss": 0.1367, "step": 129180 }, { "epoch": 6.03, - "learning_rate": 7.98134170924945e-06, - "loss": 0.09, + "learning_rate": 1.8000093613967205e-05, + "loss": 0.1493, "step": 129185 }, { "epoch": 6.03, - "learning_rate": 7.98087290797431e-06, - "loss": 0.2502, + "learning_rate": 1.7999625544131185e-05, + "loss": 0.1079, "step": 129190 }, { "epoch": 6.03, - "learning_rate": 7.980404106699172e-06, - "loss": 0.0135, + "learning_rate": 1.7999157474295165e-05, + "loss": 0.0202, "step": 129195 }, { "epoch": 6.03, - "learning_rate": 7.979935305424032e-06, - "loss": 0.0178, + "learning_rate": 1.7998689404459148e-05, + "loss": 0.0333, "step": 129200 }, { "epoch": 6.03, - "learning_rate": 7.979466504148891e-06, - "loss": 0.0291, + "learning_rate": 1.7998221334623124e-05, + "loss": 0.0286, "step": 129205 }, { "epoch": 6.03, - "learning_rate": 7.978997702873751e-06, - "loss": 0.0525, + "learning_rate": 1.7997753264787104e-05, + "loss": 0.0563, "step": 129210 }, { "epoch": 6.03, - "learning_rate": 7.978528901598613e-06, - "loss": 0.0778, + "learning_rate": 1.7997285194951087e-05, + "loss": 0.0285, "step": 129215 }, { "epoch": 6.03, - "learning_rate": 7.978060100323473e-06, - "loss": 0.0498, + "learning_rate": 1.7996817125115067e-05, + "loss": 0.0474, "step": 129220 }, { "epoch": 6.03, - "learning_rate": 7.977591299048335e-06, - "loss": 0.0315, + "learning_rate": 1.7996349055279047e-05, + "loss": 0.1144, "step": 129225 }, { "epoch": 6.03, - "learning_rate": 7.977122497773195e-06, - "loss": 0.0587, + "learning_rate": 1.7995880985443027e-05, + "loss": 0.0917, "step": 129230 }, { "epoch": 6.03, - "learning_rate": 7.976653696498056e-06, - "loss": 0.1092, + "learning_rate": 1.799541291560701e-05, + "loss": 0.1854, "step": 129235 }, { "epoch": 6.03, - "learning_rate": 7.976184895222916e-06, - "loss": 0.2081, + "learning_rate": 1.799494484577099e-05, + "loss": 0.1143, "step": 129240 }, { "epoch": 6.03, - "learning_rate": 7.975716093947776e-06, - "loss": 0.0049, + "learning_rate": 1.799447677593497e-05, + "loss": 0.016, "step": 129245 }, { "epoch": 6.03, - "learning_rate": 7.975247292672636e-06, - "loss": 0.0427, + "learning_rate": 1.799400870609895e-05, + "loss": 0.0129, "step": 129250 }, { "epoch": 6.03, - "learning_rate": 7.974778491397498e-06, - "loss": 0.0157, + "learning_rate": 1.7993540636262933e-05, + "loss": 0.0697, "step": 129255 }, { "epoch": 6.03, - "learning_rate": 7.974309690122358e-06, - "loss": 0.016, + "learning_rate": 1.7993072566426913e-05, + "loss": 0.0336, "step": 129260 }, { "epoch": 6.03, - "learning_rate": 7.97384088884722e-06, + "learning_rate": 1.7992604496590892e-05, "loss": 0.0424, "step": 129265 }, { "epoch": 6.03, - "learning_rate": 7.973372087572079e-06, - "loss": 0.0729, + "learning_rate": 1.7992136426754872e-05, + "loss": 0.0762, "step": 129270 }, { "epoch": 6.03, - "learning_rate": 7.972903286296939e-06, - "loss": 0.041, + "learning_rate": 1.7991668356918852e-05, + "loss": 0.0387, "step": 129275 }, { "epoch": 6.03, - "learning_rate": 7.972434485021799e-06, - "loss": 0.1456, + "learning_rate": 1.7991200287082832e-05, + "loss": 0.1543, "step": 129280 }, { "epoch": 6.03, - "learning_rate": 7.97196568374666e-06, - "loss": 0.221, + "learning_rate": 1.799073221724681e-05, + "loss": 0.1864, "step": 129285 }, { "epoch": 6.03, - "learning_rate": 7.97149688247152e-06, - "loss": 0.1495, + "learning_rate": 1.7990264147410795e-05, + "loss": 0.1152, "step": 129290 }, { "epoch": 6.03, - "learning_rate": 7.971028081196382e-06, - "loss": 0.0526, + "learning_rate": 1.7989796077574775e-05, + "loss": 0.005, "step": 129295 }, { "epoch": 6.03, - "learning_rate": 7.970559279921242e-06, - "loss": 0.0055, + "learning_rate": 1.7989328007738755e-05, + "loss": 0.0181, "step": 129300 }, { "epoch": 6.03, - "learning_rate": 7.970090478646104e-06, - "loss": 0.0239, + "learning_rate": 1.7988859937902734e-05, + "loss": 0.0238, "step": 129305 }, { "epoch": 6.03, - "learning_rate": 7.969621677370964e-06, - "loss": 0.026, + "learning_rate": 1.7988391868066718e-05, + "loss": 0.0439, "step": 129310 }, { "epoch": 6.03, - "learning_rate": 7.969152876095824e-06, - "loss": 0.0801, + "learning_rate": 1.7987923798230697e-05, + "loss": 0.0633, "step": 129315 }, { "epoch": 6.03, - "learning_rate": 7.968684074820684e-06, - "loss": 0.0279, + "learning_rate": 1.7987455728394677e-05, + "loss": 0.0303, "step": 129320 }, { "epoch": 6.03, - "learning_rate": 7.968215273545543e-06, - "loss": 0.0562, + "learning_rate": 1.798698765855866e-05, + "loss": 0.1148, "step": 129325 }, { "epoch": 6.03, - "learning_rate": 7.967746472270405e-06, - "loss": 0.1147, + "learning_rate": 1.798651958872264e-05, + "loss": 0.1335, "step": 129330 }, { "epoch": 6.03, - "learning_rate": 7.967277670995267e-06, - "loss": 0.1499, + "learning_rate": 1.7986051518886617e-05, + "loss": 0.2007, "step": 129335 }, { "epoch": 6.04, - "learning_rate": 7.966808869720127e-06, - "loss": 0.1172, + "learning_rate": 1.7985583449050596e-05, + "loss": 0.1536, "step": 129340 }, { "epoch": 6.04, - "learning_rate": 7.966340068444987e-06, - "loss": 0.0369, + "learning_rate": 1.798511537921458e-05, + "loss": 0.0139, "step": 129345 }, { "epoch": 6.04, - "learning_rate": 7.965871267169847e-06, - "loss": 0.0458, + "learning_rate": 1.798464730937856e-05, + "loss": 0.0064, "step": 129350 }, { "epoch": 6.04, - "learning_rate": 7.965402465894708e-06, - "loss": 0.0157, + "learning_rate": 1.798417923954254e-05, + "loss": 0.0352, "step": 129355 }, { "epoch": 6.04, - "learning_rate": 7.964933664619568e-06, - "loss": 0.0064, + "learning_rate": 1.798371116970652e-05, + "loss": 0.0131, "step": 129360 }, { "epoch": 6.04, - "learning_rate": 7.964464863344428e-06, - "loss": 0.0665, + "learning_rate": 1.7983243099870502e-05, + "loss": 0.0425, "step": 129365 }, { "epoch": 6.04, - "learning_rate": 7.96399606206929e-06, - "loss": 0.0621, + "learning_rate": 1.7982775030034482e-05, + "loss": 0.0677, "step": 129370 }, { "epoch": 6.04, - "learning_rate": 7.963527260794151e-06, - "loss": 0.0754, + "learning_rate": 1.7982306960198462e-05, + "loss": 0.1069, "step": 129375 }, { "epoch": 6.04, - "learning_rate": 7.963058459519011e-06, - "loss": 0.2058, + "learning_rate": 1.7981838890362442e-05, + "loss": 0.0368, "step": 129380 }, { "epoch": 6.04, - "learning_rate": 7.962589658243871e-06, - "loss": 0.1286, + "learning_rate": 1.7981370820526425e-05, + "loss": 0.2131, "step": 129385 }, { "epoch": 6.04, - "learning_rate": 7.962120856968731e-06, - "loss": 0.2206, + "learning_rate": 1.7980902750690405e-05, + "loss": 0.1454, "step": 129390 }, { "epoch": 6.04, - "learning_rate": 7.961652055693591e-06, - "loss": 0.0266, + "learning_rate": 1.798043468085438e-05, + "loss": 0.0022, "step": 129395 }, { "epoch": 6.04, - "learning_rate": 7.961183254418453e-06, - "loss": 0.022, + "learning_rate": 1.7979966611018364e-05, + "loss": 0.0197, "step": 129400 }, { "epoch": 6.04, - "learning_rate": 7.960714453143313e-06, - "loss": 0.0176, + "learning_rate": 1.7979498541182344e-05, + "loss": 0.0456, "step": 129405 }, { "epoch": 6.04, - "learning_rate": 7.960245651868174e-06, - "loss": 0.0602, + "learning_rate": 1.7979030471346324e-05, + "loss": 0.0253, "step": 129410 }, { "epoch": 6.04, - "learning_rate": 7.959776850593034e-06, - "loss": 0.076, + "learning_rate": 1.7978562401510304e-05, + "loss": 0.0285, "step": 129415 }, { "epoch": 6.04, - "learning_rate": 7.959308049317894e-06, - "loss": 0.0426, + "learning_rate": 1.7978094331674287e-05, + "loss": 0.0211, "step": 129420 }, { "epoch": 6.04, - "learning_rate": 7.958839248042756e-06, - "loss": 0.0647, + "learning_rate": 1.7977626261838267e-05, + "loss": 0.0801, "step": 129425 }, { "epoch": 6.04, - "learning_rate": 7.958370446767616e-06, - "loss": 0.0795, + "learning_rate": 1.7977158192002247e-05, + "loss": 0.1394, "step": 129430 }, { "epoch": 6.04, - "learning_rate": 7.957901645492476e-06, - "loss": 0.2174, + "learning_rate": 1.7976690122166227e-05, + "loss": 0.1869, "step": 129435 }, { "epoch": 6.04, - "learning_rate": 7.957432844217337e-06, - "loss": 0.1523, + "learning_rate": 1.797622205233021e-05, + "loss": 0.1419, "step": 129440 }, { "epoch": 6.04, - "learning_rate": 7.956964042942197e-06, - "loss": 0.0026, + "learning_rate": 1.797575398249419e-05, + "loss": 0.001, "step": 129445 }, { "epoch": 6.04, - "learning_rate": 7.956495241667059e-06, - "loss": 0.0374, + "learning_rate": 1.797528591265817e-05, + "loss": 0.07, "step": 129450 }, { "epoch": 6.04, - "learning_rate": 7.956026440391919e-06, - "loss": 0.0311, + "learning_rate": 1.7974817842822153e-05, + "loss": 0.0635, "step": 129455 }, { "epoch": 6.04, - "learning_rate": 7.955557639116779e-06, - "loss": 0.0376, + "learning_rate": 1.797434977298613e-05, + "loss": 0.0008, "step": 129460 }, { "epoch": 6.04, - "learning_rate": 7.955088837841639e-06, - "loss": 0.0409, + "learning_rate": 1.797388170315011e-05, + "loss": 0.0092, "step": 129465 }, { "epoch": 6.04, - "learning_rate": 7.9546200365665e-06, - "loss": 0.052, + "learning_rate": 1.797341363331409e-05, + "loss": 0.0305, "step": 129470 }, { "epoch": 6.04, - "learning_rate": 7.95415123529136e-06, - "loss": 0.0689, + "learning_rate": 1.7972945563478072e-05, + "loss": 0.0541, "step": 129475 }, { "epoch": 6.04, - "learning_rate": 7.953682434016222e-06, - "loss": 0.158, + "learning_rate": 1.797247749364205e-05, + "loss": 0.1436, "step": 129480 }, { "epoch": 6.04, - "learning_rate": 7.953213632741082e-06, - "loss": 0.1795, + "learning_rate": 1.797200942380603e-05, + "loss": 0.1991, "step": 129485 }, { "epoch": 6.04, - "learning_rate": 7.952744831465943e-06, - "loss": 0.1121, + "learning_rate": 1.797154135397001e-05, + "loss": 0.1834, "step": 129490 }, { "epoch": 6.04, - "learning_rate": 7.952276030190803e-06, - "loss": 0.0178, + "learning_rate": 1.7971073284133995e-05, + "loss": 0.0334, "step": 129495 }, { "epoch": 6.04, - "learning_rate": 7.951807228915663e-06, - "loss": 0.0114, + "learning_rate": 1.7970605214297974e-05, + "loss": 0.0471, "step": 129500 }, { "epoch": 6.04, - "learning_rate": 7.951338427640523e-06, - "loss": 0.0738, + "learning_rate": 1.7970137144461954e-05, + "loss": 0.0452, "step": 129505 }, { "epoch": 6.04, - "learning_rate": 7.950869626365385e-06, - "loss": 0.024, + "learning_rate": 1.7969669074625937e-05, + "loss": 0.0356, "step": 129510 }, { "epoch": 6.04, - "learning_rate": 7.950400825090245e-06, - "loss": 0.0443, + "learning_rate": 1.7969201004789917e-05, + "loss": 0.0387, "step": 129515 }, { "epoch": 6.04, - "learning_rate": 7.949932023815106e-06, - "loss": 0.0499, + "learning_rate": 1.7968732934953897e-05, + "loss": 0.0542, "step": 129520 }, { "epoch": 6.04, - "learning_rate": 7.949463222539966e-06, - "loss": 0.038, + "learning_rate": 1.7968264865117873e-05, + "loss": 0.0924, "step": 129525 }, { "epoch": 6.04, - "learning_rate": 7.948994421264826e-06, - "loss": 0.0743, + "learning_rate": 1.7967796795281857e-05, + "loss": 0.1555, "step": 129530 }, { "epoch": 6.04, - "learning_rate": 7.948525619989686e-06, - "loss": 0.1268, + "learning_rate": 1.7967328725445836e-05, + "loss": 0.1589, "step": 129535 }, { "epoch": 6.04, - "learning_rate": 7.948056818714548e-06, - "loss": 0.1967, + "learning_rate": 1.7966860655609816e-05, + "loss": 0.2115, "step": 129540 }, { "epoch": 6.04, - "learning_rate": 7.947588017439408e-06, - "loss": 0.0113, + "learning_rate": 1.7966392585773796e-05, + "loss": 0.0155, "step": 129545 }, { "epoch": 6.04, - "learning_rate": 7.94711921616427e-06, - "loss": 0.0094, + "learning_rate": 1.796592451593778e-05, + "loss": 0.0722, "step": 129550 }, { "epoch": 6.05, - "learning_rate": 7.94665041488913e-06, - "loss": 0.043, + "learning_rate": 1.796545644610176e-05, + "loss": 0.0093, "step": 129555 }, { "epoch": 6.05, - "learning_rate": 7.94618161361399e-06, - "loss": 0.0945, + "learning_rate": 1.796498837626574e-05, + "loss": 0.0157, "step": 129560 }, { "epoch": 6.05, - "learning_rate": 7.94571281233885e-06, - "loss": 0.0442, + "learning_rate": 1.7964520306429722e-05, + "loss": 0.0666, "step": 129565 }, { "epoch": 6.05, - "learning_rate": 7.94524401106371e-06, - "loss": 0.0351, + "learning_rate": 1.7964052236593702e-05, + "loss": 0.0287, "step": 129570 }, { "epoch": 6.05, - "learning_rate": 7.94477520978857e-06, - "loss": 0.0589, + "learning_rate": 1.7963584166757682e-05, + "loss": 0.0644, "step": 129575 }, { "epoch": 6.05, - "learning_rate": 7.94430640851343e-06, - "loss": 0.1044, + "learning_rate": 1.796311609692166e-05, + "loss": 0.1255, "step": 129580 }, { "epoch": 6.05, - "learning_rate": 7.943837607238292e-06, - "loss": 0.1991, + "learning_rate": 1.796264802708564e-05, + "loss": 0.1139, "step": 129585 }, { "epoch": 6.05, - "learning_rate": 7.943368805963154e-06, - "loss": 0.1935, + "learning_rate": 1.796217995724962e-05, + "loss": 0.1435, "step": 129590 }, { "epoch": 6.05, - "learning_rate": 7.942900004688014e-06, - "loss": 0.0344, + "learning_rate": 1.79617118874136e-05, + "loss": 0.0211, "step": 129595 }, { "epoch": 6.05, - "learning_rate": 7.942431203412874e-06, - "loss": 0.0055, + "learning_rate": 1.796124381757758e-05, + "loss": 0.0148, "step": 129600 }, { "epoch": 6.05, - "learning_rate": 7.941962402137734e-06, - "loss": 0.0447, + "learning_rate": 1.7960775747741564e-05, + "loss": 0.0233, "step": 129605 }, { "epoch": 6.05, - "learning_rate": 7.941493600862595e-06, - "loss": 0.0511, + "learning_rate": 1.7960307677905544e-05, + "loss": 0.0698, "step": 129610 }, { "epoch": 6.05, - "learning_rate": 7.941024799587455e-06, - "loss": 0.1057, + "learning_rate": 1.7959839608069524e-05, + "loss": 0.0498, "step": 129615 }, { "epoch": 6.05, - "learning_rate": 7.940555998312315e-06, - "loss": 0.1248, + "learning_rate": 1.7959371538233504e-05, + "loss": 0.1057, "step": 129620 }, { "epoch": 6.05, - "learning_rate": 7.940087197037177e-06, - "loss": 0.0972, + "learning_rate": 1.7958903468397487e-05, + "loss": 0.1524, "step": 129625 }, { "epoch": 6.05, - "learning_rate": 7.939618395762038e-06, - "loss": 0.1352, + "learning_rate": 1.7958435398561467e-05, + "loss": 0.1995, "step": 129630 }, { "epoch": 6.05, - "learning_rate": 7.939149594486898e-06, - "loss": 0.2982, + "learning_rate": 1.7957967328725446e-05, + "loss": 0.1526, "step": 129635 }, { "epoch": 6.05, - "learning_rate": 7.938680793211758e-06, - "loss": 0.2204, + "learning_rate": 1.795749925888943e-05, + "loss": 0.1224, "step": 129640 }, { "epoch": 6.05, - "learning_rate": 7.938211991936618e-06, - "loss": 0.0339, + "learning_rate": 1.795703118905341e-05, + "loss": 0.0125, "step": 129645 }, { "epoch": 6.05, - "learning_rate": 7.937743190661478e-06, - "loss": 0.0134, + "learning_rate": 1.7956563119217386e-05, + "loss": 0.0047, "step": 129650 }, { "epoch": 6.05, - "learning_rate": 7.93727438938634e-06, - "loss": 0.0459, + "learning_rate": 1.7956095049381366e-05, + "loss": 0.0115, "step": 129655 }, { "epoch": 6.05, - "learning_rate": 7.9368055881112e-06, - "loss": 0.04, + "learning_rate": 1.795562697954535e-05, + "loss": 0.0419, "step": 129660 }, { "epoch": 6.05, - "learning_rate": 7.936336786836061e-06, - "loss": 0.0682, + "learning_rate": 1.795515890970933e-05, + "loss": 0.0988, "step": 129665 }, { "epoch": 6.05, - "learning_rate": 7.935867985560921e-06, - "loss": 0.0557, + "learning_rate": 1.795469083987331e-05, + "loss": 0.1254, "step": 129670 }, { "epoch": 6.05, - "learning_rate": 7.935399184285781e-06, - "loss": 0.1058, + "learning_rate": 1.7954222770037288e-05, + "loss": 0.0498, "step": 129675 }, { "epoch": 6.05, - "learning_rate": 7.934930383010643e-06, - "loss": 0.084, + "learning_rate": 1.795375470020127e-05, + "loss": 0.1431, "step": 129680 }, { "epoch": 6.05, - "learning_rate": 7.934461581735503e-06, - "loss": 0.1593, + "learning_rate": 1.795328663036525e-05, + "loss": 0.1376, "step": 129685 }, { "epoch": 6.05, - "learning_rate": 7.933992780460363e-06, - "loss": 0.2361, + "learning_rate": 1.795281856052923e-05, + "loss": 0.1311, "step": 129690 }, { "epoch": 6.05, - "learning_rate": 7.933523979185224e-06, - "loss": 0.0171, + "learning_rate": 1.7952350490693214e-05, + "loss": 0.0207, "step": 129695 }, { "epoch": 6.05, - "learning_rate": 7.933055177910084e-06, - "loss": 0.004, + "learning_rate": 1.7951882420857194e-05, + "loss": 0.0178, "step": 129700 }, { "epoch": 6.05, - "learning_rate": 7.932586376634946e-06, - "loss": 0.0261, + "learning_rate": 1.7951414351021174e-05, + "loss": 0.0075, "step": 129705 }, { "epoch": 6.05, - "learning_rate": 7.932117575359806e-06, - "loss": 0.0264, + "learning_rate": 1.7950946281185154e-05, + "loss": 0.0408, "step": 129710 }, { "epoch": 6.05, - "learning_rate": 7.931648774084666e-06, - "loss": 0.0626, + "learning_rate": 1.7950478211349134e-05, + "loss": 0.0292, "step": 129715 }, { "epoch": 6.05, - "learning_rate": 7.931179972809526e-06, - "loss": 0.046, + "learning_rate": 1.7950010141513113e-05, + "loss": 0.0904, "step": 129720 }, { "epoch": 6.05, - "learning_rate": 7.930711171534387e-06, - "loss": 0.1059, + "learning_rate": 1.7949542071677093e-05, + "loss": 0.1432, "step": 129725 }, { "epoch": 6.05, - "learning_rate": 7.930242370259247e-06, - "loss": 0.2145, + "learning_rate": 1.7949074001841073e-05, + "loss": 0.0855, "step": 129730 }, { "epoch": 6.05, - "learning_rate": 7.929773568984109e-06, - "loss": 0.1099, + "learning_rate": 1.7948605932005056e-05, + "loss": 0.1302, "step": 129735 }, { "epoch": 6.05, - "learning_rate": 7.929304767708969e-06, - "loss": 0.1706, + "learning_rate": 1.7948137862169036e-05, + "loss": 0.1939, "step": 129740 }, { "epoch": 6.05, - "learning_rate": 7.928835966433829e-06, - "loss": 0.0238, + "learning_rate": 1.7947669792333016e-05, + "loss": 0.0092, "step": 129745 }, { "epoch": 6.05, - "learning_rate": 7.92836716515869e-06, - "loss": 0.0074, + "learning_rate": 1.7947201722497e-05, + "loss": 0.0024, "step": 129750 }, { "epoch": 6.05, - "learning_rate": 7.92789836388355e-06, - "loss": 0.0527, + "learning_rate": 1.794673365266098e-05, + "loss": 0.014, "step": 129755 }, { "epoch": 6.05, - "learning_rate": 7.92742956260841e-06, - "loss": 0.032, + "learning_rate": 1.794626558282496e-05, + "loss": 0.0178, "step": 129760 }, { "epoch": 6.06, - "learning_rate": 7.926960761333272e-06, - "loss": 0.1146, + "learning_rate": 1.794579751298894e-05, + "loss": 0.0517, "step": 129765 }, { "epoch": 6.06, - "learning_rate": 7.926491960058132e-06, - "loss": 0.1492, + "learning_rate": 1.7945329443152922e-05, + "loss": 0.0572, "step": 129770 }, { "epoch": 6.06, - "learning_rate": 7.926023158782993e-06, - "loss": 0.0907, + "learning_rate": 1.7944861373316898e-05, + "loss": 0.0897, "step": 129775 }, { "epoch": 6.06, - "learning_rate": 7.925554357507853e-06, - "loss": 0.1148, + "learning_rate": 1.7944393303480878e-05, + "loss": 0.067, "step": 129780 }, { "epoch": 6.06, - "learning_rate": 7.925085556232713e-06, - "loss": 0.2597, + "learning_rate": 1.7943925233644858e-05, + "loss": 0.1388, "step": 129785 }, { "epoch": 6.06, - "learning_rate": 7.924616754957573e-06, - "loss": 0.1417, + "learning_rate": 1.794345716380884e-05, + "loss": 0.1441, "step": 129790 }, { "epoch": 6.06, - "learning_rate": 7.924147953682435e-06, - "loss": 0.0121, + "learning_rate": 1.794298909397282e-05, + "loss": 0.004, "step": 129795 }, { "epoch": 6.06, - "learning_rate": 7.923679152407295e-06, - "loss": 0.0177, + "learning_rate": 1.79425210241368e-05, + "loss": 0.0175, "step": 129800 }, { "epoch": 6.06, - "learning_rate": 7.923210351132156e-06, - "loss": 0.0352, + "learning_rate": 1.794205295430078e-05, + "loss": 0.0832, "step": 129805 }, { "epoch": 6.06, - "learning_rate": 7.922741549857016e-06, - "loss": 0.0499, + "learning_rate": 1.7941584884464764e-05, + "loss": 0.0118, "step": 129810 }, { "epoch": 6.06, - "learning_rate": 7.922272748581878e-06, - "loss": 0.0264, + "learning_rate": 1.7941116814628744e-05, + "loss": 0.0286, "step": 129815 }, { "epoch": 6.06, - "learning_rate": 7.921803947306738e-06, - "loss": 0.0857, + "learning_rate": 1.7940648744792723e-05, + "loss": 0.0221, "step": 129820 }, { "epoch": 6.06, - "learning_rate": 7.921335146031598e-06, - "loss": 0.0452, + "learning_rate": 1.7940180674956707e-05, + "loss": 0.0823, "step": 129825 }, { "epoch": 6.06, - "learning_rate": 7.920866344756458e-06, - "loss": 0.1659, + "learning_rate": 1.7939712605120686e-05, + "loss": 0.096, "step": 129830 }, { "epoch": 6.06, - "learning_rate": 7.920397543481318e-06, - "loss": 0.15, + "learning_rate": 1.7939244535284666e-05, + "loss": 0.1386, "step": 129835 }, { "epoch": 6.06, - "learning_rate": 7.91992874220618e-06, - "loss": 0.1716, + "learning_rate": 1.7938776465448643e-05, + "loss": 0.2341, "step": 129840 }, { "epoch": 6.06, - "learning_rate": 7.919459940931041e-06, - "loss": 0.0035, + "learning_rate": 1.7938308395612626e-05, + "loss": 0.0026, "step": 129845 }, { "epoch": 6.06, - "learning_rate": 7.918991139655901e-06, - "loss": 0.0269, + "learning_rate": 1.7937840325776606e-05, + "loss": 0.0215, "step": 129850 }, { "epoch": 6.06, - "learning_rate": 7.91852233838076e-06, - "loss": 0.034, + "learning_rate": 1.7937372255940585e-05, + "loss": 0.026, "step": 129855 }, { "epoch": 6.06, - "learning_rate": 7.91805353710562e-06, - "loss": 0.0195, + "learning_rate": 1.7936904186104565e-05, + "loss": 0.0298, "step": 129860 }, { "epoch": 6.06, - "learning_rate": 7.917584735830482e-06, - "loss": 0.0519, + "learning_rate": 1.793643611626855e-05, + "loss": 0.0641, "step": 129865 }, { "epoch": 6.06, - "learning_rate": 7.917115934555342e-06, - "loss": 0.0859, + "learning_rate": 1.7935968046432528e-05, + "loss": 0.1126, "step": 129870 }, { "epoch": 6.06, - "learning_rate": 7.916647133280202e-06, - "loss": 0.0351, + "learning_rate": 1.7935499976596508e-05, + "loss": 0.0763, "step": 129875 }, { "epoch": 6.06, - "learning_rate": 7.916178332005064e-06, - "loss": 0.1241, + "learning_rate": 1.793503190676049e-05, + "loss": 0.1017, "step": 129880 }, { "epoch": 6.06, - "learning_rate": 7.915709530729925e-06, - "loss": 0.118, + "learning_rate": 1.793456383692447e-05, + "loss": 0.2429, "step": 129885 }, { "epoch": 6.06, - "learning_rate": 7.915240729454785e-06, - "loss": 0.1384, + "learning_rate": 1.793409576708845e-05, + "loss": 0.141, "step": 129890 }, { "epoch": 6.06, - "learning_rate": 7.914771928179645e-06, - "loss": 0.0036, + "learning_rate": 1.793362769725243e-05, + "loss": 0.0147, "step": 129895 }, { "epoch": 6.06, - "learning_rate": 7.914303126904505e-06, - "loss": 0.0818, + "learning_rate": 1.793315962741641e-05, + "loss": 0.0468, "step": 129900 }, { "epoch": 6.06, - "learning_rate": 7.913834325629365e-06, - "loss": 0.0344, + "learning_rate": 1.793269155758039e-05, + "loss": 0.0187, "step": 129905 }, { "epoch": 6.06, - "learning_rate": 7.913365524354227e-06, - "loss": 0.0131, + "learning_rate": 1.793222348774437e-05, + "loss": 0.0211, "step": 129910 }, { "epoch": 6.06, - "learning_rate": 7.912896723079088e-06, - "loss": 0.0233, + "learning_rate": 1.793175541790835e-05, + "loss": 0.0562, "step": 129915 }, { "epoch": 6.06, - "learning_rate": 7.912427921803948e-06, - "loss": 0.026, + "learning_rate": 1.7931287348072333e-05, + "loss": 0.0612, "step": 129920 }, { "epoch": 6.06, - "learning_rate": 7.911959120528808e-06, - "loss": 0.0307, + "learning_rate": 1.7930819278236313e-05, + "loss": 0.1078, "step": 129925 }, { "epoch": 6.06, - "learning_rate": 7.911490319253668e-06, - "loss": 0.1728, + "learning_rate": 1.7930351208400293e-05, + "loss": 0.0678, "step": 129930 }, { "epoch": 6.06, - "learning_rate": 7.91102151797853e-06, - "loss": 0.1581, + "learning_rate": 1.7929883138564276e-05, + "loss": 0.1512, "step": 129935 }, { "epoch": 6.06, - "learning_rate": 7.91055271670339e-06, - "loss": 0.1504, + "learning_rate": 1.7929415068728256e-05, + "loss": 0.1265, "step": 129940 }, { "epoch": 6.06, - "learning_rate": 7.91008391542825e-06, - "loss": 0.0112, + "learning_rate": 1.7928946998892236e-05, + "loss": 0.0163, "step": 129945 }, { "epoch": 6.06, - "learning_rate": 7.909615114153111e-06, - "loss": 0.014, + "learning_rate": 1.7928478929056216e-05, + "loss": 0.0045, "step": 129950 }, { "epoch": 6.06, - "learning_rate": 7.909146312877973e-06, - "loss": 0.0162, + "learning_rate": 1.79280108592202e-05, + "loss": 0.0179, "step": 129955 }, { "epoch": 6.06, - "learning_rate": 7.908677511602833e-06, - "loss": 0.0546, + "learning_rate": 1.792754278938418e-05, + "loss": 0.0342, "step": 129960 }, { "epoch": 6.06, - "learning_rate": 7.908208710327693e-06, - "loss": 0.043, + "learning_rate": 1.7927074719548155e-05, + "loss": 0.0445, "step": 129965 }, { "epoch": 6.06, - "learning_rate": 7.907739909052553e-06, - "loss": 0.1091, + "learning_rate": 1.7926606649712135e-05, + "loss": 0.0739, "step": 129970 }, { "epoch": 6.06, - "learning_rate": 7.907271107777413e-06, - "loss": 0.1563, + "learning_rate": 1.7926138579876118e-05, + "loss": 0.0788, "step": 129975 }, { "epoch": 6.07, - "learning_rate": 7.906802306502274e-06, - "loss": 0.0868, + "learning_rate": 1.7925670510040098e-05, + "loss": 0.2126, "step": 129980 }, { "epoch": 6.07, - "learning_rate": 7.906333505227134e-06, - "loss": 0.1862, + "learning_rate": 1.7925202440204078e-05, + "loss": 0.1177, "step": 129985 }, { "epoch": 6.07, - "learning_rate": 7.905864703951996e-06, - "loss": 0.1693, + "learning_rate": 1.7924734370368057e-05, + "loss": 0.049, "step": 129990 }, { "epoch": 6.07, - "learning_rate": 7.905395902676856e-06, - "loss": 0.0251, + "learning_rate": 1.792426630053204e-05, + "loss": 0.0338, "step": 129995 }, { "epoch": 6.07, - "learning_rate": 7.904927101401716e-06, - "loss": 0.0209, + "learning_rate": 1.792379823069602e-05, + "loss": 0.0013, "step": 130000 }, { "epoch": 6.07, - "learning_rate": 7.904458300126577e-06, - "loss": 0.0611, + "learning_rate": 1.792333016086e-05, + "loss": 0.03, "step": 130005 }, { "epoch": 6.07, - "learning_rate": 7.903989498851437e-06, - "loss": 0.0183, + "learning_rate": 1.7922862091023984e-05, + "loss": 0.0231, "step": 130010 }, { "epoch": 6.07, - "learning_rate": 7.903520697576297e-06, - "loss": 0.0835, + "learning_rate": 1.7922394021187963e-05, + "loss": 0.0219, "step": 130015 }, { "epoch": 6.07, - "learning_rate": 7.903051896301159e-06, - "loss": 0.0552, + "learning_rate": 1.7921925951351943e-05, + "loss": 0.051, "step": 130020 }, { "epoch": 6.07, - "learning_rate": 7.902583095026019e-06, - "loss": 0.1326, + "learning_rate": 1.7921457881515923e-05, + "loss": 0.0396, "step": 130025 }, { "epoch": 6.07, - "learning_rate": 7.90211429375088e-06, - "loss": 0.1231, + "learning_rate": 1.7920989811679903e-05, + "loss": 0.2241, "step": 130030 }, { "epoch": 6.07, - "learning_rate": 7.90164549247574e-06, - "loss": 0.1818, + "learning_rate": 1.7920521741843883e-05, + "loss": 0.2308, "step": 130035 }, { "epoch": 6.07, - "learning_rate": 7.9011766912006e-06, - "loss": 0.2505, + "learning_rate": 1.7920053672007862e-05, + "loss": 0.161, "step": 130040 }, { "epoch": 6.07, - "learning_rate": 7.90070788992546e-06, - "loss": 0.0121, + "learning_rate": 1.7919585602171842e-05, + "loss": 0.0051, "step": 130045 }, { "epoch": 6.07, - "learning_rate": 7.900239088650322e-06, - "loss": 0.0139, + "learning_rate": 1.7919117532335825e-05, + "loss": 0.0185, "step": 130050 }, { "epoch": 6.07, - "learning_rate": 7.899770287375182e-06, - "loss": 0.0146, + "learning_rate": 1.7918649462499805e-05, + "loss": 0.0377, "step": 130055 }, { "epoch": 6.07, - "learning_rate": 7.899301486100043e-06, - "loss": 0.058, + "learning_rate": 1.7918181392663785e-05, + "loss": 0.0543, "step": 130060 }, { "epoch": 6.07, - "learning_rate": 7.898832684824903e-06, - "loss": 0.0625, + "learning_rate": 1.7917713322827768e-05, + "loss": 0.0395, "step": 130065 }, { "epoch": 6.07, - "learning_rate": 7.898363883549763e-06, - "loss": 0.064, + "learning_rate": 1.7917245252991748e-05, + "loss": 0.1189, "step": 130070 }, { "epoch": 6.07, - "learning_rate": 7.897895082274625e-06, - "loss": 0.0448, + "learning_rate": 1.7916777183155728e-05, + "loss": 0.0574, "step": 130075 }, { "epoch": 6.07, - "learning_rate": 7.897426280999485e-06, - "loss": 0.0697, + "learning_rate": 1.7916309113319708e-05, + "loss": 0.0974, "step": 130080 }, { "epoch": 6.07, - "learning_rate": 7.896957479724345e-06, - "loss": 0.2441, + "learning_rate": 1.791584104348369e-05, + "loss": 0.055, "step": 130085 }, { "epoch": 6.07, - "learning_rate": 7.896488678449206e-06, - "loss": 0.2566, + "learning_rate": 1.7915372973647667e-05, + "loss": 0.1135, "step": 130090 }, { "epoch": 6.07, - "learning_rate": 7.896019877174066e-06, - "loss": 0.0163, + "learning_rate": 1.7914904903811647e-05, + "loss": 0.0311, "step": 130095 }, { "epoch": 6.07, - "learning_rate": 7.895551075898928e-06, - "loss": 0.0171, + "learning_rate": 1.7914436833975627e-05, + "loss": 0.0256, "step": 130100 }, { "epoch": 6.07, - "learning_rate": 7.895082274623788e-06, - "loss": 0.0416, + "learning_rate": 1.791396876413961e-05, + "loss": 0.0453, "step": 130105 }, { "epoch": 6.07, - "learning_rate": 7.894613473348648e-06, - "loss": 0.0388, + "learning_rate": 1.791350069430359e-05, + "loss": 0.0551, "step": 130110 }, { "epoch": 6.07, - "learning_rate": 7.894144672073508e-06, - "loss": 0.0173, + "learning_rate": 1.791303262446757e-05, + "loss": 0.0545, "step": 130115 }, { "epoch": 6.07, - "learning_rate": 7.89367587079837e-06, - "loss": 0.0357, + "learning_rate": 1.7912564554631553e-05, + "loss": 0.0499, "step": 130120 }, { "epoch": 6.07, - "learning_rate": 7.89320706952323e-06, - "loss": 0.0638, + "learning_rate": 1.7912096484795533e-05, + "loss": 0.0496, "step": 130125 }, { "epoch": 6.07, - "learning_rate": 7.892738268248091e-06, - "loss": 0.0736, + "learning_rate": 1.7911628414959513e-05, + "loss": 0.1019, "step": 130130 }, { "epoch": 6.07, - "learning_rate": 7.892269466972951e-06, - "loss": 0.1627, + "learning_rate": 1.7911160345123493e-05, + "loss": 0.2029, "step": 130135 }, { "epoch": 6.07, - "learning_rate": 7.891800665697813e-06, - "loss": 0.1127, + "learning_rate": 1.7910692275287476e-05, + "loss": 0.0775, "step": 130140 }, { "epoch": 6.07, - "learning_rate": 7.891331864422673e-06, - "loss": 0.0175, + "learning_rate": 1.7910224205451456e-05, + "loss": 0.0318, "step": 130145 }, { "epoch": 6.07, - "learning_rate": 7.890863063147532e-06, - "loss": 0.0187, + "learning_rate": 1.7909756135615435e-05, + "loss": 0.0228, "step": 130150 }, { "epoch": 6.07, - "learning_rate": 7.890394261872392e-06, - "loss": 0.0177, + "learning_rate": 1.7909288065779412e-05, + "loss": 0.0076, "step": 130155 }, { "epoch": 6.07, - "learning_rate": 7.889925460597252e-06, - "loss": 0.0782, + "learning_rate": 1.7908819995943395e-05, + "loss": 0.0142, "step": 130160 }, { "epoch": 6.07, - "learning_rate": 7.889456659322114e-06, - "loss": 0.0533, + "learning_rate": 1.7908351926107375e-05, + "loss": 0.0311, "step": 130165 }, { "epoch": 6.07, - "learning_rate": 7.888987858046976e-06, - "loss": 0.0573, + "learning_rate": 1.7907883856271355e-05, + "loss": 0.089, "step": 130170 }, { "epoch": 6.07, - "learning_rate": 7.888519056771836e-06, - "loss": 0.087, + "learning_rate": 1.7907415786435334e-05, + "loss": 0.0932, "step": 130175 }, { "epoch": 6.07, - "learning_rate": 7.888050255496695e-06, - "loss": 0.1778, + "learning_rate": 1.7906947716599318e-05, + "loss": 0.0781, "step": 130180 }, { "epoch": 6.07, - "learning_rate": 7.887581454221555e-06, - "loss": 0.1205, + "learning_rate": 1.7906479646763297e-05, + "loss": 0.2254, "step": 130185 }, { "epoch": 6.07, - "learning_rate": 7.887112652946417e-06, - "loss": 0.1012, + "learning_rate": 1.7906011576927277e-05, + "loss": 0.1685, "step": 130190 }, { "epoch": 6.08, - "learning_rate": 7.886643851671277e-06, - "loss": 0.0055, + "learning_rate": 1.790554350709126e-05, + "loss": 0.0312, "step": 130195 }, { "epoch": 6.08, - "learning_rate": 7.886175050396137e-06, - "loss": 0.0225, + "learning_rate": 1.790507543725524e-05, + "loss": 0.0708, "step": 130200 }, { "epoch": 6.08, - "learning_rate": 7.885706249120999e-06, - "loss": 0.0344, + "learning_rate": 1.790460736741922e-05, + "loss": 0.0181, "step": 130205 }, { "epoch": 6.08, - "learning_rate": 7.88523744784586e-06, - "loss": 0.0443, + "learning_rate": 1.79041392975832e-05, + "loss": 0.0204, "step": 130210 }, { "epoch": 6.08, - "learning_rate": 7.88476864657072e-06, - "loss": 0.0091, + "learning_rate": 1.7903671227747183e-05, + "loss": 0.0639, "step": 130215 }, { "epoch": 6.08, - "learning_rate": 7.88429984529558e-06, - "loss": 0.0766, + "learning_rate": 1.790320315791116e-05, + "loss": 0.0534, "step": 130220 }, { "epoch": 6.08, - "learning_rate": 7.88383104402044e-06, - "loss": 0.0417, + "learning_rate": 1.790273508807514e-05, + "loss": 0.0889, "step": 130225 }, { "epoch": 6.08, - "learning_rate": 7.8833622427453e-06, - "loss": 0.0571, + "learning_rate": 1.790226701823912e-05, + "loss": 0.1193, "step": 130230 }, { "epoch": 6.08, - "learning_rate": 7.882893441470161e-06, - "loss": 0.2783, + "learning_rate": 1.7901798948403102e-05, + "loss": 0.1831, "step": 130235 }, { "epoch": 6.08, - "learning_rate": 7.882424640195021e-06, - "loss": 0.2484, + "learning_rate": 1.7901330878567082e-05, + "loss": 0.1997, "step": 130240 }, { "epoch": 6.08, - "learning_rate": 7.881955838919883e-06, - "loss": 0.0134, + "learning_rate": 1.7900862808731062e-05, + "loss": 0.0054, "step": 130245 }, { "epoch": 6.08, - "learning_rate": 7.881487037644743e-06, - "loss": 0.0191, + "learning_rate": 1.7900394738895045e-05, + "loss": 0.0076, "step": 130250 }, { "epoch": 6.08, - "learning_rate": 7.881018236369603e-06, - "loss": 0.0232, + "learning_rate": 1.7899926669059025e-05, + "loss": 0.0391, "step": 130255 }, { "epoch": 6.08, - "learning_rate": 7.880549435094465e-06, - "loss": 0.0147, + "learning_rate": 1.7899458599223005e-05, + "loss": 0.0296, "step": 130260 }, { "epoch": 6.08, - "learning_rate": 7.880080633819324e-06, - "loss": 0.051, + "learning_rate": 1.7898990529386985e-05, + "loss": 0.0535, "step": 130265 }, { "epoch": 6.08, - "learning_rate": 7.879611832544184e-06, - "loss": 0.0661, + "learning_rate": 1.7898522459550968e-05, + "loss": 0.0797, "step": 130270 }, { "epoch": 6.08, - "learning_rate": 7.879143031269046e-06, - "loss": 0.0505, + "learning_rate": 1.7898054389714948e-05, + "loss": 0.0474, "step": 130275 }, { "epoch": 6.08, - "learning_rate": 7.878674229993906e-06, - "loss": 0.1024, + "learning_rate": 1.7897586319878924e-05, + "loss": 0.0443, "step": 130280 }, { "epoch": 6.08, - "learning_rate": 7.878205428718768e-06, - "loss": 0.1903, + "learning_rate": 1.7897118250042904e-05, + "loss": 0.1312, "step": 130285 }, { "epoch": 6.08, - "learning_rate": 7.877736627443628e-06, - "loss": 0.1907, + "learning_rate": 1.7896650180206887e-05, + "loss": 0.2158, "step": 130290 }, { "epoch": 6.08, - "learning_rate": 7.877267826168487e-06, - "loss": 0.013, + "learning_rate": 1.7896182110370867e-05, + "loss": 0.005, "step": 130295 }, { "epoch": 6.08, - "learning_rate": 7.876799024893347e-06, - "loss": 0.0286, + "learning_rate": 1.7895714040534847e-05, + "loss": 0.0101, "step": 130300 }, { "epoch": 6.08, - "learning_rate": 7.876330223618209e-06, - "loss": 0.0694, + "learning_rate": 1.789524597069883e-05, + "loss": 0.023, "step": 130305 }, { "epoch": 6.08, - "learning_rate": 7.875861422343069e-06, - "loss": 0.1532, + "learning_rate": 1.789477790086281e-05, + "loss": 0.1068, "step": 130310 }, { "epoch": 6.08, - "learning_rate": 7.87539262106793e-06, - "loss": 0.0338, + "learning_rate": 1.789430983102679e-05, + "loss": 0.0409, "step": 130315 }, { "epoch": 6.08, - "learning_rate": 7.87492381979279e-06, - "loss": 0.0382, + "learning_rate": 1.789384176119077e-05, + "loss": 0.0463, "step": 130320 }, { "epoch": 6.08, - "learning_rate": 7.87445501851765e-06, - "loss": 0.0971, + "learning_rate": 1.7893373691354753e-05, + "loss": 0.1046, "step": 130325 }, { "epoch": 6.08, - "learning_rate": 7.873986217242512e-06, - "loss": 0.0839, + "learning_rate": 1.7892905621518732e-05, + "loss": 0.0434, "step": 130330 }, { "epoch": 6.08, - "learning_rate": 7.873517415967372e-06, - "loss": 0.1794, + "learning_rate": 1.7892437551682712e-05, + "loss": 0.0826, "step": 130335 }, { "epoch": 6.08, - "learning_rate": 7.873048614692232e-06, - "loss": 0.1955, + "learning_rate": 1.7891969481846692e-05, + "loss": 0.1658, "step": 130340 }, { "epoch": 6.08, - "learning_rate": 7.872579813417094e-06, - "loss": 0.0177, + "learning_rate": 1.7891501412010672e-05, + "loss": 0.0038, "step": 130345 }, { "epoch": 6.08, - "learning_rate": 7.872111012141954e-06, - "loss": 0.0228, + "learning_rate": 1.7891033342174652e-05, + "loss": 0.0351, "step": 130350 }, { "epoch": 6.08, - "learning_rate": 7.871642210866815e-06, - "loss": 0.0315, + "learning_rate": 1.789056527233863e-05, + "loss": 0.0289, "step": 130355 }, { "epoch": 6.08, - "learning_rate": 7.871173409591675e-06, - "loss": 0.0141, + "learning_rate": 1.7890097202502615e-05, + "loss": 0.0046, "step": 130360 }, { "epoch": 6.08, - "learning_rate": 7.870704608316535e-06, - "loss": 0.0302, + "learning_rate": 1.7889629132666595e-05, + "loss": 0.024, "step": 130365 }, { "epoch": 6.08, - "learning_rate": 7.870235807041395e-06, - "loss": 0.0396, + "learning_rate": 1.7889161062830574e-05, + "loss": 0.0345, "step": 130370 }, { "epoch": 6.08, - "learning_rate": 7.869767005766257e-06, - "loss": 0.0683, + "learning_rate": 1.7888692992994554e-05, + "loss": 0.0749, "step": 130375 }, { "epoch": 6.08, - "learning_rate": 7.869298204491117e-06, - "loss": 0.1042, + "learning_rate": 1.7888224923158537e-05, + "loss": 0.1502, "step": 130380 }, { "epoch": 6.08, - "learning_rate": 7.868829403215978e-06, - "loss": 0.0813, + "learning_rate": 1.7887756853322517e-05, + "loss": 0.1051, "step": 130385 }, { "epoch": 6.08, - "learning_rate": 7.868360601940838e-06, - "loss": 0.1698, + "learning_rate": 1.7887288783486497e-05, + "loss": 0.1634, "step": 130390 }, { "epoch": 6.08, - "learning_rate": 7.867891800665698e-06, - "loss": 0.005, + "learning_rate": 1.7886820713650477e-05, + "loss": 0.0148, "step": 130395 }, { "epoch": 6.08, - "learning_rate": 7.86742299939056e-06, - "loss": 0.0232, + "learning_rate": 1.788635264381446e-05, + "loss": 0.0662, "step": 130400 }, { "epoch": 6.08, - "learning_rate": 7.86695419811542e-06, - "loss": 0.0343, + "learning_rate": 1.788588457397844e-05, + "loss": 0.0142, "step": 130405 }, { "epoch": 6.09, - "learning_rate": 7.86648539684028e-06, - "loss": 0.0453, + "learning_rate": 1.7885416504142416e-05, + "loss": 0.0338, "step": 130410 }, { "epoch": 6.09, - "learning_rate": 7.86601659556514e-06, - "loss": 0.0319, + "learning_rate": 1.7884948434306396e-05, + "loss": 0.0269, "step": 130415 }, { "epoch": 6.09, - "learning_rate": 7.865547794290001e-06, - "loss": 0.0171, + "learning_rate": 1.788448036447038e-05, + "loss": 0.1453, "step": 130420 }, { "epoch": 6.09, - "learning_rate": 7.865078993014863e-06, - "loss": 0.08, + "learning_rate": 1.788401229463436e-05, + "loss": 0.0425, "step": 130425 }, { "epoch": 6.09, - "learning_rate": 7.864610191739723e-06, - "loss": 0.1007, + "learning_rate": 1.788354422479834e-05, + "loss": 0.0572, "step": 130430 }, { "epoch": 6.09, - "learning_rate": 7.864141390464583e-06, - "loss": 0.117, + "learning_rate": 1.7883076154962322e-05, + "loss": 0.2889, "step": 130435 }, { "epoch": 6.09, - "learning_rate": 7.863672589189442e-06, - "loss": 0.2394, + "learning_rate": 1.7882608085126302e-05, + "loss": 0.1416, "step": 130440 }, { "epoch": 6.09, - "learning_rate": 7.863203787914304e-06, - "loss": 0.0199, + "learning_rate": 1.7882140015290282e-05, + "loss": 0.0097, "step": 130445 }, { "epoch": 6.09, - "learning_rate": 7.862734986639164e-06, - "loss": 0.0387, + "learning_rate": 1.788167194545426e-05, + "loss": 0.0229, "step": 130450 }, { "epoch": 6.09, - "learning_rate": 7.862266185364024e-06, - "loss": 0.0288, + "learning_rate": 1.7881203875618245e-05, + "loss": 0.0051, "step": 130455 }, { "epoch": 6.09, - "learning_rate": 7.861797384088886e-06, - "loss": 0.0279, + "learning_rate": 1.7880735805782225e-05, + "loss": 0.2747, "step": 130460 }, { "epoch": 6.09, - "learning_rate": 7.861328582813747e-06, - "loss": 0.0662, + "learning_rate": 1.7880267735946205e-05, + "loss": 0.0296, "step": 130465 }, { "epoch": 6.09, - "learning_rate": 7.860859781538607e-06, - "loss": 0.0723, + "learning_rate": 1.787979966611018e-05, + "loss": 0.0352, "step": 130470 }, { "epoch": 6.09, - "learning_rate": 7.860390980263467e-06, - "loss": 0.2133, + "learning_rate": 1.7879331596274164e-05, + "loss": 0.0723, "step": 130475 }, { "epoch": 6.09, - "learning_rate": 7.859922178988327e-06, - "loss": 0.0848, + "learning_rate": 1.7878863526438144e-05, + "loss": 0.1051, "step": 130480 }, { "epoch": 6.09, - "learning_rate": 7.859453377713187e-06, - "loss": 0.1824, + "learning_rate": 1.7878395456602124e-05, + "loss": 0.2528, "step": 130485 }, { "epoch": 6.09, - "learning_rate": 7.858984576438049e-06, - "loss": 0.2606, + "learning_rate": 1.7877927386766107e-05, + "loss": 0.1682, "step": 130490 }, { "epoch": 6.09, - "learning_rate": 7.858515775162909e-06, - "loss": 0.0153, + "learning_rate": 1.7877459316930087e-05, + "loss": 0.0229, "step": 130495 }, { "epoch": 6.09, - "learning_rate": 7.85804697388777e-06, - "loss": 0.0553, + "learning_rate": 1.7876991247094067e-05, + "loss": 0.0349, "step": 130500 }, { "epoch": 6.09, - "learning_rate": 7.85757817261263e-06, - "loss": 0.0238, + "learning_rate": 1.7876523177258046e-05, + "loss": 0.028, "step": 130505 }, { "epoch": 6.09, - "learning_rate": 7.85710937133749e-06, - "loss": 0.0495, + "learning_rate": 1.787605510742203e-05, + "loss": 0.0207, "step": 130510 }, { "epoch": 6.09, - "learning_rate": 7.856640570062352e-06, - "loss": 0.0302, + "learning_rate": 1.787558703758601e-05, + "loss": 0.0444, "step": 130515 }, { "epoch": 6.09, - "learning_rate": 7.856171768787212e-06, - "loss": 0.0647, + "learning_rate": 1.787511896774999e-05, + "loss": 0.035, "step": 130520 }, { "epoch": 6.09, - "learning_rate": 7.855702967512072e-06, - "loss": 0.0533, + "learning_rate": 1.787465089791397e-05, + "loss": 0.0628, "step": 130525 }, { "epoch": 6.09, - "learning_rate": 7.855234166236933e-06, - "loss": 0.136, + "learning_rate": 1.7874182828077952e-05, + "loss": 0.0998, "step": 130530 }, { "epoch": 6.09, - "learning_rate": 7.854765364961793e-06, - "loss": 0.2043, + "learning_rate": 1.787371475824193e-05, + "loss": 0.0974, "step": 130535 }, { "epoch": 6.09, - "learning_rate": 7.854296563686655e-06, - "loss": 0.1064, + "learning_rate": 1.787324668840591e-05, + "loss": 0.2458, "step": 130540 }, { "epoch": 6.09, - "learning_rate": 7.853827762411515e-06, - "loss": 0.0031, + "learning_rate": 1.7872778618569892e-05, + "loss": 0.0101, "step": 130545 }, { "epoch": 6.09, - "learning_rate": 7.853358961136375e-06, - "loss": 0.0386, + "learning_rate": 1.787231054873387e-05, + "loss": 0.0097, "step": 130550 }, { "epoch": 6.09, - "learning_rate": 7.852890159861235e-06, - "loss": 0.0273, + "learning_rate": 1.787184247889785e-05, + "loss": 0.0199, "step": 130555 }, { "epoch": 6.09, - "learning_rate": 7.852421358586096e-06, - "loss": 0.0353, + "learning_rate": 1.787137440906183e-05, + "loss": 0.0142, "step": 130560 }, { "epoch": 6.09, - "learning_rate": 7.851952557310956e-06, - "loss": 0.0657, + "learning_rate": 1.7870906339225814e-05, + "loss": 0.0546, "step": 130565 }, { "epoch": 6.09, - "learning_rate": 7.851483756035818e-06, - "loss": 0.0818, + "learning_rate": 1.7870438269389794e-05, + "loss": 0.1908, "step": 130570 }, { "epoch": 6.09, - "learning_rate": 7.851014954760678e-06, - "loss": 0.0618, + "learning_rate": 1.7869970199553774e-05, + "loss": 0.082, "step": 130575 }, { "epoch": 6.09, - "learning_rate": 7.850546153485538e-06, - "loss": 0.1553, + "learning_rate": 1.7869502129717754e-05, + "loss": 0.1165, "step": 130580 }, { "epoch": 6.09, - "learning_rate": 7.8500773522104e-06, - "loss": 0.1604, + "learning_rate": 1.7869034059881737e-05, + "loss": 0.1565, "step": 130585 }, { "epoch": 6.09, - "learning_rate": 7.849608550935259e-06, - "loss": 0.1471, + "learning_rate": 1.7868565990045717e-05, + "loss": 0.1513, "step": 130590 }, { "epoch": 6.09, - "learning_rate": 7.849139749660119e-06, - "loss": 0.0124, + "learning_rate": 1.7868097920209693e-05, + "loss": 0.0138, "step": 130595 }, { "epoch": 6.09, - "learning_rate": 7.84867094838498e-06, - "loss": 0.0137, + "learning_rate": 1.7867629850373673e-05, + "loss": 0.0465, "step": 130600 }, { "epoch": 6.09, - "learning_rate": 7.84820214710984e-06, - "loss": 0.0051, + "learning_rate": 1.7867161780537656e-05, + "loss": 0.0744, "step": 130605 }, { "epoch": 6.09, - "learning_rate": 7.847733345834702e-06, - "loss": 0.0072, + "learning_rate": 1.7866693710701636e-05, + "loss": 0.0151, "step": 130610 }, { "epoch": 6.09, - "learning_rate": 7.847264544559562e-06, - "loss": 0.0187, + "learning_rate": 1.7866225640865616e-05, + "loss": 0.0301, "step": 130615 }, { "epoch": 6.09, - "learning_rate": 7.846795743284422e-06, - "loss": 0.0352, + "learning_rate": 1.78657575710296e-05, + "loss": 0.1386, "step": 130620 }, { "epoch": 6.1, - "learning_rate": 7.846326942009282e-06, - "loss": 0.043, + "learning_rate": 1.786528950119358e-05, + "loss": 0.042, "step": 130625 }, { "epoch": 6.1, - "learning_rate": 7.845858140734144e-06, - "loss": 0.123, + "learning_rate": 1.786482143135756e-05, + "loss": 0.1218, "step": 130630 }, { "epoch": 6.1, - "learning_rate": 7.845389339459004e-06, - "loss": 0.1989, + "learning_rate": 1.786435336152154e-05, + "loss": 0.1661, "step": 130635 }, { "epoch": 6.1, - "learning_rate": 7.844920538183865e-06, - "loss": 0.1214, + "learning_rate": 1.7863885291685522e-05, + "loss": 0.1562, "step": 130640 }, { "epoch": 6.1, - "learning_rate": 7.844451736908725e-06, - "loss": 0.0103, + "learning_rate": 1.78634172218495e-05, + "loss": 0.0134, "step": 130645 }, { "epoch": 6.1, - "learning_rate": 7.843982935633585e-06, - "loss": 0.015, + "learning_rate": 1.786294915201348e-05, + "loss": 0.0013, "step": 130650 }, { "epoch": 6.1, - "learning_rate": 7.843514134358447e-06, - "loss": 0.0414, + "learning_rate": 1.786248108217746e-05, + "loss": 0.0178, "step": 130655 }, { "epoch": 6.1, - "learning_rate": 7.843045333083307e-06, - "loss": 0.0216, + "learning_rate": 1.786201301234144e-05, + "loss": 0.0111, "step": 130660 }, { "epoch": 6.1, - "learning_rate": 7.842576531808167e-06, - "loss": 0.0611, + "learning_rate": 1.786154494250542e-05, + "loss": 0.0985, "step": 130665 }, { "epoch": 6.1, - "learning_rate": 7.842107730533027e-06, - "loss": 0.0308, + "learning_rate": 1.78610768726694e-05, + "loss": 0.0255, "step": 130670 }, { "epoch": 6.1, - "learning_rate": 7.841638929257888e-06, - "loss": 0.0147, + "learning_rate": 1.7860608802833384e-05, + "loss": 0.0342, "step": 130675 }, { "epoch": 6.1, - "learning_rate": 7.84117012798275e-06, - "loss": 0.129, + "learning_rate": 1.7860140732997364e-05, + "loss": 0.1092, "step": 130680 }, { "epoch": 6.1, - "learning_rate": 7.84070132670761e-06, - "loss": 0.1699, + "learning_rate": 1.7859672663161344e-05, + "loss": 0.1145, "step": 130685 }, { "epoch": 6.1, - "learning_rate": 7.84023252543247e-06, - "loss": 0.186, + "learning_rate": 1.7859204593325323e-05, + "loss": 0.2716, "step": 130690 }, { "epoch": 6.1, - "learning_rate": 7.83976372415733e-06, - "loss": 0.0079, + "learning_rate": 1.7858736523489307e-05, + "loss": 0.0183, "step": 130695 }, { "epoch": 6.1, - "learning_rate": 7.839294922882191e-06, - "loss": 0.035, + "learning_rate": 1.7858268453653286e-05, + "loss": 0.0301, "step": 130700 }, { "epoch": 6.1, - "learning_rate": 7.838826121607051e-06, - "loss": 0.0181, + "learning_rate": 1.7857800383817266e-05, + "loss": 0.0084, "step": 130705 }, { "epoch": 6.1, - "learning_rate": 7.838357320331911e-06, - "loss": 0.0665, + "learning_rate": 1.7857332313981246e-05, + "loss": 0.0266, "step": 130710 }, { "epoch": 6.1, - "learning_rate": 7.837888519056773e-06, - "loss": 0.0387, + "learning_rate": 1.785686424414523e-05, + "loss": 0.092, "step": 130715 }, { "epoch": 6.1, - "learning_rate": 7.837419717781633e-06, - "loss": 0.0807, + "learning_rate": 1.785639617430921e-05, + "loss": 0.0951, "step": 130720 }, { "epoch": 6.1, - "learning_rate": 7.836950916506494e-06, - "loss": 0.1023, + "learning_rate": 1.7855928104473186e-05, + "loss": 0.1011, "step": 130725 }, { "epoch": 6.1, - "learning_rate": 7.836482115231354e-06, - "loss": 0.1498, + "learning_rate": 1.785546003463717e-05, + "loss": 0.2002, "step": 130730 }, { "epoch": 6.1, - "learning_rate": 7.836013313956214e-06, - "loss": 0.2131, + "learning_rate": 1.785499196480115e-05, + "loss": 0.276, "step": 130735 }, { "epoch": 6.1, - "learning_rate": 7.835544512681074e-06, - "loss": 0.178, + "learning_rate": 1.785452389496513e-05, + "loss": 0.0772, "step": 130740 }, { "epoch": 6.1, - "learning_rate": 7.835075711405936e-06, - "loss": 0.0104, + "learning_rate": 1.7854055825129108e-05, + "loss": 0.0267, "step": 130745 }, { "epoch": 6.1, - "learning_rate": 7.834606910130796e-06, - "loss": 0.0173, + "learning_rate": 1.785358775529309e-05, + "loss": 0.0332, "step": 130750 }, { "epoch": 6.1, - "learning_rate": 7.834138108855657e-06, - "loss": 0.0128, + "learning_rate": 1.785311968545707e-05, + "loss": 0.047, "step": 130755 }, { "epoch": 6.1, - "learning_rate": 7.833669307580517e-06, - "loss": 0.0482, + "learning_rate": 1.785265161562105e-05, + "loss": 0.0335, "step": 130760 }, { "epoch": 6.1, - "learning_rate": 7.833200506305377e-06, - "loss": 0.0447, + "learning_rate": 1.785218354578503e-05, + "loss": 0.0852, "step": 130765 }, { "epoch": 6.1, - "learning_rate": 7.832731705030239e-06, - "loss": 0.1123, + "learning_rate": 1.7851715475949014e-05, + "loss": 0.0574, "step": 130770 }, { "epoch": 6.1, - "learning_rate": 7.832262903755099e-06, - "loss": 0.0855, + "learning_rate": 1.7851247406112994e-05, + "loss": 0.0745, "step": 130775 }, { "epoch": 6.1, - "learning_rate": 7.831794102479959e-06, - "loss": 0.0895, + "learning_rate": 1.7850779336276974e-05, + "loss": 0.0912, "step": 130780 }, { "epoch": 6.1, - "learning_rate": 7.83132530120482e-06, - "loss": 0.2156, + "learning_rate": 1.785031126644095e-05, + "loss": 0.1503, "step": 130785 }, { "epoch": 6.1, - "learning_rate": 7.83085649992968e-06, - "loss": 0.0735, + "learning_rate": 1.7849843196604933e-05, + "loss": 0.2279, "step": 130790 }, { "epoch": 6.1, - "learning_rate": 7.830387698654542e-06, - "loss": 0.0041, + "learning_rate": 1.7849375126768913e-05, + "loss": 0.0519, "step": 130795 }, { "epoch": 6.1, - "learning_rate": 7.829918897379402e-06, - "loss": 0.0161, + "learning_rate": 1.7848907056932893e-05, + "loss": 0.0175, "step": 130800 }, { "epoch": 6.1, - "learning_rate": 7.829450096104262e-06, - "loss": 0.0417, + "learning_rate": 1.7848438987096876e-05, + "loss": 0.0131, "step": 130805 }, { "epoch": 6.1, - "learning_rate": 7.828981294829122e-06, - "loss": 0.0161, + "learning_rate": 1.7847970917260856e-05, + "loss": 0.0256, "step": 130810 }, { "epoch": 6.1, - "learning_rate": 7.828512493553983e-06, - "loss": 0.0399, + "learning_rate": 1.7847502847424836e-05, + "loss": 0.0413, "step": 130815 }, { "epoch": 6.1, - "learning_rate": 7.828043692278843e-06, - "loss": 0.0527, + "learning_rate": 1.7847034777588816e-05, + "loss": 0.0739, "step": 130820 }, { "epoch": 6.1, - "learning_rate": 7.827574891003705e-06, - "loss": 0.0955, + "learning_rate": 1.78465667077528e-05, + "loss": 0.0556, "step": 130825 }, { "epoch": 6.1, - "learning_rate": 7.827106089728565e-06, - "loss": 0.1352, + "learning_rate": 1.784609863791678e-05, + "loss": 0.0619, "step": 130830 }, { "epoch": 6.1, - "learning_rate": 7.826637288453425e-06, - "loss": 0.1426, + "learning_rate": 1.784563056808076e-05, + "loss": 0.1223, "step": 130835 }, { "epoch": 6.11, - "learning_rate": 7.826168487178286e-06, - "loss": 0.1787, + "learning_rate": 1.7845162498244738e-05, + "loss": 0.1608, "step": 130840 }, { "epoch": 6.11, - "learning_rate": 7.825699685903146e-06, - "loss": 0.0193, + "learning_rate": 1.784469442840872e-05, + "loss": 0.0121, "step": 130845 }, { "epoch": 6.11, - "learning_rate": 7.825230884628006e-06, - "loss": 0.0222, + "learning_rate": 1.7844226358572698e-05, + "loss": 0.0265, "step": 130850 }, { "epoch": 6.11, - "learning_rate": 7.824762083352868e-06, - "loss": 0.03, + "learning_rate": 1.7843758288736678e-05, + "loss": 0.019, "step": 130855 }, { "epoch": 6.11, - "learning_rate": 7.824293282077728e-06, - "loss": 0.0178, + "learning_rate": 1.784329021890066e-05, + "loss": 0.0284, "step": 130860 }, { "epoch": 6.11, - "learning_rate": 7.82382448080259e-06, - "loss": 0.0222, + "learning_rate": 1.784282214906464e-05, + "loss": 0.0518, "step": 130865 }, { "epoch": 6.11, - "learning_rate": 7.82335567952745e-06, - "loss": 0.0433, + "learning_rate": 1.784235407922862e-05, + "loss": 0.0703, "step": 130870 }, { "epoch": 6.11, - "learning_rate": 7.82288687825231e-06, - "loss": 0.0509, + "learning_rate": 1.78418860093926e-05, + "loss": 0.0569, "step": 130875 }, { "epoch": 6.11, - "learning_rate": 7.82241807697717e-06, - "loss": 0.1234, + "learning_rate": 1.7841417939556584e-05, + "loss": 0.0695, "step": 130880 }, { "epoch": 6.11, - "learning_rate": 7.82194927570203e-06, - "loss": 0.1337, + "learning_rate": 1.7840949869720563e-05, + "loss": 0.3225, "step": 130885 }, { "epoch": 6.11, - "learning_rate": 7.82148047442689e-06, - "loss": 0.2381, + "learning_rate": 1.7840481799884543e-05, + "loss": 0.1917, "step": 130890 }, { "epoch": 6.11, - "learning_rate": 7.821011673151752e-06, - "loss": 0.007, + "learning_rate": 1.7840013730048523e-05, + "loss": 0.0253, "step": 130895 }, { "epoch": 6.11, - "learning_rate": 7.820542871876612e-06, - "loss": 0.0143, + "learning_rate": 1.7839545660212506e-05, + "loss": 0.0734, "step": 130900 }, { "epoch": 6.11, - "learning_rate": 7.820074070601472e-06, - "loss": 0.0284, + "learning_rate": 1.7839077590376486e-05, + "loss": 0.017, "step": 130905 }, { "epoch": 6.11, - "learning_rate": 7.819605269326334e-06, - "loss": 0.0413, + "learning_rate": 1.7838609520540466e-05, + "loss": 0.023, "step": 130910 }, { "epoch": 6.11, - "learning_rate": 7.819136468051194e-06, - "loss": 0.0812, + "learning_rate": 1.7838141450704446e-05, + "loss": 0.0537, "step": 130915 }, { "epoch": 6.11, - "learning_rate": 7.818667666776054e-06, - "loss": 0.1378, + "learning_rate": 1.7837673380868426e-05, + "loss": 0.0606, "step": 130920 }, { "epoch": 6.11, - "learning_rate": 7.818198865500914e-06, - "loss": 0.1419, + "learning_rate": 1.7837205311032405e-05, + "loss": 0.088, "step": 130925 }, { "epoch": 6.11, - "learning_rate": 7.817730064225775e-06, - "loss": 0.1387, + "learning_rate": 1.7836737241196385e-05, + "loss": 0.0536, "step": 130930 }, { "epoch": 6.11, - "learning_rate": 7.817261262950637e-06, - "loss": 0.1091, + "learning_rate": 1.783626917136037e-05, + "loss": 0.1312, "step": 130935 }, { "epoch": 6.11, - "learning_rate": 7.816792461675497e-06, - "loss": 0.2323, + "learning_rate": 1.7835801101524348e-05, + "loss": 0.1683, "step": 130940 }, { "epoch": 6.11, - "learning_rate": 7.816323660400357e-06, - "loss": 0.0113, + "learning_rate": 1.7835333031688328e-05, + "loss": 0.036, "step": 130945 }, { "epoch": 6.11, - "learning_rate": 7.815854859125217e-06, - "loss": 0.0292, + "learning_rate": 1.7834864961852308e-05, + "loss": 0.026, "step": 130950 }, { "epoch": 6.11, - "learning_rate": 7.815386057850078e-06, - "loss": 0.0098, + "learning_rate": 1.783439689201629e-05, + "loss": 0.0131, "step": 130955 }, { "epoch": 6.11, - "learning_rate": 7.814917256574938e-06, - "loss": 0.0118, + "learning_rate": 1.783392882218027e-05, + "loss": 0.0347, "step": 130960 }, { "epoch": 6.11, - "learning_rate": 7.814448455299798e-06, - "loss": 0.0175, + "learning_rate": 1.783346075234425e-05, + "loss": 0.0366, "step": 130965 }, { "epoch": 6.11, - "learning_rate": 7.81397965402466e-06, - "loss": 0.0477, + "learning_rate": 1.7832992682508234e-05, + "loss": 0.056, "step": 130970 }, { "epoch": 6.11, - "learning_rate": 7.81351085274952e-06, - "loss": 0.0887, + "learning_rate": 1.783252461267221e-05, + "loss": 0.0547, "step": 130975 }, { "epoch": 6.11, - "learning_rate": 7.813042051474381e-06, - "loss": 0.1707, + "learning_rate": 1.783205654283619e-05, + "loss": 0.0848, "step": 130980 }, { "epoch": 6.11, - "learning_rate": 7.812573250199241e-06, - "loss": 0.1811, + "learning_rate": 1.783158847300017e-05, + "loss": 0.3112, "step": 130985 }, { "epoch": 6.11, - "learning_rate": 7.812104448924101e-06, - "loss": 0.0866, + "learning_rate": 1.7831120403164153e-05, + "loss": 0.1115, "step": 130990 }, { "epoch": 6.11, - "learning_rate": 7.811635647648961e-06, - "loss": 0.0137, + "learning_rate": 1.7830652333328133e-05, + "loss": 0.0247, "step": 130995 }, { "epoch": 6.11, - "learning_rate": 7.811166846373823e-06, - "loss": 0.061, + "learning_rate": 1.7830184263492113e-05, + "loss": 0.0175, "step": 131000 }, { "epoch": 6.11, - "learning_rate": 7.810698045098683e-06, - "loss": 0.0456, + "learning_rate": 1.7829716193656093e-05, + "loss": 0.0286, "step": 131005 }, { "epoch": 6.11, - "learning_rate": 7.810229243823544e-06, - "loss": 0.0377, + "learning_rate": 1.7829248123820076e-05, + "loss": 0.037, "step": 131010 }, { "epoch": 6.11, - "learning_rate": 7.809760442548404e-06, - "loss": 0.0412, + "learning_rate": 1.7828780053984056e-05, + "loss": 0.0354, "step": 131015 }, { "epoch": 6.11, - "learning_rate": 7.809291641273264e-06, - "loss": 0.063, + "learning_rate": 1.7828311984148035e-05, + "loss": 0.0846, "step": 131020 }, { "epoch": 6.11, - "learning_rate": 7.808822839998126e-06, - "loss": 0.0597, + "learning_rate": 1.7827843914312015e-05, + "loss": 0.0852, "step": 131025 }, { "epoch": 6.11, - "learning_rate": 7.808354038722986e-06, - "loss": 0.1267, + "learning_rate": 1.7827375844476e-05, + "loss": 0.117, "step": 131030 }, { "epoch": 6.11, - "learning_rate": 7.807885237447846e-06, - "loss": 0.1777, + "learning_rate": 1.7826907774639978e-05, + "loss": 0.2067, "step": 131035 }, { "epoch": 6.11, - "learning_rate": 7.807416436172707e-06, - "loss": 0.1356, + "learning_rate": 1.7826439704803955e-05, + "loss": 0.0967, "step": 131040 }, { "epoch": 6.11, - "learning_rate": 7.806947634897567e-06, - "loss": 0.0034, + "learning_rate": 1.7825971634967938e-05, + "loss": 0.0011, "step": 131045 }, { "epoch": 6.11, - "learning_rate": 7.806478833622429e-06, - "loss": 0.0546, + "learning_rate": 1.7825503565131918e-05, + "loss": 0.1063, "step": 131050 }, { "epoch": 6.12, - "learning_rate": 7.806010032347289e-06, - "loss": 0.0265, + "learning_rate": 1.7825035495295898e-05, + "loss": 0.0311, "step": 131055 }, { "epoch": 6.12, - "learning_rate": 7.805541231072149e-06, - "loss": 0.0543, + "learning_rate": 1.7824567425459877e-05, + "loss": 0.0295, "step": 131060 }, { "epoch": 6.12, - "learning_rate": 7.805072429797009e-06, - "loss": 0.0605, + "learning_rate": 1.782409935562386e-05, + "loss": 0.0643, "step": 131065 }, { "epoch": 6.12, - "learning_rate": 7.80460362852187e-06, - "loss": 0.017, + "learning_rate": 1.782363128578784e-05, + "loss": 0.0528, "step": 131070 }, { "epoch": 6.12, - "learning_rate": 7.80413482724673e-06, - "loss": 0.0469, + "learning_rate": 1.782316321595182e-05, + "loss": 0.101, "step": 131075 }, { "epoch": 6.12, - "learning_rate": 7.803666025971592e-06, - "loss": 0.1492, + "learning_rate": 1.78226951461158e-05, + "loss": 0.1255, "step": 131080 }, { "epoch": 6.12, - "learning_rate": 7.803197224696452e-06, - "loss": 0.2571, + "learning_rate": 1.7822227076279783e-05, + "loss": 0.1132, "step": 131085 }, { "epoch": 6.12, - "learning_rate": 7.802728423421312e-06, - "loss": 0.2145, + "learning_rate": 1.7821759006443763e-05, + "loss": 0.1225, "step": 131090 }, { "epoch": 6.12, - "learning_rate": 7.802259622146173e-06, - "loss": 0.0136, + "learning_rate": 1.7821290936607743e-05, + "loss": 0.0063, "step": 131095 }, { "epoch": 6.12, - "learning_rate": 7.801790820871033e-06, - "loss": 0.0107, + "learning_rate": 1.7820822866771723e-05, + "loss": 0.0592, "step": 131100 }, { "epoch": 6.12, - "learning_rate": 7.801322019595893e-06, - "loss": 0.0288, + "learning_rate": 1.7820354796935702e-05, + "loss": 0.0263, "step": 131105 }, { "epoch": 6.12, - "learning_rate": 7.800853218320755e-06, - "loss": 0.0155, + "learning_rate": 1.7819886727099682e-05, + "loss": 0.0611, "step": 131110 }, { "epoch": 6.12, - "learning_rate": 7.800384417045615e-06, - "loss": 0.0333, + "learning_rate": 1.7819418657263662e-05, + "loss": 0.0454, "step": 131115 }, { "epoch": 6.12, - "learning_rate": 7.799915615770476e-06, - "loss": 0.1195, + "learning_rate": 1.7818950587427645e-05, + "loss": 0.0714, "step": 131120 }, { "epoch": 6.12, - "learning_rate": 7.799446814495336e-06, - "loss": 0.0778, + "learning_rate": 1.7818482517591625e-05, + "loss": 0.0416, "step": 131125 }, { "epoch": 6.12, - "learning_rate": 7.798978013220196e-06, - "loss": 0.1246, + "learning_rate": 1.7818014447755605e-05, + "loss": 0.122, "step": 131130 }, { "epoch": 6.12, - "learning_rate": 7.798509211945056e-06, - "loss": 0.2281, + "learning_rate": 1.7817546377919585e-05, + "loss": 0.1441, "step": 131135 }, { "epoch": 6.12, - "learning_rate": 7.798040410669918e-06, - "loss": 0.165, + "learning_rate": 1.7817078308083568e-05, + "loss": 0.1378, "step": 131140 }, { "epoch": 6.12, - "learning_rate": 7.797571609394778e-06, - "loss": 0.0037, + "learning_rate": 1.7816610238247548e-05, + "loss": 0.061, "step": 131145 }, { "epoch": 6.12, - "learning_rate": 7.79710280811964e-06, - "loss": 0.0134, + "learning_rate": 1.7816142168411528e-05, + "loss": 0.0482, "step": 131150 }, { "epoch": 6.12, - "learning_rate": 7.7966340068445e-06, - "loss": 0.022, + "learning_rate": 1.781567409857551e-05, + "loss": 0.0058, "step": 131155 }, { "epoch": 6.12, - "learning_rate": 7.79616520556936e-06, - "loss": 0.0125, + "learning_rate": 1.781520602873949e-05, + "loss": 0.0171, "step": 131160 }, { "epoch": 6.12, - "learning_rate": 7.795696404294221e-06, - "loss": 0.0289, + "learning_rate": 1.7814737958903467e-05, + "loss": 0.0426, "step": 131165 }, { "epoch": 6.12, - "learning_rate": 7.795227603019081e-06, - "loss": 0.053, + "learning_rate": 1.7814269889067447e-05, + "loss": 0.0488, "step": 131170 }, { "epoch": 6.12, - "learning_rate": 7.79475880174394e-06, - "loss": 0.0353, + "learning_rate": 1.781380181923143e-05, + "loss": 0.0373, "step": 131175 }, { "epoch": 6.12, - "learning_rate": 7.7942900004688e-06, - "loss": 0.0567, + "learning_rate": 1.781333374939541e-05, + "loss": 0.1057, "step": 131180 }, { "epoch": 6.12, - "learning_rate": 7.793821199193662e-06, - "loss": 0.12, + "learning_rate": 1.781286567955939e-05, + "loss": 0.1759, "step": 131185 }, { "epoch": 6.12, - "learning_rate": 7.793352397918524e-06, - "loss": 0.1534, + "learning_rate": 1.781239760972337e-05, + "loss": 0.1208, "step": 131190 }, { "epoch": 6.12, - "learning_rate": 7.792883596643384e-06, - "loss": 0.0183, + "learning_rate": 1.7811929539887353e-05, + "loss": 0.0297, "step": 131195 }, { "epoch": 6.12, - "learning_rate": 7.792414795368244e-06, - "loss": 0.0364, + "learning_rate": 1.7811461470051333e-05, + "loss": 0.0065, "step": 131200 }, { "epoch": 6.12, - "learning_rate": 7.791945994093104e-06, - "loss": 0.0419, + "learning_rate": 1.7810993400215312e-05, + "loss": 0.0704, "step": 131205 }, { "epoch": 6.12, - "learning_rate": 7.791477192817965e-06, - "loss": 0.0089, + "learning_rate": 1.7810525330379292e-05, + "loss": 0.0339, "step": 131210 }, { "epoch": 6.12, - "learning_rate": 7.791008391542825e-06, - "loss": 0.0311, + "learning_rate": 1.7810057260543275e-05, + "loss": 0.0302, "step": 131215 }, { "epoch": 6.12, - "learning_rate": 7.790539590267685e-06, - "loss": 0.0983, + "learning_rate": 1.7809589190707255e-05, + "loss": 0.0728, "step": 131220 }, { "epoch": 6.12, - "learning_rate": 7.790070788992547e-06, - "loss": 0.0693, + "learning_rate": 1.7809121120871235e-05, + "loss": 0.1105, "step": 131225 }, { "epoch": 6.12, - "learning_rate": 7.789601987717407e-06, - "loss": 0.0859, + "learning_rate": 1.7808653051035215e-05, + "loss": 0.1188, "step": 131230 }, { "epoch": 6.12, - "learning_rate": 7.789133186442269e-06, - "loss": 0.064, + "learning_rate": 1.7808184981199195e-05, + "loss": 0.1702, "step": 131235 }, { "epoch": 6.12, - "learning_rate": 7.788664385167128e-06, - "loss": 0.1775, + "learning_rate": 1.7807716911363175e-05, + "loss": 0.2249, "step": 131240 }, { "epoch": 6.12, - "learning_rate": 7.788195583891988e-06, - "loss": 0.0237, + "learning_rate": 1.7807248841527154e-05, + "loss": 0.0136, "step": 131245 }, { "epoch": 6.12, - "learning_rate": 7.787726782616848e-06, - "loss": 0.0238, + "learning_rate": 1.7806780771691138e-05, + "loss": 0.0451, "step": 131250 }, { "epoch": 6.12, - "learning_rate": 7.78725798134171e-06, - "loss": 0.0362, + "learning_rate": 1.7806312701855117e-05, + "loss": 0.0247, "step": 131255 }, { "epoch": 6.12, - "learning_rate": 7.78678918006657e-06, - "loss": 0.0536, + "learning_rate": 1.7805844632019097e-05, + "loss": 0.0601, "step": 131260 }, { "epoch": 6.13, - "learning_rate": 7.786320378791431e-06, - "loss": 0.0576, + "learning_rate": 1.7805376562183077e-05, + "loss": 0.0287, "step": 131265 }, { "epoch": 6.13, - "learning_rate": 7.785851577516291e-06, - "loss": 0.0852, + "learning_rate": 1.780490849234706e-05, + "loss": 0.0731, "step": 131270 }, { "epoch": 6.13, - "learning_rate": 7.785382776241151e-06, - "loss": 0.11, + "learning_rate": 1.780444042251104e-05, + "loss": 0.0793, "step": 131275 }, { "epoch": 6.13, - "learning_rate": 7.784913974966013e-06, - "loss": 0.0901, + "learning_rate": 1.780397235267502e-05, + "loss": 0.1529, "step": 131280 }, { "epoch": 6.13, - "learning_rate": 7.784445173690873e-06, - "loss": 0.213, + "learning_rate": 1.7803504282839003e-05, + "loss": 0.1588, "step": 131285 }, { "epoch": 6.13, - "learning_rate": 7.783976372415733e-06, - "loss": 0.1125, + "learning_rate": 1.780303621300298e-05, + "loss": 0.1392, "step": 131290 }, { "epoch": 6.13, - "learning_rate": 7.783507571140594e-06, - "loss": 0.0577, + "learning_rate": 1.780256814316696e-05, + "loss": 0.0073, "step": 131295 }, { "epoch": 6.13, - "learning_rate": 7.783038769865454e-06, - "loss": 0.0143, + "learning_rate": 1.780210007333094e-05, + "loss": 0.0268, "step": 131300 }, { "epoch": 6.13, - "learning_rate": 7.782569968590316e-06, - "loss": 0.0097, + "learning_rate": 1.7801632003494922e-05, + "loss": 0.015, "step": 131305 }, { "epoch": 6.13, - "learning_rate": 7.782101167315176e-06, - "loss": 0.0281, + "learning_rate": 1.7801163933658902e-05, + "loss": 0.0589, "step": 131310 }, { "epoch": 6.13, - "learning_rate": 7.781632366040036e-06, - "loss": 0.061, + "learning_rate": 1.7800695863822882e-05, + "loss": 0.0354, "step": 131315 }, { "epoch": 6.13, - "learning_rate": 7.781163564764896e-06, - "loss": 0.0352, + "learning_rate": 1.7800227793986862e-05, + "loss": 0.0583, "step": 131320 }, { "epoch": 6.13, - "learning_rate": 7.780694763489757e-06, - "loss": 0.0722, + "learning_rate": 1.7799759724150845e-05, + "loss": 0.0367, "step": 131325 }, { "epoch": 6.13, - "learning_rate": 7.780225962214617e-06, - "loss": 0.1799, + "learning_rate": 1.7799291654314825e-05, + "loss": 0.0911, "step": 131330 }, { "epoch": 6.13, - "learning_rate": 7.779757160939479e-06, - "loss": 0.2216, + "learning_rate": 1.7798823584478805e-05, + "loss": 0.1846, "step": 131335 }, { "epoch": 6.13, - "learning_rate": 7.779288359664339e-06, - "loss": 0.0907, + "learning_rate": 1.7798355514642788e-05, + "loss": 0.1074, "step": 131340 }, { "epoch": 6.13, - "learning_rate": 7.778819558389199e-06, - "loss": 0.0128, + "learning_rate": 1.7797887444806768e-05, + "loss": 0.0041, "step": 131345 }, { "epoch": 6.13, - "learning_rate": 7.77835075711406e-06, - "loss": 0.0121, + "learning_rate": 1.7797419374970747e-05, + "loss": 0.0237, "step": 131350 }, { "epoch": 6.13, - "learning_rate": 7.77788195583892e-06, - "loss": 0.0367, + "learning_rate": 1.7796951305134724e-05, + "loss": 0.0127, "step": 131355 }, { "epoch": 6.13, - "learning_rate": 7.77741315456378e-06, - "loss": 0.0531, + "learning_rate": 1.7796483235298707e-05, + "loss": 0.0689, "step": 131360 }, { "epoch": 6.13, - "learning_rate": 7.776944353288642e-06, - "loss": 0.0462, + "learning_rate": 1.7796015165462687e-05, + "loss": 0.043, "step": 131365 }, { "epoch": 6.13, - "learning_rate": 7.776475552013502e-06, - "loss": 0.1162, + "learning_rate": 1.7795547095626667e-05, + "loss": 0.0626, "step": 131370 }, { "epoch": 6.13, - "learning_rate": 7.776006750738364e-06, - "loss": 0.1355, + "learning_rate": 1.7795079025790647e-05, + "loss": 0.0596, "step": 131375 }, { "epoch": 6.13, - "learning_rate": 7.775537949463224e-06, - "loss": 0.0688, + "learning_rate": 1.779461095595463e-05, + "loss": 0.143, "step": 131380 }, { "epoch": 6.13, - "learning_rate": 7.775069148188083e-06, - "loss": 0.2452, + "learning_rate": 1.779414288611861e-05, + "loss": 0.1777, "step": 131385 }, { "epoch": 6.13, - "learning_rate": 7.774600346912943e-06, - "loss": 0.1479, + "learning_rate": 1.779367481628259e-05, + "loss": 0.1251, "step": 131390 }, { "epoch": 6.13, - "learning_rate": 7.774131545637805e-06, - "loss": 0.0446, + "learning_rate": 1.779320674644657e-05, + "loss": 0.0301, "step": 131395 }, { "epoch": 6.13, - "learning_rate": 7.773662744362665e-06, - "loss": 0.0156, + "learning_rate": 1.7792738676610552e-05, + "loss": 0.0387, "step": 131400 }, { "epoch": 6.13, - "learning_rate": 7.773193943087527e-06, - "loss": 0.0097, + "learning_rate": 1.7792270606774532e-05, + "loss": 0.0478, "step": 131405 }, { "epoch": 6.13, - "learning_rate": 7.772725141812387e-06, - "loss": 0.0448, + "learning_rate": 1.7791802536938512e-05, + "loss": 0.0274, "step": 131410 }, { "epoch": 6.13, - "learning_rate": 7.772256340537246e-06, - "loss": 0.0453, + "learning_rate": 1.7791334467102495e-05, + "loss": 0.0355, "step": 131415 }, { "epoch": 6.13, - "learning_rate": 7.771787539262108e-06, - "loss": 0.0914, + "learning_rate": 1.779086639726647e-05, + "loss": 0.0661, "step": 131420 }, { "epoch": 6.13, - "learning_rate": 7.771318737986968e-06, - "loss": 0.04, + "learning_rate": 1.779039832743045e-05, + "loss": 0.0863, "step": 131425 }, { "epoch": 6.13, - "learning_rate": 7.770849936711828e-06, - "loss": 0.1168, + "learning_rate": 1.778993025759443e-05, + "loss": 0.1057, "step": 131430 }, { "epoch": 6.13, - "learning_rate": 7.770381135436688e-06, - "loss": 0.2018, + "learning_rate": 1.7789462187758415e-05, + "loss": 0.2531, "step": 131435 }, { "epoch": 6.13, - "learning_rate": 7.76991233416155e-06, - "loss": 0.1739, + "learning_rate": 1.7788994117922394e-05, + "loss": 0.1536, "step": 131440 }, { "epoch": 6.13, - "learning_rate": 7.769443532886411e-06, - "loss": 0.0382, + "learning_rate": 1.7788526048086374e-05, + "loss": 0.0008, "step": 131445 }, { "epoch": 6.13, - "learning_rate": 7.768974731611271e-06, - "loss": 0.01, + "learning_rate": 1.7788057978250354e-05, + "loss": 0.0075, "step": 131450 }, { "epoch": 6.13, - "learning_rate": 7.768505930336131e-06, - "loss": 0.012, + "learning_rate": 1.7787589908414337e-05, + "loss": 0.034, "step": 131455 }, { "epoch": 6.13, - "learning_rate": 7.768037129060991e-06, - "loss": 0.05, + "learning_rate": 1.7787121838578317e-05, + "loss": 0.0354, "step": 131460 }, { "epoch": 6.13, - "learning_rate": 7.767568327785853e-06, - "loss": 0.0595, + "learning_rate": 1.7786653768742297e-05, + "loss": 0.0421, "step": 131465 }, { "epoch": 6.13, - "learning_rate": 7.767099526510712e-06, - "loss": 0.1012, + "learning_rate": 1.778618569890628e-05, + "loss": 0.049, "step": 131470 }, { "epoch": 6.13, - "learning_rate": 7.766630725235572e-06, - "loss": 0.0497, + "learning_rate": 1.778571762907026e-05, + "loss": 0.0422, "step": 131475 }, { "epoch": 6.14, - "learning_rate": 7.766161923960434e-06, - "loss": 0.0834, + "learning_rate": 1.7785249559234236e-05, + "loss": 0.0979, "step": 131480 }, { "epoch": 6.14, - "learning_rate": 7.765693122685294e-06, - "loss": 0.2924, + "learning_rate": 1.7784781489398216e-05, + "loss": 0.0965, "step": 131485 }, { "epoch": 6.14, - "learning_rate": 7.765224321410156e-06, - "loss": 0.175, + "learning_rate": 1.77843134195622e-05, + "loss": 0.1399, "step": 131490 }, { "epoch": 6.14, - "learning_rate": 7.764755520135016e-06, - "loss": 0.0238, + "learning_rate": 1.778384534972618e-05, + "loss": 0.0047, "step": 131495 }, { "epoch": 6.14, - "learning_rate": 7.764286718859875e-06, - "loss": 0.009, + "learning_rate": 1.778337727989016e-05, + "loss": 0.0219, "step": 131500 }, { "epoch": 6.14, - "learning_rate": 7.763817917584735e-06, - "loss": 0.0228, + "learning_rate": 1.778290921005414e-05, + "loss": 0.0245, "step": 131505 }, { "epoch": 6.14, - "learning_rate": 7.763349116309597e-06, - "loss": 0.0341, + "learning_rate": 1.7782441140218122e-05, + "loss": 0.0223, "step": 131510 }, { "epoch": 6.14, - "learning_rate": 7.762880315034459e-06, - "loss": 0.0234, + "learning_rate": 1.7781973070382102e-05, + "loss": 0.034, "step": 131515 }, { "epoch": 6.14, - "learning_rate": 7.762411513759319e-06, - "loss": 0.0485, + "learning_rate": 1.778150500054608e-05, + "loss": 0.0413, "step": 131520 }, { "epoch": 6.14, - "learning_rate": 7.761942712484179e-06, - "loss": 0.1032, + "learning_rate": 1.7781036930710065e-05, + "loss": 0.0608, "step": 131525 }, { "epoch": 6.14, - "learning_rate": 7.761473911209038e-06, - "loss": 0.0934, + "learning_rate": 1.7780568860874045e-05, + "loss": 0.117, "step": 131530 }, { "epoch": 6.14, - "learning_rate": 7.7610051099339e-06, - "loss": 0.2171, + "learning_rate": 1.7780100791038024e-05, + "loss": 0.2069, "step": 131535 }, { "epoch": 6.14, - "learning_rate": 7.76053630865876e-06, - "loss": 0.1753, + "learning_rate": 1.7779632721202004e-05, + "loss": 0.1882, "step": 131540 }, { "epoch": 6.14, - "learning_rate": 7.76006750738362e-06, - "loss": 0.0433, + "learning_rate": 1.7779164651365984e-05, + "loss": 0.0183, "step": 131545 }, { "epoch": 6.14, - "learning_rate": 7.759598706108482e-06, - "loss": 0.0085, + "learning_rate": 1.7778696581529964e-05, + "loss": 0.0208, "step": 131550 }, { "epoch": 6.14, - "learning_rate": 7.759129904833342e-06, - "loss": 0.0301, + "learning_rate": 1.7778228511693944e-05, + "loss": 0.0086, "step": 131555 }, { "epoch": 6.14, - "learning_rate": 7.758661103558203e-06, - "loss": 0.0223, + "learning_rate": 1.7777760441857923e-05, + "loss": 0.0347, "step": 131560 }, { "epoch": 6.14, - "learning_rate": 7.758192302283063e-06, - "loss": 0.0813, + "learning_rate": 1.7777292372021907e-05, + "loss": 0.0349, "step": 131565 }, { "epoch": 6.14, - "learning_rate": 7.757723501007923e-06, - "loss": 0.0423, + "learning_rate": 1.7776824302185887e-05, + "loss": 0.0576, "step": 131570 }, { "epoch": 6.14, - "learning_rate": 7.757254699732783e-06, - "loss": 0.1051, + "learning_rate": 1.7776356232349866e-05, + "loss": 0.0671, "step": 131575 }, { "epoch": 6.14, - "learning_rate": 7.756785898457645e-06, - "loss": 0.1165, + "learning_rate": 1.7775888162513846e-05, + "loss": 0.0546, "step": 131580 }, { "epoch": 6.14, - "learning_rate": 7.756317097182505e-06, - "loss": 0.2156, + "learning_rate": 1.777542009267783e-05, + "loss": 0.1035, "step": 131585 }, { "epoch": 6.14, - "learning_rate": 7.755848295907366e-06, - "loss": 0.2347, + "learning_rate": 1.777495202284181e-05, + "loss": 0.1175, "step": 131590 }, { "epoch": 6.14, - "learning_rate": 7.755379494632226e-06, - "loss": 0.0294, + "learning_rate": 1.777448395300579e-05, + "loss": 0.0416, "step": 131595 }, { "epoch": 6.14, - "learning_rate": 7.754910693357086e-06, - "loss": 0.0274, + "learning_rate": 1.7774015883169772e-05, + "loss": 0.0403, "step": 131600 }, { "epoch": 6.14, - "learning_rate": 7.754441892081948e-06, - "loss": 0.0249, + "learning_rate": 1.7773547813333752e-05, + "loss": 0.0258, "step": 131605 }, { "epoch": 6.14, - "learning_rate": 7.753973090806808e-06, - "loss": 0.1807, + "learning_rate": 1.777307974349773e-05, + "loss": 0.0202, "step": 131610 }, { "epoch": 6.14, - "learning_rate": 7.753504289531668e-06, - "loss": 0.0922, + "learning_rate": 1.7772611673661708e-05, + "loss": 0.0473, "step": 131615 }, { "epoch": 6.14, - "learning_rate": 7.753035488256529e-06, - "loss": 0.0587, + "learning_rate": 1.777214360382569e-05, + "loss": 0.0144, "step": 131620 }, { "epoch": 6.14, - "learning_rate": 7.752566686981389e-06, - "loss": 0.0732, + "learning_rate": 1.777167553398967e-05, + "loss": 0.0644, "step": 131625 }, { "epoch": 6.14, - "learning_rate": 7.75209788570625e-06, - "loss": 0.1231, + "learning_rate": 1.777120746415365e-05, + "loss": 0.101, "step": 131630 }, { "epoch": 6.14, - "learning_rate": 7.75162908443111e-06, - "loss": 0.2604, + "learning_rate": 1.777073939431763e-05, + "loss": 0.175, "step": 131635 }, { "epoch": 6.14, - "learning_rate": 7.75116028315597e-06, - "loss": 0.1333, + "learning_rate": 1.7770271324481614e-05, + "loss": 0.1112, "step": 131640 }, { "epoch": 6.14, - "learning_rate": 7.75069148188083e-06, - "loss": 0.0279, + "learning_rate": 1.7769803254645594e-05, + "loss": 0.0329, "step": 131645 }, { "epoch": 6.14, - "learning_rate": 7.750222680605692e-06, - "loss": 0.0032, + "learning_rate": 1.7769335184809574e-05, + "loss": 0.0608, "step": 131650 }, { "epoch": 6.14, - "learning_rate": 7.749753879330552e-06, - "loss": 0.0099, + "learning_rate": 1.7768867114973557e-05, + "loss": 0.0702, "step": 131655 }, { "epoch": 6.14, - "learning_rate": 7.749285078055414e-06, - "loss": 0.0326, + "learning_rate": 1.7768399045137537e-05, + "loss": 0.0278, "step": 131660 }, { "epoch": 6.14, - "learning_rate": 7.748816276780274e-06, - "loss": 0.0361, + "learning_rate": 1.7767930975301517e-05, + "loss": 0.0432, "step": 131665 }, { "epoch": 6.14, - "learning_rate": 7.748347475505134e-06, - "loss": 0.0854, + "learning_rate": 1.7767462905465493e-05, + "loss": 0.0629, "step": 131670 }, { "epoch": 6.14, - "learning_rate": 7.747878674229995e-06, - "loss": 0.0972, + "learning_rate": 1.7766994835629476e-05, + "loss": 0.0917, "step": 131675 }, { "epoch": 6.14, - "learning_rate": 7.747409872954855e-06, - "loss": 0.149, + "learning_rate": 1.7766526765793456e-05, + "loss": 0.1218, "step": 131680 }, { "epoch": 6.14, - "learning_rate": 7.746941071679715e-06, - "loss": 0.2045, + "learning_rate": 1.7766058695957436e-05, + "loss": 0.1894, "step": 131685 }, { "epoch": 6.14, - "learning_rate": 7.746472270404577e-06, - "loss": 0.1092, + "learning_rate": 1.7765590626121416e-05, + "loss": 0.1737, "step": 131690 }, { "epoch": 6.15, - "learning_rate": 7.746003469129437e-06, - "loss": 0.0187, + "learning_rate": 1.77651225562854e-05, + "loss": 0.0195, "step": 131695 }, { "epoch": 6.15, - "learning_rate": 7.745534667854298e-06, - "loss": 0.0223, + "learning_rate": 1.776465448644938e-05, + "loss": 0.0309, "step": 131700 }, { "epoch": 6.15, - "learning_rate": 7.745065866579158e-06, - "loss": 0.0163, + "learning_rate": 1.776418641661336e-05, + "loss": 0.0172, "step": 131705 }, { "epoch": 6.15, - "learning_rate": 7.744597065304018e-06, - "loss": 0.0584, + "learning_rate": 1.7763718346777342e-05, + "loss": 0.0438, "step": 131710 }, { "epoch": 6.15, - "learning_rate": 7.744128264028878e-06, - "loss": 0.0746, + "learning_rate": 1.776325027694132e-05, + "loss": 0.0087, "step": 131715 }, { "epoch": 6.15, - "learning_rate": 7.74365946275374e-06, - "loss": 0.0548, + "learning_rate": 1.77627822071053e-05, + "loss": 0.0572, "step": 131720 }, { "epoch": 6.15, - "learning_rate": 7.7431906614786e-06, - "loss": 0.0433, + "learning_rate": 1.776231413726928e-05, + "loss": 0.0263, "step": 131725 }, { "epoch": 6.15, - "learning_rate": 7.742721860203461e-06, - "loss": 0.0411, + "learning_rate": 1.7761846067433264e-05, + "loss": 0.0913, "step": 131730 }, { "epoch": 6.15, - "learning_rate": 7.742253058928321e-06, - "loss": 0.1781, + "learning_rate": 1.776137799759724e-05, + "loss": 0.137, "step": 131735 }, { "epoch": 6.15, - "learning_rate": 7.741784257653181e-06, - "loss": 0.1264, + "learning_rate": 1.776090992776122e-05, + "loss": 0.2443, "step": 131740 }, { "epoch": 6.15, - "learning_rate": 7.741315456378043e-06, - "loss": 0.0723, + "learning_rate": 1.77604418579252e-05, + "loss": 0.0321, "step": 131745 }, { "epoch": 6.15, - "learning_rate": 7.740846655102903e-06, - "loss": 0.0351, + "learning_rate": 1.7759973788089184e-05, + "loss": 0.0261, "step": 131750 }, { "epoch": 6.15, - "learning_rate": 7.740377853827763e-06, - "loss": 0.028, + "learning_rate": 1.7759505718253163e-05, + "loss": 0.0289, "step": 131755 }, { "epoch": 6.15, - "learning_rate": 7.739909052552623e-06, - "loss": 0.0641, + "learning_rate": 1.7759037648417143e-05, + "loss": 0.0365, "step": 131760 }, { "epoch": 6.15, - "learning_rate": 7.739440251277484e-06, - "loss": 0.0071, + "learning_rate": 1.7758569578581127e-05, + "loss": 0.043, "step": 131765 }, { "epoch": 6.15, - "learning_rate": 7.738971450002346e-06, - "loss": 0.1151, + "learning_rate": 1.7758101508745106e-05, + "loss": 0.1274, "step": 131770 }, { "epoch": 6.15, - "learning_rate": 7.738502648727206e-06, - "loss": 0.0464, + "learning_rate": 1.7757633438909086e-05, + "loss": 0.0849, "step": 131775 }, { "epoch": 6.15, - "learning_rate": 7.738033847452066e-06, - "loss": 0.0863, + "learning_rate": 1.7757165369073066e-05, + "loss": 0.1915, "step": 131780 }, { "epoch": 6.15, - "learning_rate": 7.737565046176926e-06, - "loss": 0.1714, + "learning_rate": 1.775669729923705e-05, + "loss": 0.1717, "step": 131785 }, { "epoch": 6.15, - "learning_rate": 7.737096244901787e-06, - "loss": 0.2527, + "learning_rate": 1.775622922940103e-05, + "loss": 0.2031, "step": 131790 }, { "epoch": 6.15, - "learning_rate": 7.736627443626647e-06, - "loss": 0.019, + "learning_rate": 1.775576115956501e-05, + "loss": 0.0125, "step": 131795 }, { "epoch": 6.15, - "learning_rate": 7.736158642351507e-06, - "loss": 0.0331, + "learning_rate": 1.7755293089728985e-05, + "loss": 0.0423, "step": 131800 }, { "epoch": 6.15, - "learning_rate": 7.735689841076369e-06, - "loss": 0.02, + "learning_rate": 1.775482501989297e-05, + "loss": 0.0137, "step": 131805 }, { "epoch": 6.15, - "learning_rate": 7.735221039801229e-06, - "loss": 0.0317, + "learning_rate": 1.7754356950056948e-05, + "loss": 0.0532, "step": 131810 }, { "epoch": 6.15, - "learning_rate": 7.73475223852609e-06, - "loss": 0.0481, + "learning_rate": 1.7753888880220928e-05, + "loss": 0.0117, "step": 131815 }, { "epoch": 6.15, - "learning_rate": 7.73428343725095e-06, - "loss": 0.0728, + "learning_rate": 1.7753420810384908e-05, + "loss": 0.0176, "step": 131820 }, { "epoch": 6.15, - "learning_rate": 7.73381463597581e-06, - "loss": 0.0657, + "learning_rate": 1.775295274054889e-05, + "loss": 0.0731, "step": 131825 }, { "epoch": 6.15, - "learning_rate": 7.73334583470067e-06, - "loss": 0.0881, + "learning_rate": 1.775248467071287e-05, + "loss": 0.0713, "step": 131830 }, { "epoch": 6.15, - "learning_rate": 7.732877033425532e-06, - "loss": 0.1716, + "learning_rate": 1.775201660087685e-05, + "loss": 0.1553, "step": 131835 }, { "epoch": 6.15, - "learning_rate": 7.732408232150392e-06, - "loss": 0.1607, + "learning_rate": 1.7751548531040834e-05, + "loss": 0.1297, "step": 131840 }, { "epoch": 6.15, - "learning_rate": 7.731939430875253e-06, - "loss": 0.0071, + "learning_rate": 1.7751080461204814e-05, + "loss": 0.0332, "step": 131845 }, { "epoch": 6.15, - "learning_rate": 7.731470629600113e-06, - "loss": 0.0201, + "learning_rate": 1.7750612391368794e-05, + "loss": 0.0295, "step": 131850 }, { "epoch": 6.15, - "learning_rate": 7.731001828324973e-06, - "loss": 0.0196, + "learning_rate": 1.7750144321532773e-05, + "loss": 0.0393, "step": 131855 }, { "epoch": 6.15, - "learning_rate": 7.730533027049835e-06, - "loss": 0.0886, + "learning_rate": 1.7749676251696753e-05, + "loss": 0.117, "step": 131860 }, { "epoch": 6.15, - "learning_rate": 7.730064225774695e-06, - "loss": 0.0527, + "learning_rate": 1.7749208181860733e-05, + "loss": 0.0479, "step": 131865 }, { "epoch": 6.15, - "learning_rate": 7.729595424499555e-06, - "loss": 0.0978, + "learning_rate": 1.7748740112024713e-05, + "loss": 0.0592, "step": 131870 }, { "epoch": 6.15, - "learning_rate": 7.729126623224416e-06, - "loss": 0.1202, + "learning_rate": 1.7748272042188693e-05, + "loss": 0.1511, "step": 131875 }, { "epoch": 6.15, - "learning_rate": 7.728657821949276e-06, - "loss": 0.1637, + "learning_rate": 1.7747803972352676e-05, + "loss": 0.1131, "step": 131880 }, { "epoch": 6.15, - "learning_rate": 7.728189020674138e-06, - "loss": 0.2558, + "learning_rate": 1.7747335902516656e-05, + "loss": 0.1963, "step": 131885 }, { "epoch": 6.15, - "learning_rate": 7.727720219398998e-06, - "loss": 0.2253, + "learning_rate": 1.7746867832680636e-05, + "loss": 0.0989, "step": 131890 }, { "epoch": 6.15, - "learning_rate": 7.727251418123858e-06, - "loss": 0.0084, + "learning_rate": 1.774639976284462e-05, + "loss": 0.0527, "step": 131895 }, { "epoch": 6.15, - "learning_rate": 7.726782616848718e-06, - "loss": 0.0162, + "learning_rate": 1.77459316930086e-05, + "loss": 0.0227, "step": 131900 }, { "epoch": 6.15, - "learning_rate": 7.72631381557358e-06, - "loss": 0.0214, + "learning_rate": 1.774546362317258e-05, + "loss": 0.0776, "step": 131905 }, { "epoch": 6.16, - "learning_rate": 7.72584501429844e-06, - "loss": 0.066, + "learning_rate": 1.7744995553336558e-05, + "loss": 0.0769, "step": 131910 }, { "epoch": 6.16, - "learning_rate": 7.7253762130233e-06, - "loss": 0.0112, + "learning_rate": 1.774452748350054e-05, + "loss": 0.051, "step": 131915 }, { "epoch": 6.16, - "learning_rate": 7.72490741174816e-06, - "loss": 0.0937, + "learning_rate": 1.774405941366452e-05, + "loss": 0.0342, "step": 131920 }, { "epoch": 6.16, - "learning_rate": 7.72443861047302e-06, - "loss": 0.1067, + "learning_rate": 1.7743591343828498e-05, + "loss": 0.0734, "step": 131925 }, { "epoch": 6.16, - "learning_rate": 7.723969809197882e-06, - "loss": 0.2206, + "learning_rate": 1.7743123273992477e-05, + "loss": 0.0517, "step": 131930 }, { "epoch": 6.16, - "learning_rate": 7.723501007922742e-06, - "loss": 0.1921, + "learning_rate": 1.774265520415646e-05, + "loss": 0.155, "step": 131935 }, { "epoch": 6.16, - "learning_rate": 7.723032206647602e-06, - "loss": 0.1565, + "learning_rate": 1.774218713432044e-05, + "loss": 0.1867, "step": 131940 }, { "epoch": 6.16, - "learning_rate": 7.722563405372464e-06, - "loss": 0.0228, + "learning_rate": 1.774171906448442e-05, + "loss": 0.0065, "step": 131945 }, { "epoch": 6.16, - "learning_rate": 7.722094604097324e-06, - "loss": 0.0034, + "learning_rate": 1.7741250994648403e-05, + "loss": 0.0103, "step": 131950 }, { "epoch": 6.16, - "learning_rate": 7.721625802822185e-06, - "loss": 0.0338, + "learning_rate": 1.7740782924812383e-05, + "loss": 0.0145, "step": 131955 }, { "epoch": 6.16, - "learning_rate": 7.721157001547045e-06, - "loss": 0.0225, + "learning_rate": 1.7740314854976363e-05, + "loss": 0.0442, "step": 131960 }, { "epoch": 6.16, - "learning_rate": 7.720688200271905e-06, - "loss": 0.0536, + "learning_rate": 1.7739846785140343e-05, + "loss": 0.0508, "step": 131965 }, { "epoch": 6.16, - "learning_rate": 7.720219398996765e-06, - "loss": 0.1118, + "learning_rate": 1.7739378715304326e-05, + "loss": 0.0549, "step": 131970 }, { "epoch": 6.16, - "learning_rate": 7.719750597721627e-06, - "loss": 0.0971, + "learning_rate": 1.7738910645468306e-05, + "loss": 0.0701, "step": 131975 }, { "epoch": 6.16, - "learning_rate": 7.719281796446487e-06, - "loss": 0.1512, + "learning_rate": 1.7738442575632286e-05, + "loss": 0.0725, "step": 131980 }, { "epoch": 6.16, - "learning_rate": 7.718812995171348e-06, - "loss": 0.2908, + "learning_rate": 1.7737974505796262e-05, + "loss": 0.2387, "step": 131985 }, { "epoch": 6.16, - "learning_rate": 7.718344193896208e-06, - "loss": 0.1658, + "learning_rate": 1.7737506435960245e-05, + "loss": 0.1555, "step": 131990 }, { "epoch": 6.16, - "learning_rate": 7.717875392621068e-06, - "loss": 0.0628, + "learning_rate": 1.7737038366124225e-05, + "loss": 0.0226, "step": 131995 }, { "epoch": 6.16, - "learning_rate": 7.71740659134593e-06, - "loss": 0.016, + "learning_rate": 1.7736570296288205e-05, + "loss": 0.0271, "step": 132000 }, { "epoch": 6.16, - "learning_rate": 7.71693779007079e-06, - "loss": 0.0242, + "learning_rate": 1.7736102226452185e-05, + "loss": 0.021, "step": 132005 }, { "epoch": 6.16, - "learning_rate": 7.71646898879565e-06, - "loss": 0.0592, + "learning_rate": 1.7735634156616168e-05, + "loss": 0.0397, "step": 132010 }, { "epoch": 6.16, - "learning_rate": 7.71600018752051e-06, - "loss": 0.0792, + "learning_rate": 1.7735166086780148e-05, + "loss": 0.0651, "step": 132015 }, { "epoch": 6.16, - "learning_rate": 7.715531386245371e-06, - "loss": 0.0965, + "learning_rate": 1.7734698016944128e-05, + "loss": 0.081, "step": 132020 }, { "epoch": 6.16, - "learning_rate": 7.715062584970233e-06, - "loss": 0.09, + "learning_rate": 1.773422994710811e-05, + "loss": 0.0395, "step": 132025 }, { "epoch": 6.16, - "learning_rate": 7.714593783695093e-06, - "loss": 0.0736, + "learning_rate": 1.773376187727209e-05, + "loss": 0.095, "step": 132030 }, { "epoch": 6.16, - "learning_rate": 7.714124982419953e-06, - "loss": 0.2015, + "learning_rate": 1.773329380743607e-05, + "loss": 0.2262, "step": 132035 }, { "epoch": 6.16, - "learning_rate": 7.713656181144813e-06, - "loss": 0.1421, + "learning_rate": 1.773282573760005e-05, + "loss": 0.2525, "step": 132040 }, { "epoch": 6.16, - "learning_rate": 7.713187379869674e-06, - "loss": 0.0028, + "learning_rate": 1.7732357667764034e-05, + "loss": 0.0205, "step": 132045 }, { "epoch": 6.16, - "learning_rate": 7.712718578594534e-06, - "loss": 0.0345, + "learning_rate": 1.773188959792801e-05, + "loss": 0.0192, "step": 132050 }, { "epoch": 6.16, - "learning_rate": 7.712249777319394e-06, - "loss": 0.0391, + "learning_rate": 1.773142152809199e-05, + "loss": 0.0367, "step": 132055 }, { "epoch": 6.16, - "learning_rate": 7.711780976044256e-06, - "loss": 0.1021, + "learning_rate": 1.773095345825597e-05, + "loss": 0.0186, "step": 132060 }, { "epoch": 6.16, - "learning_rate": 7.711312174769116e-06, - "loss": 0.0237, + "learning_rate": 1.7730485388419953e-05, + "loss": 0.1018, "step": 132065 }, { "epoch": 6.16, - "learning_rate": 7.710843373493977e-06, - "loss": 0.0784, + "learning_rate": 1.7730017318583933e-05, + "loss": 0.0507, "step": 132070 }, { "epoch": 6.16, - "learning_rate": 7.710374572218837e-06, - "loss": 0.0809, + "learning_rate": 1.7729549248747912e-05, + "loss": 0.1903, "step": 132075 }, { "epoch": 6.16, - "learning_rate": 7.709905770943697e-06, - "loss": 0.0514, + "learning_rate": 1.7729081178911896e-05, + "loss": 0.0741, "step": 132080 }, { "epoch": 6.16, - "learning_rate": 7.709436969668557e-06, - "loss": 0.2114, + "learning_rate": 1.7728613109075876e-05, + "loss": 0.1574, "step": 132085 }, { "epoch": 6.16, - "learning_rate": 7.708968168393419e-06, - "loss": 0.0977, + "learning_rate": 1.7728145039239855e-05, + "loss": 0.0733, "step": 132090 }, { "epoch": 6.16, - "learning_rate": 7.708499367118279e-06, - "loss": 0.0148, + "learning_rate": 1.7727676969403835e-05, + "loss": 0.025, "step": 132095 }, { "epoch": 6.16, - "learning_rate": 7.70803056584314e-06, - "loss": 0.0206, + "learning_rate": 1.772720889956782e-05, + "loss": 0.019, "step": 132100 }, { "epoch": 6.16, - "learning_rate": 7.707561764568e-06, - "loss": 0.0108, + "learning_rate": 1.7726740829731798e-05, + "loss": 0.0169, "step": 132105 }, { "epoch": 6.16, - "learning_rate": 7.70709296329286e-06, - "loss": 0.043, + "learning_rate": 1.7726272759895778e-05, + "loss": 0.0603, "step": 132110 }, { "epoch": 6.16, - "learning_rate": 7.706624162017722e-06, - "loss": 0.0202, + "learning_rate": 1.7725804690059754e-05, + "loss": 0.069, "step": 132115 }, { "epoch": 6.16, - "learning_rate": 7.706155360742582e-06, - "loss": 0.1238, + "learning_rate": 1.7725336620223738e-05, + "loss": 0.1139, "step": 132120 }, { "epoch": 6.17, - "learning_rate": 7.705686559467442e-06, - "loss": 0.0708, + "learning_rate": 1.7724868550387717e-05, + "loss": 0.1095, "step": 132125 }, { "epoch": 6.17, - "learning_rate": 7.705217758192303e-06, - "loss": 0.1646, + "learning_rate": 1.7724400480551697e-05, + "loss": 0.1369, "step": 132130 }, { "epoch": 6.17, - "learning_rate": 7.704748956917163e-06, - "loss": 0.2095, + "learning_rate": 1.772393241071568e-05, + "loss": 0.0984, "step": 132135 }, { "epoch": 6.17, - "learning_rate": 7.704280155642025e-06, - "loss": 0.1398, + "learning_rate": 1.772346434087966e-05, + "loss": 0.1524, "step": 132140 }, { "epoch": 6.17, - "learning_rate": 7.703811354366885e-06, - "loss": 0.0411, + "learning_rate": 1.772299627104364e-05, + "loss": 0.0229, "step": 132145 }, { "epoch": 6.17, - "learning_rate": 7.703342553091745e-06, - "loss": 0.0214, + "learning_rate": 1.772252820120762e-05, + "loss": 0.0059, "step": 132150 }, { "epoch": 6.17, - "learning_rate": 7.702873751816605e-06, - "loss": 0.0398, + "learning_rate": 1.7722060131371603e-05, + "loss": 0.0255, "step": 132155 }, { "epoch": 6.17, - "learning_rate": 7.702404950541466e-06, - "loss": 0.0327, + "learning_rate": 1.7721592061535583e-05, + "loss": 0.0308, "step": 132160 }, { "epoch": 6.17, - "learning_rate": 7.701936149266326e-06, - "loss": 0.0624, + "learning_rate": 1.7721123991699563e-05, + "loss": 0.0666, "step": 132165 }, { "epoch": 6.17, - "learning_rate": 7.701467347991188e-06, - "loss": 0.0665, + "learning_rate": 1.7720655921863543e-05, + "loss": 0.0304, "step": 132170 }, { "epoch": 6.17, - "learning_rate": 7.700998546716048e-06, - "loss": 0.0824, + "learning_rate": 1.7720187852027522e-05, + "loss": 0.0423, "step": 132175 }, { "epoch": 6.17, - "learning_rate": 7.700529745440908e-06, - "loss": 0.0805, + "learning_rate": 1.7719719782191502e-05, + "loss": 0.1913, "step": 132180 }, { "epoch": 6.17, - "learning_rate": 7.70006094416577e-06, - "loss": 0.1826, + "learning_rate": 1.7719251712355482e-05, + "loss": 0.1793, "step": 132185 }, { "epoch": 6.17, - "learning_rate": 7.69959214289063e-06, - "loss": 0.2215, + "learning_rate": 1.7718783642519462e-05, + "loss": 0.1273, "step": 132190 }, { "epoch": 6.17, - "learning_rate": 7.69912334161549e-06, - "loss": 0.0024, + "learning_rate": 1.7718315572683445e-05, + "loss": 0.0019, "step": 132195 }, { "epoch": 6.17, - "learning_rate": 7.698654540340351e-06, - "loss": 0.0276, + "learning_rate": 1.7717847502847425e-05, + "loss": 0.0007, "step": 132200 }, { "epoch": 6.17, - "learning_rate": 7.69818573906521e-06, - "loss": 0.0334, + "learning_rate": 1.7717379433011405e-05, + "loss": 0.0456, "step": 132205 }, { "epoch": 6.17, - "learning_rate": 7.697716937790072e-06, - "loss": 0.0509, + "learning_rate": 1.7716911363175388e-05, + "loss": 0.0324, "step": 132210 }, { "epoch": 6.17, - "learning_rate": 7.697248136514932e-06, - "loss": 0.0919, + "learning_rate": 1.7716443293339368e-05, + "loss": 0.0558, "step": 132215 }, { "epoch": 6.17, - "learning_rate": 7.696779335239792e-06, - "loss": 0.0486, + "learning_rate": 1.7715975223503348e-05, + "loss": 0.0235, "step": 132220 }, { "epoch": 6.17, - "learning_rate": 7.696310533964652e-06, - "loss": 0.0929, + "learning_rate": 1.7715507153667327e-05, + "loss": 0.0264, "step": 132225 }, { "epoch": 6.17, - "learning_rate": 7.695841732689514e-06, - "loss": 0.0942, + "learning_rate": 1.771503908383131e-05, + "loss": 0.0884, "step": 132230 }, { "epoch": 6.17, - "learning_rate": 7.695372931414374e-06, - "loss": 0.1698, + "learning_rate": 1.771457101399529e-05, + "loss": 0.0857, "step": 132235 }, { "epoch": 6.17, - "learning_rate": 7.694904130139235e-06, - "loss": 0.1252, + "learning_rate": 1.7714102944159267e-05, + "loss": 0.1379, "step": 132240 }, { "epoch": 6.17, - "learning_rate": 7.694435328864095e-06, - "loss": 0.0094, + "learning_rate": 1.7713634874323247e-05, + "loss": 0.0228, "step": 132245 }, { "epoch": 6.17, - "learning_rate": 7.693966527588955e-06, - "loss": 0.004, + "learning_rate": 1.771316680448723e-05, + "loss": 0.007, "step": 132250 }, { "epoch": 6.17, - "learning_rate": 7.693497726313817e-06, - "loss": 0.0389, + "learning_rate": 1.771269873465121e-05, + "loss": 0.0472, "step": 132255 }, { "epoch": 6.17, - "learning_rate": 7.693028925038677e-06, - "loss": 0.0163, + "learning_rate": 1.771223066481519e-05, + "loss": 0.0744, "step": 132260 }, { "epoch": 6.17, - "learning_rate": 7.692560123763537e-06, - "loss": 0.0317, + "learning_rate": 1.7711762594979173e-05, + "loss": 0.105, "step": 132265 }, { "epoch": 6.17, - "learning_rate": 7.692091322488397e-06, - "loss": 0.0829, + "learning_rate": 1.7711294525143152e-05, + "loss": 0.0171, "step": 132270 }, { "epoch": 6.17, - "learning_rate": 7.691622521213258e-06, - "loss": 0.1656, + "learning_rate": 1.7710826455307132e-05, + "loss": 0.0979, "step": 132275 }, { "epoch": 6.17, - "learning_rate": 7.69115371993812e-06, - "loss": 0.0581, + "learning_rate": 1.7710358385471112e-05, + "loss": 0.0256, "step": 132280 }, { "epoch": 6.17, - "learning_rate": 7.69068491866298e-06, - "loss": 0.258, + "learning_rate": 1.7709890315635095e-05, + "loss": 0.1495, "step": 132285 }, { "epoch": 6.17, - "learning_rate": 7.69021611738784e-06, - "loss": 0.2679, + "learning_rate": 1.7709422245799075e-05, + "loss": 0.1307, "step": 132290 }, { "epoch": 6.17, - "learning_rate": 7.6897473161127e-06, - "loss": 0.0236, + "learning_rate": 1.7708954175963055e-05, + "loss": 0.0136, "step": 132295 }, { "epoch": 6.17, - "learning_rate": 7.689278514837561e-06, - "loss": 0.038, + "learning_rate": 1.7708486106127035e-05, + "loss": 0.0141, "step": 132300 }, { "epoch": 6.17, - "learning_rate": 7.688809713562421e-06, - "loss": 0.0228, + "learning_rate": 1.7708018036291015e-05, + "loss": 0.0044, "step": 132305 }, { "epoch": 6.17, - "learning_rate": 7.688340912287281e-06, - "loss": 0.0533, + "learning_rate": 1.7707549966454994e-05, + "loss": 0.0327, "step": 132310 }, { "epoch": 6.17, - "learning_rate": 7.687872111012143e-06, - "loss": 0.0633, + "learning_rate": 1.7707081896618974e-05, + "loss": 0.0387, "step": 132315 }, { "epoch": 6.17, - "learning_rate": 7.687403309737003e-06, - "loss": 0.0568, + "learning_rate": 1.7706613826782957e-05, + "loss": 0.057, "step": 132320 }, { "epoch": 6.17, - "learning_rate": 7.686934508461864e-06, - "loss": 0.1108, + "learning_rate": 1.7706145756946937e-05, + "loss": 0.0858, "step": 132325 }, { "epoch": 6.17, - "learning_rate": 7.686465707186724e-06, - "loss": 0.0799, + "learning_rate": 1.7705677687110917e-05, + "loss": 0.0911, "step": 132330 }, { "epoch": 6.17, - "learning_rate": 7.685996905911584e-06, - "loss": 0.1629, + "learning_rate": 1.7705209617274897e-05, + "loss": 0.1488, "step": 132335 }, { "epoch": 6.18, - "learning_rate": 7.685528104636444e-06, - "loss": 0.152, + "learning_rate": 1.770474154743888e-05, + "loss": 0.1908, "step": 132340 }, { "epoch": 6.18, - "learning_rate": 7.685059303361306e-06, - "loss": 0.0186, + "learning_rate": 1.770427347760286e-05, + "loss": 0.0064, "step": 132345 }, { "epoch": 6.18, - "learning_rate": 7.684590502086166e-06, - "loss": 0.0307, + "learning_rate": 1.770380540776684e-05, + "loss": 0.0313, "step": 132350 }, { "epoch": 6.18, - "learning_rate": 7.684121700811027e-06, - "loss": 0.0268, + "learning_rate": 1.770333733793082e-05, + "loss": 0.0122, "step": 132355 }, { "epoch": 6.18, - "learning_rate": 7.683652899535887e-06, - "loss": 0.0215, + "learning_rate": 1.7702869268094803e-05, + "loss": 0.0046, "step": 132360 }, { "epoch": 6.18, - "learning_rate": 7.683184098260747e-06, - "loss": 0.0641, + "learning_rate": 1.770240119825878e-05, + "loss": 0.0431, "step": 132365 }, { "epoch": 6.18, - "learning_rate": 7.682715296985609e-06, - "loss": 0.0384, + "learning_rate": 1.770193312842276e-05, + "loss": 0.0079, "step": 132370 }, { "epoch": 6.18, - "learning_rate": 7.682246495710469e-06, - "loss": 0.0621, + "learning_rate": 1.7701465058586742e-05, + "loss": 0.0137, "step": 132375 }, { "epoch": 6.18, - "learning_rate": 7.681777694435329e-06, - "loss": 0.1454, + "learning_rate": 1.7700996988750722e-05, + "loss": 0.177, "step": 132380 }, { "epoch": 6.18, - "learning_rate": 7.68130889316019e-06, - "loss": 0.1247, + "learning_rate": 1.7700528918914702e-05, + "loss": 0.136, "step": 132385 }, { "epoch": 6.18, - "learning_rate": 7.68084009188505e-06, - "loss": 0.1878, + "learning_rate": 1.770006084907868e-05, + "loss": 0.1353, "step": 132390 }, { "epoch": 6.18, - "learning_rate": 7.680371290609912e-06, - "loss": 0.0167, + "learning_rate": 1.7699592779242665e-05, + "loss": 0.037, "step": 132395 }, { "epoch": 6.18, - "learning_rate": 7.679902489334772e-06, - "loss": 0.0276, + "learning_rate": 1.7699124709406645e-05, + "loss": 0.0306, "step": 132400 }, { "epoch": 6.18, - "learning_rate": 7.679433688059632e-06, - "loss": 0.0226, + "learning_rate": 1.7698656639570624e-05, + "loss": 0.044, "step": 132405 }, { "epoch": 6.18, - "learning_rate": 7.678964886784492e-06, - "loss": 0.0136, + "learning_rate": 1.7698188569734604e-05, + "loss": 0.0392, "step": 132410 }, { "epoch": 6.18, - "learning_rate": 7.678496085509353e-06, - "loss": 0.0322, + "learning_rate": 1.7697720499898588e-05, + "loss": 0.0443, "step": 132415 }, { "epoch": 6.18, - "learning_rate": 7.678027284234213e-06, - "loss": 0.0834, + "learning_rate": 1.7697252430062567e-05, + "loss": 0.0865, "step": 132420 }, { "epoch": 6.18, - "learning_rate": 7.677558482959075e-06, - "loss": 0.0456, + "learning_rate": 1.7696784360226547e-05, + "loss": 0.0587, "step": 132425 }, { "epoch": 6.18, - "learning_rate": 7.677089681683935e-06, - "loss": 0.1489, + "learning_rate": 1.7696316290390524e-05, + "loss": 0.0692, "step": 132430 }, { "epoch": 6.18, - "learning_rate": 7.676620880408795e-06, - "loss": 0.1913, + "learning_rate": 1.7695848220554507e-05, + "loss": 0.1569, "step": 132435 }, { "epoch": 6.18, - "learning_rate": 7.676152079133657e-06, - "loss": 0.1548, + "learning_rate": 1.7695380150718487e-05, + "loss": 0.0925, "step": 132440 }, { "epoch": 6.18, - "learning_rate": 7.675683277858516e-06, - "loss": 0.0077, + "learning_rate": 1.7694912080882466e-05, + "loss": 0.0349, "step": 132445 }, { "epoch": 6.18, - "learning_rate": 7.675214476583376e-06, - "loss": 0.0563, + "learning_rate": 1.769444401104645e-05, + "loss": 0.0246, "step": 132450 }, { "epoch": 6.18, - "learning_rate": 7.674745675308238e-06, - "loss": 0.0205, + "learning_rate": 1.769397594121043e-05, + "loss": 0.1093, "step": 132455 }, { "epoch": 6.18, - "learning_rate": 7.674276874033098e-06, - "loss": 0.0249, + "learning_rate": 1.769350787137441e-05, + "loss": 0.0389, "step": 132460 }, { "epoch": 6.18, - "learning_rate": 7.67380807275796e-06, - "loss": 0.0698, + "learning_rate": 1.769303980153839e-05, + "loss": 0.0306, "step": 132465 }, { "epoch": 6.18, - "learning_rate": 7.67333927148282e-06, - "loss": 0.0361, + "learning_rate": 1.7692571731702372e-05, + "loss": 0.0587, "step": 132470 }, { "epoch": 6.18, - "learning_rate": 7.67287047020768e-06, - "loss": 0.0634, + "learning_rate": 1.7692103661866352e-05, + "loss": 0.0239, "step": 132475 }, { "epoch": 6.18, - "learning_rate": 7.67240166893254e-06, - "loss": 0.0888, + "learning_rate": 1.7691635592030332e-05, + "loss": 0.0467, "step": 132480 }, { "epoch": 6.18, - "learning_rate": 7.671932867657401e-06, - "loss": 0.199, + "learning_rate": 1.7691167522194312e-05, + "loss": 0.1795, "step": 132485 }, { "epoch": 6.18, - "learning_rate": 7.671464066382261e-06, - "loss": 0.1187, + "learning_rate": 1.769069945235829e-05, + "loss": 0.2285, "step": 132490 }, { "epoch": 6.18, - "learning_rate": 7.670995265107123e-06, - "loss": 0.0098, + "learning_rate": 1.769023138252227e-05, + "loss": 0.0444, "step": 132495 }, { "epoch": 6.18, - "learning_rate": 7.670526463831982e-06, - "loss": 0.0406, + "learning_rate": 1.768976331268625e-05, + "loss": 0.0217, "step": 132500 }, { "epoch": 6.18, - "learning_rate": 7.670057662556842e-06, - "loss": 0.0169, + "learning_rate": 1.7689295242850234e-05, + "loss": 0.0388, "step": 132505 }, { "epoch": 6.18, - "learning_rate": 7.669588861281704e-06, - "loss": 0.0398, + "learning_rate": 1.7688827173014214e-05, + "loss": 0.0247, "step": 132510 }, { "epoch": 6.18, - "learning_rate": 7.669120060006564e-06, - "loss": 0.0346, + "learning_rate": 1.7688359103178194e-05, + "loss": 0.057, "step": 132515 }, { "epoch": 6.18, - "learning_rate": 7.668651258731424e-06, - "loss": 0.0347, + "learning_rate": 1.7687891033342174e-05, + "loss": 0.013, "step": 132520 }, { "epoch": 6.18, - "learning_rate": 7.668182457456284e-06, - "loss": 0.0469, + "learning_rate": 1.7687422963506157e-05, + "loss": 0.0719, "step": 132525 }, { "epoch": 6.18, - "learning_rate": 7.667713656181145e-06, - "loss": 0.0944, + "learning_rate": 1.7686954893670137e-05, + "loss": 0.0659, "step": 132530 }, { "epoch": 6.18, - "learning_rate": 7.667244854906007e-06, - "loss": 0.1642, + "learning_rate": 1.7686486823834117e-05, + "loss": 0.1582, "step": 132535 }, { "epoch": 6.18, - "learning_rate": 7.666776053630867e-06, - "loss": 0.1483, + "learning_rate": 1.7686018753998097e-05, + "loss": 0.1316, "step": 132540 }, { "epoch": 6.18, - "learning_rate": 7.666307252355727e-06, - "loss": 0.0328, + "learning_rate": 1.768555068416208e-05, + "loss": 0.0053, "step": 132545 }, { "epoch": 6.18, - "learning_rate": 7.665838451080587e-06, - "loss": 0.0012, + "learning_rate": 1.768508261432606e-05, + "loss": 0.0137, "step": 132550 }, { "epoch": 6.19, - "learning_rate": 7.665369649805449e-06, - "loss": 0.0853, + "learning_rate": 1.7684614544490036e-05, + "loss": 0.0466, "step": 132555 }, { "epoch": 6.19, - "learning_rate": 7.664900848530308e-06, - "loss": 0.0217, + "learning_rate": 1.768414647465402e-05, + "loss": 0.0783, "step": 132560 }, { "epoch": 6.19, - "learning_rate": 7.664432047255168e-06, - "loss": 0.0308, + "learning_rate": 1.7683678404818e-05, + "loss": 0.059, "step": 132565 }, { "epoch": 6.19, - "learning_rate": 7.66396324598003e-06, - "loss": 0.0468, + "learning_rate": 1.768321033498198e-05, + "loss": 0.0216, "step": 132570 }, { "epoch": 6.19, - "learning_rate": 7.66349444470489e-06, - "loss": 0.0367, + "learning_rate": 1.768274226514596e-05, + "loss": 0.0568, "step": 132575 }, { "epoch": 6.19, - "learning_rate": 7.663025643429752e-06, - "loss": 0.0612, + "learning_rate": 1.7682274195309942e-05, + "loss": 0.1286, "step": 132580 }, { "epoch": 6.19, - "learning_rate": 7.662556842154612e-06, - "loss": 0.1906, + "learning_rate": 1.768180612547392e-05, + "loss": 0.3361, "step": 132585 }, { "epoch": 6.19, - "learning_rate": 7.662088040879471e-06, - "loss": 0.1645, + "learning_rate": 1.76813380556379e-05, + "loss": 0.106, "step": 132590 }, { "epoch": 6.19, - "learning_rate": 7.661619239604331e-06, - "loss": 0.0235, + "learning_rate": 1.768086998580188e-05, + "loss": 0.0468, "step": 132595 }, { "epoch": 6.19, - "learning_rate": 7.661150438329193e-06, - "loss": 0.0121, + "learning_rate": 1.7680401915965864e-05, + "loss": 0.0429, "step": 132600 }, { "epoch": 6.19, - "learning_rate": 7.660681637054053e-06, - "loss": 0.0174, + "learning_rate": 1.7679933846129844e-05, + "loss": 0.0513, "step": 132605 }, { "epoch": 6.19, - "learning_rate": 7.660212835778915e-06, - "loss": 0.0324, + "learning_rate": 1.7679465776293824e-05, + "loss": 0.0518, "step": 132610 }, { "epoch": 6.19, - "learning_rate": 7.659744034503775e-06, - "loss": 0.0628, + "learning_rate": 1.7678997706457804e-05, + "loss": 0.0403, "step": 132615 }, { "epoch": 6.19, - "learning_rate": 7.659275233228634e-06, - "loss": 0.0562, + "learning_rate": 1.7678529636621784e-05, + "loss": 0.0712, "step": 132620 }, { "epoch": 6.19, - "learning_rate": 7.658806431953496e-06, - "loss": 0.1024, + "learning_rate": 1.7678061566785764e-05, + "loss": 0.0759, "step": 132625 }, { "epoch": 6.19, - "learning_rate": 7.658337630678356e-06, - "loss": 0.0822, + "learning_rate": 1.7677593496949743e-05, + "loss": 0.2405, "step": 132630 }, { "epoch": 6.19, - "learning_rate": 7.657868829403216e-06, - "loss": 0.1248, + "learning_rate": 1.7677125427113727e-05, + "loss": 0.2575, "step": 132635 }, { "epoch": 6.19, - "learning_rate": 7.657400028128078e-06, - "loss": 0.1948, + "learning_rate": 1.7676657357277706e-05, + "loss": 0.1719, "step": 132640 }, { "epoch": 6.19, - "learning_rate": 7.656931226852938e-06, - "loss": 0.0214, + "learning_rate": 1.7676189287441686e-05, + "loss": 0.0456, "step": 132645 }, { "epoch": 6.19, - "learning_rate": 7.656462425577799e-06, - "loss": 0.03, + "learning_rate": 1.7675721217605666e-05, + "loss": 0.0468, "step": 132650 }, { "epoch": 6.19, - "learning_rate": 7.655993624302659e-06, - "loss": 0.0126, + "learning_rate": 1.767525314776965e-05, + "loss": 0.0272, "step": 132655 }, { "epoch": 6.19, - "learning_rate": 7.655524823027519e-06, - "loss": 0.0389, + "learning_rate": 1.767478507793363e-05, + "loss": 0.046, "step": 132660 }, { "epoch": 6.19, - "learning_rate": 7.655056021752379e-06, - "loss": 0.051, + "learning_rate": 1.767431700809761e-05, + "loss": 0.0123, "step": 132665 }, { "epoch": 6.19, - "learning_rate": 7.65458722047724e-06, - "loss": 0.0991, + "learning_rate": 1.767384893826159e-05, + "loss": 0.0703, "step": 132670 }, { "epoch": 6.19, - "learning_rate": 7.6541184192021e-06, - "loss": 0.0861, + "learning_rate": 1.7673380868425572e-05, + "loss": 0.096, "step": 132675 }, { "epoch": 6.19, - "learning_rate": 7.653649617926962e-06, - "loss": 0.1701, + "learning_rate": 1.767291279858955e-05, + "loss": 0.0837, "step": 132680 }, { "epoch": 6.19, - "learning_rate": 7.653180816651822e-06, - "loss": 0.1963, + "learning_rate": 1.7672444728753528e-05, + "loss": 0.075, "step": 132685 }, { "epoch": 6.19, - "learning_rate": 7.652712015376682e-06, - "loss": 0.1536, + "learning_rate": 1.767197665891751e-05, + "loss": 0.1727, "step": 132690 }, { "epoch": 6.19, - "learning_rate": 7.652243214101544e-06, - "loss": 0.0883, + "learning_rate": 1.767150858908149e-05, + "loss": 0.0106, "step": 132695 }, { "epoch": 6.19, - "learning_rate": 7.651774412826404e-06, - "loss": 0.0154, + "learning_rate": 1.767104051924547e-05, + "loss": 0.0169, "step": 132700 }, { "epoch": 6.19, - "learning_rate": 7.651305611551263e-06, - "loss": 0.0678, + "learning_rate": 1.767057244940945e-05, + "loss": 0.0243, "step": 132705 }, { "epoch": 6.19, - "learning_rate": 7.650836810276125e-06, - "loss": 0.0521, + "learning_rate": 1.7670104379573434e-05, + "loss": 0.0318, "step": 132710 }, { "epoch": 6.19, - "learning_rate": 7.650368009000985e-06, - "loss": 0.0212, + "learning_rate": 1.7669636309737414e-05, + "loss": 0.031, "step": 132715 }, { "epoch": 6.19, - "learning_rate": 7.649899207725847e-06, - "loss": 0.0771, + "learning_rate": 1.7669168239901394e-05, + "loss": 0.1057, "step": 132720 }, { "epoch": 6.19, - "learning_rate": 7.649430406450707e-06, - "loss": 0.0444, + "learning_rate": 1.7668700170065373e-05, + "loss": 0.0797, "step": 132725 }, { "epoch": 6.19, - "learning_rate": 7.648961605175567e-06, - "loss": 0.0749, + "learning_rate": 1.7668232100229357e-05, + "loss": 0.1065, "step": 132730 }, { "epoch": 6.19, - "learning_rate": 7.648492803900426e-06, - "loss": 0.1257, + "learning_rate": 1.7667764030393337e-05, + "loss": 0.1455, "step": 132735 }, { "epoch": 6.19, - "learning_rate": 7.648024002625286e-06, - "loss": 0.1229, + "learning_rate": 1.7667295960557316e-05, + "loss": 0.1116, "step": 132740 }, { "epoch": 6.19, - "learning_rate": 7.647555201350148e-06, - "loss": 0.0162, + "learning_rate": 1.7666827890721296e-05, + "loss": 0.0085, "step": 132745 }, { "epoch": 6.19, - "learning_rate": 7.64708640007501e-06, - "loss": 0.0361, + "learning_rate": 1.7666359820885276e-05, + "loss": 0.0207, "step": 132750 }, { "epoch": 6.19, - "learning_rate": 7.64661759879987e-06, - "loss": 0.0037, + "learning_rate": 1.7665891751049256e-05, + "loss": 0.0249, "step": 132755 }, { "epoch": 6.19, - "learning_rate": 7.64614879752473e-06, - "loss": 0.0213, + "learning_rate": 1.7665423681213236e-05, + "loss": 0.0391, "step": 132760 }, { "epoch": 6.19, - "learning_rate": 7.645679996249591e-06, - "loss": 0.0297, + "learning_rate": 1.766495561137722e-05, + "loss": 0.0521, "step": 132765 }, { "epoch": 6.2, - "learning_rate": 7.645211194974451e-06, - "loss": 0.0338, + "learning_rate": 1.76644875415412e-05, + "loss": 0.0584, "step": 132770 }, { "epoch": 6.2, - "learning_rate": 7.644742393699311e-06, - "loss": 0.0434, + "learning_rate": 1.766401947170518e-05, + "loss": 0.0905, "step": 132775 }, { "epoch": 6.2, - "learning_rate": 7.644273592424171e-06, - "loss": 0.0444, + "learning_rate": 1.7663551401869158e-05, + "loss": 0.0712, "step": 132780 }, { "epoch": 6.2, - "learning_rate": 7.643804791149033e-06, - "loss": 0.0896, + "learning_rate": 1.766308333203314e-05, + "loss": 0.149, "step": 132785 }, { "epoch": 6.2, - "learning_rate": 7.643335989873894e-06, - "loss": 0.0988, + "learning_rate": 1.766261526219712e-05, + "loss": 0.151, "step": 132790 }, { "epoch": 6.2, - "learning_rate": 7.642867188598754e-06, - "loss": 0.0445, + "learning_rate": 1.76621471923611e-05, + "loss": 0.0141, "step": 132795 }, { "epoch": 6.2, - "learning_rate": 7.642398387323614e-06, - "loss": 0.0394, + "learning_rate": 1.766167912252508e-05, + "loss": 0.017, "step": 132800 }, { "epoch": 6.2, - "learning_rate": 7.641929586048474e-06, - "loss": 0.0327, + "learning_rate": 1.7661211052689064e-05, + "loss": 0.0211, "step": 132805 }, { "epoch": 6.2, - "learning_rate": 7.641460784773336e-06, - "loss": 0.0097, + "learning_rate": 1.766074298285304e-05, + "loss": 0.078, "step": 132810 }, { "epoch": 6.2, - "learning_rate": 7.640991983498196e-06, - "loss": 0.0363, + "learning_rate": 1.766027491301702e-05, + "loss": 0.0511, "step": 132815 }, { "epoch": 6.2, - "learning_rate": 7.640523182223056e-06, - "loss": 0.0586, + "learning_rate": 1.7659806843181004e-05, + "loss": 0.0597, "step": 132820 }, { "epoch": 6.2, - "learning_rate": 7.640054380947917e-06, - "loss": 0.0787, + "learning_rate": 1.7659338773344983e-05, + "loss": 0.0621, "step": 132825 }, { "epoch": 6.2, - "learning_rate": 7.639585579672777e-06, - "loss": 0.1005, + "learning_rate": 1.7658870703508963e-05, + "loss": 0.131, "step": 132830 }, { "epoch": 6.2, - "learning_rate": 7.639116778397639e-06, - "loss": 0.0994, + "learning_rate": 1.7658402633672943e-05, + "loss": 0.1778, "step": 132835 }, { "epoch": 6.2, - "learning_rate": 7.638647977122499e-06, - "loss": 0.2076, + "learning_rate": 1.7657934563836926e-05, + "loss": 0.0958, "step": 132840 }, { "epoch": 6.2, - "learning_rate": 7.638179175847359e-06, - "loss": 0.0289, + "learning_rate": 1.7657466494000906e-05, + "loss": 0.0199, "step": 132845 }, { "epoch": 6.2, - "learning_rate": 7.637710374572218e-06, - "loss": 0.0074, + "learning_rate": 1.7656998424164886e-05, + "loss": 0.0119, "step": 132850 }, { "epoch": 6.2, - "learning_rate": 7.63724157329708e-06, - "loss": 0.0464, + "learning_rate": 1.7656530354328866e-05, + "loss": 0.0075, "step": 132855 }, { "epoch": 6.2, - "learning_rate": 7.63677277202194e-06, - "loss": 0.0472, + "learning_rate": 1.765606228449285e-05, + "loss": 0.0391, "step": 132860 }, { "epoch": 6.2, - "learning_rate": 7.636303970746802e-06, - "loss": 0.0514, + "learning_rate": 1.765559421465683e-05, + "loss": 0.0132, "step": 132865 }, { "epoch": 6.2, - "learning_rate": 7.635835169471662e-06, - "loss": 0.2009, + "learning_rate": 1.7655126144820805e-05, + "loss": 0.0484, "step": 132870 }, { "epoch": 6.2, - "learning_rate": 7.635366368196522e-06, - "loss": 0.0674, + "learning_rate": 1.765465807498479e-05, + "loss": 0.0814, "step": 132875 }, { "epoch": 6.2, - "learning_rate": 7.634897566921383e-06, - "loss": 0.1488, + "learning_rate": 1.7654190005148768e-05, + "loss": 0.081, "step": 132880 }, { "epoch": 6.2, - "learning_rate": 7.634428765646243e-06, - "loss": 0.2675, + "learning_rate": 1.7653721935312748e-05, + "loss": 0.1177, "step": 132885 }, { "epoch": 6.2, - "learning_rate": 7.633959964371103e-06, - "loss": 0.1144, + "learning_rate": 1.7653253865476728e-05, + "loss": 0.1413, "step": 132890 }, { "epoch": 6.2, - "learning_rate": 7.633491163095965e-06, - "loss": 0.009, + "learning_rate": 1.765278579564071e-05, + "loss": 0.0485, "step": 132895 }, { "epoch": 6.2, - "learning_rate": 7.633022361820825e-06, - "loss": 0.0389, + "learning_rate": 1.765231772580469e-05, + "loss": 0.0472, "step": 132900 }, { "epoch": 6.2, - "learning_rate": 7.632553560545686e-06, - "loss": 0.0336, + "learning_rate": 1.765184965596867e-05, + "loss": 0.0451, "step": 132905 }, { "epoch": 6.2, - "learning_rate": 7.632084759270546e-06, - "loss": 0.0424, + "learning_rate": 1.765138158613265e-05, + "loss": 0.0939, "step": 132910 }, { "epoch": 6.2, - "learning_rate": 7.631615957995406e-06, - "loss": 0.0792, + "learning_rate": 1.7650913516296634e-05, + "loss": 0.0642, "step": 132915 }, { "epoch": 6.2, - "learning_rate": 7.631147156720266e-06, - "loss": 0.0349, + "learning_rate": 1.7650445446460613e-05, + "loss": 0.0681, "step": 132920 }, { "epoch": 6.2, - "learning_rate": 7.630678355445128e-06, - "loss": 0.0647, + "learning_rate": 1.7649977376624593e-05, + "loss": 0.0452, "step": 132925 }, { "epoch": 6.2, - "learning_rate": 7.630209554169988e-06, - "loss": 0.1355, + "learning_rate": 1.7649509306788576e-05, + "loss": 0.1012, "step": 132930 }, { "epoch": 6.2, - "learning_rate": 7.62974075289485e-06, - "loss": 0.1954, + "learning_rate": 1.7649041236952553e-05, + "loss": 0.1509, "step": 132935 }, { "epoch": 6.2, - "learning_rate": 7.629271951619709e-06, - "loss": 0.1271, + "learning_rate": 1.7648573167116533e-05, + "loss": 0.1968, "step": 132940 }, { "epoch": 6.2, - "learning_rate": 7.628803150344569e-06, - "loss": 0.0271, + "learning_rate": 1.7648105097280513e-05, + "loss": 0.0416, "step": 132945 }, { "epoch": 6.2, - "learning_rate": 7.628334349069431e-06, - "loss": 0.0087, + "learning_rate": 1.7647637027444496e-05, + "loss": 0.0287, "step": 132950 }, { "epoch": 6.2, - "learning_rate": 7.627865547794291e-06, - "loss": 0.0117, + "learning_rate": 1.7647168957608476e-05, + "loss": 0.0036, "step": 132955 }, { "epoch": 6.2, - "learning_rate": 7.6273967465191514e-06, - "loss": 0.0636, + "learning_rate": 1.7646700887772455e-05, + "loss": 0.0743, "step": 132960 }, { "epoch": 6.2, - "learning_rate": 7.626927945244011e-06, - "loss": 0.0702, + "learning_rate": 1.7646232817936435e-05, + "loss": 0.0766, "step": 132965 }, { "epoch": 6.2, - "learning_rate": 7.626459143968872e-06, - "loss": 0.04, + "learning_rate": 1.764576474810042e-05, + "loss": 0.0512, "step": 132970 }, { "epoch": 6.2, - "learning_rate": 7.625990342693733e-06, - "loss": 0.0728, + "learning_rate": 1.7645296678264398e-05, + "loss": 0.1153, "step": 132975 }, { "epoch": 6.21, - "learning_rate": 7.625521541418594e-06, - "loss": 0.0563, + "learning_rate": 1.7644828608428378e-05, + "loss": 0.0603, "step": 132980 }, { "epoch": 6.21, - "learning_rate": 7.625052740143454e-06, - "loss": 0.2476, + "learning_rate": 1.764436053859236e-05, + "loss": 0.1317, "step": 132985 }, { "epoch": 6.21, - "learning_rate": 7.6245839388683144e-06, - "loss": 0.1505, + "learning_rate": 1.764389246875634e-05, + "loss": 0.1437, "step": 132990 }, { "epoch": 6.21, - "learning_rate": 7.624115137593174e-06, - "loss": 0.0382, + "learning_rate": 1.764342439892032e-05, + "loss": 0.067, "step": 132995 }, { "epoch": 6.21, - "learning_rate": 7.623646336318036e-06, - "loss": 0.0211, + "learning_rate": 1.7642956329084297e-05, + "loss": 0.011, "step": 133000 }, { "epoch": 6.21, - "learning_rate": 7.623177535042896e-06, - "loss": 0.0124, + "learning_rate": 1.764248825924828e-05, + "loss": 0.0264, "step": 133005 }, { "epoch": 6.21, - "learning_rate": 7.622708733767757e-06, - "loss": 0.0164, + "learning_rate": 1.764202018941226e-05, + "loss": 0.0546, "step": 133010 }, { "epoch": 6.21, - "learning_rate": 7.622239932492617e-06, - "loss": 0.039, + "learning_rate": 1.764155211957624e-05, + "loss": 0.0763, "step": 133015 }, { "epoch": 6.21, - "learning_rate": 7.621771131217478e-06, - "loss": 0.0849, + "learning_rate": 1.764108404974022e-05, + "loss": 0.1039, "step": 133020 }, { "epoch": 6.21, - "learning_rate": 7.621302329942338e-06, - "loss": 0.0141, + "learning_rate": 1.7640615979904203e-05, + "loss": 0.1582, "step": 133025 }, { "epoch": 6.21, - "learning_rate": 7.620833528667199e-06, - "loss": 0.1076, + "learning_rate": 1.7640147910068183e-05, + "loss": 0.127, "step": 133030 }, { "epoch": 6.21, - "learning_rate": 7.620364727392059e-06, - "loss": 0.2131, + "learning_rate": 1.7639679840232163e-05, + "loss": 0.2806, "step": 133035 }, { "epoch": 6.21, - "learning_rate": 7.619895926116919e-06, - "loss": 0.1174, + "learning_rate": 1.7639211770396143e-05, + "loss": 0.1792, "step": 133040 }, { "epoch": 6.21, - "learning_rate": 7.6194271248417805e-06, - "loss": 0.0097, + "learning_rate": 1.7638743700560126e-05, + "loss": 0.0054, "step": 133045 }, { "epoch": 6.21, - "learning_rate": 7.618958323566641e-06, - "loss": 0.0146, + "learning_rate": 1.7638275630724106e-05, + "loss": 0.0253, "step": 133050 }, { "epoch": 6.21, - "learning_rate": 7.618489522291501e-06, - "loss": 0.0662, + "learning_rate": 1.7637807560888085e-05, + "loss": 0.0142, "step": 133055 }, { "epoch": 6.21, - "learning_rate": 7.618020721016361e-06, - "loss": 0.0695, + "learning_rate": 1.7637339491052065e-05, + "loss": 0.0185, "step": 133060 }, { "epoch": 6.21, - "learning_rate": 7.617551919741222e-06, - "loss": 0.0601, + "learning_rate": 1.7636871421216045e-05, + "loss": 0.0522, "step": 133065 }, { "epoch": 6.21, - "learning_rate": 7.6170831184660835e-06, - "loss": 0.0446, + "learning_rate": 1.7636403351380025e-05, + "loss": 0.0362, "step": 133070 }, { "epoch": 6.21, - "learning_rate": 7.6166143171909435e-06, - "loss": 0.0742, + "learning_rate": 1.7635935281544005e-05, + "loss": 0.0888, "step": 133075 }, { "epoch": 6.21, - "learning_rate": 7.616145515915803e-06, - "loss": 0.1054, + "learning_rate": 1.7635467211707988e-05, + "loss": 0.0363, "step": 133080 }, { "epoch": 6.21, - "learning_rate": 7.615676714640664e-06, - "loss": 0.1595, + "learning_rate": 1.7634999141871968e-05, + "loss": 0.1429, "step": 133085 }, { "epoch": 6.21, - "learning_rate": 7.615207913365526e-06, - "loss": 0.1137, + "learning_rate": 1.7634531072035948e-05, + "loss": 0.1539, "step": 133090 }, { "epoch": 6.21, - "learning_rate": 7.614739112090386e-06, - "loss": 0.0029, + "learning_rate": 1.7634063002199927e-05, + "loss": 0.0036, "step": 133095 }, { "epoch": 6.21, - "learning_rate": 7.614270310815246e-06, - "loss": 0.0213, + "learning_rate": 1.763359493236391e-05, + "loss": 0.0349, "step": 133100 }, { "epoch": 6.21, - "learning_rate": 7.6138015095401065e-06, - "loss": 0.0242, + "learning_rate": 1.763312686252789e-05, + "loss": 0.0334, "step": 133105 }, { "epoch": 6.21, - "learning_rate": 7.613332708264966e-06, - "loss": 0.0807, + "learning_rate": 1.763265879269187e-05, + "loss": 0.0827, "step": 133110 }, { "epoch": 6.21, - "learning_rate": 7.612863906989828e-06, - "loss": 0.0264, + "learning_rate": 1.7632190722855853e-05, + "loss": 0.0739, "step": 133115 }, { "epoch": 6.21, - "learning_rate": 7.612395105714688e-06, - "loss": 0.0558, + "learning_rate": 1.7631722653019833e-05, + "loss": 0.0411, "step": 133120 }, { "epoch": 6.21, - "learning_rate": 7.611926304439549e-06, - "loss": 0.0902, + "learning_rate": 1.763125458318381e-05, + "loss": 0.0921, "step": 133125 }, { "epoch": 6.21, - "learning_rate": 7.611457503164409e-06, - "loss": 0.0988, + "learning_rate": 1.763078651334779e-05, + "loss": 0.132, "step": 133130 }, { "epoch": 6.21, - "learning_rate": 7.61098870188927e-06, - "loss": 0.2618, + "learning_rate": 1.7630318443511773e-05, + "loss": 0.2191, "step": 133135 }, { "epoch": 6.21, - "learning_rate": 7.61051990061413e-06, - "loss": 0.1692, + "learning_rate": 1.7629850373675753e-05, + "loss": 0.3232, "step": 133140 }, { "epoch": 6.21, - "learning_rate": 7.610051099338991e-06, - "loss": 0.012, + "learning_rate": 1.7629382303839732e-05, + "loss": 0.059, "step": 133145 }, { "epoch": 6.21, - "learning_rate": 7.609582298063851e-06, - "loss": 0.02, + "learning_rate": 1.7628914234003712e-05, + "loss": 0.0267, "step": 133150 }, { "epoch": 6.21, - "learning_rate": 7.609113496788712e-06, - "loss": 0.0459, + "learning_rate": 1.7628446164167695e-05, + "loss": 0.0311, "step": 133155 }, { "epoch": 6.21, - "learning_rate": 7.6086446955135725e-06, - "loss": 0.0565, + "learning_rate": 1.7627978094331675e-05, + "loss": 0.0073, "step": 133160 }, { "epoch": 6.21, - "learning_rate": 7.608175894238433e-06, - "loss": 0.062, + "learning_rate": 1.7627510024495655e-05, + "loss": 0.036, "step": 133165 }, { "epoch": 6.21, - "learning_rate": 7.607707092963293e-06, - "loss": 0.0952, + "learning_rate": 1.7627041954659638e-05, + "loss": 0.0389, "step": 133170 }, { "epoch": 6.21, - "learning_rate": 7.607238291688154e-06, - "loss": 0.1256, + "learning_rate": 1.7626573884823618e-05, + "loss": 0.0549, "step": 133175 }, { "epoch": 6.21, - "learning_rate": 7.606769490413014e-06, - "loss": 0.0523, + "learning_rate": 1.7626105814987598e-05, + "loss": 0.0732, "step": 133180 }, { "epoch": 6.21, - "learning_rate": 7.6063006891378756e-06, - "loss": 0.2347, + "learning_rate": 1.7625637745151578e-05, + "loss": 0.1107, "step": 133185 }, { "epoch": 6.21, - "learning_rate": 7.6058318878627355e-06, - "loss": 0.1569, + "learning_rate": 1.7625169675315558e-05, + "loss": 0.1161, "step": 133190 }, { "epoch": 6.22, - "learning_rate": 7.605363086587596e-06, - "loss": 0.0237, + "learning_rate": 1.7624701605479537e-05, + "loss": 0.0272, "step": 133195 }, { "epoch": 6.22, - "learning_rate": 7.604894285312456e-06, - "loss": 0.0127, + "learning_rate": 1.7624233535643517e-05, + "loss": 0.042, "step": 133200 }, { "epoch": 6.22, - "learning_rate": 7.604425484037318e-06, - "loss": 0.012, + "learning_rate": 1.7623765465807497e-05, + "loss": 0.1117, "step": 133205 }, { "epoch": 6.22, - "learning_rate": 7.603956682762178e-06, - "loss": 0.017, + "learning_rate": 1.762329739597148e-05, + "loss": 0.0133, "step": 133210 }, { "epoch": 6.22, - "learning_rate": 7.6034878814870385e-06, - "loss": 0.0526, + "learning_rate": 1.762282932613546e-05, + "loss": 0.02, "step": 133215 }, { "epoch": 6.22, - "learning_rate": 7.6030190802118985e-06, - "loss": 0.0565, + "learning_rate": 1.762236125629944e-05, + "loss": 0.0623, "step": 133220 }, { "epoch": 6.22, - "learning_rate": 7.602550278936759e-06, - "loss": 0.0496, + "learning_rate": 1.762189318646342e-05, + "loss": 0.0713, "step": 133225 }, { "epoch": 6.22, - "learning_rate": 7.60208147766162e-06, - "loss": 0.0761, + "learning_rate": 1.7621425116627403e-05, + "loss": 0.0402, "step": 133230 }, { "epoch": 6.22, - "learning_rate": 7.601612676386481e-06, - "loss": 0.2174, + "learning_rate": 1.7620957046791383e-05, + "loss": 0.1408, "step": 133235 }, { "epoch": 6.22, - "learning_rate": 7.601143875111341e-06, - "loss": 0.093, + "learning_rate": 1.7620488976955362e-05, + "loss": 0.1379, "step": 133240 }, { "epoch": 6.22, - "learning_rate": 7.6006750738362015e-06, - "loss": 0.0837, + "learning_rate": 1.7620020907119346e-05, + "loss": 0.0167, "step": 133245 }, { "epoch": 6.22, - "learning_rate": 7.6002062725610615e-06, - "loss": 0.0185, + "learning_rate": 1.7619552837283322e-05, + "loss": 0.0276, "step": 133250 }, { "epoch": 6.22, - "learning_rate": 7.599737471285923e-06, - "loss": 0.0264, + "learning_rate": 1.7619084767447302e-05, + "loss": 0.0221, "step": 133255 }, { "epoch": 6.22, - "learning_rate": 7.599268670010783e-06, - "loss": 0.0256, + "learning_rate": 1.7618616697611282e-05, + "loss": 0.0341, "step": 133260 }, { "epoch": 6.22, - "learning_rate": 7.598799868735644e-06, - "loss": 0.0331, + "learning_rate": 1.7618148627775265e-05, + "loss": 0.0611, "step": 133265 }, { "epoch": 6.22, - "learning_rate": 7.598331067460504e-06, - "loss": 0.0687, + "learning_rate": 1.7617680557939245e-05, + "loss": 0.0356, "step": 133270 }, { "epoch": 6.22, - "learning_rate": 7.597862266185365e-06, - "loss": 0.0613, + "learning_rate": 1.7617212488103225e-05, + "loss": 0.0438, "step": 133275 }, { "epoch": 6.22, - "learning_rate": 7.597393464910225e-06, - "loss": 0.2293, + "learning_rate": 1.7616744418267204e-05, + "loss": 0.0757, "step": 133280 }, { "epoch": 6.22, - "learning_rate": 7.596924663635086e-06, - "loss": 0.1545, + "learning_rate": 1.7616276348431188e-05, + "loss": 0.1861, "step": 133285 }, { "epoch": 6.22, - "learning_rate": 7.596455862359946e-06, - "loss": 0.1269, + "learning_rate": 1.7615808278595167e-05, + "loss": 0.1832, "step": 133290 }, { "epoch": 6.22, - "learning_rate": 7.595987061084806e-06, - "loss": 0.0451, + "learning_rate": 1.7615340208759147e-05, + "loss": 0.025, "step": 133295 }, { "epoch": 6.22, - "learning_rate": 7.595518259809668e-06, - "loss": 0.0253, + "learning_rate": 1.761487213892313e-05, + "loss": 0.0414, "step": 133300 }, { "epoch": 6.22, - "learning_rate": 7.595049458534528e-06, - "loss": 0.007, + "learning_rate": 1.761440406908711e-05, + "loss": 0.0334, "step": 133305 }, { "epoch": 6.22, - "learning_rate": 7.594580657259388e-06, - "loss": 0.0586, + "learning_rate": 1.761393599925109e-05, + "loss": 0.0483, "step": 133310 }, { "epoch": 6.22, - "learning_rate": 7.594111855984248e-06, - "loss": 0.1468, + "learning_rate": 1.7613467929415067e-05, + "loss": 0.0922, "step": 133315 }, { "epoch": 6.22, - "learning_rate": 7.593643054709109e-06, - "loss": 0.0696, + "learning_rate": 1.761299985957905e-05, + "loss": 0.0659, "step": 133320 }, { "epoch": 6.22, - "learning_rate": 7.593174253433971e-06, - "loss": 0.0674, + "learning_rate": 1.761253178974303e-05, + "loss": 0.0707, "step": 133325 }, { "epoch": 6.22, - "learning_rate": 7.5927054521588306e-06, - "loss": 0.0938, + "learning_rate": 1.761206371990701e-05, + "loss": 0.1227, "step": 133330 }, { "epoch": 6.22, - "learning_rate": 7.5922366508836905e-06, - "loss": 0.1425, + "learning_rate": 1.761159565007099e-05, + "loss": 0.1157, "step": 133335 }, { "epoch": 6.22, - "learning_rate": 7.591767849608551e-06, - "loss": 0.1192, + "learning_rate": 1.7611127580234972e-05, + "loss": 0.0908, "step": 133340 }, { "epoch": 6.22, - "learning_rate": 7.591299048333413e-06, - "loss": 0.0171, + "learning_rate": 1.7610659510398952e-05, + "loss": 0.0587, "step": 133345 }, { "epoch": 6.22, - "learning_rate": 7.590830247058273e-06, - "loss": 0.0257, + "learning_rate": 1.7610191440562932e-05, + "loss": 0.0385, "step": 133350 }, { "epoch": 6.22, - "learning_rate": 7.590361445783133e-06, - "loss": 0.0161, + "learning_rate": 1.7609723370726915e-05, + "loss": 0.0367, "step": 133355 }, { "epoch": 6.22, - "learning_rate": 7.5898926445079936e-06, - "loss": 0.0086, + "learning_rate": 1.7609255300890895e-05, + "loss": 0.0389, "step": 133360 }, { "epoch": 6.22, - "learning_rate": 7.5894238432328535e-06, - "loss": 0.0511, + "learning_rate": 1.7608787231054875e-05, + "loss": 0.0588, "step": 133365 }, { "epoch": 6.22, - "learning_rate": 7.588955041957715e-06, - "loss": 0.0696, + "learning_rate": 1.7608319161218855e-05, + "loss": 0.0706, "step": 133370 }, { "epoch": 6.22, - "learning_rate": 7.588486240682575e-06, - "loss": 0.1007, + "learning_rate": 1.7607851091382834e-05, + "loss": 0.0788, "step": 133375 }, { "epoch": 6.22, - "learning_rate": 7.588017439407436e-06, - "loss": 0.0784, + "learning_rate": 1.7607383021546814e-05, + "loss": 0.0604, "step": 133380 }, { "epoch": 6.22, - "learning_rate": 7.587548638132296e-06, - "loss": 0.206, + "learning_rate": 1.7606914951710794e-05, + "loss": 0.126, "step": 133385 }, { "epoch": 6.22, - "learning_rate": 7.5870798368571566e-06, - "loss": 0.1128, + "learning_rate": 1.7606446881874774e-05, + "loss": 0.2236, "step": 133390 }, { "epoch": 6.22, - "learning_rate": 7.586611035582017e-06, - "loss": 0.0011, + "learning_rate": 1.7605978812038757e-05, + "loss": 0.0054, "step": 133395 }, { "epoch": 6.22, - "learning_rate": 7.586142234306878e-06, - "loss": 0.0193, + "learning_rate": 1.7605510742202737e-05, + "loss": 0.0151, "step": 133400 }, { "epoch": 6.22, - "learning_rate": 7.585673433031738e-06, - "loss": 0.0121, + "learning_rate": 1.7605042672366717e-05, + "loss": 0.0118, "step": 133405 }, { "epoch": 6.23, - "learning_rate": 7.585204631756599e-06, - "loss": 0.0477, + "learning_rate": 1.7604574602530697e-05, + "loss": 0.0242, "step": 133410 }, { "epoch": 6.23, - "learning_rate": 7.58473583048146e-06, - "loss": 0.0539, + "learning_rate": 1.760410653269468e-05, + "loss": 0.0249, "step": 133415 }, { "epoch": 6.23, - "learning_rate": 7.58426702920632e-06, - "loss": 0.0218, + "learning_rate": 1.760363846285866e-05, + "loss": 0.0377, "step": 133420 }, { "epoch": 6.23, - "learning_rate": 7.58379822793118e-06, - "loss": 0.1327, + "learning_rate": 1.760317039302264e-05, + "loss": 0.0404, "step": 133425 }, { "epoch": 6.23, - "learning_rate": 7.583329426656041e-06, - "loss": 0.1603, + "learning_rate": 1.7602702323186623e-05, + "loss": 0.0822, "step": 133430 }, { "epoch": 6.23, - "learning_rate": 7.582860625380901e-06, - "loss": 0.1997, + "learning_rate": 1.7602234253350602e-05, + "loss": 0.2045, "step": 133435 }, { "epoch": 6.23, - "learning_rate": 7.582391824105763e-06, - "loss": 0.1416, + "learning_rate": 1.760176618351458e-05, + "loss": 0.1759, "step": 133440 }, { "epoch": 6.23, - "learning_rate": 7.581923022830623e-06, - "loss": 0.0601, + "learning_rate": 1.760129811367856e-05, + "loss": 0.0035, "step": 133445 }, { "epoch": 6.23, - "learning_rate": 7.581454221555483e-06, - "loss": 0.0306, + "learning_rate": 1.7600830043842542e-05, + "loss": 0.016, "step": 133450 }, { "epoch": 6.23, - "learning_rate": 7.580985420280343e-06, - "loss": 0.0572, + "learning_rate": 1.7600361974006522e-05, + "loss": 0.0187, "step": 133455 }, { "epoch": 6.23, - "learning_rate": 7.580516619005205e-06, - "loss": 0.0479, + "learning_rate": 1.75998939041705e-05, + "loss": 0.0221, "step": 133460 }, { "epoch": 6.23, - "learning_rate": 7.580047817730065e-06, - "loss": 0.0902, + "learning_rate": 1.759942583433448e-05, + "loss": 0.0292, "step": 133465 }, { "epoch": 6.23, - "learning_rate": 7.579579016454926e-06, - "loss": 0.0314, + "learning_rate": 1.7598957764498465e-05, + "loss": 0.0426, "step": 133470 }, { "epoch": 6.23, - "learning_rate": 7.579110215179786e-06, - "loss": 0.1311, + "learning_rate": 1.7598489694662444e-05, + "loss": 0.0921, "step": 133475 }, { "epoch": 6.23, - "learning_rate": 7.578641413904646e-06, - "loss": 0.101, + "learning_rate": 1.7598021624826424e-05, + "loss": 0.1651, "step": 133480 }, { "epoch": 6.23, - "learning_rate": 7.578172612629507e-06, - "loss": 0.2035, + "learning_rate": 1.7597553554990407e-05, + "loss": 0.2633, "step": 133485 }, { "epoch": 6.23, - "learning_rate": 7.577703811354368e-06, - "loss": 0.1423, + "learning_rate": 1.7597085485154387e-05, + "loss": 0.1131, "step": 133490 }, { "epoch": 6.23, - "learning_rate": 7.577235010079228e-06, - "loss": 0.0676, + "learning_rate": 1.7596617415318367e-05, + "loss": 0.0095, "step": 133495 }, { "epoch": 6.23, - "learning_rate": 7.576766208804089e-06, - "loss": 0.0153, + "learning_rate": 1.7596149345482347e-05, + "loss": 0.0684, "step": 133500 }, { "epoch": 6.23, - "learning_rate": 7.576297407528949e-06, - "loss": 0.0555, + "learning_rate": 1.7595681275646327e-05, + "loss": 0.1288, "step": 133505 }, { "epoch": 6.23, - "learning_rate": 7.57582860625381e-06, - "loss": 0.0702, + "learning_rate": 1.7595213205810306e-05, + "loss": 0.0502, "step": 133510 }, { "epoch": 6.23, - "learning_rate": 7.57535980497867e-06, - "loss": 0.0337, + "learning_rate": 1.7594745135974286e-05, + "loss": 0.0542, "step": 133515 }, { "epoch": 6.23, - "learning_rate": 7.574891003703531e-06, - "loss": 0.0176, + "learning_rate": 1.7594277066138266e-05, + "loss": 0.0852, "step": 133520 }, { "epoch": 6.23, - "learning_rate": 7.574422202428391e-06, - "loss": 0.0902, + "learning_rate": 1.759380899630225e-05, + "loss": 0.071, "step": 133525 }, { "epoch": 6.23, - "learning_rate": 7.5739534011532525e-06, - "loss": 0.0904, + "learning_rate": 1.759334092646623e-05, + "loss": 0.0842, "step": 133530 }, { "epoch": 6.23, - "learning_rate": 7.573484599878112e-06, - "loss": 0.1625, + "learning_rate": 1.759287285663021e-05, + "loss": 0.1496, "step": 133535 }, { "epoch": 6.23, - "learning_rate": 7.573015798602973e-06, - "loss": 0.1866, + "learning_rate": 1.7592404786794192e-05, + "loss": 0.145, "step": 133540 }, { "epoch": 6.23, - "learning_rate": 7.572546997327833e-06, - "loss": 0.0585, + "learning_rate": 1.7591936716958172e-05, + "loss": 0.0055, "step": 133545 }, { "epoch": 6.23, - "learning_rate": 7.572078196052693e-06, - "loss": 0.0259, + "learning_rate": 1.7591468647122152e-05, + "loss": 0.0044, "step": 133550 }, { "epoch": 6.23, - "learning_rate": 7.571609394777555e-06, - "loss": 0.0117, + "learning_rate": 1.759100057728613e-05, + "loss": 0.0269, "step": 133555 }, { "epoch": 6.23, - "learning_rate": 7.5711405935024155e-06, - "loss": 0.0312, + "learning_rate": 1.7590532507450115e-05, + "loss": 0.0418, "step": 133560 }, { "epoch": 6.23, - "learning_rate": 7.570671792227275e-06, - "loss": 0.0422, + "learning_rate": 1.759006443761409e-05, + "loss": 0.0229, "step": 133565 }, { "epoch": 6.23, - "learning_rate": 7.570202990952135e-06, - "loss": 0.0631, + "learning_rate": 1.758959636777807e-05, + "loss": 0.1316, "step": 133570 }, { "epoch": 6.23, - "learning_rate": 7.569734189676996e-06, - "loss": 0.1029, + "learning_rate": 1.758912829794205e-05, + "loss": 0.131, "step": 133575 }, { "epoch": 6.23, - "learning_rate": 7.569265388401858e-06, - "loss": 0.1094, + "learning_rate": 1.7588660228106034e-05, + "loss": 0.0917, "step": 133580 }, { "epoch": 6.23, - "learning_rate": 7.568796587126718e-06, - "loss": 0.1962, + "learning_rate": 1.7588192158270014e-05, + "loss": 0.1015, "step": 133585 }, { "epoch": 6.23, - "learning_rate": 7.568327785851578e-06, - "loss": 0.1879, + "learning_rate": 1.7587724088433994e-05, + "loss": 0.0728, "step": 133590 }, { "epoch": 6.23, - "learning_rate": 7.567858984576438e-06, - "loss": 0.0079, + "learning_rate": 1.7587256018597974e-05, + "loss": 0.0112, "step": 133595 }, { "epoch": 6.23, - "learning_rate": 7.5673901833013e-06, - "loss": 0.0356, + "learning_rate": 1.7586787948761957e-05, + "loss": 0.0199, "step": 133600 }, { "epoch": 6.23, - "learning_rate": 7.56692138202616e-06, - "loss": 0.0434, + "learning_rate": 1.7586319878925937e-05, + "loss": 0.0083, "step": 133605 }, { "epoch": 6.23, - "learning_rate": 7.56645258075102e-06, - "loss": 0.0623, + "learning_rate": 1.7585851809089916e-05, + "loss": 0.0325, "step": 133610 }, { "epoch": 6.23, - "learning_rate": 7.565983779475881e-06, - "loss": 0.0298, + "learning_rate": 1.75853837392539e-05, + "loss": 0.0344, "step": 133615 }, { "epoch": 6.23, - "learning_rate": 7.565514978200741e-06, - "loss": 0.0585, + "learning_rate": 1.758491566941788e-05, + "loss": 0.0362, "step": 133620 }, { "epoch": 6.24, - "learning_rate": 7.565046176925602e-06, - "loss": 0.0661, + "learning_rate": 1.758444759958186e-05, + "loss": 0.0512, "step": 133625 }, { "epoch": 6.24, - "learning_rate": 7.564577375650462e-06, - "loss": 0.0485, + "learning_rate": 1.7583979529745836e-05, + "loss": 0.1822, "step": 133630 }, { "epoch": 6.24, - "learning_rate": 7.564108574375323e-06, - "loss": 0.1878, + "learning_rate": 1.758351145990982e-05, + "loss": 0.1147, "step": 133635 }, { "epoch": 6.24, - "learning_rate": 7.563639773100183e-06, - "loss": 0.186, + "learning_rate": 1.75830433900738e-05, + "loss": 0.1853, "step": 133640 }, { "epoch": 6.24, - "learning_rate": 7.563170971825044e-06, - "loss": 0.0237, + "learning_rate": 1.758257532023778e-05, + "loss": 0.0024, "step": 133645 }, { "epoch": 6.24, - "learning_rate": 7.5627021705499044e-06, - "loss": 0.017, + "learning_rate": 1.758210725040176e-05, + "loss": 0.0317, "step": 133650 }, { "epoch": 6.24, - "learning_rate": 7.562233369274765e-06, - "loss": 0.0461, + "learning_rate": 1.758163918056574e-05, + "loss": 0.0374, "step": 133655 }, { "epoch": 6.24, - "learning_rate": 7.561764567999625e-06, - "loss": 0.0369, + "learning_rate": 1.758117111072972e-05, + "loss": 0.0717, "step": 133660 }, { "epoch": 6.24, - "learning_rate": 7.561295766724486e-06, - "loss": 0.0162, + "learning_rate": 1.75807030408937e-05, + "loss": 0.0355, "step": 133665 }, { "epoch": 6.24, - "learning_rate": 7.560826965449347e-06, - "loss": 0.0825, + "learning_rate": 1.7580234971057684e-05, + "loss": 0.0168, "step": 133670 }, { "epoch": 6.24, - "learning_rate": 7.5603581641742075e-06, - "loss": 0.1246, + "learning_rate": 1.7579766901221664e-05, + "loss": 0.0242, "step": 133675 }, { "epoch": 6.24, - "learning_rate": 7.5598893628990674e-06, - "loss": 0.1923, + "learning_rate": 1.7579298831385644e-05, + "loss": 0.158, "step": 133680 }, { "epoch": 6.24, - "learning_rate": 7.559420561623928e-06, - "loss": 0.1873, + "learning_rate": 1.7578830761549624e-05, + "loss": 0.108, "step": 133685 }, { "epoch": 6.24, - "learning_rate": 7.558951760348788e-06, - "loss": 0.2094, + "learning_rate": 1.7578362691713604e-05, + "loss": 0.2246, "step": 133690 }, { "epoch": 6.24, - "learning_rate": 7.55848295907365e-06, - "loss": 0.0123, + "learning_rate": 1.7577894621877583e-05, + "loss": 0.0005, "step": 133695 }, { "epoch": 6.24, - "learning_rate": 7.55801415779851e-06, - "loss": 0.0507, + "learning_rate": 1.7577426552041563e-05, + "loss": 0.0308, "step": 133700 }, { "epoch": 6.24, - "learning_rate": 7.5575453565233705e-06, - "loss": 0.0217, + "learning_rate": 1.7576958482205543e-05, + "loss": 0.0315, "step": 133705 }, { "epoch": 6.24, - "learning_rate": 7.5570765552482304e-06, - "loss": 0.0624, + "learning_rate": 1.7576490412369526e-05, + "loss": 0.0971, "step": 133710 }, { "epoch": 6.24, - "learning_rate": 7.556607753973091e-06, - "loss": 0.0294, + "learning_rate": 1.7576022342533506e-05, + "loss": 0.0682, "step": 133715 }, { "epoch": 6.24, - "learning_rate": 7.556138952697952e-06, - "loss": 0.0975, + "learning_rate": 1.7575554272697486e-05, + "loss": 0.1058, "step": 133720 }, { "epoch": 6.24, - "learning_rate": 7.555670151422813e-06, - "loss": 0.109, + "learning_rate": 1.757508620286147e-05, + "loss": 0.0437, "step": 133725 }, { "epoch": 6.24, - "learning_rate": 7.555201350147673e-06, - "loss": 0.0737, + "learning_rate": 1.757461813302545e-05, + "loss": 0.0689, "step": 133730 }, { "epoch": 6.24, - "learning_rate": 7.5547325488725335e-06, - "loss": 0.1987, + "learning_rate": 1.757415006318943e-05, + "loss": 0.1217, "step": 133735 }, { "epoch": 6.24, - "learning_rate": 7.554263747597394e-06, - "loss": 0.2115, + "learning_rate": 1.757368199335341e-05, + "loss": 0.0872, "step": 133740 }, { "epoch": 6.24, - "learning_rate": 7.553794946322255e-06, - "loss": 0.0264, + "learning_rate": 1.7573213923517392e-05, + "loss": 0.024, "step": 133745 }, { "epoch": 6.24, - "learning_rate": 7.553326145047115e-06, - "loss": 0.0142, + "learning_rate": 1.757274585368137e-05, + "loss": 0.0273, "step": 133750 }, { "epoch": 6.24, - "learning_rate": 7.552857343771976e-06, - "loss": 0.0714, + "learning_rate": 1.7572277783845348e-05, + "loss": 0.0451, "step": 133755 }, { "epoch": 6.24, - "learning_rate": 7.552388542496836e-06, - "loss": 0.0378, + "learning_rate": 1.7571809714009328e-05, + "loss": 0.0423, "step": 133760 }, { "epoch": 6.24, - "learning_rate": 7.551919741221697e-06, - "loss": 0.06, + "learning_rate": 1.757134164417331e-05, + "loss": 0.0508, "step": 133765 }, { "epoch": 6.24, - "learning_rate": 7.551450939946557e-06, - "loss": 0.0382, + "learning_rate": 1.757087357433729e-05, + "loss": 0.0593, "step": 133770 }, { "epoch": 6.24, - "learning_rate": 7.550982138671418e-06, - "loss": 0.0954, + "learning_rate": 1.757040550450127e-05, + "loss": 0.11, "step": 133775 }, { "epoch": 6.24, - "learning_rate": 7.550513337396278e-06, - "loss": 0.1519, + "learning_rate": 1.7569937434665254e-05, + "loss": 0.0784, "step": 133780 }, { "epoch": 6.24, - "learning_rate": 7.55004453612114e-06, - "loss": 0.1925, + "learning_rate": 1.7569469364829234e-05, + "loss": 0.3361, "step": 133785 }, { "epoch": 6.24, - "learning_rate": 7.5495757348459995e-06, - "loss": 0.1588, + "learning_rate": 1.7569001294993214e-05, + "loss": 0.2491, "step": 133790 }, { "epoch": 6.24, - "learning_rate": 7.54910693357086e-06, - "loss": 0.0153, + "learning_rate": 1.7568533225157193e-05, + "loss": 0.0064, "step": 133795 }, { "epoch": 6.24, - "learning_rate": 7.54863813229572e-06, - "loss": 0.0216, + "learning_rate": 1.7568065155321177e-05, + "loss": 0.024, "step": 133800 }, { "epoch": 6.24, - "learning_rate": 7.54816933102058e-06, - "loss": 0.0136, + "learning_rate": 1.7567597085485156e-05, + "loss": 0.0499, "step": 133805 }, { "epoch": 6.24, - "learning_rate": 7.547700529745442e-06, - "loss": 0.0548, + "learning_rate": 1.7567129015649136e-05, + "loss": 0.0709, "step": 133810 }, { "epoch": 6.24, - "learning_rate": 7.547231728470303e-06, - "loss": 0.0287, + "learning_rate": 1.7566660945813116e-05, + "loss": 0.0418, "step": 133815 }, { "epoch": 6.24, - "learning_rate": 7.5467629271951625e-06, - "loss": 0.0252, + "learning_rate": 1.7566192875977096e-05, + "loss": 0.1056, "step": 133820 }, { "epoch": 6.24, - "learning_rate": 7.5462941259200225e-06, - "loss": 0.0793, + "learning_rate": 1.7565724806141076e-05, + "loss": 0.1619, "step": 133825 }, { "epoch": 6.24, - "learning_rate": 7.545825324644883e-06, - "loss": 0.0966, + "learning_rate": 1.7565256736305055e-05, + "loss": 0.2061, "step": 133830 }, { "epoch": 6.24, - "learning_rate": 7.545356523369745e-06, - "loss": 0.1591, + "learning_rate": 1.7564788666469035e-05, + "loss": 0.1557, "step": 133835 }, { "epoch": 6.25, - "learning_rate": 7.544887722094605e-06, - "loss": 0.3229, + "learning_rate": 1.756432059663302e-05, + "loss": 0.093, "step": 133840 }, { "epoch": 6.25, - "learning_rate": 7.544418920819465e-06, - "loss": 0.0121, + "learning_rate": 1.7563852526797e-05, + "loss": 0.0019, "step": 133845 }, { "epoch": 6.25, - "learning_rate": 7.5439501195443255e-06, + "learning_rate": 1.7563384456960978e-05, "loss": 0.0105, "step": 133850 }, { "epoch": 6.25, - "learning_rate": 7.543481318269187e-06, - "loss": 0.0436, + "learning_rate": 1.756291638712496e-05, + "loss": 0.0261, "step": 133855 }, { "epoch": 6.25, - "learning_rate": 7.543012516994047e-06, - "loss": 0.0555, + "learning_rate": 1.756244831728894e-05, + "loss": 0.0596, "step": 133860 }, { "epoch": 6.25, - "learning_rate": 7.542543715718907e-06, - "loss": 0.0443, + "learning_rate": 1.756198024745292e-05, + "loss": 0.0613, "step": 133865 }, { "epoch": 6.25, - "learning_rate": 7.542074914443768e-06, - "loss": 0.0745, + "learning_rate": 1.75615121776169e-05, + "loss": 0.0798, "step": 133870 }, { "epoch": 6.25, - "learning_rate": 7.541606113168628e-06, - "loss": 0.0498, + "learning_rate": 1.7561044107780884e-05, + "loss": 0.0954, "step": 133875 }, { "epoch": 6.25, - "learning_rate": 7.541137311893489e-06, - "loss": 0.1648, + "learning_rate": 1.756057603794486e-05, + "loss": 0.1024, "step": 133880 }, { "epoch": 6.25, - "learning_rate": 7.54066851061835e-06, - "loss": 0.194, + "learning_rate": 1.756010796810884e-05, + "loss": 0.1814, "step": 133885 }, { "epoch": 6.25, - "learning_rate": 7.54019970934321e-06, - "loss": 0.1173, + "learning_rate": 1.755963989827282e-05, + "loss": 0.2058, "step": 133890 }, { "epoch": 6.25, - "learning_rate": 7.53973090806807e-06, - "loss": 0.0142, + "learning_rate": 1.7559171828436803e-05, + "loss": 0.0141, "step": 133895 }, { "epoch": 6.25, - "learning_rate": 7.539262106792931e-06, - "loss": 0.0183, + "learning_rate": 1.7558703758600783e-05, + "loss": 0.0228, "step": 133900 }, { "epoch": 6.25, - "learning_rate": 7.538793305517792e-06, - "loss": 0.049, + "learning_rate": 1.7558235688764763e-05, + "loss": 0.0089, "step": 133905 }, { "epoch": 6.25, - "learning_rate": 7.538324504242652e-06, - "loss": 0.0576, + "learning_rate": 1.7557767618928746e-05, + "loss": 0.0496, "step": 133910 }, { "epoch": 6.25, - "learning_rate": 7.537855702967512e-06, - "loss": 0.0699, + "learning_rate": 1.7557299549092726e-05, + "loss": 0.0146, "step": 133915 }, { "epoch": 6.25, - "learning_rate": 7.537386901692373e-06, - "loss": 0.0707, + "learning_rate": 1.7556831479256706e-05, + "loss": 0.0503, "step": 133920 }, { "epoch": 6.25, - "learning_rate": 7.536918100417235e-06, - "loss": 0.1092, + "learning_rate": 1.7556363409420686e-05, + "loss": 0.0517, "step": 133925 }, { "epoch": 6.25, - "learning_rate": 7.536449299142095e-06, - "loss": 0.0751, + "learning_rate": 1.755589533958467e-05, + "loss": 0.0711, "step": 133930 }, { "epoch": 6.25, - "learning_rate": 7.5359804978669545e-06, - "loss": 0.1287, + "learning_rate": 1.755542726974865e-05, + "loss": 0.1623, "step": 133935 }, { "epoch": 6.25, - "learning_rate": 7.535511696591815e-06, - "loss": 0.2029, + "learning_rate": 1.755495919991263e-05, + "loss": 0.1115, "step": 133940 }, { "epoch": 6.25, - "learning_rate": 7.535042895316675e-06, - "loss": 0.0272, + "learning_rate": 1.7554491130076605e-05, + "loss": 0.0456, "step": 133945 }, { "epoch": 6.25, - "learning_rate": 7.534574094041537e-06, - "loss": 0.0106, + "learning_rate": 1.7554023060240588e-05, + "loss": 0.0164, "step": 133950 }, { "epoch": 6.25, - "learning_rate": 7.534105292766397e-06, - "loss": 0.0224, + "learning_rate": 1.7553554990404568e-05, + "loss": 0.0085, "step": 133955 }, { "epoch": 6.25, - "learning_rate": 7.533636491491258e-06, - "loss": 0.0846, + "learning_rate": 1.7553086920568548e-05, + "loss": 0.0329, "step": 133960 }, { "epoch": 6.25, - "learning_rate": 7.5331676902161175e-06, - "loss": 0.0429, + "learning_rate": 1.755261885073253e-05, + "loss": 0.0732, "step": 133965 }, { "epoch": 6.25, - "learning_rate": 7.532698888940978e-06, - "loss": 0.147, + "learning_rate": 1.755215078089651e-05, + "loss": 0.0136, "step": 133970 }, { "epoch": 6.25, - "learning_rate": 7.532230087665839e-06, - "loss": 0.0575, + "learning_rate": 1.755168271106049e-05, + "loss": 0.0523, "step": 133975 }, { "epoch": 6.25, - "learning_rate": 7.5317612863907e-06, - "loss": 0.0873, + "learning_rate": 1.755121464122447e-05, + "loss": 0.1505, "step": 133980 }, { "epoch": 6.25, - "learning_rate": 7.53129248511556e-06, - "loss": 0.1459, + "learning_rate": 1.7550746571388454e-05, + "loss": 0.0816, "step": 133985 }, { "epoch": 6.25, - "learning_rate": 7.530823683840421e-06, - "loss": 0.1365, + "learning_rate": 1.7550278501552433e-05, + "loss": 0.1279, "step": 133990 }, { "epoch": 6.25, - "learning_rate": 7.530354882565281e-06, - "loss": 0.0068, + "learning_rate": 1.7549810431716413e-05, + "loss": 0.0082, "step": 133995 }, { "epoch": 6.25, - "learning_rate": 7.529886081290142e-06, - "loss": 0.003, + "learning_rate": 1.7549342361880393e-05, + "loss": 0.0145, "step": 134000 }, { "epoch": 6.25, - "learning_rate": 7.529417280015002e-06, - "loss": 0.0258, + "learning_rate": 1.7548874292044376e-05, + "loss": 0.0186, "step": 134005 }, { "epoch": 6.25, - "learning_rate": 7.528948478739863e-06, - "loss": 0.0453, + "learning_rate": 1.7548406222208353e-05, + "loss": 0.0119, "step": 134010 }, { "epoch": 6.25, - "learning_rate": 7.528479677464723e-06, - "loss": 0.0414, + "learning_rate": 1.7547938152372332e-05, + "loss": 0.0496, "step": 134015 }, { "epoch": 6.25, - "learning_rate": 7.5280108761895844e-06, - "loss": 0.0285, + "learning_rate": 1.7547470082536312e-05, + "loss": 0.0643, "step": 134020 }, { "epoch": 6.25, - "learning_rate": 7.527542074914444e-06, - "loss": 0.1138, + "learning_rate": 1.7547002012700295e-05, + "loss": 0.0776, "step": 134025 }, { "epoch": 6.25, - "learning_rate": 7.527073273639305e-06, - "loss": 0.1778, + "learning_rate": 1.7546533942864275e-05, + "loss": 0.0575, "step": 134030 }, { "epoch": 6.25, - "learning_rate": 7.526604472364165e-06, - "loss": 0.209, + "learning_rate": 1.7546065873028255e-05, + "loss": 0.1225, "step": 134035 }, { "epoch": 6.25, - "learning_rate": 7.526135671089025e-06, - "loss": 0.191, + "learning_rate": 1.754559780319224e-05, + "loss": 0.1443, "step": 134040 }, { "epoch": 6.25, - "learning_rate": 7.525666869813887e-06, - "loss": 0.0065, + "learning_rate": 1.7545129733356218e-05, + "loss": 0.0072, "step": 134045 }, { "epoch": 6.25, - "learning_rate": 7.525198068538747e-06, - "loss": 0.0186, + "learning_rate": 1.7544661663520198e-05, + "loss": 0.0101, "step": 134050 }, { "epoch": 6.26, - "learning_rate": 7.524729267263607e-06, - "loss": 0.0604, + "learning_rate": 1.7544193593684178e-05, + "loss": 0.039, "step": 134055 }, { "epoch": 6.26, - "learning_rate": 7.524260465988468e-06, - "loss": 0.0296, + "learning_rate": 1.754372552384816e-05, + "loss": 0.065, "step": 134060 }, { "epoch": 6.26, - "learning_rate": 7.523791664713329e-06, - "loss": 0.0571, + "learning_rate": 1.754325745401214e-05, + "loss": 0.086, "step": 134065 }, { "epoch": 6.26, - "learning_rate": 7.52332286343819e-06, - "loss": 0.0576, + "learning_rate": 1.7542789384176117e-05, + "loss": 0.0535, "step": 134070 }, { "epoch": 6.26, - "learning_rate": 7.52285406216305e-06, - "loss": 0.0779, + "learning_rate": 1.7542321314340097e-05, + "loss": 0.0649, "step": 134075 }, { "epoch": 6.26, - "learning_rate": 7.52238526088791e-06, - "loss": 0.1583, + "learning_rate": 1.754185324450408e-05, + "loss": 0.0715, "step": 134080 }, { "epoch": 6.26, - "learning_rate": 7.52191645961277e-06, - "loss": 0.1847, + "learning_rate": 1.754138517466806e-05, + "loss": 0.2096, "step": 134085 }, { "epoch": 6.26, - "learning_rate": 7.521447658337632e-06, - "loss": 0.1818, + "learning_rate": 1.754091710483204e-05, + "loss": 0.3257, "step": 134090 }, { "epoch": 6.26, - "learning_rate": 7.520978857062492e-06, - "loss": 0.0065, + "learning_rate": 1.7540449034996023e-05, + "loss": 0.0125, "step": 134095 }, { "epoch": 6.26, - "learning_rate": 7.520510055787353e-06, - "loss": 0.0292, + "learning_rate": 1.7539980965160003e-05, + "loss": 0.0126, "step": 134100 }, { "epoch": 6.26, - "learning_rate": 7.520041254512213e-06, - "loss": 0.0175, + "learning_rate": 1.7539512895323983e-05, + "loss": 0.0077, "step": 134105 }, { "epoch": 6.26, - "learning_rate": 7.519572453237074e-06, - "loss": 0.0181, + "learning_rate": 1.7539044825487963e-05, + "loss": 0.0252, "step": 134110 }, { "epoch": 6.26, - "learning_rate": 7.519103651961934e-06, - "loss": 0.048, + "learning_rate": 1.7538576755651946e-05, + "loss": 0.0455, "step": 134115 }, { "epoch": 6.26, - "learning_rate": 7.518634850686795e-06, - "loss": 0.063, + "learning_rate": 1.7538108685815926e-05, + "loss": 0.0365, "step": 134120 }, { "epoch": 6.26, - "learning_rate": 7.518166049411655e-06, - "loss": 0.067, + "learning_rate": 1.7537640615979905e-05, + "loss": 0.0548, "step": 134125 }, { "epoch": 6.26, - "learning_rate": 7.517697248136515e-06, - "loss": 0.1133, + "learning_rate": 1.7537172546143885e-05, + "loss": 0.0788, "step": 134130 }, { "epoch": 6.26, - "learning_rate": 7.5172284468613765e-06, - "loss": 0.2967, + "learning_rate": 1.7536704476307865e-05, + "loss": 0.1121, "step": 134135 }, { "epoch": 6.26, - "learning_rate": 7.516759645586237e-06, - "loss": 0.1978, + "learning_rate": 1.7536236406471845e-05, + "loss": 0.1068, "step": 134140 }, { "epoch": 6.26, - "learning_rate": 7.516290844311097e-06, - "loss": 0.0385, + "learning_rate": 1.7535768336635825e-05, + "loss": 0.0076, "step": 134145 }, { "epoch": 6.26, - "learning_rate": 7.515822043035957e-06, - "loss": 0.0103, + "learning_rate": 1.7535300266799808e-05, + "loss": 0.0203, "step": 134150 }, { "epoch": 6.26, - "learning_rate": 7.515353241760818e-06, - "loss": 0.0311, + "learning_rate": 1.7534832196963788e-05, + "loss": 0.0499, "step": 134155 }, { "epoch": 6.26, - "learning_rate": 7.5148844404856795e-06, - "loss": 0.0522, + "learning_rate": 1.7534364127127767e-05, + "loss": 0.0287, "step": 134160 }, { "epoch": 6.26, - "learning_rate": 7.5144156392105394e-06, - "loss": 0.0761, + "learning_rate": 1.7533896057291747e-05, + "loss": 0.0357, "step": 134165 }, { "epoch": 6.26, - "learning_rate": 7.513946837935399e-06, - "loss": 0.0938, + "learning_rate": 1.753342798745573e-05, + "loss": 0.0888, "step": 134170 }, { "epoch": 6.26, - "learning_rate": 7.51347803666026e-06, - "loss": 0.0465, + "learning_rate": 1.753295991761971e-05, + "loss": 0.074, "step": 134175 }, { "epoch": 6.26, - "learning_rate": 7.513009235385122e-06, - "loss": 0.1468, + "learning_rate": 1.753249184778369e-05, + "loss": 0.1074, "step": 134180 }, { "epoch": 6.26, - "learning_rate": 7.512540434109982e-06, - "loss": 0.1477, + "learning_rate": 1.753202377794767e-05, + "loss": 0.2266, "step": 134185 }, { "epoch": 6.26, - "learning_rate": 7.512071632834842e-06, - "loss": 0.166, + "learning_rate": 1.7531555708111653e-05, + "loss": 0.1436, "step": 134190 }, { "epoch": 6.26, - "learning_rate": 7.5116028315597024e-06, - "loss": 0.0456, + "learning_rate": 1.7531087638275633e-05, + "loss": 0.0328, "step": 134195 }, { "epoch": 6.26, - "learning_rate": 7.511134030284562e-06, - "loss": 0.0595, + "learning_rate": 1.753061956843961e-05, + "loss": 0.0435, "step": 134200 }, { "epoch": 6.26, - "learning_rate": 7.510665229009424e-06, - "loss": 0.0618, + "learning_rate": 1.753015149860359e-05, + "loss": 0.012, "step": 134205 }, { "epoch": 6.26, - "learning_rate": 7.510196427734284e-06, - "loss": 0.0635, + "learning_rate": 1.7529683428767572e-05, + "loss": 0.0742, "step": 134210 }, { "epoch": 6.26, - "learning_rate": 7.509727626459145e-06, - "loss": 0.0221, + "learning_rate": 1.7529215358931552e-05, + "loss": 0.0577, "step": 134215 }, { "epoch": 6.26, - "learning_rate": 7.509258825184005e-06, - "loss": 0.0588, + "learning_rate": 1.7528747289095532e-05, + "loss": 0.079, "step": 134220 }, { "epoch": 6.26, - "learning_rate": 7.508790023908865e-06, - "loss": 0.0935, + "learning_rate": 1.7528279219259515e-05, + "loss": 0.0528, "step": 134225 }, { "epoch": 6.26, - "learning_rate": 7.508321222633726e-06, - "loss": 0.0952, + "learning_rate": 1.7527811149423495e-05, + "loss": 0.1035, "step": 134230 }, { "epoch": 6.26, - "learning_rate": 7.507852421358587e-06, - "loss": 0.1824, + "learning_rate": 1.7527343079587475e-05, + "loss": 0.2028, "step": 134235 }, { "epoch": 6.26, - "learning_rate": 7.507383620083447e-06, - "loss": 0.1142, + "learning_rate": 1.7526875009751455e-05, + "loss": 0.0741, "step": 134240 }, { "epoch": 6.26, - "learning_rate": 7.506914818808308e-06, - "loss": 0.0057, + "learning_rate": 1.7526406939915438e-05, + "loss": 0.0323, "step": 134245 }, { "epoch": 6.26, - "learning_rate": 7.5064460175331685e-06, - "loss": 0.0392, + "learning_rate": 1.7525938870079418e-05, + "loss": 0.0294, "step": 134250 }, { "epoch": 6.26, - "learning_rate": 7.505977216258029e-06, - "loss": 0.0193, + "learning_rate": 1.7525470800243398e-05, + "loss": 0.0109, "step": 134255 }, { "epoch": 6.26, - "learning_rate": 7.505508414982889e-06, - "loss": 0.0203, + "learning_rate": 1.7525002730407374e-05, + "loss": 0.0205, "step": 134260 }, { "epoch": 6.26, - "learning_rate": 7.50503961370775e-06, - "loss": 0.009, + "learning_rate": 1.7524534660571357e-05, + "loss": 0.0407, "step": 134265 }, { "epoch": 6.27, - "learning_rate": 7.50457081243261e-06, - "loss": 0.1062, + "learning_rate": 1.7524066590735337e-05, + "loss": 0.0401, "step": 134270 }, { "epoch": 6.27, - "learning_rate": 7.5041020111574715e-06, - "loss": 0.0586, + "learning_rate": 1.7523598520899317e-05, + "loss": 0.1082, "step": 134275 }, { "epoch": 6.27, - "learning_rate": 7.5036332098823315e-06, - "loss": 0.1337, + "learning_rate": 1.75231304510633e-05, + "loss": 0.1699, "step": 134280 }, { "epoch": 6.27, - "learning_rate": 7.503164408607192e-06, - "loss": 0.1436, + "learning_rate": 1.752266238122728e-05, + "loss": 0.129, "step": 134285 }, { "epoch": 6.27, - "learning_rate": 7.502695607332052e-06, - "loss": 0.1626, + "learning_rate": 1.752219431139126e-05, + "loss": 0.1591, "step": 134290 }, { "epoch": 6.27, - "learning_rate": 7.502226806056913e-06, - "loss": 0.0455, + "learning_rate": 1.752172624155524e-05, + "loss": 0.0074, "step": 134295 }, { "epoch": 6.27, - "learning_rate": 7.501758004781774e-06, - "loss": 0.0562, + "learning_rate": 1.7521258171719223e-05, + "loss": 0.0251, "step": 134300 }, { "epoch": 6.27, - "learning_rate": 7.5012892035066345e-06, - "loss": 0.0105, + "learning_rate": 1.7520790101883203e-05, + "loss": 0.0424, "step": 134305 }, { "epoch": 6.27, - "learning_rate": 7.5008204022314945e-06, - "loss": 0.0154, + "learning_rate": 1.7520322032047182e-05, + "loss": 0.0157, "step": 134310 }, { "epoch": 6.27, - "learning_rate": 7.500351600956355e-06, - "loss": 0.0204, + "learning_rate": 1.7519853962211162e-05, + "loss": 0.0691, "step": 134315 }, { "epoch": 6.27, - "learning_rate": 7.499882799681216e-06, - "loss": 0.0479, + "learning_rate": 1.7519385892375145e-05, + "loss": 0.0251, "step": 134320 }, { "epoch": 6.27, - "learning_rate": 7.499413998406077e-06, - "loss": 0.0779, + "learning_rate": 1.7518917822539122e-05, + "loss": 0.0705, "step": 134325 }, { "epoch": 6.27, - "learning_rate": 7.498945197130937e-06, - "loss": 0.0903, + "learning_rate": 1.75184497527031e-05, + "loss": 0.0958, "step": 134330 }, { "epoch": 6.27, - "learning_rate": 7.4984763958557975e-06, - "loss": 0.2031, + "learning_rate": 1.7517981682867085e-05, + "loss": 0.1789, "step": 134335 }, { "epoch": 6.27, - "learning_rate": 7.4980075945806574e-06, - "loss": 0.0618, + "learning_rate": 1.7517513613031065e-05, + "loss": 0.1986, "step": 134340 }, { "epoch": 6.27, - "learning_rate": 7.497538793305519e-06, - "loss": 0.0218, + "learning_rate": 1.7517045543195044e-05, + "loss": 0.0167, "step": 134345 }, { "epoch": 6.27, - "learning_rate": 7.497069992030379e-06, - "loss": 0.019, + "learning_rate": 1.7516577473359024e-05, + "loss": 0.0191, "step": 134350 }, { "epoch": 6.27, - "learning_rate": 7.49660119075524e-06, - "loss": 0.0416, + "learning_rate": 1.7516109403523007e-05, + "loss": 0.0245, "step": 134355 }, { "epoch": 6.27, - "learning_rate": 7.4961323894801e-06, - "loss": 0.0195, + "learning_rate": 1.7515641333686987e-05, + "loss": 0.0285, "step": 134360 }, { "epoch": 6.27, - "learning_rate": 7.49566358820496e-06, - "loss": 0.025, + "learning_rate": 1.7515173263850967e-05, + "loss": 0.0783, "step": 134365 }, { "epoch": 6.27, - "learning_rate": 7.495194786929821e-06, - "loss": 0.0572, + "learning_rate": 1.7514705194014947e-05, + "loss": 0.1142, "step": 134370 }, { "epoch": 6.27, - "learning_rate": 7.494725985654682e-06, - "loss": 0.0562, + "learning_rate": 1.751423712417893e-05, + "loss": 0.0996, "step": 134375 }, { "epoch": 6.27, - "learning_rate": 7.494257184379542e-06, - "loss": 0.0865, + "learning_rate": 1.751376905434291e-05, + "loss": 0.0765, "step": 134380 }, { "epoch": 6.27, - "learning_rate": 7.493788383104402e-06, - "loss": 0.2577, + "learning_rate": 1.751330098450689e-05, + "loss": 0.1823, "step": 134385 }, { "epoch": 6.27, - "learning_rate": 7.4933195818292636e-06, - "loss": 0.2453, + "learning_rate": 1.751283291467087e-05, + "loss": 0.1568, "step": 134390 }, { "epoch": 6.27, - "learning_rate": 7.492850780554124e-06, - "loss": 0.0239, + "learning_rate": 1.751236484483485e-05, + "loss": 0.0196, "step": 134395 }, { "epoch": 6.27, - "learning_rate": 7.492381979278984e-06, - "loss": 0.0104, + "learning_rate": 1.751189677499883e-05, + "loss": 0.0319, "step": 134400 }, { "epoch": 6.27, - "learning_rate": 7.491913178003844e-06, - "loss": 0.0191, + "learning_rate": 1.751142870516281e-05, + "loss": 0.0199, "step": 134405 }, { "epoch": 6.27, - "learning_rate": 7.491444376728705e-06, - "loss": 0.0098, + "learning_rate": 1.7510960635326792e-05, + "loss": 0.0151, "step": 134410 }, { "epoch": 6.27, - "learning_rate": 7.490975575453567e-06, - "loss": 0.0463, + "learning_rate": 1.7510492565490772e-05, + "loss": 0.0227, "step": 134415 }, { "epoch": 6.27, - "learning_rate": 7.4905067741784266e-06, - "loss": 0.0876, + "learning_rate": 1.7510024495654752e-05, + "loss": 0.0453, "step": 134420 }, { "epoch": 6.27, - "learning_rate": 7.4900379729032865e-06, - "loss": 0.0474, + "learning_rate": 1.7509556425818732e-05, + "loss": 0.0736, "step": 134425 }, { "epoch": 6.27, - "learning_rate": 7.489569171628147e-06, - "loss": 0.0642, + "learning_rate": 1.7509088355982715e-05, + "loss": 0.1617, "step": 134430 }, { "epoch": 6.27, - "learning_rate": 7.489100370353009e-06, - "loss": 0.1597, + "learning_rate": 1.7508620286146695e-05, + "loss": 0.1858, "step": 134435 }, { "epoch": 6.27, - "learning_rate": 7.488631569077869e-06, - "loss": 0.1651, + "learning_rate": 1.7508152216310675e-05, + "loss": 0.1698, "step": 134440 }, { "epoch": 6.27, - "learning_rate": 7.488162767802729e-06, - "loss": 0.0005, + "learning_rate": 1.7507684146474654e-05, + "loss": 0.0245, "step": 134445 }, { "epoch": 6.27, - "learning_rate": 7.4876939665275895e-06, - "loss": 0.0231, + "learning_rate": 1.7507216076638634e-05, + "loss": 0.0415, "step": 134450 }, { "epoch": 6.27, - "learning_rate": 7.4872251652524495e-06, - "loss": 0.0282, + "learning_rate": 1.7506748006802614e-05, + "loss": 0.0192, "step": 134455 }, { "epoch": 6.27, - "learning_rate": 7.486756363977311e-06, - "loss": 0.046, + "learning_rate": 1.7506279936966594e-05, + "loss": 0.022, "step": 134460 }, { "epoch": 6.27, - "learning_rate": 7.486287562702171e-06, - "loss": 0.0106, + "learning_rate": 1.7505811867130577e-05, + "loss": 0.0029, "step": 134465 }, { "epoch": 6.27, - "learning_rate": 7.485818761427032e-06, - "loss": 0.052, + "learning_rate": 1.7505343797294557e-05, + "loss": 0.0967, "step": 134470 }, { "epoch": 6.27, - "learning_rate": 7.485349960151892e-06, - "loss": 0.0438, + "learning_rate": 1.7504875727458537e-05, + "loss": 0.0478, "step": 134475 }, { "epoch": 6.28, - "learning_rate": 7.4848811588767525e-06, - "loss": 0.1282, + "learning_rate": 1.7504407657622516e-05, + "loss": 0.059, "step": 134480 }, { "epoch": 6.28, - "learning_rate": 7.484412357601613e-06, - "loss": 0.1227, + "learning_rate": 1.75039395877865e-05, + "loss": 0.1919, "step": 134485 }, { "epoch": 6.28, - "learning_rate": 7.483943556326474e-06, - "loss": 0.2339, + "learning_rate": 1.750347151795048e-05, + "loss": 0.2474, "step": 134490 }, { "epoch": 6.28, - "learning_rate": 7.483474755051334e-06, - "loss": 0.0295, + "learning_rate": 1.750300344811446e-05, + "loss": 0.0078, "step": 134495 }, { "epoch": 6.28, - "learning_rate": 7.483005953776195e-06, - "loss": 0.019, + "learning_rate": 1.750253537827844e-05, + "loss": 0.0321, "step": 134500 }, { "epoch": 6.28, - "learning_rate": 7.482537152501056e-06, - "loss": 0.0197, + "learning_rate": 1.7502067308442422e-05, + "loss": 0.0147, "step": 134505 }, { "epoch": 6.28, - "learning_rate": 7.482068351225916e-06, - "loss": 0.0201, + "learning_rate": 1.7501599238606402e-05, + "loss": 0.0386, "step": 134510 }, { "epoch": 6.28, - "learning_rate": 7.481599549950776e-06, - "loss": 0.0629, + "learning_rate": 1.750113116877038e-05, + "loss": 0.0435, "step": 134515 }, { "epoch": 6.28, - "learning_rate": 7.481130748675637e-06, - "loss": 0.0477, + "learning_rate": 1.7500663098934362e-05, + "loss": 0.0579, "step": 134520 }, { "epoch": 6.28, - "learning_rate": 7.480661947400497e-06, - "loss": 0.0723, + "learning_rate": 1.750019502909834e-05, + "loss": 0.1343, "step": 134525 }, { "epoch": 6.28, - "learning_rate": 7.480193146125359e-06, - "loss": 0.1134, + "learning_rate": 1.749972695926232e-05, + "loss": 0.108, "step": 134530 }, { "epoch": 6.28, - "learning_rate": 7.479724344850219e-06, - "loss": 0.0888, + "learning_rate": 1.74992588894263e-05, + "loss": 0.1585, "step": 134535 }, { "epoch": 6.28, - "learning_rate": 7.479255543575079e-06, - "loss": 0.2173, + "learning_rate": 1.7498790819590284e-05, + "loss": 0.0791, "step": 134540 }, { "epoch": 6.28, - "learning_rate": 7.478786742299939e-06, - "loss": 0.0066, + "learning_rate": 1.7498322749754264e-05, + "loss": 0.0298, "step": 134545 }, { "epoch": 6.28, - "learning_rate": 7.4783179410248e-06, - "loss": 0.018, + "learning_rate": 1.7497854679918244e-05, + "loss": 0.0113, "step": 134550 }, { "epoch": 6.28, - "learning_rate": 7.477849139749661e-06, - "loss": 0.0135, + "learning_rate": 1.7497386610082224e-05, + "loss": 0.0527, "step": 134555 }, { "epoch": 6.28, - "learning_rate": 7.477380338474522e-06, - "loss": 0.0211, + "learning_rate": 1.7496918540246207e-05, + "loss": 0.0641, "step": 134560 }, { "epoch": 6.28, - "learning_rate": 7.4769115371993816e-06, - "loss": 0.0364, + "learning_rate": 1.7496450470410187e-05, + "loss": 0.0455, "step": 134565 }, { "epoch": 6.28, - "learning_rate": 7.476442735924242e-06, - "loss": 0.1335, + "learning_rate": 1.7495982400574167e-05, + "loss": 0.0554, "step": 134570 }, { "epoch": 6.28, - "learning_rate": 7.475973934649103e-06, - "loss": 0.0307, + "learning_rate": 1.7495514330738147e-05, + "loss": 0.1149, "step": 134575 }, { "epoch": 6.28, - "learning_rate": 7.475505133373964e-06, - "loss": 0.1224, + "learning_rate": 1.7495046260902126e-05, + "loss": 0.0528, "step": 134580 }, { "epoch": 6.28, - "learning_rate": 7.475036332098824e-06, - "loss": 0.1067, + "learning_rate": 1.7494578191066106e-05, + "loss": 0.1861, "step": 134585 }, { "epoch": 6.28, - "learning_rate": 7.474567530823685e-06, - "loss": 0.0995, + "learning_rate": 1.7494110121230086e-05, + "loss": 0.1661, "step": 134590 }, { "epoch": 6.28, - "learning_rate": 7.4740987295485446e-06, - "loss": 0.0217, + "learning_rate": 1.749364205139407e-05, + "loss": 0.0025, "step": 134595 }, { "epoch": 6.28, - "learning_rate": 7.473629928273406e-06, - "loss": 0.0136, + "learning_rate": 1.749317398155805e-05, + "loss": 0.0036, "step": 134600 }, { "epoch": 6.28, - "learning_rate": 7.473161126998266e-06, - "loss": 0.0622, + "learning_rate": 1.749270591172203e-05, + "loss": 0.0097, "step": 134605 }, { "epoch": 6.28, - "learning_rate": 7.472692325723127e-06, - "loss": 0.0299, + "learning_rate": 1.749223784188601e-05, + "loss": 0.0578, "step": 134610 }, { "epoch": 6.28, - "learning_rate": 7.472223524447987e-06, - "loss": 0.0437, + "learning_rate": 1.7491769772049992e-05, + "loss": 0.0677, "step": 134615 }, { "epoch": 6.28, - "learning_rate": 7.471754723172847e-06, - "loss": 0.0664, + "learning_rate": 1.7491301702213972e-05, + "loss": 0.0261, "step": 134620 }, { "epoch": 6.28, - "learning_rate": 7.471285921897708e-06, - "loss": 0.114, + "learning_rate": 1.749083363237795e-05, + "loss": 0.0965, "step": 134625 }, { "epoch": 6.28, - "learning_rate": 7.470817120622569e-06, - "loss": 0.1854, + "learning_rate": 1.749036556254193e-05, + "loss": 0.1707, "step": 134630 }, { "epoch": 6.28, - "learning_rate": 7.470348319347429e-06, - "loss": 0.2335, + "learning_rate": 1.7489897492705915e-05, + "loss": 0.1385, "step": 134635 }, { "epoch": 6.28, - "learning_rate": 7.469879518072289e-06, - "loss": 0.1651, + "learning_rate": 1.748942942286989e-05, + "loss": 0.1525, "step": 134640 }, { "epoch": 6.28, - "learning_rate": 7.469410716797151e-06, - "loss": 0.0119, + "learning_rate": 1.748896135303387e-05, + "loss": 0.0092, "step": 134645 }, { "epoch": 6.28, - "learning_rate": 7.4689419155220115e-06, - "loss": 0.0134, + "learning_rate": 1.7488493283197854e-05, + "loss": 0.0405, "step": 134650 }, { "epoch": 6.28, - "learning_rate": 7.468473114246871e-06, - "loss": 0.0242, + "learning_rate": 1.7488025213361834e-05, + "loss": 0.0478, "step": 134655 }, { "epoch": 6.28, - "learning_rate": 7.468004312971731e-06, - "loss": 0.052, + "learning_rate": 1.7487557143525814e-05, + "loss": 0.0187, "step": 134660 }, { "epoch": 6.28, - "learning_rate": 7.467535511696592e-06, - "loss": 0.1046, + "learning_rate": 1.7487089073689793e-05, + "loss": 0.0566, "step": 134665 }, { "epoch": 6.28, - "learning_rate": 7.467066710421454e-06, - "loss": 0.0502, + "learning_rate": 1.7486621003853777e-05, + "loss": 0.1328, "step": 134670 }, { "epoch": 6.28, - "learning_rate": 7.466597909146314e-06, - "loss": 0.0556, + "learning_rate": 1.7486152934017756e-05, + "loss": 0.0533, "step": 134675 }, { "epoch": 6.28, - "learning_rate": 7.466129107871174e-06, - "loss": 0.0916, + "learning_rate": 1.7485684864181736e-05, + "loss": 0.0688, "step": 134680 }, { "epoch": 6.28, - "learning_rate": 7.465660306596034e-06, - "loss": 0.1562, + "learning_rate": 1.7485216794345716e-05, + "loss": 0.1578, "step": 134685 }, { "epoch": 6.28, - "learning_rate": 7.465191505320894e-06, - "loss": 0.1358, + "learning_rate": 1.74847487245097e-05, + "loss": 0.3123, "step": 134690 }, { "epoch": 6.29, - "learning_rate": 7.464722704045756e-06, - "loss": 0.0136, + "learning_rate": 1.748428065467368e-05, + "loss": 0.0134, "step": 134695 }, { "epoch": 6.29, - "learning_rate": 7.464253902770616e-06, - "loss": 0.0101, + "learning_rate": 1.748381258483766e-05, + "loss": 0.022, "step": 134700 }, { "epoch": 6.29, - "learning_rate": 7.463785101495477e-06, - "loss": 0.017, + "learning_rate": 1.748334451500164e-05, + "loss": 0.0434, "step": 134705 }, { "epoch": 6.29, - "learning_rate": 7.463316300220337e-06, - "loss": 0.132, + "learning_rate": 1.748287644516562e-05, + "loss": 0.0185, "step": 134710 }, { "epoch": 6.29, - "learning_rate": 7.462847498945198e-06, - "loss": 0.0393, + "learning_rate": 1.74824083753296e-05, + "loss": 0.0838, "step": 134715 }, { "epoch": 6.29, - "learning_rate": 7.462378697670058e-06, - "loss": 0.0166, + "learning_rate": 1.7481940305493578e-05, + "loss": 0.0419, "step": 134720 }, { "epoch": 6.29, - "learning_rate": 7.461909896394919e-06, - "loss": 0.0594, + "learning_rate": 1.748147223565756e-05, + "loss": 0.1098, "step": 134725 }, { "epoch": 6.29, - "learning_rate": 7.461441095119779e-06, - "loss": 0.0743, + "learning_rate": 1.748100416582154e-05, + "loss": 0.0681, "step": 134730 }, { "epoch": 6.29, - "learning_rate": 7.46097229384464e-06, - "loss": 0.2154, + "learning_rate": 1.748053609598552e-05, + "loss": 0.1184, "step": 134735 }, { "epoch": 6.29, - "learning_rate": 7.4605034925695e-06, - "loss": 0.186, + "learning_rate": 1.74800680261495e-05, + "loss": 0.1384, "step": 134740 }, { "epoch": 6.29, - "learning_rate": 7.460034691294361e-06, - "loss": 0.0086, + "learning_rate": 1.7479599956313484e-05, + "loss": 0.0041, "step": 134745 }, { "epoch": 6.29, - "learning_rate": 7.459565890019221e-06, - "loss": 0.0254, + "learning_rate": 1.7479131886477464e-05, + "loss": 0.0194, "step": 134750 }, { "epoch": 6.29, - "learning_rate": 7.459097088744082e-06, - "loss": 0.0499, + "learning_rate": 1.7478663816641444e-05, + "loss": 0.0208, "step": 134755 }, { "epoch": 6.29, - "learning_rate": 7.458628287468943e-06, - "loss": 0.0611, + "learning_rate": 1.7478195746805427e-05, + "loss": 0.077, "step": 134760 }, { "epoch": 6.29, - "learning_rate": 7.4581594861938035e-06, - "loss": 0.0463, + "learning_rate": 1.7477727676969403e-05, + "loss": 0.0732, "step": 134765 }, { "epoch": 6.29, - "learning_rate": 7.457690684918663e-06, - "loss": 0.017, + "learning_rate": 1.7477259607133383e-05, + "loss": 0.0354, "step": 134770 }, { "epoch": 6.29, - "learning_rate": 7.457221883643524e-06, - "loss": 0.102, + "learning_rate": 1.7476791537297363e-05, + "loss": 0.176, "step": 134775 }, { "epoch": 6.29, - "learning_rate": 7.456753082368384e-06, - "loss": 0.2041, + "learning_rate": 1.7476323467461346e-05, + "loss": 0.0649, "step": 134780 }, { "epoch": 6.29, - "learning_rate": 7.456284281093246e-06, - "loss": 0.2131, + "learning_rate": 1.7475855397625326e-05, + "loss": 0.1371, "step": 134785 }, { "epoch": 6.29, - "learning_rate": 7.455815479818106e-06, - "loss": 0.1423, + "learning_rate": 1.7475387327789306e-05, + "loss": 0.1551, "step": 134790 }, { "epoch": 6.29, - "learning_rate": 7.4553466785429665e-06, - "loss": 0.0194, + "learning_rate": 1.7474919257953286e-05, + "loss": 0.0145, "step": 134795 }, { "epoch": 6.29, - "learning_rate": 7.454877877267826e-06, - "loss": 0.0082, + "learning_rate": 1.747445118811727e-05, + "loss": 0.0036, "step": 134800 }, { "epoch": 6.29, - "learning_rate": 7.454409075992687e-06, - "loss": 0.0259, + "learning_rate": 1.747398311828125e-05, + "loss": 0.0385, "step": 134805 }, { "epoch": 6.29, - "learning_rate": 7.453940274717548e-06, - "loss": 0.0381, + "learning_rate": 1.747351504844523e-05, + "loss": 0.0208, "step": 134810 }, { "epoch": 6.29, - "learning_rate": 7.453471473442409e-06, - "loss": 0.0639, + "learning_rate": 1.747304697860921e-05, + "loss": 0.0513, "step": 134815 }, { "epoch": 6.29, - "learning_rate": 7.453002672167269e-06, - "loss": 0.056, + "learning_rate": 1.747257890877319e-05, + "loss": 0.0432, "step": 134820 }, { "epoch": 6.29, - "learning_rate": 7.4525338708921295e-06, - "loss": 0.0398, + "learning_rate": 1.747211083893717e-05, + "loss": 0.0185, "step": 134825 }, { "epoch": 6.29, - "learning_rate": 7.45206506961699e-06, - "loss": 0.0708, + "learning_rate": 1.7471642769101148e-05, + "loss": 0.1325, "step": 134830 }, { "epoch": 6.29, - "learning_rate": 7.451596268341851e-06, - "loss": 0.1331, + "learning_rate": 1.747117469926513e-05, + "loss": 0.1885, "step": 134835 }, { "epoch": 6.29, - "learning_rate": 7.451127467066711e-06, - "loss": 0.1845, + "learning_rate": 1.747070662942911e-05, + "loss": 0.1129, "step": 134840 }, { "epoch": 6.29, - "learning_rate": 7.450658665791572e-06, - "loss": 0.0237, + "learning_rate": 1.747023855959309e-05, + "loss": 0.0074, "step": 134845 }, { "epoch": 6.29, - "learning_rate": 7.450189864516432e-06, - "loss": 0.0248, + "learning_rate": 1.746977048975707e-05, + "loss": 0.0232, "step": 134850 }, { "epoch": 6.29, - "learning_rate": 7.449721063241293e-06, - "loss": 0.0293, + "learning_rate": 1.7469302419921054e-05, + "loss": 0.0077, "step": 134855 }, { "epoch": 6.29, - "learning_rate": 7.449252261966153e-06, - "loss": 0.0442, + "learning_rate": 1.7468834350085033e-05, + "loss": 0.0266, "step": 134860 }, { "epoch": 6.29, - "learning_rate": 7.448783460691014e-06, - "loss": 0.0387, + "learning_rate": 1.7468366280249013e-05, + "loss": 0.0652, "step": 134865 }, { "epoch": 6.29, - "learning_rate": 7.448314659415874e-06, - "loss": 0.0564, + "learning_rate": 1.7467898210412993e-05, + "loss": 0.0703, "step": 134870 }, { "epoch": 6.29, - "learning_rate": 7.447845858140734e-06, - "loss": 0.0813, + "learning_rate": 1.7467430140576976e-05, + "loss": 0.0327, "step": 134875 }, { "epoch": 6.29, - "learning_rate": 7.4473770568655955e-06, - "loss": 0.1202, + "learning_rate": 1.7466962070740956e-05, + "loss": 0.0986, "step": 134880 }, { "epoch": 6.29, - "learning_rate": 7.446908255590456e-06, - "loss": 0.2029, + "learning_rate": 1.7466494000904936e-05, + "loss": 0.1353, "step": 134885 }, { "epoch": 6.29, - "learning_rate": 7.446439454315316e-06, - "loss": 0.256, + "learning_rate": 1.7466025931068916e-05, + "loss": 0.1387, "step": 134890 }, { "epoch": 6.29, - "learning_rate": 7.445970653040176e-06, - "loss": 0.0252, + "learning_rate": 1.7465557861232896e-05, + "loss": 0.007, "step": 134895 }, { "epoch": 6.29, - "learning_rate": 7.445501851765038e-06, - "loss": 0.0301, + "learning_rate": 1.7465089791396875e-05, + "loss": 0.0131, "step": 134900 }, { "epoch": 6.29, - "learning_rate": 7.4450330504898986e-06, - "loss": 0.0459, + "learning_rate": 1.7464621721560855e-05, + "loss": 0.0523, "step": 134905 }, { "epoch": 6.3, - "learning_rate": 7.4445642492147585e-06, - "loss": 0.0367, + "learning_rate": 1.746415365172484e-05, + "loss": 0.0261, "step": 134910 }, { "epoch": 6.3, - "learning_rate": 7.4440954479396184e-06, - "loss": 0.067, + "learning_rate": 1.7463685581888818e-05, + "loss": 0.0234, "step": 134915 }, { "epoch": 6.3, - "learning_rate": 7.443626646664479e-06, - "loss": 0.0569, + "learning_rate": 1.7463217512052798e-05, + "loss": 0.0767, "step": 134920 }, { "epoch": 6.3, - "learning_rate": 7.443157845389341e-06, - "loss": 0.158, + "learning_rate": 1.7462749442216778e-05, + "loss": 0.05, "step": 134925 }, { "epoch": 6.3, - "learning_rate": 7.442689044114201e-06, - "loss": 0.1235, + "learning_rate": 1.746228137238076e-05, + "loss": 0.0791, "step": 134930 }, { "epoch": 6.3, - "learning_rate": 7.442220242839061e-06, - "loss": 0.2057, + "learning_rate": 1.746181330254474e-05, + "loss": 0.182, "step": 134935 }, { "epoch": 6.3, - "learning_rate": 7.4417514415639215e-06, - "loss": 0.1392, + "learning_rate": 1.746134523270872e-05, + "loss": 0.1883, "step": 134940 }, { "epoch": 6.3, - "learning_rate": 7.441282640288781e-06, - "loss": 0.0146, + "learning_rate": 1.7460877162872704e-05, + "loss": 0.0084, "step": 134945 }, { "epoch": 6.3, - "learning_rate": 7.440813839013643e-06, - "loss": 0.0045, + "learning_rate": 1.7460409093036684e-05, + "loss": 0.0244, "step": 134950 }, { "epoch": 6.3, - "learning_rate": 7.440345037738503e-06, - "loss": 0.0425, + "learning_rate": 1.745994102320066e-05, + "loss": 0.0088, "step": 134955 }, { "epoch": 6.3, - "learning_rate": 7.439876236463364e-06, - "loss": 0.0561, + "learning_rate": 1.745947295336464e-05, + "loss": 0.0275, "step": 134960 }, { "epoch": 6.3, - "learning_rate": 7.439407435188224e-06, - "loss": 0.1275, + "learning_rate": 1.7459004883528623e-05, + "loss": 0.0745, "step": 134965 }, { "epoch": 6.3, - "learning_rate": 7.438938633913085e-06, - "loss": 0.0563, + "learning_rate": 1.7458536813692603e-05, + "loss": 0.0487, "step": 134970 }, { "epoch": 6.3, - "learning_rate": 7.438469832637945e-06, - "loss": 0.0383, + "learning_rate": 1.7458068743856583e-05, + "loss": 0.0921, "step": 134975 }, { "epoch": 6.3, - "learning_rate": 7.438001031362806e-06, - "loss": 0.0852, + "learning_rate": 1.7457600674020563e-05, + "loss": 0.1209, "step": 134980 }, { "epoch": 6.3, - "learning_rate": 7.437532230087666e-06, - "loss": 0.177, + "learning_rate": 1.7457132604184546e-05, + "loss": 0.234, "step": 134985 }, { "epoch": 6.3, - "learning_rate": 7.437063428812527e-06, - "loss": 0.1636, + "learning_rate": 1.7456664534348526e-05, + "loss": 0.1272, "step": 134990 }, { "epoch": 6.3, - "learning_rate": 7.4365946275373875e-06, - "loss": 0.0222, + "learning_rate": 1.7456196464512505e-05, + "loss": 0.0243, "step": 134995 }, { "epoch": 6.3, - "learning_rate": 7.436125826262248e-06, - "loss": 0.0333, + "learning_rate": 1.7455728394676485e-05, + "loss": 0.0228, "step": 135000 }, { "epoch": 6.3, - "learning_rate": 7.435657024987108e-06, - "loss": 0.0928, + "learning_rate": 1.745526032484047e-05, + "loss": 0.026, "step": 135005 }, { "epoch": 6.3, - "learning_rate": 7.435188223711969e-06, - "loss": 0.0797, + "learning_rate": 1.745479225500445e-05, + "loss": 0.0705, "step": 135010 }, { "epoch": 6.3, - "learning_rate": 7.434719422436829e-06, - "loss": 0.0209, + "learning_rate": 1.7454324185168428e-05, + "loss": 0.118, "step": 135015 }, { "epoch": 6.3, - "learning_rate": 7.434250621161691e-06, - "loss": 0.0712, + "learning_rate": 1.7453856115332408e-05, + "loss": 0.0397, "step": 135020 }, { "epoch": 6.3, - "learning_rate": 7.4337818198865505e-06, - "loss": 0.085, + "learning_rate": 1.7453388045496388e-05, + "loss": 0.0808, "step": 135025 }, { "epoch": 6.3, - "learning_rate": 7.433313018611411e-06, - "loss": 0.2167, + "learning_rate": 1.7452919975660368e-05, + "loss": 0.1561, "step": 135030 }, { "epoch": 6.3, - "learning_rate": 7.432844217336271e-06, - "loss": 0.2129, + "learning_rate": 1.7452451905824347e-05, + "loss": 0.1503, "step": 135035 }, { "epoch": 6.3, - "learning_rate": 7.432375416061133e-06, - "loss": 0.183, + "learning_rate": 1.745198383598833e-05, + "loss": 0.1, "step": 135040 }, { "epoch": 6.3, - "learning_rate": 7.431906614785993e-06, - "loss": 0.0212, + "learning_rate": 1.745151576615231e-05, + "loss": 0.0177, "step": 135045 }, { "epoch": 6.3, - "learning_rate": 7.431437813510854e-06, - "loss": 0.0235, + "learning_rate": 1.745104769631629e-05, + "loss": 0.0016, "step": 135050 }, { "epoch": 6.3, - "learning_rate": 7.4309690122357135e-06, - "loss": 0.0231, + "learning_rate": 1.745057962648027e-05, + "loss": 0.0809, "step": 135055 }, { "epoch": 6.3, - "learning_rate": 7.430500210960574e-06, - "loss": 0.0056, + "learning_rate": 1.7450111556644253e-05, + "loss": 0.0268, "step": 135060 }, { "epoch": 6.3, - "learning_rate": 7.430031409685435e-06, - "loss": 0.0533, + "learning_rate": 1.7449643486808233e-05, + "loss": 0.0165, "step": 135065 }, { "epoch": 6.3, - "learning_rate": 7.429562608410296e-06, - "loss": 0.0333, + "learning_rate": 1.7449175416972213e-05, + "loss": 0.035, "step": 135070 }, { "epoch": 6.3, - "learning_rate": 7.429093807135156e-06, - "loss": 0.0593, + "learning_rate": 1.7448707347136196e-05, + "loss": 0.0764, "step": 135075 }, { "epoch": 6.3, - "learning_rate": 7.4286250058600166e-06, - "loss": 0.0831, + "learning_rate": 1.7448239277300173e-05, + "loss": 0.1526, "step": 135080 }, { "epoch": 6.3, - "learning_rate": 7.428156204584877e-06, - "loss": 0.1439, + "learning_rate": 1.7447771207464152e-05, + "loss": 0.3038, "step": 135085 }, { "epoch": 6.3, - "learning_rate": 7.427687403309738e-06, - "loss": 0.2736, + "learning_rate": 1.7447303137628132e-05, + "loss": 0.1792, "step": 135090 }, { "epoch": 6.3, - "learning_rate": 7.427218602034598e-06, - "loss": 0.033, + "learning_rate": 1.7446835067792115e-05, + "loss": 0.0179, "step": 135095 }, { "epoch": 6.3, - "learning_rate": 7.426749800759459e-06, - "loss": 0.0291, + "learning_rate": 1.7446366997956095e-05, + "loss": 0.0264, "step": 135100 }, { "epoch": 6.3, - "learning_rate": 7.426280999484319e-06, - "loss": 0.0186, + "learning_rate": 1.7445898928120075e-05, + "loss": 0.0389, "step": 135105 }, { "epoch": 6.3, - "learning_rate": 7.42581219820918e-06, - "loss": 0.0183, + "learning_rate": 1.7445430858284055e-05, + "loss": 0.0458, "step": 135110 }, { "epoch": 6.3, - "learning_rate": 7.42534339693404e-06, - "loss": 0.0665, + "learning_rate": 1.7444962788448038e-05, + "loss": 0.0162, "step": 135115 }, { "epoch": 6.3, - "learning_rate": 7.424874595658901e-06, - "loss": 0.1257, + "learning_rate": 1.7444494718612018e-05, + "loss": 0.0545, "step": 135120 }, { "epoch": 6.31, - "learning_rate": 7.424405794383761e-06, - "loss": 0.0888, + "learning_rate": 1.7444026648775998e-05, + "loss": 0.0452, "step": 135125 }, { "epoch": 6.31, - "learning_rate": 7.423936993108621e-06, - "loss": 0.0775, + "learning_rate": 1.744355857893998e-05, + "loss": 0.1616, "step": 135130 }, { "epoch": 6.31, - "learning_rate": 7.423468191833483e-06, - "loss": 0.0901, + "learning_rate": 1.744309050910396e-05, + "loss": 0.1094, "step": 135135 }, { "epoch": 6.31, - "learning_rate": 7.422999390558343e-06, - "loss": 0.1193, + "learning_rate": 1.744262243926794e-05, + "loss": 0.1086, "step": 135140 }, { "epoch": 6.31, - "learning_rate": 7.422530589283203e-06, - "loss": 0.0243, + "learning_rate": 1.7442154369431917e-05, + "loss": 0.0072, "step": 135145 }, { "epoch": 6.31, - "learning_rate": 7.422061788008063e-06, - "loss": 0.0187, + "learning_rate": 1.74416862995959e-05, + "loss": 0.014, "step": 135150 }, { "epoch": 6.31, - "learning_rate": 7.421592986732925e-06, - "loss": 0.0162, + "learning_rate": 1.744121822975988e-05, + "loss": 0.0449, "step": 135155 }, { "epoch": 6.31, - "learning_rate": 7.421124185457786e-06, - "loss": 0.01, + "learning_rate": 1.744075015992386e-05, + "loss": 0.0268, "step": 135160 }, { "epoch": 6.31, - "learning_rate": 7.420655384182646e-06, - "loss": 0.0491, + "learning_rate": 1.744028209008784e-05, + "loss": 0.046, "step": 135165 }, { "epoch": 6.31, - "learning_rate": 7.4201865829075055e-06, - "loss": 0.0557, + "learning_rate": 1.7439814020251823e-05, + "loss": 0.044, "step": 135170 }, { "epoch": 6.31, - "learning_rate": 7.419717781632366e-06, - "loss": 0.0637, + "learning_rate": 1.7439345950415803e-05, + "loss": 0.0277, "step": 135175 }, { "epoch": 6.31, - "learning_rate": 7.419248980357228e-06, - "loss": 0.0968, + "learning_rate": 1.7438877880579782e-05, + "loss": 0.1087, "step": 135180 }, { "epoch": 6.31, - "learning_rate": 7.418780179082088e-06, - "loss": 0.2632, + "learning_rate": 1.7438409810743766e-05, + "loss": 0.1321, "step": 135185 }, { "epoch": 6.31, - "learning_rate": 7.418311377806948e-06, - "loss": 0.1837, + "learning_rate": 1.7437941740907745e-05, + "loss": 0.0747, "step": 135190 }, { "epoch": 6.31, - "learning_rate": 7.417842576531809e-06, - "loss": 0.0365, + "learning_rate": 1.7437473671071725e-05, + "loss": 0.0207, "step": 135195 }, { "epoch": 6.31, - "learning_rate": 7.4173737752566685e-06, - "loss": 0.029, + "learning_rate": 1.7437005601235705e-05, + "loss": 0.0213, "step": 135200 }, { "epoch": 6.31, - "learning_rate": 7.41690497398153e-06, - "loss": 0.0224, + "learning_rate": 1.7436537531399688e-05, + "loss": 0.016, "step": 135205 }, { "epoch": 6.31, - "learning_rate": 7.41643617270639e-06, - "loss": 0.0387, + "learning_rate": 1.7436069461563665e-05, + "loss": 0.0358, "step": 135210 }, { "epoch": 6.31, - "learning_rate": 7.415967371431251e-06, - "loss": 0.0758, + "learning_rate": 1.7435601391727645e-05, + "loss": 0.0304, "step": 135215 }, { "epoch": 6.31, - "learning_rate": 7.415498570156111e-06, - "loss": 0.0703, + "learning_rate": 1.7435133321891624e-05, + "loss": 0.0573, "step": 135220 }, { "epoch": 6.31, - "learning_rate": 7.4150297688809724e-06, - "loss": 0.072, + "learning_rate": 1.7434665252055608e-05, + "loss": 0.0178, "step": 135225 }, { "epoch": 6.31, - "learning_rate": 7.414560967605832e-06, - "loss": 0.0952, + "learning_rate": 1.7434197182219587e-05, + "loss": 0.1199, "step": 135230 }, { "epoch": 6.31, - "learning_rate": 7.414092166330693e-06, - "loss": 0.141, + "learning_rate": 1.7433729112383567e-05, + "loss": 0.1341, "step": 135235 }, { "epoch": 6.31, - "learning_rate": 7.413623365055553e-06, - "loss": 0.1903, + "learning_rate": 1.7433261042547547e-05, + "loss": 0.0985, "step": 135240 }, { "epoch": 6.31, - "learning_rate": 7.413154563780414e-06, - "loss": 0.0301, + "learning_rate": 1.743279297271153e-05, + "loss": 0.0135, "step": 135245 }, { "epoch": 6.31, - "learning_rate": 7.412685762505275e-06, - "loss": 0.0046, + "learning_rate": 1.743232490287551e-05, + "loss": 0.0484, "step": 135250 }, { "epoch": 6.31, - "learning_rate": 7.412216961230135e-06, - "loss": 0.0096, + "learning_rate": 1.743185683303949e-05, + "loss": 0.0678, "step": 135255 }, { "epoch": 6.31, - "learning_rate": 7.411748159954995e-06, - "loss": 0.0228, + "learning_rate": 1.7431388763203473e-05, + "loss": 0.0314, "step": 135260 }, { "epoch": 6.31, - "learning_rate": 7.411279358679856e-06, - "loss": 0.0712, + "learning_rate": 1.7430920693367453e-05, + "loss": 0.072, "step": 135265 }, { "epoch": 6.31, - "learning_rate": 7.410810557404716e-06, - "loss": 0.0402, + "learning_rate": 1.743045262353143e-05, + "loss": 0.1761, "step": 135270 }, { "epoch": 6.31, - "learning_rate": 7.410341756129578e-06, - "loss": 0.0457, + "learning_rate": 1.742998455369541e-05, + "loss": 0.0629, "step": 135275 }, { "epoch": 6.31, - "learning_rate": 7.409872954854438e-06, - "loss": 0.11, + "learning_rate": 1.7429516483859392e-05, + "loss": 0.0562, "step": 135280 }, { "epoch": 6.31, - "learning_rate": 7.409404153579298e-06, - "loss": 0.1502, + "learning_rate": 1.7429048414023372e-05, + "loss": 0.0687, "step": 135285 }, { "epoch": 6.31, - "learning_rate": 7.408935352304158e-06, - "loss": 0.1213, + "learning_rate": 1.7428580344187352e-05, + "loss": 0.1551, "step": 135290 }, { "epoch": 6.31, - "learning_rate": 7.40846655102902e-06, - "loss": 0.0432, + "learning_rate": 1.7428112274351332e-05, + "loss": 0.0013, "step": 135295 }, { "epoch": 6.31, - "learning_rate": 7.40799774975388e-06, - "loss": 0.0361, + "learning_rate": 1.7427644204515315e-05, + "loss": 0.0098, "step": 135300 }, { "epoch": 6.31, - "learning_rate": 7.407528948478741e-06, - "loss": 0.0072, + "learning_rate": 1.7427176134679295e-05, + "loss": 0.033, "step": 135305 }, { "epoch": 6.31, - "learning_rate": 7.407060147203601e-06, - "loss": 0.042, + "learning_rate": 1.7426708064843275e-05, + "loss": 0.0361, "step": 135310 }, { "epoch": 6.31, - "learning_rate": 7.406591345928461e-06, - "loss": 0.07, + "learning_rate": 1.7426239995007258e-05, + "loss": 0.039, "step": 135315 }, { "epoch": 6.31, - "learning_rate": 7.406122544653322e-06, - "loss": 0.024, + "learning_rate": 1.7425771925171238e-05, + "loss": 0.0993, "step": 135320 }, { "epoch": 6.31, - "learning_rate": 7.405653743378183e-06, - "loss": 0.051, + "learning_rate": 1.7425303855335217e-05, + "loss": 0.0817, "step": 135325 }, { "epoch": 6.31, - "learning_rate": 7.405184942103043e-06, - "loss": 0.1072, + "learning_rate": 1.7424835785499197e-05, + "loss": 0.2171, "step": 135330 }, { "epoch": 6.31, - "learning_rate": 7.404716140827904e-06, - "loss": 0.2696, + "learning_rate": 1.7424367715663177e-05, + "loss": 0.2169, "step": 135335 }, { "epoch": 6.32, - "learning_rate": 7.404247339552764e-06, - "loss": 0.1093, + "learning_rate": 1.7423899645827157e-05, + "loss": 0.1503, "step": 135340 }, { "epoch": 6.32, - "learning_rate": 7.403778538277625e-06, - "loss": 0.0137, + "learning_rate": 1.7423431575991137e-05, + "loss": 0.0301, "step": 135345 }, { "epoch": 6.32, - "learning_rate": 7.403309737002485e-06, - "loss": 0.0446, + "learning_rate": 1.7422963506155117e-05, + "loss": 0.0167, "step": 135350 }, { "epoch": 6.32, - "learning_rate": 7.402840935727346e-06, - "loss": 0.0315, + "learning_rate": 1.74224954363191e-05, + "loss": 0.0403, "step": 135355 }, { "epoch": 6.32, - "learning_rate": 7.402372134452206e-06, - "loss": 0.0122, + "learning_rate": 1.742202736648308e-05, + "loss": 0.0456, "step": 135360 }, { "epoch": 6.32, - "learning_rate": 7.4019033331770675e-06, - "loss": 0.0697, + "learning_rate": 1.742155929664706e-05, + "loss": 0.0571, "step": 135365 }, { "epoch": 6.32, - "learning_rate": 7.4014345319019274e-06, - "loss": 0.0401, + "learning_rate": 1.7421091226811043e-05, + "loss": 0.0171, "step": 135370 }, { "epoch": 6.32, - "learning_rate": 7.400965730626788e-06, - "loss": 0.0338, + "learning_rate": 1.7420623156975022e-05, + "loss": 0.0849, "step": 135375 }, { "epoch": 6.32, - "learning_rate": 7.400496929351648e-06, - "loss": 0.0885, + "learning_rate": 1.7420155087139002e-05, + "loss": 0.0476, "step": 135380 }, { "epoch": 6.32, - "learning_rate": 7.400028128076508e-06, - "loss": 0.1956, + "learning_rate": 1.7419687017302982e-05, + "loss": 0.179, "step": 135385 }, { "epoch": 6.32, - "learning_rate": 7.39955932680137e-06, - "loss": 0.0954, + "learning_rate": 1.7419218947466965e-05, + "loss": 0.1278, "step": 135390 }, { "epoch": 6.32, - "learning_rate": 7.3990905255262305e-06, - "loss": 0.0041, + "learning_rate": 1.7418750877630945e-05, + "loss": 0.0155, "step": 135395 }, { "epoch": 6.32, - "learning_rate": 7.3986217242510904e-06, - "loss": 0.0082, + "learning_rate": 1.741828280779492e-05, + "loss": 0.0067, "step": 135400 }, { "epoch": 6.32, - "learning_rate": 7.39815292297595e-06, - "loss": 0.0441, + "learning_rate": 1.74178147379589e-05, + "loss": 0.032, "step": 135405 }, { "epoch": 6.32, - "learning_rate": 7.397684121700812e-06, - "loss": 0.0203, + "learning_rate": 1.7417346668122885e-05, + "loss": 0.0324, "step": 135410 }, { "epoch": 6.32, - "learning_rate": 7.397215320425673e-06, - "loss": 0.0701, + "learning_rate": 1.7416878598286864e-05, + "loss": 0.0115, "step": 135415 }, { "epoch": 6.32, - "learning_rate": 7.396746519150533e-06, - "loss": 0.0413, + "learning_rate": 1.7416410528450844e-05, + "loss": 0.0789, "step": 135420 }, { "epoch": 6.32, - "learning_rate": 7.396277717875393e-06, - "loss": 0.082, + "learning_rate": 1.7415942458614824e-05, + "loss": 0.0554, "step": 135425 }, { "epoch": 6.32, - "learning_rate": 7.3958089166002534e-06, - "loss": 0.0647, + "learning_rate": 1.7415474388778807e-05, + "loss": 0.642, "step": 135430 }, { "epoch": 6.32, - "learning_rate": 7.395340115325115e-06, - "loss": 0.08, + "learning_rate": 1.7415006318942787e-05, + "loss": 0.126, "step": 135435 }, { "epoch": 6.32, - "learning_rate": 7.394871314049975e-06, - "loss": 0.1308, + "learning_rate": 1.7414538249106767e-05, + "loss": 0.1776, "step": 135440 }, { "epoch": 6.32, - "learning_rate": 7.394402512774835e-06, - "loss": 0.0351, + "learning_rate": 1.741407017927075e-05, + "loss": 0.0116, "step": 135445 }, { "epoch": 6.32, - "learning_rate": 7.393933711499696e-06, - "loss": 0.0142, + "learning_rate": 1.741360210943473e-05, + "loss": 0.0175, "step": 135450 }, { "epoch": 6.32, - "learning_rate": 7.393464910224556e-06, - "loss": 0.0133, + "learning_rate": 1.741313403959871e-05, + "loss": 0.0239, "step": 135455 }, { "epoch": 6.32, - "learning_rate": 7.392996108949417e-06, - "loss": 0.0297, + "learning_rate": 1.7412665969762686e-05, + "loss": 0.0327, "step": 135460 }, { "epoch": 6.32, - "learning_rate": 7.392527307674277e-06, - "loss": 0.0306, + "learning_rate": 1.741219789992667e-05, + "loss": 0.041, "step": 135465 }, { "epoch": 6.32, - "learning_rate": 7.392058506399138e-06, - "loss": 0.0246, + "learning_rate": 1.741172983009065e-05, + "loss": 0.0876, "step": 135470 }, { "epoch": 6.32, - "learning_rate": 7.391589705123998e-06, - "loss": 0.1374, + "learning_rate": 1.741126176025463e-05, + "loss": 0.03, "step": 135475 }, { "epoch": 6.32, - "learning_rate": 7.3911209038488595e-06, - "loss": 0.1056, + "learning_rate": 1.741079369041861e-05, + "loss": 0.0558, "step": 135480 }, { "epoch": 6.32, - "learning_rate": 7.39065210257372e-06, - "loss": 0.0834, + "learning_rate": 1.7410325620582592e-05, + "loss": 0.1345, "step": 135485 }, { "epoch": 6.32, - "learning_rate": 7.39018330129858e-06, - "loss": 0.101, + "learning_rate": 1.7409857550746572e-05, + "loss": 0.1261, "step": 135490 }, { "epoch": 6.32, - "learning_rate": 7.38971450002344e-06, - "loss": 0.0123, + "learning_rate": 1.740938948091055e-05, + "loss": 0.0651, "step": 135495 }, { "epoch": 6.32, - "learning_rate": 7.389245698748301e-06, - "loss": 0.0034, + "learning_rate": 1.7408921411074535e-05, + "loss": 0.0416, "step": 135500 }, { "epoch": 6.32, - "learning_rate": 7.388776897473163e-06, - "loss": 0.0574, + "learning_rate": 1.7408453341238515e-05, + "loss": 0.0052, "step": 135505 }, { "epoch": 6.32, - "learning_rate": 7.3883080961980225e-06, - "loss": 0.0578, + "learning_rate": 1.7407985271402494e-05, + "loss": 0.103, "step": 135510 }, { "epoch": 6.32, - "learning_rate": 7.3878392949228825e-06, - "loss": 0.0315, + "learning_rate": 1.7407517201566474e-05, + "loss": 0.025, "step": 135515 }, { "epoch": 6.32, - "learning_rate": 7.387370493647743e-06, - "loss": 0.0861, + "learning_rate": 1.7407049131730457e-05, + "loss": 0.0403, "step": 135520 }, { "epoch": 6.32, - "learning_rate": 7.386901692372603e-06, - "loss": 0.1086, + "learning_rate": 1.7406581061894434e-05, + "loss": 0.0195, "step": 135525 }, { "epoch": 6.32, - "learning_rate": 7.386432891097465e-06, - "loss": 0.1063, + "learning_rate": 1.7406112992058414e-05, + "loss": 0.1739, "step": 135530 }, { "epoch": 6.32, - "learning_rate": 7.385964089822325e-06, - "loss": 0.2057, + "learning_rate": 1.7405644922222394e-05, + "loss": 0.3685, "step": 135535 }, { "epoch": 6.32, - "learning_rate": 7.3854952885471855e-06, - "loss": 0.1805, + "learning_rate": 1.7405176852386377e-05, + "loss": 0.1131, "step": 135540 }, { "epoch": 6.32, - "learning_rate": 7.3850264872720455e-06, - "loss": 0.02, + "learning_rate": 1.7404708782550357e-05, + "loss": 0.0228, "step": 135545 }, { "epoch": 6.32, - "learning_rate": 7.384557685996907e-06, - "loss": 0.0039, + "learning_rate": 1.7404240712714336e-05, + "loss": 0.0319, "step": 135550 }, { "epoch": 6.33, - "learning_rate": 7.384088884721767e-06, - "loss": 0.0508, + "learning_rate": 1.740377264287832e-05, + "loss": 0.0231, "step": 135555 }, { "epoch": 6.33, - "learning_rate": 7.383620083446628e-06, - "loss": 0.0351, + "learning_rate": 1.74033045730423e-05, + "loss": 0.0401, "step": 135560 }, { "epoch": 6.33, - "learning_rate": 7.383151282171488e-06, - "loss": 0.0462, + "learning_rate": 1.740283650320628e-05, + "loss": 0.0372, "step": 135565 }, { "epoch": 6.33, - "learning_rate": 7.3826824808963485e-06, - "loss": 0.06, + "learning_rate": 1.740236843337026e-05, + "loss": 0.0206, "step": 135570 }, { "epoch": 6.33, - "learning_rate": 7.382213679621209e-06, - "loss": 0.0655, + "learning_rate": 1.7401900363534242e-05, + "loss": 0.092, "step": 135575 }, { "epoch": 6.33, - "learning_rate": 7.38174487834607e-06, - "loss": 0.1491, + "learning_rate": 1.7401432293698222e-05, + "loss": 0.0646, "step": 135580 }, { "epoch": 6.33, - "learning_rate": 7.38127607707093e-06, - "loss": 0.0839, + "learning_rate": 1.7400964223862202e-05, + "loss": 0.2227, "step": 135585 }, { "epoch": 6.33, - "learning_rate": 7.380807275795791e-06, - "loss": 0.1435, + "learning_rate": 1.740049615402618e-05, + "loss": 0.1738, "step": 135590 }, { "epoch": 6.33, - "learning_rate": 7.380338474520651e-06, - "loss": 0.0393, + "learning_rate": 1.740002808419016e-05, + "loss": 0.0148, "step": 135595 }, { "epoch": 6.33, - "learning_rate": 7.379869673245512e-06, - "loss": 0.0303, + "learning_rate": 1.739956001435414e-05, + "loss": 0.0325, "step": 135600 }, { "epoch": 6.33, - "learning_rate": 7.379400871970372e-06, - "loss": 0.004, + "learning_rate": 1.739909194451812e-05, + "loss": 0.0185, "step": 135605 }, { "epoch": 6.33, - "learning_rate": 7.378932070695233e-06, - "loss": 0.0198, + "learning_rate": 1.73986238746821e-05, + "loss": 0.034, "step": 135610 }, { "epoch": 6.33, - "learning_rate": 7.378463269420093e-06, - "loss": 0.0608, + "learning_rate": 1.7398155804846084e-05, + "loss": 0.0143, "step": 135615 }, { "epoch": 6.33, - "learning_rate": 7.377994468144955e-06, - "loss": 0.0264, + "learning_rate": 1.7397687735010064e-05, + "loss": 0.0458, "step": 135620 }, { "epoch": 6.33, - "learning_rate": 7.3775256668698146e-06, - "loss": 0.0619, + "learning_rate": 1.7397219665174044e-05, + "loss": 0.0485, "step": 135625 }, { "epoch": 6.33, - "learning_rate": 7.377056865594675e-06, - "loss": 0.0811, + "learning_rate": 1.7396751595338027e-05, + "loss": 0.1025, "step": 135630 }, { "epoch": 6.33, - "learning_rate": 7.376588064319535e-06, - "loss": 0.1385, + "learning_rate": 1.7396283525502007e-05, + "loss": 0.1468, "step": 135635 }, { "epoch": 6.33, - "learning_rate": 7.376119263044396e-06, - "loss": 0.1366, + "learning_rate": 1.7395815455665987e-05, + "loss": 0.2728, "step": 135640 }, { "epoch": 6.33, - "learning_rate": 7.375650461769257e-06, - "loss": 0.0224, + "learning_rate": 1.7395347385829966e-05, + "loss": 0.0318, "step": 135645 }, { "epoch": 6.33, - "learning_rate": 7.375181660494118e-06, - "loss": 0.0081, + "learning_rate": 1.7394879315993946e-05, + "loss": 0.0126, "step": 135650 }, { "epoch": 6.33, - "learning_rate": 7.3747128592189775e-06, - "loss": 0.0305, + "learning_rate": 1.7394411246157926e-05, + "loss": 0.0317, "step": 135655 }, { "epoch": 6.33, - "learning_rate": 7.374244057943838e-06, - "loss": 0.0646, + "learning_rate": 1.7393943176321906e-05, + "loss": 0.0304, "step": 135660 }, { "epoch": 6.33, - "learning_rate": 7.373775256668698e-06, - "loss": 0.0258, + "learning_rate": 1.7393475106485886e-05, + "loss": 0.0728, "step": 135665 }, { "epoch": 6.33, - "learning_rate": 7.37330645539356e-06, - "loss": 0.0447, + "learning_rate": 1.739300703664987e-05, + "loss": 0.085, "step": 135670 }, { "epoch": 6.33, - "learning_rate": 7.37283765411842e-06, - "loss": 0.1055, + "learning_rate": 1.739253896681385e-05, + "loss": 0.0636, "step": 135675 }, { "epoch": 6.33, - "learning_rate": 7.372368852843281e-06, - "loss": 0.0647, + "learning_rate": 1.739207089697783e-05, + "loss": 0.0598, "step": 135680 }, { "epoch": 6.33, - "learning_rate": 7.3719000515681405e-06, - "loss": 0.1458, + "learning_rate": 1.7391602827141812e-05, + "loss": 0.1668, "step": 135685 }, { "epoch": 6.33, - "learning_rate": 7.371431250293002e-06, - "loss": 0.1177, + "learning_rate": 1.739113475730579e-05, + "loss": 0.1149, "step": 135690 }, { "epoch": 6.33, - "learning_rate": 7.370962449017862e-06, - "loss": 0.0316, + "learning_rate": 1.739066668746977e-05, + "loss": 0.0024, "step": 135695 }, { "epoch": 6.33, - "learning_rate": 7.370493647742723e-06, - "loss": 0.0164, + "learning_rate": 1.739019861763375e-05, + "loss": 0.0217, "step": 135700 }, { "epoch": 6.33, - "learning_rate": 7.370024846467583e-06, - "loss": 0.0278, + "learning_rate": 1.7389730547797734e-05, + "loss": 0.0099, "step": 135705 }, { "epoch": 6.33, - "learning_rate": 7.369556045192443e-06, - "loss": 0.0372, + "learning_rate": 1.7389262477961714e-05, + "loss": 0.0174, "step": 135710 }, { "epoch": 6.33, - "learning_rate": 7.369087243917304e-06, - "loss": 0.0622, + "learning_rate": 1.738879440812569e-05, + "loss": 0.0342, "step": 135715 }, { "epoch": 6.33, - "learning_rate": 7.368618442642165e-06, - "loss": 0.0196, + "learning_rate": 1.738832633828967e-05, + "loss": 0.0353, "step": 135720 }, { "epoch": 6.33, - "learning_rate": 7.368149641367025e-06, - "loss": 0.0536, + "learning_rate": 1.7387858268453654e-05, + "loss": 0.0994, "step": 135725 }, { "epoch": 6.33, - "learning_rate": 7.367680840091885e-06, - "loss": 0.1215, + "learning_rate": 1.7387390198617634e-05, + "loss": 0.1653, "step": 135730 }, { "epoch": 6.33, - "learning_rate": 7.367212038816747e-06, - "loss": 0.1779, + "learning_rate": 1.7386922128781613e-05, + "loss": 0.188, "step": 135735 }, { "epoch": 6.33, - "learning_rate": 7.3667432375416074e-06, - "loss": 0.1972, + "learning_rate": 1.7386454058945597e-05, + "loss": 0.1204, "step": 135740 }, { "epoch": 6.33, - "learning_rate": 7.366274436266467e-06, - "loss": 0.0323, + "learning_rate": 1.7385985989109576e-05, + "loss": 0.0038, "step": 135745 }, { "epoch": 6.33, - "learning_rate": 7.365805634991327e-06, - "loss": 0.0206, + "learning_rate": 1.7385517919273556e-05, + "loss": 0.0144, "step": 135750 }, { "epoch": 6.33, - "learning_rate": 7.365336833716188e-06, - "loss": 0.038, + "learning_rate": 1.7385049849437536e-05, + "loss": 0.027, "step": 135755 }, { "epoch": 6.33, - "learning_rate": 7.36486803244105e-06, - "loss": 0.0701, + "learning_rate": 1.738458177960152e-05, + "loss": 0.0484, "step": 135760 }, { "epoch": 6.33, - "learning_rate": 7.36439923116591e-06, - "loss": 0.0594, + "learning_rate": 1.73841137097655e-05, + "loss": 0.0944, "step": 135765 }, { "epoch": 6.34, - "learning_rate": 7.3639304298907696e-06, - "loss": 0.0412, + "learning_rate": 1.738364563992948e-05, + "loss": 0.0588, "step": 135770 }, { "epoch": 6.34, - "learning_rate": 7.36346162861563e-06, - "loss": 0.0829, + "learning_rate": 1.738317757009346e-05, + "loss": 0.0482, "step": 135775 }, { "epoch": 6.34, - "learning_rate": 7.36299282734049e-06, - "loss": 0.1552, + "learning_rate": 1.738270950025744e-05, + "loss": 0.1419, "step": 135780 }, { "epoch": 6.34, - "learning_rate": 7.362524026065352e-06, - "loss": 0.2017, + "learning_rate": 1.7382241430421418e-05, + "loss": 0.1637, "step": 135785 }, { "epoch": 6.34, - "learning_rate": 7.362055224790212e-06, - "loss": 0.1636, + "learning_rate": 1.7381773360585398e-05, + "loss": 0.1135, "step": 135790 }, { "epoch": 6.34, - "learning_rate": 7.361586423515073e-06, - "loss": 0.02, + "learning_rate": 1.738130529074938e-05, + "loss": 0.0526, "step": 135795 }, { "epoch": 6.34, - "learning_rate": 7.3611176222399326e-06, - "loss": 0.0059, + "learning_rate": 1.738083722091336e-05, + "loss": 0.0421, "step": 135800 }, { "epoch": 6.34, - "learning_rate": 7.360648820964794e-06, - "loss": 0.0178, + "learning_rate": 1.738036915107734e-05, + "loss": 0.0358, "step": 135805 }, { "epoch": 6.34, - "learning_rate": 7.360180019689654e-06, - "loss": 0.0681, + "learning_rate": 1.737990108124132e-05, + "loss": 0.0238, "step": 135810 }, { "epoch": 6.34, - "learning_rate": 7.359711218414515e-06, - "loss": 0.1091, + "learning_rate": 1.7379433011405304e-05, + "loss": 0.0771, "step": 135815 }, { "epoch": 6.34, - "learning_rate": 7.359242417139375e-06, - "loss": 0.0198, + "learning_rate": 1.7378964941569284e-05, + "loss": 0.0517, "step": 135820 }, { "epoch": 6.34, - "learning_rate": 7.358773615864236e-06, - "loss": 0.128, + "learning_rate": 1.7378496871733264e-05, + "loss": 0.0466, "step": 135825 }, { "epoch": 6.34, - "learning_rate": 7.358304814589096e-06, - "loss": 0.0806, + "learning_rate": 1.7378028801897243e-05, + "loss": 0.0539, "step": 135830 }, { "epoch": 6.34, - "learning_rate": 7.357836013313957e-06, - "loss": 0.1906, + "learning_rate": 1.7377560732061227e-05, + "loss": 0.1282, "step": 135835 }, { "epoch": 6.34, - "learning_rate": 7.357367212038817e-06, - "loss": 0.1134, + "learning_rate": 1.7377092662225203e-05, + "loss": 0.1826, "step": 135840 }, { "epoch": 6.34, - "learning_rate": 7.356898410763678e-06, - "loss": 0.0135, + "learning_rate": 1.7376624592389183e-05, + "loss": 0.006, "step": 135845 }, { "epoch": 6.34, - "learning_rate": 7.356429609488538e-06, - "loss": 0.0097, + "learning_rate": 1.7376156522553163e-05, + "loss": 0.0217, "step": 135850 }, { "epoch": 6.34, - "learning_rate": 7.3559608082133995e-06, - "loss": 0.0477, + "learning_rate": 1.7375688452717146e-05, + "loss": 0.0205, "step": 135855 }, { "epoch": 6.34, - "learning_rate": 7.355492006938259e-06, - "loss": 0.0111, + "learning_rate": 1.7375220382881126e-05, + "loss": 0.0471, "step": 135860 }, { "epoch": 6.34, - "learning_rate": 7.35502320566312e-06, - "loss": 0.0384, + "learning_rate": 1.7374752313045106e-05, + "loss": 0.0205, "step": 135865 }, { "epoch": 6.34, - "learning_rate": 7.35455440438798e-06, - "loss": 0.0779, + "learning_rate": 1.737428424320909e-05, + "loss": 0.0424, "step": 135870 }, { "epoch": 6.34, - "learning_rate": 7.354085603112842e-06, - "loss": 0.0905, + "learning_rate": 1.737381617337307e-05, + "loss": 0.0517, "step": 135875 }, { "epoch": 6.34, - "learning_rate": 7.353616801837702e-06, - "loss": 0.0991, + "learning_rate": 1.737334810353705e-05, + "loss": 0.0881, "step": 135880 }, { "epoch": 6.34, - "learning_rate": 7.3531480005625624e-06, - "loss": 0.2245, + "learning_rate": 1.7372880033701028e-05, + "loss": 0.188, "step": 135885 }, { "epoch": 6.34, - "learning_rate": 7.352679199287422e-06, - "loss": 0.2038, + "learning_rate": 1.737241196386501e-05, + "loss": 0.2446, "step": 135890 }, { "epoch": 6.34, - "learning_rate": 7.352210398012283e-06, - "loss": 0.0305, + "learning_rate": 1.737194389402899e-05, + "loss": 0.012, "step": 135895 }, { "epoch": 6.34, - "learning_rate": 7.351741596737144e-06, - "loss": 0.0242, + "learning_rate": 1.737147582419297e-05, + "loss": 0.0075, "step": 135900 }, { "epoch": 6.34, - "learning_rate": 7.351272795462005e-06, - "loss": 0.0221, + "learning_rate": 1.7371007754356947e-05, + "loss": 0.0231, "step": 135905 }, { "epoch": 6.34, - "learning_rate": 7.350803994186865e-06, - "loss": 0.0433, + "learning_rate": 1.737053968452093e-05, + "loss": 0.0022, "step": 135910 }, { "epoch": 6.34, - "learning_rate": 7.3503351929117254e-06, - "loss": 0.0442, + "learning_rate": 1.737007161468491e-05, + "loss": 0.0918, "step": 135915 }, { "epoch": 6.34, - "learning_rate": 7.349866391636585e-06, - "loss": 0.0573, + "learning_rate": 1.736960354484889e-05, + "loss": 0.0815, "step": 135920 }, { "epoch": 6.34, - "learning_rate": 7.349397590361447e-06, - "loss": 0.1495, + "learning_rate": 1.7369135475012874e-05, + "loss": 0.0676, "step": 135925 }, { "epoch": 6.34, - "learning_rate": 7.348928789086307e-06, - "loss": 0.1174, + "learning_rate": 1.7368667405176853e-05, + "loss": 0.0869, "step": 135930 }, { "epoch": 6.34, - "learning_rate": 7.348459987811168e-06, - "loss": 0.1544, + "learning_rate": 1.7368199335340833e-05, + "loss": 0.219, "step": 135935 }, { "epoch": 6.34, - "learning_rate": 7.347991186536028e-06, - "loss": 0.1699, + "learning_rate": 1.7367731265504813e-05, + "loss": 0.1389, "step": 135940 }, { "epoch": 6.34, - "learning_rate": 7.347522385260889e-06, - "loss": 0.0028, + "learning_rate": 1.7367263195668796e-05, + "loss": 0.0203, "step": 135945 }, { "epoch": 6.34, - "learning_rate": 7.347053583985749e-06, - "loss": 0.014, + "learning_rate": 1.7366795125832776e-05, + "loss": 0.0412, "step": 135950 }, { "epoch": 6.34, - "learning_rate": 7.34658478271061e-06, - "loss": 0.0183, + "learning_rate": 1.7366327055996756e-05, + "loss": 0.0221, "step": 135955 }, { "epoch": 6.34, - "learning_rate": 7.34611598143547e-06, - "loss": 0.0449, + "learning_rate": 1.7365858986160736e-05, + "loss": 0.0417, "step": 135960 }, { "epoch": 6.34, - "learning_rate": 7.34564718016033e-06, - "loss": 0.1091, + "learning_rate": 1.7365390916324715e-05, + "loss": 0.0469, "step": 135965 }, { "epoch": 6.34, - "learning_rate": 7.3451783788851915e-06, - "loss": 0.0812, + "learning_rate": 1.7364922846488695e-05, + "loss": 0.0589, "step": 135970 }, { "epoch": 6.34, - "learning_rate": 7.344709577610052e-06, - "loss": 0.1366, + "learning_rate": 1.7364454776652675e-05, + "loss": 0.065, "step": 135975 }, { "epoch": 6.35, - "learning_rate": 7.344240776334912e-06, - "loss": 0.0405, + "learning_rate": 1.7363986706816658e-05, + "loss": 0.1088, "step": 135980 }, { "epoch": 6.35, - "learning_rate": 7.343771975059772e-06, - "loss": 0.1326, + "learning_rate": 1.7363518636980638e-05, + "loss": 0.1592, "step": 135985 }, { "epoch": 6.35, - "learning_rate": 7.343303173784633e-06, - "loss": 0.2499, + "learning_rate": 1.7363050567144618e-05, + "loss": 0.1425, "step": 135990 }, { "epoch": 6.35, - "learning_rate": 7.3428343725094945e-06, - "loss": 0.0035, + "learning_rate": 1.7362582497308598e-05, + "loss": 0.0143, "step": 135995 }, { "epoch": 6.35, - "learning_rate": 7.3423655712343545e-06, - "loss": 0.0461, + "learning_rate": 1.736211442747258e-05, + "loss": 0.0175, "step": 136000 }, { "epoch": 6.35, - "learning_rate": 7.341896769959214e-06, - "loss": 0.032, + "learning_rate": 1.736164635763656e-05, + "loss": 0.0236, "step": 136005 }, { "epoch": 6.35, - "learning_rate": 7.341427968684075e-06, - "loss": 0.0294, + "learning_rate": 1.736117828780054e-05, + "loss": 0.0122, "step": 136010 }, { "epoch": 6.35, - "learning_rate": 7.340959167408937e-06, - "loss": 0.0401, + "learning_rate": 1.736071021796452e-05, + "loss": 0.0935, "step": 136015 }, { "epoch": 6.35, - "learning_rate": 7.340490366133797e-06, - "loss": 0.0441, + "learning_rate": 1.7360242148128504e-05, + "loss": 0.0725, "step": 136020 }, { "epoch": 6.35, - "learning_rate": 7.340021564858657e-06, - "loss": 0.061, + "learning_rate": 1.7359774078292483e-05, + "loss": 0.0121, "step": 136025 }, { "epoch": 6.35, - "learning_rate": 7.3395527635835175e-06, - "loss": 0.0781, + "learning_rate": 1.735930600845646e-05, + "loss": 0.0455, "step": 136030 }, { "epoch": 6.35, - "learning_rate": 7.339083962308377e-06, - "loss": 0.1089, + "learning_rate": 1.735883793862044e-05, + "loss": 0.2396, "step": 136035 }, { "epoch": 6.35, - "learning_rate": 7.338615161033239e-06, - "loss": 0.177, + "learning_rate": 1.7358369868784423e-05, + "loss": 0.0927, "step": 136040 }, { "epoch": 6.35, - "learning_rate": 7.338146359758099e-06, - "loss": 0.014, + "learning_rate": 1.7357901798948403e-05, + "loss": 0.024, "step": 136045 }, { "epoch": 6.35, - "learning_rate": 7.33767755848296e-06, - "loss": 0.0249, + "learning_rate": 1.7357433729112383e-05, + "loss": 0.0323, "step": 136050 }, { "epoch": 6.35, - "learning_rate": 7.33720875720782e-06, - "loss": 0.0387, + "learning_rate": 1.7356965659276366e-05, + "loss": 0.005, "step": 136055 }, { "epoch": 6.35, - "learning_rate": 7.336739955932681e-06, - "loss": 0.0325, + "learning_rate": 1.7356497589440346e-05, + "loss": 0.0264, "step": 136060 }, { "epoch": 6.35, - "learning_rate": 7.336271154657541e-06, - "loss": 0.0504, + "learning_rate": 1.7356029519604325e-05, + "loss": 0.0577, "step": 136065 }, { "epoch": 6.35, - "learning_rate": 7.335802353382402e-06, - "loss": 0.0534, + "learning_rate": 1.7355561449768305e-05, + "loss": 0.0531, "step": 136070 }, { "epoch": 6.35, - "learning_rate": 7.335333552107262e-06, - "loss": 0.0699, + "learning_rate": 1.735509337993229e-05, + "loss": 0.0257, "step": 136075 }, { "epoch": 6.35, - "learning_rate": 7.334864750832123e-06, - "loss": 0.0475, + "learning_rate": 1.7354625310096268e-05, + "loss": 0.0339, "step": 136080 }, { "epoch": 6.35, - "learning_rate": 7.3343959495569835e-06, - "loss": 0.1626, + "learning_rate": 1.7354157240260248e-05, + "loss": 0.1915, "step": 136085 }, { "epoch": 6.35, - "learning_rate": 7.333927148281844e-06, - "loss": 0.2486, + "learning_rate": 1.7353689170424228e-05, + "loss": 0.092, "step": 136090 }, { "epoch": 6.35, - "learning_rate": 7.333458347006704e-06, - "loss": 0.0095, + "learning_rate": 1.7353221100588208e-05, + "loss": 0.0142, "step": 136095 }, { "epoch": 6.35, - "learning_rate": 7.332989545731565e-06, - "loss": 0.0184, + "learning_rate": 1.7352753030752187e-05, + "loss": 0.0107, "step": 136100 }, { "epoch": 6.35, - "learning_rate": 7.332520744456425e-06, - "loss": 0.0289, + "learning_rate": 1.7352284960916167e-05, + "loss": 0.0492, "step": 136105 }, { "epoch": 6.35, - "learning_rate": 7.3320519431812866e-06, - "loss": 0.0366, + "learning_rate": 1.735181689108015e-05, + "loss": 0.0374, "step": 136110 }, { "epoch": 6.35, - "learning_rate": 7.3315831419061465e-06, - "loss": 0.0546, + "learning_rate": 1.735134882124413e-05, + "loss": 0.0486, "step": 136115 }, { "epoch": 6.35, - "learning_rate": 7.331114340631007e-06, - "loss": 0.063, + "learning_rate": 1.735088075140811e-05, + "loss": 0.12, "step": 136120 }, { "epoch": 6.35, - "learning_rate": 7.330645539355867e-06, - "loss": 0.0715, + "learning_rate": 1.735041268157209e-05, + "loss": 0.1262, "step": 136125 }, { "epoch": 6.35, - "learning_rate": 7.330176738080729e-06, - "loss": 0.0718, + "learning_rate": 1.7349944611736073e-05, + "loss": 0.142, "step": 136130 }, { "epoch": 6.35, - "learning_rate": 7.329707936805589e-06, - "loss": 0.2484, + "learning_rate": 1.7349476541900053e-05, + "loss": 0.2932, "step": 136135 }, { "epoch": 6.35, - "learning_rate": 7.3292391355304496e-06, - "loss": 0.1396, + "learning_rate": 1.7349008472064033e-05, + "loss": 0.1785, "step": 136140 }, { "epoch": 6.35, - "learning_rate": 7.3287703342553095e-06, - "loss": 0.0183, + "learning_rate": 1.7348540402228013e-05, + "loss": 0.0293, "step": 136145 }, { "epoch": 6.35, - "learning_rate": 7.32830153298017e-06, - "loss": 0.0222, + "learning_rate": 1.7348072332391996e-05, + "loss": 0.0115, "step": 136150 }, { "epoch": 6.35, - "learning_rate": 7.327832731705031e-06, - "loss": 0.0269, + "learning_rate": 1.7347604262555972e-05, + "loss": 0.0453, "step": 136155 }, { "epoch": 6.35, - "learning_rate": 7.327363930429892e-06, - "loss": 0.0064, + "learning_rate": 1.7347136192719952e-05, + "loss": 0.0205, "step": 136160 }, { "epoch": 6.35, - "learning_rate": 7.326895129154752e-06, - "loss": 0.0228, + "learning_rate": 1.7346668122883935e-05, + "loss": 0.0487, "step": 136165 }, { "epoch": 6.35, - "learning_rate": 7.3264263278796125e-06, - "loss": 0.0457, + "learning_rate": 1.7346200053047915e-05, + "loss": 0.0679, "step": 136170 }, { "epoch": 6.35, - "learning_rate": 7.3259575266044725e-06, - "loss": 0.0484, + "learning_rate": 1.7345731983211895e-05, + "loss": 0.1132, "step": 136175 }, { "epoch": 6.35, - "learning_rate": 7.325488725329334e-06, - "loss": 0.1158, + "learning_rate": 1.7345263913375875e-05, + "loss": 0.1159, "step": 136180 }, { "epoch": 6.35, - "learning_rate": 7.325019924054194e-06, - "loss": 0.1829, + "learning_rate": 1.7344795843539858e-05, + "loss": 0.1015, "step": 136185 }, { "epoch": 6.35, - "learning_rate": 7.324551122779055e-06, - "loss": 0.1177, + "learning_rate": 1.7344327773703838e-05, + "loss": 0.1206, "step": 136190 }, { "epoch": 6.36, - "learning_rate": 7.324082321503915e-06, - "loss": 0.0037, + "learning_rate": 1.7343859703867818e-05, + "loss": 0.0237, "step": 136195 }, { "epoch": 6.36, - "learning_rate": 7.323613520228776e-06, - "loss": 0.0024, + "learning_rate": 1.7343391634031797e-05, + "loss": 0.006, "step": 136200 }, { "epoch": 6.36, - "learning_rate": 7.323144718953636e-06, - "loss": 0.0252, + "learning_rate": 1.734292356419578e-05, + "loss": 0.0097, "step": 136205 }, { "epoch": 6.36, - "learning_rate": 7.322675917678497e-06, - "loss": 0.0599, + "learning_rate": 1.734245549435976e-05, + "loss": 0.0565, "step": 136210 }, { "epoch": 6.36, - "learning_rate": 7.322207116403357e-06, - "loss": 0.0495, + "learning_rate": 1.734198742452374e-05, + "loss": 0.0672, "step": 136215 }, { "epoch": 6.36, - "learning_rate": 7.321738315128217e-06, - "loss": 0.0722, + "learning_rate": 1.7341519354687717e-05, + "loss": 0.1205, "step": 136220 }, { "epoch": 6.36, - "learning_rate": 7.321269513853079e-06, - "loss": 0.0862, + "learning_rate": 1.73410512848517e-05, + "loss": 0.1562, "step": 136225 }, { "epoch": 6.36, - "learning_rate": 7.320800712577939e-06, - "loss": 0.1453, + "learning_rate": 1.734058321501568e-05, + "loss": 0.0481, "step": 136230 }, { "epoch": 6.36, - "learning_rate": 7.320331911302799e-06, - "loss": 0.154, + "learning_rate": 1.734011514517966e-05, + "loss": 0.1499, "step": 136235 }, { "epoch": 6.36, - "learning_rate": 7.319863110027659e-06, - "loss": 0.1201, + "learning_rate": 1.7339647075343643e-05, + "loss": 0.1798, "step": 136240 }, { "epoch": 6.36, - "learning_rate": 7.31939430875252e-06, - "loss": 0.0051, + "learning_rate": 1.7339179005507623e-05, + "loss": 0.0159, "step": 136245 }, { "epoch": 6.36, - "learning_rate": 7.318925507477382e-06, - "loss": 0.0141, + "learning_rate": 1.7338710935671602e-05, + "loss": 0.0209, "step": 136250 }, { "epoch": 6.36, - "learning_rate": 7.318456706202242e-06, - "loss": 0.0653, + "learning_rate": 1.7338242865835582e-05, + "loss": 0.0298, "step": 136255 }, { "epoch": 6.36, - "learning_rate": 7.3179879049271015e-06, - "loss": 0.0233, + "learning_rate": 1.7337774795999565e-05, + "loss": 0.0435, "step": 136260 }, { "epoch": 6.36, - "learning_rate": 7.317519103651962e-06, - "loss": 0.0463, + "learning_rate": 1.7337306726163545e-05, + "loss": 0.0136, "step": 136265 }, { "epoch": 6.36, - "learning_rate": 7.317050302376824e-06, - "loss": 0.0982, + "learning_rate": 1.7336838656327525e-05, + "loss": 0.1087, "step": 136270 }, { "epoch": 6.36, - "learning_rate": 7.316581501101684e-06, - "loss": 0.0695, + "learning_rate": 1.7336370586491505e-05, + "loss": 0.0699, "step": 136275 }, { "epoch": 6.36, - "learning_rate": 7.316112699826544e-06, - "loss": 0.0994, + "learning_rate": 1.7335902516655485e-05, + "loss": 0.1077, "step": 136280 }, { "epoch": 6.36, - "learning_rate": 7.3156438985514046e-06, - "loss": 0.1525, + "learning_rate": 1.7335434446819464e-05, + "loss": 0.3255, "step": 136285 }, { "epoch": 6.36, - "learning_rate": 7.3151750972762645e-06, - "loss": 0.256, + "learning_rate": 1.7334966376983444e-05, + "loss": 0.2521, "step": 136290 }, { "epoch": 6.36, - "learning_rate": 7.314706296001126e-06, - "loss": 0.0353, + "learning_rate": 1.7334498307147427e-05, + "loss": 0.0238, "step": 136295 }, { "epoch": 6.36, - "learning_rate": 7.314237494725986e-06, - "loss": 0.0327, + "learning_rate": 1.7334030237311407e-05, + "loss": 0.0104, "step": 136300 }, { "epoch": 6.36, - "learning_rate": 7.313768693450847e-06, - "loss": 0.022, + "learning_rate": 1.7333562167475387e-05, + "loss": 0.0143, "step": 136305 }, { "epoch": 6.36, - "learning_rate": 7.313299892175707e-06, - "loss": 0.107, + "learning_rate": 1.7333094097639367e-05, + "loss": 0.0696, "step": 136310 }, { "epoch": 6.36, - "learning_rate": 7.3128310909005676e-06, - "loss": 0.0364, + "learning_rate": 1.733262602780335e-05, + "loss": 0.0704, "step": 136315 }, { "epoch": 6.36, - "learning_rate": 7.312362289625428e-06, - "loss": 0.0453, + "learning_rate": 1.733215795796733e-05, + "loss": 0.0652, "step": 136320 }, { "epoch": 6.36, - "learning_rate": 7.311893488350289e-06, - "loss": 0.076, + "learning_rate": 1.733168988813131e-05, + "loss": 0.0928, "step": 136325 }, { "epoch": 6.36, - "learning_rate": 7.311424687075149e-06, - "loss": 0.1698, + "learning_rate": 1.733122181829529e-05, + "loss": 0.1213, "step": 136330 }, { "epoch": 6.36, - "learning_rate": 7.31095588580001e-06, - "loss": 0.143, + "learning_rate": 1.7330753748459273e-05, + "loss": 0.2608, "step": 136335 }, { "epoch": 6.36, - "learning_rate": 7.310487084524871e-06, - "loss": 0.112, + "learning_rate": 1.7330285678623253e-05, + "loss": 0.195, "step": 136340 }, { "epoch": 6.36, - "learning_rate": 7.310018283249731e-06, - "loss": 0.0704, + "learning_rate": 1.732981760878723e-05, + "loss": 0.0096, "step": 136345 }, { "epoch": 6.36, - "learning_rate": 7.309549481974591e-06, - "loss": 0.0023, + "learning_rate": 1.7329349538951212e-05, + "loss": 0.0024, "step": 136350 }, { "epoch": 6.36, - "learning_rate": 7.309080680699452e-06, - "loss": 0.0191, + "learning_rate": 1.7328881469115192e-05, + "loss": 0.0284, "step": 136355 }, { "epoch": 6.36, - "learning_rate": 7.308611879424312e-06, - "loss": 0.0299, + "learning_rate": 1.7328413399279172e-05, + "loss": 0.0955, "step": 136360 }, { "epoch": 6.36, - "learning_rate": 7.308143078149174e-06, - "loss": 0.0468, + "learning_rate": 1.732794532944315e-05, + "loss": 0.0144, "step": 136365 }, { "epoch": 6.36, - "learning_rate": 7.307674276874034e-06, - "loss": 0.0598, + "learning_rate": 1.7327477259607135e-05, + "loss": 0.026, "step": 136370 }, { "epoch": 6.36, - "learning_rate": 7.307205475598894e-06, - "loss": 0.0523, + "learning_rate": 1.7327009189771115e-05, + "loss": 0.1294, "step": 136375 }, { "epoch": 6.36, - "learning_rate": 7.306736674323754e-06, - "loss": 0.0848, + "learning_rate": 1.7326541119935095e-05, + "loss": 0.0681, "step": 136380 }, { "epoch": 6.36, - "learning_rate": 7.306267873048616e-06, - "loss": 0.1925, + "learning_rate": 1.7326073050099074e-05, + "loss": 0.2244, "step": 136385 }, { "epoch": 6.36, - "learning_rate": 7.305799071773476e-06, - "loss": 0.1284, + "learning_rate": 1.7325604980263058e-05, + "loss": 0.2057, "step": 136390 }, { "epoch": 6.36, - "learning_rate": 7.305330270498337e-06, - "loss": 0.022, + "learning_rate": 1.7325136910427037e-05, + "loss": 0.0505, "step": 136395 }, { "epoch": 6.36, - "learning_rate": 7.304861469223197e-06, - "loss": 0.0051, + "learning_rate": 1.7324668840591017e-05, + "loss": 0.0458, "step": 136400 }, { "epoch": 6.36, - "learning_rate": 7.304392667948057e-06, - "loss": 0.0218, + "learning_rate": 1.7324200770754997e-05, + "loss": 0.0516, "step": 136405 }, { "epoch": 6.37, - "learning_rate": 7.303923866672918e-06, - "loss": 0.0249, + "learning_rate": 1.7323732700918977e-05, + "loss": 0.0242, "step": 136410 }, { "epoch": 6.37, - "learning_rate": 7.303455065397779e-06, - "loss": 0.0658, + "learning_rate": 1.7323264631082957e-05, + "loss": 0.0228, "step": 136415 }, { "epoch": 6.37, - "learning_rate": 7.302986264122639e-06, - "loss": 0.0886, + "learning_rate": 1.7322796561246936e-05, + "loss": 0.1059, "step": 136420 }, { "epoch": 6.37, - "learning_rate": 7.3025174628475e-06, - "loss": 0.065, + "learning_rate": 1.732232849141092e-05, + "loss": 0.0353, "step": 136425 }, { "epoch": 6.37, - "learning_rate": 7.30204866157236e-06, - "loss": 0.0955, + "learning_rate": 1.73218604215749e-05, + "loss": 0.0748, "step": 136430 }, { "epoch": 6.37, - "learning_rate": 7.301579860297221e-06, - "loss": 0.2888, + "learning_rate": 1.732139235173888e-05, + "loss": 0.1267, "step": 136435 }, { "epoch": 6.37, - "learning_rate": 7.301111059022081e-06, - "loss": 0.1836, + "learning_rate": 1.732092428190286e-05, + "loss": 0.1068, "step": 136440 }, { "epoch": 6.37, - "learning_rate": 7.300642257746942e-06, - "loss": 0.0135, + "learning_rate": 1.7320456212066842e-05, + "loss": 0.0167, "step": 136445 }, { "epoch": 6.37, - "learning_rate": 7.300173456471802e-06, - "loss": 0.024, + "learning_rate": 1.7319988142230822e-05, + "loss": 0.0223, "step": 136450 }, { "epoch": 6.37, - "learning_rate": 7.2997046551966635e-06, - "loss": 0.0346, + "learning_rate": 1.7319520072394802e-05, + "loss": 0.017, "step": 136455 }, { "epoch": 6.37, - "learning_rate": 7.2992358539215234e-06, - "loss": 0.0747, + "learning_rate": 1.7319052002558782e-05, + "loss": 0.0544, "step": 136460 }, { "epoch": 6.37, - "learning_rate": 7.298767052646384e-06, - "loss": 0.043, + "learning_rate": 1.7318583932722765e-05, + "loss": 0.0193, "step": 136465 }, { "epoch": 6.37, - "learning_rate": 7.298298251371244e-06, - "loss": 0.0615, + "learning_rate": 1.731811586288674e-05, + "loss": 0.0233, "step": 136470 }, { "epoch": 6.37, - "learning_rate": 7.297829450096104e-06, - "loss": 0.1146, + "learning_rate": 1.731764779305072e-05, + "loss": 0.0594, "step": 136475 }, { "epoch": 6.37, - "learning_rate": 7.297360648820966e-06, - "loss": 0.1898, + "learning_rate": 1.7317179723214704e-05, + "loss": 0.1822, "step": 136480 }, { "epoch": 6.37, - "learning_rate": 7.2968918475458265e-06, - "loss": 0.2005, + "learning_rate": 1.7316711653378684e-05, + "loss": 0.1996, "step": 136485 }, { "epoch": 6.37, - "learning_rate": 7.296423046270686e-06, - "loss": 0.1998, + "learning_rate": 1.7316243583542664e-05, + "loss": 0.0977, "step": 136490 }, { "epoch": 6.37, - "learning_rate": 7.295954244995546e-06, - "loss": 0.2515, + "learning_rate": 1.7315775513706644e-05, + "loss": 0.0249, "step": 136495 }, { "epoch": 6.37, - "learning_rate": 7.295485443720407e-06, - "loss": 0.0051, + "learning_rate": 1.7315307443870627e-05, + "loss": 0.0208, "step": 136500 }, { "epoch": 6.37, - "learning_rate": 7.295016642445269e-06, - "loss": 0.048, + "learning_rate": 1.7314839374034607e-05, + "loss": 0.0415, "step": 136505 }, { "epoch": 6.37, - "learning_rate": 7.294547841170129e-06, - "loss": 0.0081, + "learning_rate": 1.7314371304198587e-05, + "loss": 0.0127, "step": 136510 }, { "epoch": 6.37, - "learning_rate": 7.294079039894989e-06, - "loss": 0.0414, + "learning_rate": 1.7313903234362567e-05, + "loss": 0.0571, "step": 136515 }, { "epoch": 6.37, - "learning_rate": 7.293610238619849e-06, - "loss": 0.0827, + "learning_rate": 1.731343516452655e-05, + "loss": 0.0254, "step": 136520 }, { "epoch": 6.37, - "learning_rate": 7.293141437344711e-06, - "loss": 0.1401, + "learning_rate": 1.731296709469053e-05, + "loss": 0.0826, "step": 136525 }, { "epoch": 6.37, - "learning_rate": 7.292672636069571e-06, - "loss": 0.1645, + "learning_rate": 1.731249902485451e-05, + "loss": 0.0963, "step": 136530 }, { "epoch": 6.37, - "learning_rate": 7.292203834794431e-06, - "loss": 0.299, + "learning_rate": 1.731203095501849e-05, + "loss": 0.1964, "step": 136535 }, { "epoch": 6.37, - "learning_rate": 7.291735033519292e-06, - "loss": 0.237, + "learning_rate": 1.731156288518247e-05, + "loss": 0.0837, "step": 136540 }, { "epoch": 6.37, - "learning_rate": 7.291266232244152e-06, - "loss": 0.0252, + "learning_rate": 1.731109481534645e-05, + "loss": 0.0275, "step": 136545 }, { "epoch": 6.37, - "learning_rate": 7.290797430969013e-06, - "loss": 0.0185, + "learning_rate": 1.731062674551043e-05, + "loss": 0.0247, "step": 136550 }, { "epoch": 6.37, - "learning_rate": 7.290328629693873e-06, - "loss": 0.0346, + "learning_rate": 1.7310158675674412e-05, + "loss": 0.0276, "step": 136555 }, { "epoch": 6.37, - "learning_rate": 7.289859828418734e-06, - "loss": 0.0349, + "learning_rate": 1.730969060583839e-05, + "loss": 0.0389, "step": 136560 }, { "epoch": 6.37, - "learning_rate": 7.289391027143594e-06, - "loss": 0.043, + "learning_rate": 1.730922253600237e-05, + "loss": 0.0801, "step": 136565 }, { "epoch": 6.37, - "learning_rate": 7.288922225868455e-06, - "loss": 0.1162, + "learning_rate": 1.730875446616635e-05, + "loss": 0.0499, "step": 136570 }, { "epoch": 6.37, - "learning_rate": 7.2884534245933155e-06, - "loss": 0.1033, + "learning_rate": 1.7308286396330335e-05, + "loss": 0.0221, "step": 136575 }, { "epoch": 6.37, - "learning_rate": 7.287984623318176e-06, - "loss": 0.1138, + "learning_rate": 1.7307818326494314e-05, + "loss": 0.113, "step": 136580 }, { "epoch": 6.37, - "learning_rate": 7.287515822043036e-06, - "loss": 0.329, + "learning_rate": 1.7307350256658294e-05, + "loss": 0.1466, "step": 136585 }, { "epoch": 6.37, - "learning_rate": 7.287047020767897e-06, - "loss": 0.1681, + "learning_rate": 1.7306882186822277e-05, + "loss": 0.2604, "step": 136590 }, { "epoch": 6.37, - "learning_rate": 7.286578219492758e-06, - "loss": 0.0203, + "learning_rate": 1.7306414116986257e-05, + "loss": 0.0214, "step": 136595 }, { "epoch": 6.37, - "learning_rate": 7.2861094182176185e-06, - "loss": 0.022, + "learning_rate": 1.7305946047150234e-05, + "loss": 0.0517, "step": 136600 }, { "epoch": 6.37, - "learning_rate": 7.2856406169424784e-06, - "loss": 0.0055, + "learning_rate": 1.7305477977314213e-05, + "loss": 0.0701, "step": 136605 }, { "epoch": 6.37, - "learning_rate": 7.285171815667339e-06, - "loss": 0.0381, + "learning_rate": 1.7305009907478197e-05, + "loss": 0.0499, "step": 136610 }, { "epoch": 6.37, - "learning_rate": 7.284703014392199e-06, - "loss": 0.0247, + "learning_rate": 1.7304541837642176e-05, + "loss": 0.0258, "step": 136615 }, { "epoch": 6.37, - "learning_rate": 7.284234213117061e-06, - "loss": 0.1053, + "learning_rate": 1.7304073767806156e-05, + "loss": 0.0456, "step": 136620 }, { "epoch": 6.38, - "learning_rate": 7.283765411841921e-06, - "loss": 0.049, + "learning_rate": 1.7303605697970136e-05, + "loss": 0.0429, "step": 136625 }, { "epoch": 6.38, - "learning_rate": 7.2832966105667815e-06, - "loss": 0.1378, + "learning_rate": 1.730313762813412e-05, + "loss": 0.101, "step": 136630 }, { "epoch": 6.38, - "learning_rate": 7.2828278092916414e-06, - "loss": 0.1215, + "learning_rate": 1.73026695582981e-05, + "loss": 0.0835, "step": 136635 }, { "epoch": 6.38, - "learning_rate": 7.282359008016502e-06, - "loss": 0.1522, + "learning_rate": 1.730220148846208e-05, + "loss": 0.2592, "step": 136640 }, { "epoch": 6.38, - "learning_rate": 7.281890206741363e-06, - "loss": 0.0522, + "learning_rate": 1.730173341862606e-05, + "loss": 0.0061, "step": 136645 }, { "epoch": 6.38, - "learning_rate": 7.281421405466224e-06, - "loss": 0.0155, + "learning_rate": 1.7301265348790042e-05, + "loss": 0.0006, "step": 136650 }, { "epoch": 6.38, - "learning_rate": 7.280952604191084e-06, - "loss": 0.0128, + "learning_rate": 1.7300797278954022e-05, + "loss": 0.0388, "step": 136655 }, { "epoch": 6.38, - "learning_rate": 7.2804838029159445e-06, - "loss": 0.0327, + "learning_rate": 1.7300329209117998e-05, + "loss": 0.0702, "step": 136660 }, { "epoch": 6.38, - "learning_rate": 7.280015001640805e-06, - "loss": 0.0992, + "learning_rate": 1.729986113928198e-05, + "loss": 0.0564, "step": 136665 }, { "epoch": 6.38, - "learning_rate": 7.279546200365666e-06, - "loss": 0.0527, + "learning_rate": 1.729939306944596e-05, + "loss": 0.0309, "step": 136670 }, { "epoch": 6.38, - "learning_rate": 7.279077399090526e-06, - "loss": 0.0731, + "learning_rate": 1.729892499960994e-05, + "loss": 0.0925, "step": 136675 }, { "epoch": 6.38, - "learning_rate": 7.278608597815387e-06, - "loss": 0.0837, + "learning_rate": 1.729845692977392e-05, + "loss": 0.0667, "step": 136680 }, { "epoch": 6.38, - "learning_rate": 7.278139796540247e-06, - "loss": 0.2225, + "learning_rate": 1.7297988859937904e-05, + "loss": 0.1123, "step": 136685 }, { "epoch": 6.38, - "learning_rate": 7.277670995265108e-06, - "loss": 0.1933, + "learning_rate": 1.7297520790101884e-05, + "loss": 0.1001, "step": 136690 }, { "epoch": 6.38, - "learning_rate": 7.277202193989968e-06, - "loss": 0.0071, + "learning_rate": 1.7297052720265864e-05, + "loss": 0.0144, "step": 136695 }, { "epoch": 6.38, - "learning_rate": 7.276733392714829e-06, - "loss": 0.0219, + "learning_rate": 1.7296584650429844e-05, + "loss": 0.0394, "step": 136700 }, { "epoch": 6.38, - "learning_rate": 7.276264591439689e-06, - "loss": 0.0128, + "learning_rate": 1.7296116580593827e-05, + "loss": 0.0276, "step": 136705 }, { "epoch": 6.38, - "learning_rate": 7.275795790164549e-06, - "loss": 0.0136, + "learning_rate": 1.7295648510757807e-05, + "loss": 0.0468, "step": 136710 }, { "epoch": 6.38, - "learning_rate": 7.2753269888894105e-06, - "loss": 0.0396, + "learning_rate": 1.7295180440921786e-05, + "loss": 0.0499, "step": 136715 }, { "epoch": 6.38, - "learning_rate": 7.274858187614271e-06, - "loss": 0.0864, + "learning_rate": 1.729471237108577e-05, + "loss": 0.0308, "step": 136720 }, { "epoch": 6.38, - "learning_rate": 7.274389386339131e-06, - "loss": 0.0804, + "learning_rate": 1.7294244301249746e-05, + "loss": 0.082, "step": 136725 }, { "epoch": 6.38, - "learning_rate": 7.273920585063991e-06, - "loss": 0.087, + "learning_rate": 1.7293776231413726e-05, + "loss": 0.162, "step": 136730 }, { "epoch": 6.38, - "learning_rate": 7.273451783788853e-06, - "loss": 0.2555, + "learning_rate": 1.7293308161577706e-05, + "loss": 0.1147, "step": 136735 }, { "epoch": 6.38, - "learning_rate": 7.272982982513714e-06, - "loss": 0.1012, + "learning_rate": 1.729284009174169e-05, + "loss": 0.1426, "step": 136740 }, { "epoch": 6.38, - "learning_rate": 7.2725141812385735e-06, - "loss": 0.0084, + "learning_rate": 1.729237202190567e-05, + "loss": 0.0107, "step": 136745 }, { "epoch": 6.38, - "learning_rate": 7.2720453799634335e-06, - "loss": 0.0261, + "learning_rate": 1.729190395206965e-05, + "loss": 0.0081, "step": 136750 }, { "epoch": 6.38, - "learning_rate": 7.271576578688294e-06, - "loss": 0.0404, + "learning_rate": 1.7291435882233628e-05, + "loss": 0.0169, "step": 136755 }, { "epoch": 6.38, - "learning_rate": 7.271107777413156e-06, - "loss": 0.0428, + "learning_rate": 1.729096781239761e-05, + "loss": 0.0086, "step": 136760 }, { "epoch": 6.38, - "learning_rate": 7.270638976138016e-06, - "loss": 0.0829, + "learning_rate": 1.729049974256159e-05, + "loss": 0.0426, "step": 136765 }, { "epoch": 6.38, - "learning_rate": 7.270170174862876e-06, - "loss": 0.0205, + "learning_rate": 1.729003167272557e-05, + "loss": 0.0525, "step": 136770 }, { "epoch": 6.38, - "learning_rate": 7.2697013735877365e-06, - "loss": 0.0986, + "learning_rate": 1.7289563602889554e-05, + "loss": 0.1006, "step": 136775 }, { "epoch": 6.38, - "learning_rate": 7.269232572312598e-06, - "loss": 0.1559, + "learning_rate": 1.7289095533053534e-05, + "loss": 0.0638, "step": 136780 }, { "epoch": 6.38, - "learning_rate": 7.268763771037458e-06, - "loss": 0.201, + "learning_rate": 1.7288627463217514e-05, + "loss": 0.2233, "step": 136785 }, { "epoch": 6.38, - "learning_rate": 7.268294969762318e-06, - "loss": 0.1959, + "learning_rate": 1.728815939338149e-05, + "loss": 0.1825, "step": 136790 }, { "epoch": 6.38, - "learning_rate": 7.267826168487179e-06, - "loss": 0.0307, + "learning_rate": 1.7287691323545474e-05, + "loss": 0.0203, "step": 136795 }, { "epoch": 6.38, - "learning_rate": 7.267357367212039e-06, - "loss": 0.0171, + "learning_rate": 1.7287223253709453e-05, + "loss": 0.0128, "step": 136800 }, { "epoch": 6.38, - "learning_rate": 7.2668885659369e-06, - "loss": 0.0132, + "learning_rate": 1.7286755183873433e-05, + "loss": 0.0645, "step": 136805 }, { "epoch": 6.38, - "learning_rate": 7.26641976466176e-06, - "loss": 0.0494, + "learning_rate": 1.7286287114037413e-05, + "loss": 0.0487, "step": 136810 }, { "epoch": 6.38, - "learning_rate": 7.265950963386621e-06, - "loss": 0.0268, + "learning_rate": 1.7285819044201396e-05, + "loss": 0.0355, "step": 136815 }, { "epoch": 6.38, - "learning_rate": 7.265482162111481e-06, - "loss": 0.066, + "learning_rate": 1.7285350974365376e-05, + "loss": 0.0346, "step": 136820 }, { "epoch": 6.38, - "learning_rate": 7.265013360836342e-06, - "loss": 0.1611, + "learning_rate": 1.7284882904529356e-05, + "loss": 0.0405, "step": 136825 }, { "epoch": 6.38, - "learning_rate": 7.2645445595612026e-06, - "loss": 0.0896, + "learning_rate": 1.7284414834693336e-05, + "loss": 0.0711, "step": 136830 }, { "epoch": 6.38, - "learning_rate": 7.264075758286063e-06, - "loss": 0.217, + "learning_rate": 1.728394676485732e-05, + "loss": 0.2351, "step": 136835 }, { "epoch": 6.39, - "learning_rate": 7.263606957010923e-06, - "loss": 0.2548, + "learning_rate": 1.72834786950213e-05, + "loss": 0.1839, "step": 136840 }, { "epoch": 6.39, - "learning_rate": 7.263138155735784e-06, - "loss": 0.0503, + "learning_rate": 1.728301062518528e-05, + "loss": 0.0109, "step": 136845 }, { "epoch": 6.39, - "learning_rate": 7.262669354460645e-06, - "loss": 0.037, + "learning_rate": 1.728254255534926e-05, + "loss": 0.0302, "step": 136850 }, { "epoch": 6.39, - "learning_rate": 7.262200553185506e-06, - "loss": 0.0213, + "learning_rate": 1.7282074485513238e-05, + "loss": 0.037, "step": 136855 }, { "epoch": 6.39, - "learning_rate": 7.2617317519103655e-06, - "loss": 0.0191, + "learning_rate": 1.7281606415677218e-05, + "loss": 0.0483, "step": 136860 }, { "epoch": 6.39, - "learning_rate": 7.261262950635226e-06, - "loss": 0.0359, + "learning_rate": 1.7281138345841198e-05, + "loss": 0.0214, "step": 136865 }, { "epoch": 6.39, - "learning_rate": 7.260794149360086e-06, - "loss": 0.0299, + "learning_rate": 1.728067027600518e-05, + "loss": 0.0989, "step": 136870 }, { "epoch": 6.39, - "learning_rate": 7.260325348084948e-06, - "loss": 0.0607, + "learning_rate": 1.728020220616916e-05, + "loss": 0.057, "step": 136875 }, { "epoch": 6.39, - "learning_rate": 7.259856546809808e-06, - "loss": 0.0738, + "learning_rate": 1.727973413633314e-05, + "loss": 0.0894, "step": 136880 }, { "epoch": 6.39, - "learning_rate": 7.259387745534669e-06, - "loss": 0.203, + "learning_rate": 1.727926606649712e-05, + "loss": 0.1519, "step": 136885 }, { "epoch": 6.39, - "learning_rate": 7.2589189442595285e-06, - "loss": 0.1721, + "learning_rate": 1.7278797996661104e-05, + "loss": 0.1125, "step": 136890 }, { "epoch": 6.39, - "learning_rate": 7.258450142984389e-06, - "loss": 0.0269, + "learning_rate": 1.7278329926825084e-05, + "loss": 0.0329, "step": 136895 }, { "epoch": 6.39, - "learning_rate": 7.25798134170925e-06, - "loss": 0.0149, + "learning_rate": 1.7277861856989063e-05, + "loss": 0.0431, "step": 136900 }, { "epoch": 6.39, - "learning_rate": 7.257512540434111e-06, - "loss": 0.0148, + "learning_rate": 1.7277393787153047e-05, + "loss": 0.0093, "step": 136905 }, { "epoch": 6.39, - "learning_rate": 7.257043739158971e-06, - "loss": 0.0266, + "learning_rate": 1.7276925717317026e-05, + "loss": 0.0347, "step": 136910 }, { "epoch": 6.39, - "learning_rate": 7.256574937883832e-06, - "loss": 0.0732, + "learning_rate": 1.7276457647481003e-05, + "loss": 0.0874, "step": 136915 }, { "epoch": 6.39, - "learning_rate": 7.256106136608692e-06, - "loss": 0.0564, + "learning_rate": 1.7275989577644983e-05, + "loss": 0.0473, "step": 136920 }, { "epoch": 6.39, - "learning_rate": 7.255637335333553e-06, - "loss": 0.0884, + "learning_rate": 1.7275521507808966e-05, + "loss": 0.0955, "step": 136925 }, { "epoch": 6.39, - "learning_rate": 7.255168534058413e-06, - "loss": 0.0996, + "learning_rate": 1.7275053437972946e-05, + "loss": 0.1298, "step": 136930 }, { "epoch": 6.39, - "learning_rate": 7.254699732783274e-06, - "loss": 0.1079, + "learning_rate": 1.7274585368136925e-05, + "loss": 0.1193, "step": 136935 }, { "epoch": 6.39, - "learning_rate": 7.254230931508134e-06, - "loss": 0.14, + "learning_rate": 1.7274117298300905e-05, + "loss": 0.1474, "step": 136940 }, { "epoch": 6.39, - "learning_rate": 7.2537621302329954e-06, - "loss": 0.0097, + "learning_rate": 1.727364922846489e-05, + "loss": 0.0113, "step": 136945 }, { "epoch": 6.39, - "learning_rate": 7.253293328957855e-06, - "loss": 0.0113, + "learning_rate": 1.7273181158628868e-05, + "loss": 0.036, "step": 136950 }, { "epoch": 6.39, - "learning_rate": 7.252824527682716e-06, - "loss": 0.0331, + "learning_rate": 1.7272713088792848e-05, + "loss": 0.0242, "step": 136955 }, { "epoch": 6.39, - "learning_rate": 7.252355726407576e-06, - "loss": 0.061, + "learning_rate": 1.727224501895683e-05, + "loss": 0.0271, "step": 136960 }, { "epoch": 6.39, - "learning_rate": 7.251886925132436e-06, - "loss": 0.0092, + "learning_rate": 1.727177694912081e-05, + "loss": 0.0536, "step": 136965 }, { "epoch": 6.39, - "learning_rate": 7.251418123857298e-06, - "loss": 0.0404, + "learning_rate": 1.727130887928479e-05, + "loss": 0.0605, "step": 136970 }, { "epoch": 6.39, - "learning_rate": 7.250949322582158e-06, - "loss": 0.0397, + "learning_rate": 1.727084080944877e-05, + "loss": 0.0517, "step": 136975 }, { "epoch": 6.39, - "learning_rate": 7.250480521307018e-06, - "loss": 0.1069, + "learning_rate": 1.727037273961275e-05, + "loss": 0.0537, "step": 136980 }, { "epoch": 6.39, - "learning_rate": 7.250011720031878e-06, - "loss": 0.2963, + "learning_rate": 1.726990466977673e-05, + "loss": 0.1395, "step": 136985 }, { "epoch": 6.39, - "learning_rate": 7.24954291875674e-06, - "loss": 0.0889, + "learning_rate": 1.726943659994071e-05, + "loss": 0.1342, "step": 136990 }, { "epoch": 6.39, - "learning_rate": 7.249074117481601e-06, - "loss": 0.0165, + "learning_rate": 1.726896853010469e-05, + "loss": 0.004, "step": 136995 }, { "epoch": 6.39, - "learning_rate": 7.248605316206461e-06, - "loss": 0.0224, + "learning_rate": 1.7268500460268673e-05, + "loss": 0.0149, "step": 137000 }, { "epoch": 6.39, - "learning_rate": 7.2481365149313206e-06, - "loss": 0.0547, + "learning_rate": 1.7268032390432653e-05, + "loss": 0.0065, "step": 137005 }, { "epoch": 6.39, - "learning_rate": 7.247667713656181e-06, - "loss": 0.0234, + "learning_rate": 1.7267564320596633e-05, + "loss": 0.0339, "step": 137010 }, { "epoch": 6.39, - "learning_rate": 7.247198912381043e-06, - "loss": 0.0408, + "learning_rate": 1.7267096250760613e-05, + "loss": 0.0463, "step": 137015 }, { "epoch": 6.39, - "learning_rate": 7.246730111105903e-06, - "loss": 0.0466, + "learning_rate": 1.7266628180924596e-05, + "loss": 0.092, "step": 137020 }, { "epoch": 6.39, - "learning_rate": 7.246261309830763e-06, - "loss": 0.084, + "learning_rate": 1.7266160111088576e-05, + "loss": 0.0534, "step": 137025 }, { "epoch": 6.39, - "learning_rate": 7.245792508555624e-06, - "loss": 0.0921, + "learning_rate": 1.7265692041252556e-05, + "loss": 0.019, "step": 137030 }, { "epoch": 6.39, - "learning_rate": 7.2453237072804836e-06, - "loss": 0.1336, + "learning_rate": 1.726522397141654e-05, + "loss": 0.1443, "step": 137035 }, { "epoch": 6.39, - "learning_rate": 7.244854906005345e-06, - "loss": 0.1361, + "learning_rate": 1.7264755901580515e-05, + "loss": 0.125, "step": 137040 }, { "epoch": 6.39, - "learning_rate": 7.244386104730205e-06, - "loss": 0.0084, + "learning_rate": 1.7264287831744495e-05, + "loss": 0.0044, "step": 137045 }, { "epoch": 6.39, - "learning_rate": 7.243917303455066e-06, - "loss": 0.0521, + "learning_rate": 1.7263819761908475e-05, + "loss": 0.0402, "step": 137050 }, { "epoch": 6.4, - "learning_rate": 7.243448502179926e-06, - "loss": 0.0485, + "learning_rate": 1.7263351692072458e-05, + "loss": 0.0337, "step": 137055 }, { "epoch": 6.4, - "learning_rate": 7.2429797009047875e-06, - "loss": 0.0475, + "learning_rate": 1.7262883622236438e-05, + "loss": 0.078, "step": 137060 }, { "epoch": 6.4, - "learning_rate": 7.242510899629648e-06, - "loss": 0.019, + "learning_rate": 1.7262415552400418e-05, + "loss": 0.0521, "step": 137065 }, { "epoch": 6.4, - "learning_rate": 7.242042098354508e-06, - "loss": 0.0328, + "learning_rate": 1.7261947482564397e-05, + "loss": 0.0627, "step": 137070 }, { "epoch": 6.4, - "learning_rate": 7.241573297079368e-06, - "loss": 0.0459, + "learning_rate": 1.726147941272838e-05, + "loss": 0.0426, "step": 137075 }, { "epoch": 6.4, - "learning_rate": 7.241104495804229e-06, - "loss": 0.0661, + "learning_rate": 1.726101134289236e-05, + "loss": 0.1473, "step": 137080 }, { "epoch": 6.4, - "learning_rate": 7.2406356945290905e-06, - "loss": 0.2055, + "learning_rate": 1.726054327305634e-05, + "loss": 0.145, "step": 137085 }, { "epoch": 6.4, - "learning_rate": 7.2401668932539504e-06, - "loss": 0.2181, + "learning_rate": 1.7260075203220324e-05, + "loss": 0.0955, "step": 137090 }, { "epoch": 6.4, - "learning_rate": 7.23969809197881e-06, - "loss": 0.024, + "learning_rate": 1.7259607133384303e-05, + "loss": 0.0285, "step": 137095 }, { "epoch": 6.4, - "learning_rate": 7.239229290703671e-06, - "loss": 0.0061, + "learning_rate": 1.7259139063548283e-05, + "loss": 0.0475, "step": 137100 }, { "epoch": 6.4, - "learning_rate": 7.238760489428533e-06, - "loss": 0.0103, + "learning_rate": 1.725867099371226e-05, + "loss": 0.0218, "step": 137105 }, { "epoch": 6.4, - "learning_rate": 7.238291688153393e-06, - "loss": 0.0211, + "learning_rate": 1.7258202923876243e-05, + "loss": 0.0337, "step": 137110 }, { "epoch": 6.4, - "learning_rate": 7.237822886878253e-06, - "loss": 0.0411, + "learning_rate": 1.7257734854040223e-05, + "loss": 0.0323, "step": 137115 }, { "epoch": 6.4, - "learning_rate": 7.2373540856031134e-06, - "loss": 0.0246, + "learning_rate": 1.7257266784204202e-05, + "loss": 0.0475, "step": 137120 }, { "epoch": 6.4, - "learning_rate": 7.236885284327973e-06, - "loss": 0.1575, + "learning_rate": 1.7256798714368182e-05, + "loss": 0.0694, "step": 137125 }, { "epoch": 6.4, - "learning_rate": 7.236416483052835e-06, - "loss": 0.1421, + "learning_rate": 1.7256330644532165e-05, + "loss": 0.1048, "step": 137130 }, { "epoch": 6.4, - "learning_rate": 7.235947681777695e-06, - "loss": 0.2175, + "learning_rate": 1.7255862574696145e-05, + "loss": 0.182, "step": 137135 }, { "epoch": 6.4, - "learning_rate": 7.235478880502556e-06, - "loss": 0.1378, + "learning_rate": 1.7255394504860125e-05, + "loss": 0.1993, "step": 137140 }, { "epoch": 6.4, - "learning_rate": 7.235010079227416e-06, - "loss": 0.0051, + "learning_rate": 1.7254926435024108e-05, + "loss": 0.0162, "step": 137145 }, { "epoch": 6.4, - "learning_rate": 7.2345412779522764e-06, - "loss": 0.0301, + "learning_rate": 1.7254458365188088e-05, + "loss": 0.0293, "step": 137150 }, { "epoch": 6.4, - "learning_rate": 7.234072476677137e-06, - "loss": 0.026, + "learning_rate": 1.7253990295352068e-05, + "loss": 0.0276, "step": 137155 }, { "epoch": 6.4, - "learning_rate": 7.233603675401998e-06, - "loss": 0.03, + "learning_rate": 1.7253522225516048e-05, + "loss": 0.0357, "step": 137160 }, { "epoch": 6.4, - "learning_rate": 7.233134874126858e-06, - "loss": 0.0265, + "learning_rate": 1.7253054155680028e-05, + "loss": 0.0374, "step": 137165 }, { "epoch": 6.4, - "learning_rate": 7.232666072851719e-06, - "loss": 0.1095, + "learning_rate": 1.7252586085844007e-05, + "loss": 0.0294, "step": 137170 }, { "epoch": 6.4, - "learning_rate": 7.2321972715765795e-06, - "loss": 0.0564, + "learning_rate": 1.7252118016007987e-05, + "loss": 0.0739, "step": 137175 }, { "epoch": 6.4, - "learning_rate": 7.23172847030144e-06, - "loss": 0.0949, + "learning_rate": 1.7251649946171967e-05, + "loss": 0.0845, "step": 137180 }, { "epoch": 6.4, - "learning_rate": 7.2312596690263e-06, - "loss": 0.1482, + "learning_rate": 1.725118187633595e-05, + "loss": 0.1071, "step": 137185 }, { "epoch": 6.4, - "learning_rate": 7.230790867751161e-06, - "loss": 0.1157, + "learning_rate": 1.725071380649993e-05, + "loss": 0.1498, "step": 137190 }, { "epoch": 6.4, - "learning_rate": 7.230322066476021e-06, - "loss": 0.0035, + "learning_rate": 1.725024573666391e-05, + "loss": 0.0017, "step": 137195 }, { "epoch": 6.4, - "learning_rate": 7.2298532652008825e-06, - "loss": 0.0213, + "learning_rate": 1.7249777666827893e-05, + "loss": 0.023, "step": 137200 }, { "epoch": 6.4, - "learning_rate": 7.2293844639257425e-06, - "loss": 0.0108, + "learning_rate": 1.7249309596991873e-05, + "loss": 0.0067, "step": 137205 }, { "epoch": 6.4, - "learning_rate": 7.228915662650603e-06, - "loss": 0.0426, + "learning_rate": 1.7248841527155853e-05, + "loss": 0.0461, "step": 137210 }, { "epoch": 6.4, - "learning_rate": 7.228446861375463e-06, - "loss": 0.0409, + "learning_rate": 1.7248373457319833e-05, + "loss": 0.0705, "step": 137215 }, { "epoch": 6.4, - "learning_rate": 7.227978060100323e-06, - "loss": 0.0411, + "learning_rate": 1.7247905387483816e-05, + "loss": 0.0532, "step": 137220 }, { "epoch": 6.4, - "learning_rate": 7.227509258825185e-06, - "loss": 0.0589, + "learning_rate": 1.7247437317647796e-05, + "loss": 0.0567, "step": 137225 }, { "epoch": 6.4, - "learning_rate": 7.2270404575500455e-06, - "loss": 0.1132, + "learning_rate": 1.7246969247811772e-05, + "loss": 0.1076, "step": 137230 }, { "epoch": 6.4, - "learning_rate": 7.2265716562749055e-06, - "loss": 0.1512, + "learning_rate": 1.7246501177975752e-05, + "loss": 0.2397, "step": 137235 }, { "epoch": 6.4, - "learning_rate": 7.226102854999766e-06, - "loss": 0.1347, + "learning_rate": 1.7246033108139735e-05, + "loss": 0.1462, "step": 137240 }, { "epoch": 6.4, - "learning_rate": 7.225634053724627e-06, - "loss": 0.0236, + "learning_rate": 1.7245565038303715e-05, + "loss": 0.0283, "step": 137245 }, { "epoch": 6.4, - "learning_rate": 7.225165252449488e-06, - "loss": 0.0138, + "learning_rate": 1.7245096968467695e-05, + "loss": 0.0149, "step": 137250 }, { "epoch": 6.4, - "learning_rate": 7.224696451174348e-06, - "loss": 0.0082, + "learning_rate": 1.7244628898631674e-05, + "loss": 0.0161, "step": 137255 }, { "epoch": 6.4, - "learning_rate": 7.2242276498992085e-06, - "loss": 0.0592, + "learning_rate": 1.7244160828795658e-05, + "loss": 0.0478, "step": 137260 }, { "epoch": 6.4, - "learning_rate": 7.2237588486240685e-06, - "loss": 0.0492, + "learning_rate": 1.7243692758959637e-05, + "loss": 0.1007, "step": 137265 }, { "epoch": 6.41, - "learning_rate": 7.22329004734893e-06, - "loss": 0.1011, + "learning_rate": 1.7243224689123617e-05, + "loss": 0.0433, "step": 137270 }, { "epoch": 6.41, - "learning_rate": 7.22282124607379e-06, - "loss": 0.0684, + "learning_rate": 1.72427566192876e-05, + "loss": 0.0502, "step": 137275 }, { "epoch": 6.41, - "learning_rate": 7.222352444798651e-06, - "loss": 0.0449, + "learning_rate": 1.724228854945158e-05, + "loss": 0.1536, "step": 137280 }, { "epoch": 6.41, - "learning_rate": 7.221883643523511e-06, - "loss": 0.097, + "learning_rate": 1.724182047961556e-05, + "loss": 0.0925, "step": 137285 }, { "epoch": 6.41, - "learning_rate": 7.221414842248371e-06, - "loss": 0.1143, + "learning_rate": 1.724135240977954e-05, + "loss": 0.1602, "step": 137290 }, { "epoch": 6.41, - "learning_rate": 7.220946040973232e-06, - "loss": 0.0116, + "learning_rate": 1.724088433994352e-05, + "loss": 0.0224, "step": 137295 }, { "epoch": 6.41, - "learning_rate": 7.220477239698093e-06, - "loss": 0.0159, + "learning_rate": 1.72404162701075e-05, + "loss": 0.0042, "step": 137300 }, { "epoch": 6.41, - "learning_rate": 7.220008438422953e-06, - "loss": 0.051, + "learning_rate": 1.723994820027148e-05, + "loss": 0.0443, "step": 137305 }, { "epoch": 6.41, - "learning_rate": 7.219539637147813e-06, - "loss": 0.0289, + "learning_rate": 1.723948013043546e-05, + "loss": 0.0338, "step": 137310 }, { "epoch": 6.41, - "learning_rate": 7.2190708358726746e-06, - "loss": 0.0192, + "learning_rate": 1.7239012060599442e-05, + "loss": 0.0286, "step": 137315 }, { "epoch": 6.41, - "learning_rate": 7.218602034597535e-06, - "loss": 0.0458, + "learning_rate": 1.7238543990763422e-05, + "loss": 0.0723, "step": 137320 }, { "epoch": 6.41, - "learning_rate": 7.218133233322395e-06, - "loss": 0.0414, + "learning_rate": 1.7238075920927402e-05, + "loss": 0.0852, "step": 137325 }, { "epoch": 6.41, - "learning_rate": 7.217664432047255e-06, - "loss": 0.2136, + "learning_rate": 1.7237607851091385e-05, + "loss": 0.0899, "step": 137330 }, { "epoch": 6.41, - "learning_rate": 7.217195630772116e-06, - "loss": 0.1915, + "learning_rate": 1.7237139781255365e-05, + "loss": 0.0844, "step": 137335 }, { "epoch": 6.41, - "learning_rate": 7.216726829496978e-06, - "loss": 0.1576, + "learning_rate": 1.7236671711419345e-05, + "loss": 0.08, "step": 137340 }, { "epoch": 6.41, - "learning_rate": 7.2162580282218376e-06, - "loss": 0.0328, + "learning_rate": 1.7236203641583325e-05, + "loss": 0.0036, "step": 137345 }, { "epoch": 6.41, - "learning_rate": 7.2157892269466975e-06, - "loss": 0.0092, + "learning_rate": 1.7235735571747308e-05, + "loss": 0.0638, "step": 137350 }, { "epoch": 6.41, - "learning_rate": 7.215320425671558e-06, - "loss": 0.0202, + "learning_rate": 1.7235267501911284e-05, + "loss": 0.0585, "step": 137355 }, { "epoch": 6.41, - "learning_rate": 7.214851624396418e-06, - "loss": 0.0204, + "learning_rate": 1.7234799432075264e-05, + "loss": 0.037, "step": 137360 }, { "epoch": 6.41, - "learning_rate": 7.21438282312128e-06, - "loss": 0.0359, + "learning_rate": 1.7234331362239244e-05, + "loss": 0.0141, "step": 137365 }, { "epoch": 6.41, - "learning_rate": 7.21391402184614e-06, - "loss": 0.0339, + "learning_rate": 1.7233863292403227e-05, + "loss": 0.0625, "step": 137370 }, { "epoch": 6.41, - "learning_rate": 7.2134452205710005e-06, - "loss": 0.0514, + "learning_rate": 1.7233395222567207e-05, + "loss": 0.0604, "step": 137375 }, { "epoch": 6.41, - "learning_rate": 7.2129764192958605e-06, - "loss": 0.0978, + "learning_rate": 1.7232927152731187e-05, + "loss": 0.097, "step": 137380 }, { "epoch": 6.41, - "learning_rate": 7.212507618020722e-06, - "loss": 0.1486, + "learning_rate": 1.723245908289517e-05, + "loss": 0.2116, "step": 137385 }, { "epoch": 6.41, - "learning_rate": 7.212038816745582e-06, - "loss": 0.1293, + "learning_rate": 1.723199101305915e-05, + "loss": 0.1776, "step": 137390 }, { "epoch": 6.41, - "learning_rate": 7.211570015470443e-06, - "loss": 0.0107, + "learning_rate": 1.723152294322313e-05, + "loss": 0.026, "step": 137395 }, { "epoch": 6.41, - "learning_rate": 7.211101214195303e-06, - "loss": 0.018, + "learning_rate": 1.723105487338711e-05, + "loss": 0.0228, "step": 137400 }, { "epoch": 6.41, - "learning_rate": 7.2106324129201635e-06, - "loss": 0.0157, + "learning_rate": 1.7230586803551093e-05, + "loss": 0.0794, "step": 137405 }, { "epoch": 6.41, - "learning_rate": 7.210163611645024e-06, - "loss": 0.0653, + "learning_rate": 1.7230118733715072e-05, + "loss": 0.0037, "step": 137410 }, { "epoch": 6.41, - "learning_rate": 7.209694810369885e-06, - "loss": 0.0928, + "learning_rate": 1.7229650663879052e-05, + "loss": 0.0236, "step": 137415 }, { "epoch": 6.41, - "learning_rate": 7.209226009094745e-06, - "loss": 0.0401, + "learning_rate": 1.722918259404303e-05, + "loss": 0.047, "step": 137420 }, { "epoch": 6.41, - "learning_rate": 7.208757207819606e-06, - "loss": 0.0579, + "learning_rate": 1.7228714524207012e-05, + "loss": 0.0515, "step": 137425 }, { "epoch": 6.41, - "learning_rate": 7.208288406544467e-06, - "loss": 0.0582, + "learning_rate": 1.7228246454370992e-05, + "loss": 0.0518, "step": 137430 }, { "epoch": 6.41, - "learning_rate": 7.207819605269327e-06, - "loss": 0.2023, + "learning_rate": 1.722777838453497e-05, + "loss": 0.1182, "step": 137435 }, { "epoch": 6.41, - "learning_rate": 7.207350803994187e-06, - "loss": 0.129, + "learning_rate": 1.722731031469895e-05, + "loss": 0.1134, "step": 137440 }, { "epoch": 6.41, - "learning_rate": 7.206882002719048e-06, - "loss": 0.0053, + "learning_rate": 1.7226842244862935e-05, + "loss": 0.0057, "step": 137445 }, { "epoch": 6.41, - "learning_rate": 7.206413201443908e-06, - "loss": 0.01, + "learning_rate": 1.7226374175026914e-05, + "loss": 0.0307, "step": 137450 }, { "epoch": 6.41, - "learning_rate": 7.20594440016877e-06, - "loss": 0.011, + "learning_rate": 1.7225906105190894e-05, + "loss": 0.0224, "step": 137455 }, { "epoch": 6.41, - "learning_rate": 7.20547559889363e-06, - "loss": 0.0242, + "learning_rate": 1.7225438035354877e-05, + "loss": 0.0265, "step": 137460 }, { "epoch": 6.41, - "learning_rate": 7.20500679761849e-06, - "loss": 0.0448, + "learning_rate": 1.7224969965518857e-05, + "loss": 0.0634, "step": 137465 }, { "epoch": 6.41, - "learning_rate": 7.20453799634335e-06, - "loss": 0.0292, + "learning_rate": 1.7224501895682837e-05, + "loss": 0.1081, "step": 137470 }, { "epoch": 6.41, - "learning_rate": 7.204069195068211e-06, - "loss": 0.0443, + "learning_rate": 1.7224033825846817e-05, + "loss": 0.0363, "step": 137475 }, { "epoch": 6.42, - "learning_rate": 7.203600393793072e-06, - "loss": 0.0951, + "learning_rate": 1.7223565756010797e-05, + "loss": 0.0452, "step": 137480 }, { "epoch": 6.42, - "learning_rate": 7.203131592517933e-06, - "loss": 0.2894, + "learning_rate": 1.7223097686174777e-05, + "loss": 0.1042, "step": 137485 }, { "epoch": 6.42, - "learning_rate": 7.2026627912427926e-06, - "loss": 0.1521, + "learning_rate": 1.7222629616338756e-05, + "loss": 0.1824, "step": 137490 }, { "epoch": 6.42, - "learning_rate": 7.202193989967653e-06, - "loss": 0.0433, + "learning_rate": 1.7222161546502736e-05, + "loss": 0.0182, "step": 137495 }, { "epoch": 6.42, - "learning_rate": 7.201725188692514e-06, - "loss": 0.0095, + "learning_rate": 1.722169347666672e-05, + "loss": 0.0209, "step": 137500 }, { "epoch": 6.42, - "learning_rate": 7.201256387417375e-06, - "loss": 0.0636, + "learning_rate": 1.72212254068307e-05, + "loss": 0.0014, "step": 137505 }, { "epoch": 6.42, - "learning_rate": 7.200787586142235e-06, - "loss": 0.0438, + "learning_rate": 1.722075733699468e-05, + "loss": 0.0307, "step": 137510 }, { "epoch": 6.42, - "learning_rate": 7.200318784867096e-06, - "loss": 0.0398, + "learning_rate": 1.7220289267158662e-05, + "loss": 0.0168, "step": 137515 }, { "epoch": 6.42, - "learning_rate": 7.1998499835919556e-06, - "loss": 0.0427, + "learning_rate": 1.7219821197322642e-05, + "loss": 0.0152, "step": 137520 }, { "epoch": 6.42, - "learning_rate": 7.199381182316817e-06, - "loss": 0.0427, + "learning_rate": 1.7219353127486622e-05, + "loss": 0.1146, "step": 137525 }, { "epoch": 6.42, - "learning_rate": 7.198912381041677e-06, - "loss": 0.145, + "learning_rate": 1.72188850576506e-05, + "loss": 0.1033, "step": 137530 }, { "epoch": 6.42, - "learning_rate": 7.198443579766538e-06, - "loss": 0.1617, + "learning_rate": 1.7218416987814585e-05, + "loss": 0.1042, "step": 137535 }, { "epoch": 6.42, - "learning_rate": 7.197974778491398e-06, - "loss": 0.1172, + "learning_rate": 1.7217948917978565e-05, + "loss": 0.1637, "step": 137540 }, { "epoch": 6.42, - "learning_rate": 7.197505977216258e-06, - "loss": 0.0357, + "learning_rate": 1.721748084814254e-05, + "loss": 0.022, "step": 137545 }, { "epoch": 6.42, - "learning_rate": 7.197037175941119e-06, - "loss": 0.0343, + "learning_rate": 1.721701277830652e-05, + "loss": 0.0248, "step": 137550 }, { "epoch": 6.42, - "learning_rate": 7.19656837466598e-06, - "loss": 0.0246, + "learning_rate": 1.7216544708470504e-05, + "loss": 0.0084, "step": 137555 }, { "epoch": 6.42, - "learning_rate": 7.19609957339084e-06, - "loss": 0.0232, + "learning_rate": 1.7216076638634484e-05, + "loss": 0.021, "step": 137560 }, { "epoch": 6.42, - "learning_rate": 7.1956307721157e-06, - "loss": 0.0197, + "learning_rate": 1.7215608568798464e-05, + "loss": 0.0127, "step": 137565 }, { "epoch": 6.42, - "learning_rate": 7.195161970840562e-06, - "loss": 0.0873, + "learning_rate": 1.7215140498962447e-05, + "loss": 0.0467, "step": 137570 }, { "epoch": 6.42, - "learning_rate": 7.1946931695654225e-06, - "loss": 0.0661, + "learning_rate": 1.7214672429126427e-05, + "loss": 0.0249, "step": 137575 }, { "epoch": 6.42, - "learning_rate": 7.194224368290282e-06, - "loss": 0.0965, + "learning_rate": 1.7214204359290407e-05, + "loss": 0.056, "step": 137580 }, { "epoch": 6.42, - "learning_rate": 7.193755567015142e-06, - "loss": 0.0775, + "learning_rate": 1.7213736289454386e-05, + "loss": 0.2061, "step": 137585 }, { "epoch": 6.42, - "learning_rate": 7.193286765740003e-06, - "loss": 0.1387, + "learning_rate": 1.721326821961837e-05, + "loss": 0.202, "step": 137590 }, { "epoch": 6.42, - "learning_rate": 7.192817964464865e-06, - "loss": 0.0225, + "learning_rate": 1.721280014978235e-05, + "loss": 0.0353, "step": 137595 }, { "epoch": 6.42, - "learning_rate": 7.192349163189725e-06, - "loss": 0.0217, + "learning_rate": 1.721233207994633e-05, + "loss": 0.0691, "step": 137600 }, { "epoch": 6.42, - "learning_rate": 7.191880361914585e-06, - "loss": 0.0166, + "learning_rate": 1.721186401011031e-05, + "loss": 0.0555, "step": 137605 }, { "epoch": 6.42, - "learning_rate": 7.191411560639445e-06, - "loss": 0.0636, + "learning_rate": 1.721139594027429e-05, + "loss": 0.0592, "step": 137610 }, { "epoch": 6.42, - "learning_rate": 7.190942759364305e-06, - "loss": 0.0412, + "learning_rate": 1.721092787043827e-05, + "loss": 0.0434, "step": 137615 }, { "epoch": 6.42, - "learning_rate": 7.190473958089167e-06, - "loss": 0.0924, + "learning_rate": 1.721045980060225e-05, + "loss": 0.1091, "step": 137620 }, { "epoch": 6.42, - "learning_rate": 7.190005156814027e-06, - "loss": 0.0674, + "learning_rate": 1.720999173076623e-05, + "loss": 0.0873, "step": 137625 }, { "epoch": 6.42, - "learning_rate": 7.189536355538888e-06, - "loss": 0.0746, + "learning_rate": 1.720952366093021e-05, + "loss": 0.0719, "step": 137630 }, { "epoch": 6.42, - "learning_rate": 7.189067554263748e-06, - "loss": 0.2229, + "learning_rate": 1.720905559109419e-05, + "loss": 0.2304, "step": 137635 }, { "epoch": 6.42, - "learning_rate": 7.188598752988609e-06, - "loss": 0.2061, + "learning_rate": 1.720858752125817e-05, + "loss": 0.1233, "step": 137640 }, { "epoch": 6.42, - "learning_rate": 7.188129951713469e-06, - "loss": 0.0389, + "learning_rate": 1.7208119451422154e-05, + "loss": 0.007, "step": 137645 }, { "epoch": 6.42, - "learning_rate": 7.18766115043833e-06, - "loss": 0.0788, + "learning_rate": 1.7207651381586134e-05, + "loss": 0.0151, "step": 137650 }, { "epoch": 6.42, - "learning_rate": 7.18719234916319e-06, - "loss": 0.0515, + "learning_rate": 1.7207183311750114e-05, + "loss": 0.0644, "step": 137655 }, { "epoch": 6.42, - "learning_rate": 7.186723547888051e-06, - "loss": 0.0453, + "learning_rate": 1.7206715241914094e-05, + "loss": 0.0297, "step": 137660 }, { "epoch": 6.42, - "learning_rate": 7.1862547466129114e-06, - "loss": 0.0583, + "learning_rate": 1.7206247172078077e-05, + "loss": 0.0345, "step": 137665 }, { "epoch": 6.42, - "learning_rate": 7.185785945337772e-06, - "loss": 0.0867, + "learning_rate": 1.7205779102242054e-05, + "loss": 0.0913, "step": 137670 }, { "epoch": 6.42, - "learning_rate": 7.185317144062632e-06, - "loss": 0.0529, + "learning_rate": 1.7205311032406033e-05, + "loss": 0.0384, "step": 137675 }, { "epoch": 6.42, - "learning_rate": 7.184848342787493e-06, - "loss": 0.1169, + "learning_rate": 1.7204842962570013e-05, + "loss": 0.2433, "step": 137680 }, { "epoch": 6.42, - "learning_rate": 7.184379541512353e-06, - "loss": 0.1253, + "learning_rate": 1.7204374892733996e-05, + "loss": 0.1612, "step": 137685 }, { "epoch": 6.42, - "learning_rate": 7.1839107402372145e-06, - "loss": 0.1685, + "learning_rate": 1.7203906822897976e-05, + "loss": 0.1726, "step": 137690 }, { "epoch": 6.43, - "learning_rate": 7.183441938962074e-06, - "loss": 0.022, + "learning_rate": 1.7203438753061956e-05, + "loss": 0.0117, "step": 137695 }, { "epoch": 6.43, - "learning_rate": 7.182973137686935e-06, - "loss": 0.0403, + "learning_rate": 1.720297068322594e-05, + "loss": 0.0772, "step": 137700 }, { "epoch": 6.43, - "learning_rate": 7.182504336411795e-06, - "loss": 0.0214, + "learning_rate": 1.720250261338992e-05, + "loss": 0.0172, "step": 137705 }, { "epoch": 6.43, - "learning_rate": 7.182035535136657e-06, - "loss": 0.0112, + "learning_rate": 1.72020345435539e-05, + "loss": 0.0571, "step": 137710 }, { "epoch": 6.43, - "learning_rate": 7.181566733861517e-06, - "loss": 0.0307, + "learning_rate": 1.720156647371788e-05, + "loss": 0.0174, "step": 137715 }, { "epoch": 6.43, - "learning_rate": 7.1810979325863775e-06, - "loss": 0.0457, + "learning_rate": 1.7201098403881862e-05, + "loss": 0.0754, "step": 137720 }, { "epoch": 6.43, - "learning_rate": 7.180629131311237e-06, - "loss": 0.0582, + "learning_rate": 1.720063033404584e-05, + "loss": 0.1945, "step": 137725 }, { "epoch": 6.43, - "learning_rate": 7.180160330036098e-06, - "loss": 0.0823, + "learning_rate": 1.720016226420982e-05, + "loss": 0.101, "step": 137730 }, { "epoch": 6.43, - "learning_rate": 7.179691528760959e-06, - "loss": 0.1798, + "learning_rate": 1.7199694194373798e-05, + "loss": 0.3091, "step": 137735 }, { "epoch": 6.43, - "learning_rate": 7.17922272748582e-06, - "loss": 0.1305, + "learning_rate": 1.719922612453778e-05, + "loss": 0.0809, "step": 137740 }, { "epoch": 6.43, - "learning_rate": 7.17875392621068e-06, - "loss": 0.0015, + "learning_rate": 1.719875805470176e-05, + "loss": 0.0039, "step": 137745 }, { "epoch": 6.43, - "learning_rate": 7.1782851249355405e-06, - "loss": 0.039, + "learning_rate": 1.719828998486574e-05, + "loss": 0.0156, "step": 137750 }, { "epoch": 6.43, - "learning_rate": 7.177816323660401e-06, - "loss": 0.0333, + "learning_rate": 1.7197821915029724e-05, + "loss": 0.0052, "step": 137755 }, { "epoch": 6.43, - "learning_rate": 7.177347522385262e-06, - "loss": 0.0685, + "learning_rate": 1.7197353845193704e-05, + "loss": 0.0316, "step": 137760 }, { "epoch": 6.43, - "learning_rate": 7.176878721110122e-06, - "loss": 0.0716, + "learning_rate": 1.7196885775357684e-05, + "loss": 0.0139, "step": 137765 }, { "epoch": 6.43, - "learning_rate": 7.176409919834983e-06, - "loss": 0.1209, + "learning_rate": 1.7196417705521663e-05, + "loss": 0.0349, "step": 137770 }, { "epoch": 6.43, - "learning_rate": 7.175941118559843e-06, - "loss": 0.0988, + "learning_rate": 1.7195949635685647e-05, + "loss": 0.0674, "step": 137775 }, { "epoch": 6.43, - "learning_rate": 7.175472317284704e-06, - "loss": 0.0681, + "learning_rate": 1.7195481565849626e-05, + "loss": 0.1295, "step": 137780 }, { "epoch": 6.43, - "learning_rate": 7.175003516009564e-06, - "loss": 0.2648, + "learning_rate": 1.7195013496013606e-05, + "loss": 0.2022, "step": 137785 }, { "epoch": 6.43, - "learning_rate": 7.174534714734425e-06, - "loss": 0.1942, + "learning_rate": 1.7194545426177586e-05, + "loss": 0.1179, "step": 137790 }, { "epoch": 6.43, - "learning_rate": 7.174065913459285e-06, - "loss": 0.0133, + "learning_rate": 1.719407735634157e-05, + "loss": 0.0196, "step": 137795 }, { "epoch": 6.43, - "learning_rate": 7.173597112184145e-06, - "loss": 0.0373, + "learning_rate": 1.7193609286505546e-05, + "loss": 0.0353, "step": 137800 }, { "epoch": 6.43, - "learning_rate": 7.1731283109090065e-06, - "loss": 0.0119, + "learning_rate": 1.7193141216669526e-05, + "loss": 0.0394, "step": 137805 }, { "epoch": 6.43, - "learning_rate": 7.172659509633867e-06, - "loss": 0.1166, + "learning_rate": 1.719267314683351e-05, + "loss": 0.0867, "step": 137810 }, { "epoch": 6.43, - "learning_rate": 7.172190708358727e-06, - "loss": 0.0667, + "learning_rate": 1.719220507699749e-05, + "loss": 0.0247, "step": 137815 }, { "epoch": 6.43, - "learning_rate": 7.171721907083587e-06, - "loss": 0.0628, + "learning_rate": 1.719173700716147e-05, + "loss": 0.0248, "step": 137820 }, { "epoch": 6.43, - "learning_rate": 7.171253105808449e-06, - "loss": 0.0597, + "learning_rate": 1.7191268937325448e-05, + "loss": 0.0641, "step": 137825 }, { "epoch": 6.43, - "learning_rate": 7.1707843045333096e-06, - "loss": 0.0609, + "learning_rate": 1.719080086748943e-05, + "loss": 0.5097, "step": 137830 }, { "epoch": 6.43, - "learning_rate": 7.1703155032581695e-06, - "loss": 0.1948, + "learning_rate": 1.719033279765341e-05, + "loss": 0.1692, "step": 137835 }, { "epoch": 6.43, - "learning_rate": 7.1698467019830294e-06, - "loss": 0.0867, + "learning_rate": 1.718986472781739e-05, + "loss": 0.2023, "step": 137840 }, { "epoch": 6.43, - "learning_rate": 7.16937790070789e-06, - "loss": 0.0147, + "learning_rate": 1.718939665798137e-05, + "loss": 0.0017, "step": 137845 }, { "epoch": 6.43, - "learning_rate": 7.168909099432752e-06, - "loss": 0.0647, + "learning_rate": 1.7188928588145354e-05, + "loss": 0.0056, "step": 137850 }, { "epoch": 6.43, - "learning_rate": 7.168440298157612e-06, - "loss": 0.0726, + "learning_rate": 1.7188460518309334e-05, + "loss": 0.0308, "step": 137855 }, { "epoch": 6.43, - "learning_rate": 7.167971496882472e-06, - "loss": 0.0406, + "learning_rate": 1.718799244847331e-05, + "loss": 0.0381, "step": 137860 }, { "epoch": 6.43, - "learning_rate": 7.1675026956073325e-06, - "loss": 0.0529, + "learning_rate": 1.718752437863729e-05, + "loss": 0.0566, "step": 137865 }, { "epoch": 6.43, - "learning_rate": 7.1670338943321924e-06, - "loss": 0.0378, + "learning_rate": 1.7187056308801273e-05, + "loss": 0.0496, "step": 137870 }, { "epoch": 6.43, - "learning_rate": 7.166565093057054e-06, - "loss": 0.0333, + "learning_rate": 1.7186588238965253e-05, + "loss": 0.0583, "step": 137875 }, { "epoch": 6.43, - "learning_rate": 7.166096291781914e-06, - "loss": 0.1046, + "learning_rate": 1.7186120169129233e-05, + "loss": 0.1147, "step": 137880 }, { "epoch": 6.43, - "learning_rate": 7.165627490506775e-06, - "loss": 0.1621, + "learning_rate": 1.7185652099293216e-05, + "loss": 0.192, "step": 137885 }, { "epoch": 6.43, - "learning_rate": 7.165158689231635e-06, - "loss": 0.1102, + "learning_rate": 1.7185184029457196e-05, + "loss": 0.1734, "step": 137890 }, { "epoch": 6.43, - "learning_rate": 7.164689887956496e-06, - "loss": 0.013, + "learning_rate": 1.7184715959621176e-05, + "loss": 0.0095, "step": 137895 }, { "epoch": 6.43, - "learning_rate": 7.164221086681356e-06, - "loss": 0.0398, + "learning_rate": 1.7184247889785156e-05, + "loss": 0.034, "step": 137900 }, { "epoch": 6.43, - "learning_rate": 7.163752285406217e-06, - "loss": 0.0131, + "learning_rate": 1.718377981994914e-05, + "loss": 0.0101, "step": 137905 }, { "epoch": 6.44, - "learning_rate": 7.163283484131077e-06, - "loss": 0.0182, + "learning_rate": 1.718331175011312e-05, + "loss": 0.0249, "step": 137910 }, { "epoch": 6.44, - "learning_rate": 7.162814682855938e-06, - "loss": 0.0966, + "learning_rate": 1.71828436802771e-05, + "loss": 0.1106, "step": 137915 }, { "epoch": 6.44, - "learning_rate": 7.1623458815807985e-06, - "loss": 0.0608, + "learning_rate": 1.7182375610441078e-05, + "loss": 0.0827, "step": 137920 }, { "epoch": 6.44, - "learning_rate": 7.161877080305659e-06, - "loss": 0.0624, + "learning_rate": 1.7181907540605058e-05, + "loss": 0.0313, "step": 137925 }, { "epoch": 6.44, - "learning_rate": 7.161408279030519e-06, - "loss": 0.1464, + "learning_rate": 1.7181439470769038e-05, + "loss": 0.1075, "step": 137930 }, { "epoch": 6.44, - "learning_rate": 7.16093947775538e-06, - "loss": 0.2466, + "learning_rate": 1.7180971400933018e-05, + "loss": 0.199, "step": 137935 }, { "epoch": 6.44, - "learning_rate": 7.16047067648024e-06, - "loss": 0.1397, + "learning_rate": 1.7180503331097e-05, + "loss": 0.1312, "step": 137940 }, { "epoch": 6.44, - "learning_rate": 7.160001875205102e-06, - "loss": 0.0257, + "learning_rate": 1.718003526126098e-05, + "loss": 0.0074, "step": 137945 }, { "epoch": 6.44, - "learning_rate": 7.1595330739299615e-06, - "loss": 0.0644, + "learning_rate": 1.717956719142496e-05, + "loss": 0.0166, "step": 137950 }, { "epoch": 6.44, - "learning_rate": 7.159064272654822e-06, - "loss": 0.0558, + "learning_rate": 1.717909912158894e-05, + "loss": 0.0176, "step": 137955 }, { "epoch": 6.44, - "learning_rate": 7.158595471379682e-06, - "loss": 0.0278, + "learning_rate": 1.7178631051752924e-05, + "loss": 0.0224, "step": 137960 }, { "epoch": 6.44, - "learning_rate": 7.158126670104544e-06, - "loss": 0.0268, + "learning_rate": 1.7178162981916903e-05, + "loss": 0.0667, "step": 137965 }, { "epoch": 6.44, - "learning_rate": 7.157657868829404e-06, - "loss": 0.05, + "learning_rate": 1.7177694912080883e-05, + "loss": 0.044, "step": 137970 }, { "epoch": 6.44, - "learning_rate": 7.157189067554265e-06, - "loss": 0.0371, + "learning_rate": 1.7177226842244863e-05, + "loss": 0.0406, "step": 137975 }, { "epoch": 6.44, - "learning_rate": 7.1567202662791245e-06, - "loss": 0.0972, + "learning_rate": 1.7176758772408846e-05, + "loss": 0.1073, "step": 137980 }, { "epoch": 6.44, - "learning_rate": 7.156251465003985e-06, - "loss": 0.1465, + "learning_rate": 1.7176290702572826e-05, + "loss": 0.1402, "step": 137985 }, { "epoch": 6.44, - "learning_rate": 7.155782663728846e-06, - "loss": 0.1849, + "learning_rate": 1.7175822632736803e-05, + "loss": 0.1374, "step": 137990 }, { "epoch": 6.44, - "learning_rate": 7.155313862453707e-06, - "loss": 0.0132, + "learning_rate": 1.7175354562900786e-05, + "loss": 0.0103, "step": 137995 }, { "epoch": 6.44, - "learning_rate": 7.154845061178567e-06, - "loss": 0.0011, + "learning_rate": 1.7174886493064766e-05, + "loss": 0.0289, "step": 138000 }, { "epoch": 6.44, - "learning_rate": 7.1543762599034276e-06, - "loss": 0.0116, + "learning_rate": 1.7174418423228745e-05, + "loss": 0.0293, "step": 138005 }, { "epoch": 6.44, - "learning_rate": 7.1539074586282875e-06, - "loss": 0.0362, + "learning_rate": 1.7173950353392725e-05, + "loss": 0.0446, "step": 138010 }, { "epoch": 6.44, - "learning_rate": 7.153438657353149e-06, - "loss": 0.0609, + "learning_rate": 1.717348228355671e-05, + "loss": 0.0374, "step": 138015 }, { "epoch": 6.44, - "learning_rate": 7.152969856078009e-06, - "loss": 0.0563, + "learning_rate": 1.7173014213720688e-05, + "loss": 0.0691, "step": 138020 }, { "epoch": 6.44, - "learning_rate": 7.15250105480287e-06, - "loss": 0.0441, + "learning_rate": 1.7172546143884668e-05, + "loss": 0.1005, "step": 138025 }, { "epoch": 6.44, - "learning_rate": 7.15203225352773e-06, - "loss": 0.0875, + "learning_rate": 1.7172078074048648e-05, + "loss": 0.0736, "step": 138030 }, { "epoch": 6.44, - "learning_rate": 7.151563452252591e-06, - "loss": 0.2359, + "learning_rate": 1.717161000421263e-05, + "loss": 0.1706, "step": 138035 }, { "epoch": 6.44, - "learning_rate": 7.151094650977451e-06, - "loss": 0.0949, + "learning_rate": 1.717114193437661e-05, + "loss": 0.1082, "step": 138040 }, { "epoch": 6.44, - "learning_rate": 7.150625849702312e-06, - "loss": 0.0208, + "learning_rate": 1.717067386454059e-05, + "loss": 0.0237, "step": 138045 }, { "epoch": 6.44, - "learning_rate": 7.150157048427172e-06, - "loss": 0.0229, + "learning_rate": 1.7170205794704567e-05, + "loss": 0.0105, "step": 138050 }, { "epoch": 6.44, - "learning_rate": 7.149688247152032e-06, - "loss": 0.0692, + "learning_rate": 1.716973772486855e-05, + "loss": 0.0243, "step": 138055 }, { "epoch": 6.44, - "learning_rate": 7.149219445876894e-06, - "loss": 0.0252, + "learning_rate": 1.716926965503253e-05, + "loss": 0.0376, "step": 138060 }, { "epoch": 6.44, - "learning_rate": 7.148750644601754e-06, - "loss": 0.0733, + "learning_rate": 1.716880158519651e-05, + "loss": 0.0251, "step": 138065 }, { "epoch": 6.44, - "learning_rate": 7.148281843326614e-06, - "loss": 0.0392, + "learning_rate": 1.7168333515360493e-05, + "loss": 0.061, "step": 138070 }, { "epoch": 6.44, - "learning_rate": 7.147813042051474e-06, - "loss": 0.0893, + "learning_rate": 1.7167865445524473e-05, + "loss": 0.0625, "step": 138075 }, { "epoch": 6.44, - "learning_rate": 7.147344240776336e-06, - "loss": 0.0772, + "learning_rate": 1.7167397375688453e-05, + "loss": 0.147, "step": 138080 }, { "epoch": 6.44, - "learning_rate": 7.146875439501197e-06, - "loss": 0.1383, + "learning_rate": 1.7166929305852433e-05, + "loss": 0.1245, "step": 138085 }, { "epoch": 6.44, - "learning_rate": 7.146406638226057e-06, - "loss": 0.1858, + "learning_rate": 1.7166461236016416e-05, + "loss": 0.1855, "step": 138090 }, { "epoch": 6.44, - "learning_rate": 7.1459378369509165e-06, - "loss": 0.0267, + "learning_rate": 1.7165993166180396e-05, + "loss": 0.0187, "step": 138095 }, { "epoch": 6.44, - "learning_rate": 7.145469035675777e-06, - "loss": 0.0267, + "learning_rate": 1.7165525096344375e-05, + "loss": 0.019, "step": 138100 }, { "epoch": 6.44, - "learning_rate": 7.145000234400639e-06, - "loss": 0.0259, + "learning_rate": 1.7165057026508355e-05, + "loss": 0.018, "step": 138105 }, { "epoch": 6.44, - "learning_rate": 7.144531433125499e-06, - "loss": 0.0291, + "learning_rate": 1.716458895667234e-05, + "loss": 0.0119, "step": 138110 }, { "epoch": 6.44, - "learning_rate": 7.144062631850359e-06, - "loss": 0.0974, + "learning_rate": 1.7164120886836315e-05, + "loss": 0.0324, "step": 138115 }, { "epoch": 6.44, - "learning_rate": 7.14359383057522e-06, - "loss": 0.1118, + "learning_rate": 1.7163652817000295e-05, + "loss": 0.0681, "step": 138120 }, { "epoch": 6.45, - "learning_rate": 7.1431250293000795e-06, - "loss": 0.0918, + "learning_rate": 1.7163184747164278e-05, + "loss": 0.0521, "step": 138125 }, { "epoch": 6.45, - "learning_rate": 7.142656228024941e-06, - "loss": 0.1048, + "learning_rate": 1.7162716677328258e-05, + "loss": 0.1452, "step": 138130 }, { "epoch": 6.45, - "learning_rate": 7.142187426749801e-06, - "loss": 0.1312, + "learning_rate": 1.7162248607492238e-05, + "loss": 0.2331, "step": 138135 }, { "epoch": 6.45, - "learning_rate": 7.141718625474662e-06, - "loss": 0.1658, + "learning_rate": 1.7161780537656217e-05, + "loss": 0.1744, "step": 138140 }, { "epoch": 6.45, - "learning_rate": 7.141249824199522e-06, - "loss": 0.0414, + "learning_rate": 1.71613124678202e-05, + "loss": 0.0089, "step": 138145 }, { "epoch": 6.45, - "learning_rate": 7.1407810229243834e-06, - "loss": 0.012, + "learning_rate": 1.716084439798418e-05, + "loss": 0.0129, "step": 138150 }, { "epoch": 6.45, - "learning_rate": 7.140312221649243e-06, - "loss": 0.0114, + "learning_rate": 1.716037632814816e-05, + "loss": 0.0187, "step": 138155 }, { "epoch": 6.45, - "learning_rate": 7.139843420374104e-06, - "loss": 0.039, + "learning_rate": 1.715990825831214e-05, + "loss": 0.0241, "step": 138160 }, { "epoch": 6.45, - "learning_rate": 7.139374619098964e-06, - "loss": 0.0156, + "learning_rate": 1.7159440188476123e-05, + "loss": 0.0272, "step": 138165 }, { "epoch": 6.45, - "learning_rate": 7.138905817823825e-06, - "loss": 0.0397, + "learning_rate": 1.7158972118640103e-05, + "loss": 0.0742, "step": 138170 }, { "epoch": 6.45, - "learning_rate": 7.138437016548686e-06, - "loss": 0.0912, + "learning_rate": 1.7158504048804083e-05, + "loss": 0.1031, "step": 138175 }, { "epoch": 6.45, - "learning_rate": 7.1379682152735464e-06, - "loss": 0.0685, + "learning_rate": 1.7158035978968063e-05, + "loss": 0.1104, "step": 138180 }, { "epoch": 6.45, - "learning_rate": 7.137499413998406e-06, - "loss": 0.2109, + "learning_rate": 1.7157567909132042e-05, + "loss": 0.2068, "step": 138185 }, { "epoch": 6.45, - "learning_rate": 7.137030612723267e-06, - "loss": 0.1265, + "learning_rate": 1.7157099839296022e-05, + "loss": 0.18, "step": 138190 }, { "epoch": 6.45, - "learning_rate": 7.136561811448127e-06, - "loss": 0.0434, + "learning_rate": 1.7156631769460002e-05, + "loss": 0.0222, "step": 138195 }, { "epoch": 6.45, - "learning_rate": 7.136093010172989e-06, - "loss": 0.0108, + "learning_rate": 1.7156163699623985e-05, + "loss": 0.0224, "step": 138200 }, { "epoch": 6.45, - "learning_rate": 7.135624208897849e-06, - "loss": 0.0523, + "learning_rate": 1.7155695629787965e-05, + "loss": 0.0402, "step": 138205 }, { "epoch": 6.45, - "learning_rate": 7.135155407622709e-06, - "loss": 0.0073, + "learning_rate": 1.7155227559951945e-05, + "loss": 0.0317, "step": 138210 }, { "epoch": 6.45, - "learning_rate": 7.134686606347569e-06, - "loss": 0.0663, + "learning_rate": 1.7154759490115925e-05, + "loss": 0.0156, "step": 138215 }, { "epoch": 6.45, - "learning_rate": 7.134217805072431e-06, - "loss": 0.054, + "learning_rate": 1.7154291420279908e-05, + "loss": 0.0433, "step": 138220 }, { "epoch": 6.45, - "learning_rate": 7.133749003797291e-06, - "loss": 0.0882, + "learning_rate": 1.7153823350443888e-05, + "loss": 0.0491, "step": 138225 }, { "epoch": 6.45, - "learning_rate": 7.133280202522152e-06, - "loss": 0.0688, + "learning_rate": 1.7153355280607868e-05, + "loss": 0.142, "step": 138230 }, { "epoch": 6.45, - "learning_rate": 7.132811401247012e-06, - "loss": 0.1425, + "learning_rate": 1.7152887210771847e-05, + "loss": 0.1285, "step": 138235 }, { "epoch": 6.45, - "learning_rate": 7.132342599971872e-06, - "loss": 0.118, + "learning_rate": 1.7152419140935827e-05, + "loss": 0.1566, "step": 138240 }, { "epoch": 6.45, - "learning_rate": 7.131873798696733e-06, - "loss": 0.0004, + "learning_rate": 1.7151951071099807e-05, + "loss": 0.0048, "step": 138245 }, { "epoch": 6.45, - "learning_rate": 7.131404997421594e-06, - "loss": 0.0114, + "learning_rate": 1.7151483001263787e-05, + "loss": 0.0186, "step": 138250 }, { "epoch": 6.45, - "learning_rate": 7.130936196146454e-06, - "loss": 0.02, + "learning_rate": 1.715101493142777e-05, + "loss": 0.0236, "step": 138255 }, { "epoch": 6.45, - "learning_rate": 7.130467394871315e-06, - "loss": 0.0236, + "learning_rate": 1.715054686159175e-05, + "loss": 0.0043, "step": 138260 }, { "epoch": 6.45, - "learning_rate": 7.129998593596175e-06, - "loss": 0.0358, + "learning_rate": 1.715007879175573e-05, + "loss": 0.0343, "step": 138265 }, { "epoch": 6.45, - "learning_rate": 7.129529792321036e-06, - "loss": 0.0433, + "learning_rate": 1.714961072191971e-05, + "loss": 0.0507, "step": 138270 }, { "epoch": 6.45, - "learning_rate": 7.129060991045896e-06, - "loss": 0.0359, + "learning_rate": 1.7149142652083693e-05, + "loss": 0.0373, "step": 138275 }, { "epoch": 6.45, - "learning_rate": 7.128592189770757e-06, - "loss": 0.2135, + "learning_rate": 1.7148674582247673e-05, + "loss": 0.044, "step": 138280 }, { "epoch": 6.45, - "learning_rate": 7.128123388495617e-06, - "loss": 0.1351, + "learning_rate": 1.7148206512411652e-05, + "loss": 0.1315, "step": 138285 }, { "epoch": 6.45, - "learning_rate": 7.1276545872204785e-06, - "loss": 0.2258, + "learning_rate": 1.7147738442575632e-05, + "loss": 0.158, "step": 138290 }, { "epoch": 6.45, - "learning_rate": 7.1271857859453385e-06, - "loss": 0.0545, + "learning_rate": 1.7147270372739615e-05, + "loss": 0.0169, "step": 138295 }, { "epoch": 6.45, - "learning_rate": 7.126716984670199e-06, - "loss": 0.0203, + "learning_rate": 1.7146802302903595e-05, + "loss": 0.0018, "step": 138300 }, { "epoch": 6.45, - "learning_rate": 7.126248183395059e-06, - "loss": 0.0338, + "learning_rate": 1.714633423306757e-05, + "loss": 0.0523, "step": 138305 }, { "epoch": 6.45, - "learning_rate": 7.125779382119919e-06, - "loss": 0.0641, + "learning_rate": 1.7145866163231555e-05, + "loss": 0.0371, "step": 138310 }, { "epoch": 6.45, - "learning_rate": 7.125310580844781e-06, - "loss": 0.0341, + "learning_rate": 1.7145398093395535e-05, + "loss": 0.0916, "step": 138315 }, { "epoch": 6.45, - "learning_rate": 7.1248417795696415e-06, - "loss": 0.0851, + "learning_rate": 1.7144930023559515e-05, + "loss": 0.0503, "step": 138320 }, { "epoch": 6.45, - "learning_rate": 7.1243729782945014e-06, - "loss": 0.0328, + "learning_rate": 1.7144461953723494e-05, + "loss": 0.0214, "step": 138325 }, { "epoch": 6.45, - "learning_rate": 7.123904177019361e-06, - "loss": 0.0686, + "learning_rate": 1.7143993883887478e-05, + "loss": 0.0865, "step": 138330 }, { "epoch": 6.45, - "learning_rate": 7.123435375744222e-06, - "loss": 0.1625, + "learning_rate": 1.7143525814051457e-05, + "loss": 0.097, "step": 138335 }, { "epoch": 6.46, - "learning_rate": 7.122966574469084e-06, - "loss": 0.1409, + "learning_rate": 1.7143057744215437e-05, + "loss": 0.218, "step": 138340 }, { "epoch": 6.46, - "learning_rate": 7.122497773193944e-06, - "loss": 0.0136, + "learning_rate": 1.7142589674379417e-05, + "loss": 0.0352, "step": 138345 }, { "epoch": 6.46, - "learning_rate": 7.122028971918804e-06, - "loss": 0.0262, + "learning_rate": 1.71421216045434e-05, + "loss": 0.0147, "step": 138350 }, { "epoch": 6.46, - "learning_rate": 7.1215601706436644e-06, - "loss": 0.0275, + "learning_rate": 1.714165353470738e-05, + "loss": 0.046, "step": 138355 }, { "epoch": 6.46, - "learning_rate": 7.121091369368526e-06, - "loss": 0.0269, + "learning_rate": 1.714118546487136e-05, + "loss": 0.0241, "step": 138360 }, { "epoch": 6.46, - "learning_rate": 7.120622568093386e-06, - "loss": 0.0407, + "learning_rate": 1.714071739503534e-05, + "loss": 0.0413, "step": 138365 }, { "epoch": 6.46, - "learning_rate": 7.120153766818246e-06, - "loss": 0.0706, + "learning_rate": 1.714024932519932e-05, + "loss": 0.0349, "step": 138370 }, { "epoch": 6.46, - "learning_rate": 7.119684965543107e-06, - "loss": 0.0743, + "learning_rate": 1.71397812553633e-05, + "loss": 0.0396, "step": 138375 }, { "epoch": 6.46, - "learning_rate": 7.119216164267967e-06, - "loss": 0.1196, + "learning_rate": 1.713931318552728e-05, + "loss": 0.0526, "step": 138380 }, { "epoch": 6.46, - "learning_rate": 7.118747362992828e-06, - "loss": 0.2095, + "learning_rate": 1.7138845115691262e-05, + "loss": 0.1322, "step": 138385 }, { "epoch": 6.46, - "learning_rate": 7.118278561717688e-06, - "loss": 0.1527, + "learning_rate": 1.7138377045855242e-05, + "loss": 0.2002, "step": 138390 }, { "epoch": 6.46, - "learning_rate": 7.117809760442549e-06, - "loss": 0.0021, + "learning_rate": 1.7137908976019222e-05, + "loss": 0.0046, "step": 138395 }, { "epoch": 6.46, - "learning_rate": 7.117340959167409e-06, - "loss": 0.0202, + "learning_rate": 1.7137440906183202e-05, + "loss": 0.0363, "step": 138400 }, { "epoch": 6.46, - "learning_rate": 7.1168721578922705e-06, - "loss": 0.0139, + "learning_rate": 1.7136972836347185e-05, + "loss": 0.0138, "step": 138405 }, { "epoch": 6.46, - "learning_rate": 7.1164033566171305e-06, - "loss": 0.0233, + "learning_rate": 1.7136504766511165e-05, + "loss": 0.0761, "step": 138410 }, { "epoch": 6.46, - "learning_rate": 7.115934555341991e-06, - "loss": 0.0311, + "learning_rate": 1.7136036696675145e-05, + "loss": 0.0559, "step": 138415 }, { "epoch": 6.46, - "learning_rate": 7.115465754066851e-06, - "loss": 0.0418, + "learning_rate": 1.7135568626839124e-05, + "loss": 0.0247, "step": 138420 }, { "epoch": 6.46, - "learning_rate": 7.114996952791712e-06, - "loss": 0.157, + "learning_rate": 1.7135100557003108e-05, + "loss": 0.0376, "step": 138425 }, { "epoch": 6.46, - "learning_rate": 7.114528151516573e-06, - "loss": 0.122, + "learning_rate": 1.7134632487167084e-05, + "loss": 0.0228, "step": 138430 }, { "epoch": 6.46, - "learning_rate": 7.1140593502414335e-06, - "loss": 0.154, + "learning_rate": 1.7134164417331064e-05, + "loss": 0.1586, "step": 138435 }, { "epoch": 6.46, - "learning_rate": 7.1135905489662935e-06, - "loss": 0.2288, + "learning_rate": 1.7133696347495047e-05, + "loss": 0.0772, "step": 138440 }, { "epoch": 6.46, - "learning_rate": 7.113121747691154e-06, - "loss": 0.0046, + "learning_rate": 1.7133228277659027e-05, + "loss": 0.022, "step": 138445 }, { "epoch": 6.46, - "learning_rate": 7.112652946416014e-06, - "loss": 0.0381, + "learning_rate": 1.7132760207823007e-05, + "loss": 0.0175, "step": 138450 }, { "epoch": 6.46, - "learning_rate": 7.112184145140876e-06, - "loss": 0.0096, + "learning_rate": 1.7132292137986987e-05, + "loss": 0.0362, "step": 138455 }, { "epoch": 6.46, - "learning_rate": 7.111715343865736e-06, - "loss": 0.0733, + "learning_rate": 1.713182406815097e-05, + "loss": 0.015, "step": 138460 }, { "epoch": 6.46, - "learning_rate": 7.1112465425905965e-06, - "loss": 0.0174, + "learning_rate": 1.713135599831495e-05, + "loss": 0.0386, "step": 138465 }, { "epoch": 6.46, - "learning_rate": 7.1107777413154565e-06, - "loss": 0.0414, + "learning_rate": 1.713088792847893e-05, + "loss": 0.0456, "step": 138470 }, { "epoch": 6.46, - "learning_rate": 7.110308940040318e-06, - "loss": 0.017, + "learning_rate": 1.713041985864291e-05, + "loss": 0.0346, "step": 138475 }, { "epoch": 6.46, - "learning_rate": 7.109840138765178e-06, - "loss": 0.1516, + "learning_rate": 1.7129951788806892e-05, + "loss": 0.058, "step": 138480 }, { "epoch": 6.46, - "learning_rate": 7.109371337490039e-06, - "loss": 0.2079, + "learning_rate": 1.7129483718970872e-05, + "loss": 0.1466, "step": 138485 }, { "epoch": 6.46, - "learning_rate": 7.108902536214899e-06, - "loss": 0.1469, + "learning_rate": 1.7129015649134852e-05, + "loss": 0.1365, "step": 138490 }, { "epoch": 6.46, - "learning_rate": 7.1084337349397595e-06, - "loss": 0.0149, + "learning_rate": 1.7128547579298832e-05, + "loss": 0.0285, "step": 138495 }, { "epoch": 6.46, - "learning_rate": 7.10796493366462e-06, - "loss": 0.017, + "learning_rate": 1.712807950946281e-05, + "loss": 0.0334, "step": 138500 }, { "epoch": 6.46, - "learning_rate": 7.107496132389481e-06, - "loss": 0.0319, + "learning_rate": 1.712761143962679e-05, + "loss": 0.0085, "step": 138505 }, { "epoch": 6.46, - "learning_rate": 7.107027331114341e-06, - "loss": 0.0184, + "learning_rate": 1.712714336979077e-05, + "loss": 0.0347, "step": 138510 }, { "epoch": 6.46, - "learning_rate": 7.106558529839202e-06, - "loss": 0.0153, + "learning_rate": 1.7126675299954755e-05, + "loss": 0.0731, "step": 138515 }, { "epoch": 6.46, - "learning_rate": 7.106089728564062e-06, - "loss": 0.0374, + "learning_rate": 1.7126207230118734e-05, + "loss": 0.0856, "step": 138520 }, { "epoch": 6.46, - "learning_rate": 7.105620927288923e-06, - "loss": 0.1152, + "learning_rate": 1.7125739160282714e-05, + "loss": 0.069, "step": 138525 }, { "epoch": 6.46, - "learning_rate": 7.105152126013783e-06, - "loss": 0.148, + "learning_rate": 1.7125271090446694e-05, + "loss": 0.0404, "step": 138530 }, { "epoch": 6.46, - "learning_rate": 7.104683324738644e-06, - "loss": 0.1725, + "learning_rate": 1.7124803020610677e-05, + "loss": 0.1394, "step": 138535 }, { "epoch": 6.46, - "learning_rate": 7.104214523463504e-06, - "loss": 0.3024, + "learning_rate": 1.7124334950774657e-05, + "loss": 0.2293, "step": 138540 }, { "epoch": 6.46, - "learning_rate": 7.103745722188366e-06, - "loss": 0.0022, + "learning_rate": 1.7123866880938637e-05, + "loss": 0.0258, "step": 138545 }, { "epoch": 6.46, - "learning_rate": 7.1032769209132256e-06, - "loss": 0.0194, + "learning_rate": 1.712339881110262e-05, + "loss": 0.0067, "step": 138550 }, { "epoch": 6.47, - "learning_rate": 7.102808119638086e-06, - "loss": 0.0192, + "learning_rate": 1.7122930741266596e-05, + "loss": 0.0013, "step": 138555 }, { "epoch": 6.47, - "learning_rate": 7.102339318362946e-06, - "loss": 0.0401, + "learning_rate": 1.7122462671430576e-05, + "loss": 0.0194, "step": 138560 }, { "epoch": 6.47, - "learning_rate": 7.101870517087806e-06, - "loss": 0.0442, + "learning_rate": 1.7121994601594556e-05, + "loss": 0.071, "step": 138565 }, { "epoch": 6.47, - "learning_rate": 7.101401715812668e-06, - "loss": 0.0174, + "learning_rate": 1.712152653175854e-05, + "loss": 0.0307, "step": 138570 }, { "epoch": 6.47, - "learning_rate": 7.100932914537529e-06, - "loss": 0.0597, + "learning_rate": 1.712105846192252e-05, + "loss": 0.0539, "step": 138575 }, { "epoch": 6.47, - "learning_rate": 7.1004641132623886e-06, - "loss": 0.182, + "learning_rate": 1.71205903920865e-05, + "loss": 0.0662, "step": 138580 }, { "epoch": 6.47, - "learning_rate": 7.0999953119872485e-06, - "loss": 0.2193, + "learning_rate": 1.712012232225048e-05, + "loss": 0.31, "step": 138585 }, { "epoch": 6.47, - "learning_rate": 7.099526510712109e-06, - "loss": 0.1151, + "learning_rate": 1.7119654252414462e-05, + "loss": 0.1719, "step": 138590 }, { "epoch": 6.47, - "learning_rate": 7.099057709436971e-06, - "loss": 0.0244, + "learning_rate": 1.7119186182578442e-05, + "loss": 0.0139, "step": 138595 }, { "epoch": 6.47, - "learning_rate": 7.098588908161831e-06, - "loss": 0.0038, + "learning_rate": 1.711871811274242e-05, + "loss": 0.0197, "step": 138600 }, { "epoch": 6.47, - "learning_rate": 7.098120106886691e-06, - "loss": 0.0297, + "learning_rate": 1.7118250042906405e-05, + "loss": 0.0201, "step": 138605 }, { "epoch": 6.47, - "learning_rate": 7.0976513056115515e-06, - "loss": 0.016, + "learning_rate": 1.7117781973070385e-05, + "loss": 0.082, "step": 138610 }, { "epoch": 6.47, - "learning_rate": 7.097182504336413e-06, - "loss": 0.0173, + "learning_rate": 1.7117313903234364e-05, + "loss": 0.0362, "step": 138615 }, { "epoch": 6.47, - "learning_rate": 7.096713703061273e-06, - "loss": 0.0691, + "learning_rate": 1.711684583339834e-05, + "loss": 0.0214, "step": 138620 }, { "epoch": 6.47, - "learning_rate": 7.096244901786133e-06, - "loss": 0.0673, + "learning_rate": 1.7116377763562324e-05, + "loss": 0.0812, "step": 138625 }, { "epoch": 6.47, - "learning_rate": 7.095776100510994e-06, - "loss": 0.1336, + "learning_rate": 1.7115909693726304e-05, + "loss": 0.1242, "step": 138630 }, { "epoch": 6.47, - "learning_rate": 7.095307299235854e-06, - "loss": 0.2075, + "learning_rate": 1.7115441623890284e-05, + "loss": 0.1928, "step": 138635 }, { "epoch": 6.47, - "learning_rate": 7.094838497960715e-06, - "loss": 0.1942, + "learning_rate": 1.7114973554054264e-05, + "loss": 0.1089, "step": 138640 }, { "epoch": 6.47, - "learning_rate": 7.094369696685575e-06, - "loss": 0.0252, + "learning_rate": 1.7114505484218247e-05, + "loss": 0.009, "step": 138645 }, { "epoch": 6.47, - "learning_rate": 7.093900895410436e-06, - "loss": 0.0289, + "learning_rate": 1.7114037414382227e-05, + "loss": 0.0093, "step": 138650 }, { "epoch": 6.47, - "learning_rate": 7.093432094135296e-06, - "loss": 0.0371, + "learning_rate": 1.7113569344546206e-05, + "loss": 0.0546, "step": 138655 }, { "epoch": 6.47, - "learning_rate": 7.092963292860157e-06, - "loss": 0.0143, + "learning_rate": 1.7113101274710186e-05, + "loss": 0.074, "step": 138660 }, { "epoch": 6.47, - "learning_rate": 7.0924944915850184e-06, - "loss": 0.0355, + "learning_rate": 1.711263320487417e-05, + "loss": 0.0336, "step": 138665 }, { "epoch": 6.47, - "learning_rate": 7.092025690309878e-06, - "loss": 0.1051, + "learning_rate": 1.711216513503815e-05, + "loss": 0.126, "step": 138670 }, { "epoch": 6.47, - "learning_rate": 7.091556889034738e-06, - "loss": 0.0981, + "learning_rate": 1.711169706520213e-05, + "loss": 0.0547, "step": 138675 }, { "epoch": 6.47, - "learning_rate": 7.091088087759599e-06, - "loss": 0.0245, + "learning_rate": 1.7111228995366112e-05, + "loss": 0.0736, "step": 138680 }, { "epoch": 6.47, - "learning_rate": 7.090619286484461e-06, - "loss": 0.2269, + "learning_rate": 1.711076092553009e-05, + "loss": 0.1604, "step": 138685 }, { "epoch": 6.47, - "learning_rate": 7.090150485209321e-06, - "loss": 0.1048, + "learning_rate": 1.711029285569407e-05, + "loss": 0.1319, "step": 138690 }, { "epoch": 6.47, - "learning_rate": 7.089681683934181e-06, - "loss": 0.0111, + "learning_rate": 1.7109824785858048e-05, + "loss": 0.0162, "step": 138695 }, { "epoch": 6.47, - "learning_rate": 7.089212882659041e-06, - "loss": 0.0718, + "learning_rate": 1.710935671602203e-05, + "loss": 0.0105, "step": 138700 }, { "epoch": 6.47, - "learning_rate": 7.088744081383901e-06, - "loss": 0.0205, + "learning_rate": 1.710888864618601e-05, + "loss": 0.0067, "step": 138705 }, { "epoch": 6.47, - "learning_rate": 7.088275280108763e-06, - "loss": 0.0353, + "learning_rate": 1.710842057634999e-05, + "loss": 0.0561, "step": 138710 }, { "epoch": 6.47, - "learning_rate": 7.087806478833623e-06, - "loss": 0.1043, + "learning_rate": 1.710795250651397e-05, + "loss": 0.0313, "step": 138715 }, { "epoch": 6.47, - "learning_rate": 7.087337677558484e-06, - "loss": 0.0369, + "learning_rate": 1.7107484436677954e-05, + "loss": 0.0555, "step": 138720 }, { "epoch": 6.47, - "learning_rate": 7.0868688762833436e-06, - "loss": 0.0717, + "learning_rate": 1.7107016366841934e-05, + "loss": 0.0387, "step": 138725 }, { "epoch": 6.47, - "learning_rate": 7.086400075008205e-06, - "loss": 0.0458, + "learning_rate": 1.7106548297005914e-05, + "loss": 0.1077, "step": 138730 }, { "epoch": 6.47, - "learning_rate": 7.085931273733065e-06, - "loss": 0.1863, + "learning_rate": 1.7106080227169897e-05, + "loss": 0.171, "step": 138735 }, { "epoch": 6.47, - "learning_rate": 7.085462472457926e-06, - "loss": 0.1761, + "learning_rate": 1.7105612157333877e-05, + "loss": 0.1808, "step": 138740 }, { "epoch": 6.47, - "learning_rate": 7.084993671182786e-06, - "loss": 0.023, + "learning_rate": 1.7105144087497853e-05, + "loss": 0.0217, "step": 138745 }, { "epoch": 6.47, - "learning_rate": 7.084524869907647e-06, - "loss": 0.0476, + "learning_rate": 1.7104676017661833e-05, + "loss": 0.0162, "step": 138750 }, { "epoch": 6.47, - "learning_rate": 7.084056068632507e-06, - "loss": 0.0267, + "learning_rate": 1.7104207947825816e-05, + "loss": 0.0577, "step": 138755 }, { "epoch": 6.47, - "learning_rate": 7.083587267357368e-06, - "loss": 0.0075, + "learning_rate": 1.7103739877989796e-05, + "loss": 0.0178, "step": 138760 }, { "epoch": 6.47, - "learning_rate": 7.083118466082228e-06, - "loss": 0.0586, + "learning_rate": 1.7103271808153776e-05, + "loss": 0.0541, "step": 138765 }, { "epoch": 6.48, - "learning_rate": 7.082649664807089e-06, - "loss": 0.0969, + "learning_rate": 1.7102803738317756e-05, + "loss": 0.0898, "step": 138770 }, { "epoch": 6.48, - "learning_rate": 7.082180863531949e-06, - "loss": 0.0218, + "learning_rate": 1.710233566848174e-05, + "loss": 0.0806, "step": 138775 }, { "epoch": 6.48, - "learning_rate": 7.0817120622568105e-06, - "loss": 0.1589, + "learning_rate": 1.710186759864572e-05, + "loss": 0.0686, "step": 138780 }, { "epoch": 6.48, - "learning_rate": 7.08124326098167e-06, - "loss": 0.1703, + "learning_rate": 1.71013995288097e-05, + "loss": 0.204, "step": 138785 }, { "epoch": 6.48, - "learning_rate": 7.080774459706531e-06, - "loss": 0.1599, + "learning_rate": 1.7100931458973682e-05, + "loss": 0.2719, "step": 138790 }, { "epoch": 6.48, - "learning_rate": 7.080305658431391e-06, - "loss": 0.0219, + "learning_rate": 1.710046338913766e-05, + "loss": 0.0148, "step": 138795 }, { "epoch": 6.48, - "learning_rate": 7.079836857156253e-06, - "loss": 0.0356, + "learning_rate": 1.709999531930164e-05, + "loss": 0.0654, "step": 138800 }, { "epoch": 6.48, - "learning_rate": 7.079368055881113e-06, - "loss": 0.0503, + "learning_rate": 1.709952724946562e-05, + "loss": 0.0173, "step": 138805 }, { "epoch": 6.48, - "learning_rate": 7.0788992546059735e-06, - "loss": 0.0449, + "learning_rate": 1.70990591796296e-05, + "loss": 0.0203, "step": 138810 }, { "epoch": 6.48, - "learning_rate": 7.078430453330833e-06, - "loss": 0.0518, + "learning_rate": 1.709859110979358e-05, + "loss": 0.0293, "step": 138815 }, { "epoch": 6.48, - "learning_rate": 7.077961652055694e-06, - "loss": 0.0586, + "learning_rate": 1.709812303995756e-05, + "loss": 0.0235, "step": 138820 }, { "epoch": 6.48, - "learning_rate": 7.077492850780555e-06, - "loss": 0.0672, + "learning_rate": 1.709765497012154e-05, + "loss": 0.1045, "step": 138825 }, { "epoch": 6.48, - "learning_rate": 7.077024049505416e-06, - "loss": 0.1514, + "learning_rate": 1.7097186900285524e-05, + "loss": 0.1112, "step": 138830 }, { "epoch": 6.48, - "learning_rate": 7.076555248230276e-06, - "loss": 0.0801, + "learning_rate": 1.7096718830449503e-05, + "loss": 0.0931, "step": 138835 }, { "epoch": 6.48, - "learning_rate": 7.0760864469551364e-06, - "loss": 0.1871, + "learning_rate": 1.7096250760613483e-05, + "loss": 0.1145, "step": 138840 }, { "epoch": 6.48, - "learning_rate": 7.075617645679996e-06, - "loss": 0.0281, + "learning_rate": 1.7095782690777463e-05, + "loss": 0.0161, "step": 138845 }, { "epoch": 6.48, - "learning_rate": 7.075148844404858e-06, - "loss": 0.0246, + "learning_rate": 1.7095314620941446e-05, + "loss": 0.0249, "step": 138850 }, { "epoch": 6.48, - "learning_rate": 7.074680043129718e-06, - "loss": 0.054, + "learning_rate": 1.7094846551105426e-05, + "loss": 0.0091, "step": 138855 }, { "epoch": 6.48, - "learning_rate": 7.074211241854579e-06, - "loss": 0.0346, + "learning_rate": 1.7094378481269406e-05, + "loss": 0.0568, "step": 138860 }, { "epoch": 6.48, - "learning_rate": 7.073742440579439e-06, - "loss": 0.0627, + "learning_rate": 1.709391041143339e-05, + "loss": 0.0532, "step": 138865 }, { "epoch": 6.48, - "learning_rate": 7.0732736393043e-06, - "loss": 0.0396, + "learning_rate": 1.7093442341597366e-05, + "loss": 0.0527, "step": 138870 }, { "epoch": 6.48, - "learning_rate": 7.07280483802916e-06, - "loss": 0.0533, + "learning_rate": 1.7092974271761345e-05, + "loss": 0.0662, "step": 138875 }, { "epoch": 6.48, - "learning_rate": 7.072336036754021e-06, - "loss": 0.0668, + "learning_rate": 1.7092506201925325e-05, + "loss": 0.1185, "step": 138880 }, { "epoch": 6.48, - "learning_rate": 7.071867235478881e-06, - "loss": 0.2498, + "learning_rate": 1.709203813208931e-05, + "loss": 0.1718, "step": 138885 }, { "epoch": 6.48, - "learning_rate": 7.071398434203741e-06, - "loss": 0.1561, + "learning_rate": 1.7091570062253288e-05, + "loss": 0.1213, "step": 138890 }, { "epoch": 6.48, - "learning_rate": 7.0709296329286025e-06, - "loss": 0.0325, + "learning_rate": 1.7091101992417268e-05, + "loss": 0.0267, "step": 138895 }, { "epoch": 6.48, - "learning_rate": 7.070460831653463e-06, - "loss": 0.0432, + "learning_rate": 1.7090633922581248e-05, + "loss": 0.0135, "step": 138900 }, { "epoch": 6.48, - "learning_rate": 7.069992030378323e-06, - "loss": 0.0295, + "learning_rate": 1.709016585274523e-05, + "loss": 0.0434, "step": 138905 }, { "epoch": 6.48, - "learning_rate": 7.069523229103183e-06, - "loss": 0.0443, + "learning_rate": 1.708969778290921e-05, + "loss": 0.0095, "step": 138910 }, { "epoch": 6.48, - "learning_rate": 7.069054427828044e-06, - "loss": 0.0282, + "learning_rate": 1.708922971307319e-05, + "loss": 0.0274, "step": 138915 }, { "epoch": 6.48, - "learning_rate": 7.0685856265529055e-06, - "loss": 0.0544, + "learning_rate": 1.7088761643237174e-05, + "loss": 0.0466, "step": 138920 }, { "epoch": 6.48, - "learning_rate": 7.0681168252777655e-06, - "loss": 0.0471, + "learning_rate": 1.7088293573401154e-05, + "loss": 0.0518, "step": 138925 }, { "epoch": 6.48, - "learning_rate": 7.067648024002625e-06, - "loss": 0.0835, + "learning_rate": 1.7087825503565134e-05, + "loss": 0.0754, "step": 138930 }, { "epoch": 6.48, - "learning_rate": 7.067179222727486e-06, - "loss": 0.1078, + "learning_rate": 1.708735743372911e-05, + "loss": 0.206, "step": 138935 }, { "epoch": 6.48, - "learning_rate": 7.066710421452348e-06, - "loss": 0.1133, + "learning_rate": 1.7086889363893093e-05, + "loss": 0.1918, "step": 138940 }, { "epoch": 6.48, - "learning_rate": 7.066241620177208e-06, - "loss": 0.0274, + "learning_rate": 1.7086421294057073e-05, + "loss": 0.0011, "step": 138945 }, { "epoch": 6.48, - "learning_rate": 7.065772818902068e-06, - "loss": 0.0183, + "learning_rate": 1.7085953224221053e-05, + "loss": 0.0141, "step": 138950 }, { "epoch": 6.48, - "learning_rate": 7.0653040176269285e-06, - "loss": 0.0097, + "learning_rate": 1.7085485154385033e-05, + "loss": 0.0174, "step": 138955 }, { "epoch": 6.48, - "learning_rate": 7.064835216351788e-06, - "loss": 0.0738, + "learning_rate": 1.7085017084549016e-05, + "loss": 0.0032, "step": 138960 }, { "epoch": 6.48, - "learning_rate": 7.06436641507665e-06, - "loss": 0.0527, + "learning_rate": 1.7084549014712996e-05, + "loss": 0.1033, "step": 138965 }, { "epoch": 6.48, - "learning_rate": 7.06389761380151e-06, - "loss": 0.107, + "learning_rate": 1.7084080944876976e-05, + "loss": 0.038, "step": 138970 }, { "epoch": 6.48, - "learning_rate": 7.063428812526371e-06, - "loss": 0.1027, + "learning_rate": 1.708361287504096e-05, + "loss": 0.0729, "step": 138975 }, { "epoch": 6.48, - "learning_rate": 7.062960011251231e-06, - "loss": 0.0907, + "learning_rate": 1.708314480520494e-05, + "loss": 0.0093, "step": 138980 }, { "epoch": 6.49, - "learning_rate": 7.0624912099760915e-06, - "loss": 0.1455, + "learning_rate": 1.708267673536892e-05, + "loss": 0.2548, "step": 138985 }, { "epoch": 6.49, - "learning_rate": 7.062022408700952e-06, - "loss": 0.1326, + "learning_rate": 1.7082208665532898e-05, + "loss": 0.1918, "step": 138990 }, { "epoch": 6.49, - "learning_rate": 7.061553607425813e-06, - "loss": 0.0035, + "learning_rate": 1.708174059569688e-05, + "loss": 0.0286, "step": 138995 }, { "epoch": 6.49, - "learning_rate": 7.061084806150673e-06, - "loss": 0.0095, + "learning_rate": 1.7081272525860858e-05, + "loss": 0.0053, "step": 139000 }, { "epoch": 6.49, - "learning_rate": 7.060616004875534e-06, - "loss": 0.0264, + "learning_rate": 1.7080804456024838e-05, + "loss": 0.0361, "step": 139005 }, { "epoch": 6.49, - "learning_rate": 7.0601472036003945e-06, - "loss": 0.0108, + "learning_rate": 1.7080336386188817e-05, + "loss": 0.0217, "step": 139010 }, { "epoch": 6.49, - "learning_rate": 7.059678402325255e-06, - "loss": 0.0352, + "learning_rate": 1.70798683163528e-05, + "loss": 0.0469, "step": 139015 }, { "epoch": 6.49, - "learning_rate": 7.059209601050115e-06, - "loss": 0.0232, + "learning_rate": 1.707940024651678e-05, + "loss": 0.0812, "step": 139020 }, { "epoch": 6.49, - "learning_rate": 7.058740799774976e-06, - "loss": 0.0415, + "learning_rate": 1.707893217668076e-05, + "loss": 0.0344, "step": 139025 }, { "epoch": 6.49, - "learning_rate": 7.058271998499836e-06, - "loss": 0.1432, + "learning_rate": 1.707846410684474e-05, + "loss": 0.1607, "step": 139030 }, { "epoch": 6.49, - "learning_rate": 7.0578031972246976e-06, - "loss": 0.1881, + "learning_rate": 1.7077996037008723e-05, + "loss": 0.2141, "step": 139035 }, { "epoch": 6.49, - "learning_rate": 7.0573343959495575e-06, - "loss": 0.1022, + "learning_rate": 1.7077527967172703e-05, + "loss": 0.2282, "step": 139040 }, { "epoch": 6.49, - "learning_rate": 7.056865594674418e-06, - "loss": 0.0022, + "learning_rate": 1.7077059897336683e-05, + "loss": 0.0151, "step": 139045 }, { "epoch": 6.49, - "learning_rate": 7.056396793399278e-06, - "loss": 0.0068, + "learning_rate": 1.7076591827500666e-05, + "loss": 0.0529, "step": 139050 }, { "epoch": 6.49, - "learning_rate": 7.05592799212414e-06, - "loss": 0.0079, + "learning_rate": 1.7076123757664646e-05, + "loss": 0.0649, "step": 139055 }, { "epoch": 6.49, - "learning_rate": 7.055459190849e-06, - "loss": 0.0295, + "learning_rate": 1.7075655687828622e-05, + "loss": 0.0508, "step": 139060 }, { "epoch": 6.49, - "learning_rate": 7.0549903895738606e-06, - "loss": 0.0273, + "learning_rate": 1.7075187617992602e-05, + "loss": 0.0696, "step": 139065 }, { "epoch": 6.49, - "learning_rate": 7.0545215882987205e-06, - "loss": 0.0632, + "learning_rate": 1.7074719548156585e-05, + "loss": 0.0275, "step": 139070 }, { "epoch": 6.49, - "learning_rate": 7.054052787023581e-06, - "loss": 0.0681, + "learning_rate": 1.7074251478320565e-05, + "loss": 0.0467, "step": 139075 }, { "epoch": 6.49, - "learning_rate": 7.053583985748442e-06, - "loss": 0.0766, + "learning_rate": 1.7073783408484545e-05, + "loss": 0.1225, "step": 139080 }, { "epoch": 6.49, - "learning_rate": 7.053115184473303e-06, - "loss": 0.2042, + "learning_rate": 1.7073315338648525e-05, + "loss": 0.1439, "step": 139085 }, { "epoch": 6.49, - "learning_rate": 7.052646383198163e-06, - "loss": 0.2046, + "learning_rate": 1.7072847268812508e-05, + "loss": 0.1855, "step": 139090 }, { "epoch": 6.49, - "learning_rate": 7.0521775819230236e-06, - "loss": 0.0068, + "learning_rate": 1.7072379198976488e-05, + "loss": 0.0129, "step": 139095 }, { "epoch": 6.49, - "learning_rate": 7.0517087806478835e-06, - "loss": 0.0192, + "learning_rate": 1.7071911129140468e-05, + "loss": 0.0114, "step": 139100 }, { "epoch": 6.49, - "learning_rate": 7.051239979372745e-06, - "loss": 0.0163, + "learning_rate": 1.707144305930445e-05, + "loss": 0.0493, "step": 139105 }, { "epoch": 6.49, - "learning_rate": 7.050771178097605e-06, - "loss": 0.0252, + "learning_rate": 1.707097498946843e-05, + "loss": 0.0765, "step": 139110 }, { "epoch": 6.49, - "learning_rate": 7.050302376822466e-06, - "loss": 0.075, + "learning_rate": 1.707050691963241e-05, + "loss": 0.0293, "step": 139115 }, { "epoch": 6.49, - "learning_rate": 7.049833575547326e-06, - "loss": 0.1039, + "learning_rate": 1.707003884979639e-05, + "loss": 0.0592, "step": 139120 }, { "epoch": 6.49, - "learning_rate": 7.049364774272187e-06, - "loss": 0.0939, + "learning_rate": 1.706957077996037e-05, + "loss": 0.0515, "step": 139125 }, { "epoch": 6.49, - "learning_rate": 7.048895972997047e-06, - "loss": 0.0643, + "learning_rate": 1.706910271012435e-05, + "loss": 0.1266, "step": 139130 }, { "epoch": 6.49, - "learning_rate": 7.048427171721908e-06, - "loss": 0.1631, + "learning_rate": 1.706863464028833e-05, + "loss": 0.1318, "step": 139135 }, { "epoch": 6.49, - "learning_rate": 7.047958370446768e-06, - "loss": 0.0808, + "learning_rate": 1.706816657045231e-05, + "loss": 0.0779, "step": 139140 }, { "epoch": 6.49, - "learning_rate": 7.047489569171628e-06, - "loss": 0.0066, + "learning_rate": 1.7067698500616293e-05, + "loss": 0.0124, "step": 139145 }, { "epoch": 6.49, - "learning_rate": 7.04702076789649e-06, - "loss": 0.0152, + "learning_rate": 1.7067230430780273e-05, + "loss": 0.0125, "step": 139150 }, { "epoch": 6.49, - "learning_rate": 7.04655196662135e-06, - "loss": 0.0341, + "learning_rate": 1.7066762360944252e-05, + "loss": 0.0227, "step": 139155 }, { "epoch": 6.49, - "learning_rate": 7.04608316534621e-06, - "loss": 0.0326, + "learning_rate": 1.7066294291108236e-05, + "loss": 0.0385, "step": 139160 }, { "epoch": 6.49, - "learning_rate": 7.04561436407107e-06, - "loss": 0.0233, + "learning_rate": 1.7065826221272216e-05, + "loss": 0.0232, "step": 139165 }, { "epoch": 6.49, - "learning_rate": 7.045145562795931e-06, - "loss": 0.0773, + "learning_rate": 1.7065358151436195e-05, + "loss": 0.1007, "step": 139170 }, { "epoch": 6.49, - "learning_rate": 7.044676761520793e-06, - "loss": 0.0569, + "learning_rate": 1.7064890081600175e-05, + "loss": 0.0288, "step": 139175 }, { "epoch": 6.49, - "learning_rate": 7.044207960245653e-06, - "loss": 0.0562, + "learning_rate": 1.706442201176416e-05, + "loss": 0.2197, "step": 139180 }, { "epoch": 6.49, - "learning_rate": 7.0437391589705125e-06, - "loss": 0.2528, + "learning_rate": 1.7063953941928138e-05, + "loss": 0.2388, "step": 139185 }, { "epoch": 6.49, - "learning_rate": 7.043270357695373e-06, - "loss": 0.2914, + "learning_rate": 1.7063485872092115e-05, + "loss": 0.1218, "step": 139190 }, { "epoch": 6.5, - "learning_rate": 7.042801556420235e-06, - "loss": 0.0439, + "learning_rate": 1.7063017802256094e-05, + "loss": 0.014, "step": 139195 }, { "epoch": 6.5, - "learning_rate": 7.042332755145095e-06, - "loss": 0.0166, + "learning_rate": 1.7062549732420078e-05, + "loss": 0.027, "step": 139200 }, { "epoch": 6.5, - "learning_rate": 7.041863953869955e-06, - "loss": 0.0319, + "learning_rate": 1.7062081662584057e-05, + "loss": 0.0175, "step": 139205 }, { "epoch": 6.5, - "learning_rate": 7.041395152594816e-06, - "loss": 0.0556, + "learning_rate": 1.7061613592748037e-05, + "loss": 0.0931, "step": 139210 }, { "epoch": 6.5, - "learning_rate": 7.0409263513196755e-06, - "loss": 0.0822, + "learning_rate": 1.706114552291202e-05, + "loss": 0.0566, "step": 139215 }, { "epoch": 6.5, - "learning_rate": 7.040457550044537e-06, - "loss": 0.0281, + "learning_rate": 1.7060677453076e-05, + "loss": 0.0537, "step": 139220 }, { "epoch": 6.5, - "learning_rate": 7.039988748769397e-06, - "loss": 0.0834, + "learning_rate": 1.706020938323998e-05, + "loss": 0.0222, "step": 139225 }, { "epoch": 6.5, - "learning_rate": 7.039519947494258e-06, - "loss": 0.0929, + "learning_rate": 1.705974131340396e-05, + "loss": 0.0652, "step": 139230 }, { "epoch": 6.5, - "learning_rate": 7.039051146219118e-06, - "loss": 0.2187, + "learning_rate": 1.7059273243567943e-05, + "loss": 0.2033, "step": 139235 }, { "epoch": 6.5, - "learning_rate": 7.0385823449439786e-06, - "loss": 0.2534, + "learning_rate": 1.7058805173731923e-05, + "loss": 0.1506, "step": 139240 }, { "epoch": 6.5, - "learning_rate": 7.038113543668839e-06, - "loss": 0.0155, + "learning_rate": 1.7058337103895903e-05, + "loss": 0.0299, "step": 139245 }, { "epoch": 6.5, - "learning_rate": 7.0376447423937e-06, - "loss": 0.0062, + "learning_rate": 1.705786903405988e-05, + "loss": 0.0027, "step": 139250 }, { "epoch": 6.5, - "learning_rate": 7.03717594111856e-06, - "loss": 0.0127, + "learning_rate": 1.7057400964223862e-05, + "loss": 0.008, "step": 139255 }, { "epoch": 6.5, - "learning_rate": 7.036707139843421e-06, - "loss": 0.0104, + "learning_rate": 1.7056932894387842e-05, + "loss": 0.0426, "step": 139260 }, { "epoch": 6.5, - "learning_rate": 7.036238338568282e-06, - "loss": 0.0574, + "learning_rate": 1.7056464824551822e-05, + "loss": 0.0274, "step": 139265 }, { "epoch": 6.5, - "learning_rate": 7.035769537293142e-06, - "loss": 0.0174, + "learning_rate": 1.7055996754715802e-05, + "loss": 0.0189, "step": 139270 }, { "epoch": 6.5, - "learning_rate": 7.035300736018002e-06, - "loss": 0.0394, + "learning_rate": 1.7055528684879785e-05, + "loss": 0.0921, "step": 139275 }, { "epoch": 6.5, - "learning_rate": 7.034831934742863e-06, - "loss": 0.1073, + "learning_rate": 1.7055060615043765e-05, + "loss": 0.1702, "step": 139280 }, { "epoch": 6.5, - "learning_rate": 7.034363133467723e-06, - "loss": 0.1512, + "learning_rate": 1.7054592545207745e-05, + "loss": 0.2721, "step": 139285 }, { "epoch": 6.5, - "learning_rate": 7.033894332192585e-06, - "loss": 0.1911, + "learning_rate": 1.7054124475371728e-05, + "loss": 0.1256, "step": 139290 }, { "epoch": 6.5, - "learning_rate": 7.033425530917445e-06, - "loss": 0.0113, + "learning_rate": 1.7053656405535708e-05, + "loss": 0.0271, "step": 139295 }, { "epoch": 6.5, - "learning_rate": 7.032956729642305e-06, - "loss": 0.0125, + "learning_rate": 1.7053188335699688e-05, + "loss": 0.0312, "step": 139300 }, { "epoch": 6.5, - "learning_rate": 7.032487928367165e-06, - "loss": 0.0391, + "learning_rate": 1.7052720265863667e-05, + "loss": 0.0174, "step": 139305 }, { "epoch": 6.5, - "learning_rate": 7.032019127092026e-06, - "loss": 0.0148, + "learning_rate": 1.705225219602765e-05, + "loss": 0.0426, "step": 139310 }, { "epoch": 6.5, - "learning_rate": 7.031550325816887e-06, - "loss": 0.0791, + "learning_rate": 1.7051784126191627e-05, + "loss": 0.034, "step": 139315 }, { "epoch": 6.5, - "learning_rate": 7.031081524541748e-06, - "loss": 0.0716, + "learning_rate": 1.7051316056355607e-05, + "loss": 0.0325, "step": 139320 }, { "epoch": 6.5, - "learning_rate": 7.030612723266608e-06, - "loss": 0.0668, + "learning_rate": 1.7050847986519587e-05, + "loss": 0.0203, "step": 139325 }, { "epoch": 6.5, - "learning_rate": 7.030143921991468e-06, - "loss": 0.0859, + "learning_rate": 1.705037991668357e-05, + "loss": 0.0426, "step": 139330 }, { "epoch": 6.5, - "learning_rate": 7.029675120716329e-06, - "loss": 0.2498, + "learning_rate": 1.704991184684755e-05, + "loss": 0.2565, "step": 139335 }, { "epoch": 6.5, - "learning_rate": 7.02920631944119e-06, - "loss": 0.1158, + "learning_rate": 1.704944377701153e-05, + "loss": 0.2095, "step": 139340 }, { "epoch": 6.5, - "learning_rate": 7.02873751816605e-06, - "loss": 0.0391, + "learning_rate": 1.7048975707175513e-05, + "loss": 0.0115, "step": 139345 }, { "epoch": 6.5, - "learning_rate": 7.028268716890911e-06, - "loss": 0.0043, + "learning_rate": 1.7048507637339492e-05, + "loss": 0.0094, "step": 139350 }, { "epoch": 6.5, - "learning_rate": 7.027799915615771e-06, - "loss": 0.0307, + "learning_rate": 1.7048039567503472e-05, + "loss": 0.093, "step": 139355 }, { "epoch": 6.5, - "learning_rate": 7.027331114340632e-06, - "loss": 0.0175, + "learning_rate": 1.7047571497667452e-05, + "loss": 0.0171, "step": 139360 }, { "epoch": 6.5, - "learning_rate": 7.026862313065492e-06, - "loss": 0.0628, + "learning_rate": 1.7047103427831435e-05, + "loss": 0.032, "step": 139365 }, { "epoch": 6.5, - "learning_rate": 7.026393511790353e-06, - "loss": 0.0502, + "learning_rate": 1.7046635357995415e-05, + "loss": 0.0901, "step": 139370 }, { "epoch": 6.5, - "learning_rate": 7.025924710515213e-06, - "loss": 0.0684, + "learning_rate": 1.7046167288159395e-05, + "loss": 0.0159, "step": 139375 }, { "epoch": 6.5, - "learning_rate": 7.0254559092400745e-06, - "loss": 0.0944, + "learning_rate": 1.704569921832337e-05, + "loss": 0.1003, "step": 139380 }, { "epoch": 6.5, - "learning_rate": 7.0249871079649344e-06, - "loss": 0.1566, + "learning_rate": 1.7045231148487355e-05, + "loss": 0.1318, "step": 139385 }, { "epoch": 6.5, - "learning_rate": 7.024518306689795e-06, - "loss": 0.0842, + "learning_rate": 1.7044763078651334e-05, + "loss": 0.1174, "step": 139390 }, { "epoch": 6.5, - "learning_rate": 7.024049505414655e-06, - "loss": 0.094, + "learning_rate": 1.7044295008815314e-05, + "loss": 0.0157, "step": 139395 }, { "epoch": 6.5, - "learning_rate": 7.023580704139515e-06, - "loss": 0.0299, + "learning_rate": 1.7043826938979297e-05, + "loss": 0.0341, "step": 139400 }, { "epoch": 6.5, - "learning_rate": 7.023111902864377e-06, - "loss": 0.0196, + "learning_rate": 1.7043358869143277e-05, + "loss": 0.0177, "step": 139405 }, { "epoch": 6.51, - "learning_rate": 7.0226431015892375e-06, - "loss": 0.0748, + "learning_rate": 1.7042890799307257e-05, + "loss": 0.0303, "step": 139410 }, { "epoch": 6.51, - "learning_rate": 7.022174300314097e-06, - "loss": 0.0864, + "learning_rate": 1.7042422729471237e-05, + "loss": 0.0126, "step": 139415 }, { "epoch": 6.51, - "learning_rate": 7.021705499038957e-06, - "loss": 0.052, + "learning_rate": 1.704195465963522e-05, + "loss": 0.0805, "step": 139420 }, { "epoch": 6.51, - "learning_rate": 7.021236697763818e-06, - "loss": 0.0499, + "learning_rate": 1.70414865897992e-05, + "loss": 0.0762, "step": 139425 }, { "epoch": 6.51, - "learning_rate": 7.02076789648868e-06, - "loss": 0.1539, + "learning_rate": 1.704101851996318e-05, + "loss": 0.0865, "step": 139430 }, { "epoch": 6.51, - "learning_rate": 7.02029909521354e-06, - "loss": 0.1958, + "learning_rate": 1.704055045012716e-05, + "loss": 0.1155, "step": 139435 }, { "epoch": 6.51, - "learning_rate": 7.0198302939384e-06, - "loss": 0.1359, + "learning_rate": 1.704008238029114e-05, + "loss": 0.1691, "step": 139440 }, { "epoch": 6.51, - "learning_rate": 7.01936149266326e-06, - "loss": 0.0161, + "learning_rate": 1.703961431045512e-05, + "loss": 0.017, "step": 139445 }, { "epoch": 6.51, - "learning_rate": 7.018892691388122e-06, - "loss": 0.0061, + "learning_rate": 1.70391462406191e-05, + "loss": 0.0332, "step": 139450 }, { "epoch": 6.51, - "learning_rate": 7.018423890112982e-06, - "loss": 0.0137, + "learning_rate": 1.703867817078308e-05, + "loss": 0.0223, "step": 139455 }, { "epoch": 6.51, - "learning_rate": 7.017955088837842e-06, - "loss": 0.0634, + "learning_rate": 1.7038210100947062e-05, + "loss": 0.0617, "step": 139460 }, { "epoch": 6.51, - "learning_rate": 7.017486287562703e-06, - "loss": 0.04, + "learning_rate": 1.7037742031111042e-05, + "loss": 0.0841, "step": 139465 }, { "epoch": 6.51, - "learning_rate": 7.017017486287563e-06, - "loss": 0.1068, + "learning_rate": 1.703727396127502e-05, + "loss": 0.0656, "step": 139470 }, { "epoch": 6.51, - "learning_rate": 7.016548685012424e-06, - "loss": 0.0299, + "learning_rate": 1.7036805891439005e-05, + "loss": 0.0565, "step": 139475 }, { "epoch": 6.51, - "learning_rate": 7.016079883737284e-06, - "loss": 0.1832, + "learning_rate": 1.7036337821602985e-05, + "loss": 0.0485, "step": 139480 }, { "epoch": 6.51, - "learning_rate": 7.015611082462145e-06, - "loss": 0.2653, + "learning_rate": 1.7035869751766964e-05, + "loss": 0.2955, "step": 139485 }, { "epoch": 6.51, - "learning_rate": 7.015142281187005e-06, - "loss": 0.1588, + "learning_rate": 1.7035401681930944e-05, + "loss": 0.1122, "step": 139490 }, { "epoch": 6.51, - "learning_rate": 7.014673479911866e-06, - "loss": 0.0004, + "learning_rate": 1.7034933612094928e-05, + "loss": 0.0275, "step": 139495 }, { "epoch": 6.51, - "learning_rate": 7.0142046786367265e-06, - "loss": 0.0286, + "learning_rate": 1.7034465542258907e-05, + "loss": 0.0052, "step": 139500 }, { "epoch": 6.51, - "learning_rate": 7.013735877361587e-06, - "loss": 0.0415, + "learning_rate": 1.7033997472422884e-05, + "loss": 0.0127, "step": 139505 }, { "epoch": 6.51, - "learning_rate": 7.013267076086447e-06, - "loss": 0.0257, + "learning_rate": 1.7033529402586864e-05, + "loss": 0.0783, "step": 139510 }, { "epoch": 6.51, - "learning_rate": 7.012798274811308e-06, - "loss": 0.0669, + "learning_rate": 1.7033061332750847e-05, + "loss": 0.0494, "step": 139515 }, { "epoch": 6.51, - "learning_rate": 7.012329473536169e-06, - "loss": 0.0593, + "learning_rate": 1.7032593262914827e-05, + "loss": 0.1155, "step": 139520 }, { "epoch": 6.51, - "learning_rate": 7.0118606722610295e-06, - "loss": 0.0554, + "learning_rate": 1.7032125193078806e-05, + "loss": 0.0882, "step": 139525 }, { "epoch": 6.51, - "learning_rate": 7.0113918709858894e-06, - "loss": 0.1022, + "learning_rate": 1.703165712324279e-05, + "loss": 0.0395, "step": 139530 }, { "epoch": 6.51, - "learning_rate": 7.01092306971075e-06, - "loss": 0.2378, + "learning_rate": 1.703118905340677e-05, + "loss": 0.0921, "step": 139535 }, { "epoch": 6.51, - "learning_rate": 7.01045426843561e-06, - "loss": 0.1587, + "learning_rate": 1.703072098357075e-05, + "loss": 0.1225, "step": 139540 }, { "epoch": 6.51, - "learning_rate": 7.009985467160472e-06, - "loss": 0.0031, + "learning_rate": 1.703025291373473e-05, + "loss": 0.0027, "step": 139545 }, { "epoch": 6.51, - "learning_rate": 7.009516665885332e-06, - "loss": 0.0132, + "learning_rate": 1.7029784843898712e-05, + "loss": 0.0135, "step": 139550 }, { "epoch": 6.51, - "learning_rate": 7.0090478646101925e-06, - "loss": 0.0284, + "learning_rate": 1.7029316774062692e-05, + "loss": 0.0424, "step": 139555 }, { "epoch": 6.51, - "learning_rate": 7.0085790633350524e-06, - "loss": 0.0393, + "learning_rate": 1.7028848704226672e-05, + "loss": 0.0309, "step": 139560 }, { "epoch": 6.51, - "learning_rate": 7.008110262059913e-06, - "loss": 0.0458, + "learning_rate": 1.7028380634390652e-05, + "loss": 0.0695, "step": 139565 }, { "epoch": 6.51, - "learning_rate": 7.007641460784774e-06, - "loss": 0.0606, + "learning_rate": 1.702791256455463e-05, + "loss": 0.0487, "step": 139570 }, { "epoch": 6.51, - "learning_rate": 7.007172659509635e-06, - "loss": 0.0774, + "learning_rate": 1.702744449471861e-05, + "loss": 0.059, "step": 139575 }, { "epoch": 6.51, - "learning_rate": 7.006703858234495e-06, - "loss": 0.1348, + "learning_rate": 1.702697642488259e-05, + "loss": 0.115, "step": 139580 }, { "epoch": 6.51, - "learning_rate": 7.0062350569593555e-06, - "loss": 0.1506, + "learning_rate": 1.7026508355046574e-05, + "loss": 0.2659, "step": 139585 }, { "epoch": 6.51, - "learning_rate": 7.005766255684216e-06, - "loss": 0.1978, + "learning_rate": 1.7026040285210554e-05, + "loss": 0.1381, "step": 139590 }, { "epoch": 6.51, - "learning_rate": 7.005297454409077e-06, - "loss": 0.0136, + "learning_rate": 1.7025572215374534e-05, + "loss": 0.0158, "step": 139595 }, { "epoch": 6.51, - "learning_rate": 7.004828653133937e-06, - "loss": 0.0288, + "learning_rate": 1.7025104145538514e-05, + "loss": 0.0075, "step": 139600 }, { "epoch": 6.51, - "learning_rate": 7.004359851858798e-06, - "loss": 0.0532, + "learning_rate": 1.7024636075702497e-05, + "loss": 0.0237, "step": 139605 }, { "epoch": 6.51, - "learning_rate": 7.003891050583658e-06, - "loss": 0.0191, + "learning_rate": 1.7024168005866477e-05, + "loss": 0.0268, "step": 139610 }, { "epoch": 6.51, - "learning_rate": 7.003422249308519e-06, - "loss": 0.0337, + "learning_rate": 1.7023699936030457e-05, + "loss": 0.0621, "step": 139615 }, { "epoch": 6.51, - "learning_rate": 7.002953448033379e-06, - "loss": 0.0298, + "learning_rate": 1.7023231866194437e-05, + "loss": 0.0489, "step": 139620 }, { "epoch": 6.52, - "learning_rate": 7.00248464675824e-06, - "loss": 0.083, + "learning_rate": 1.702276379635842e-05, + "loss": 0.1063, "step": 139625 }, { "epoch": 6.52, - "learning_rate": 7.0020158454831e-06, - "loss": 0.0646, + "learning_rate": 1.7022295726522396e-05, + "loss": 0.1161, "step": 139630 }, { "epoch": 6.52, - "learning_rate": 7.00154704420796e-06, - "loss": 0.0907, + "learning_rate": 1.7021827656686376e-05, + "loss": 0.1148, "step": 139635 }, { "epoch": 6.52, - "learning_rate": 7.0010782429328215e-06, - "loss": 0.1341, + "learning_rate": 1.7021359586850356e-05, + "loss": 0.1749, "step": 139640 }, { "epoch": 6.52, - "learning_rate": 7.000609441657682e-06, - "loss": 0.0053, + "learning_rate": 1.702089151701434e-05, + "loss": 0.022, "step": 139645 }, { "epoch": 6.52, - "learning_rate": 7.000140640382542e-06, - "loss": 0.0071, + "learning_rate": 1.702042344717832e-05, + "loss": 0.0161, "step": 139650 }, { "epoch": 6.52, - "learning_rate": 6.999671839107402e-06, - "loss": 0.0211, + "learning_rate": 1.70199553773423e-05, + "loss": 0.0123, "step": 139655 }, { "epoch": 6.52, - "learning_rate": 6.999203037832264e-06, - "loss": 0.0128, + "learning_rate": 1.7019487307506282e-05, + "loss": 0.0642, "step": 139660 }, { "epoch": 6.52, - "learning_rate": 6.998734236557125e-06, - "loss": 0.0506, + "learning_rate": 1.701901923767026e-05, + "loss": 0.0291, "step": 139665 }, { "epoch": 6.52, - "learning_rate": 6.9982654352819845e-06, - "loss": 0.115, + "learning_rate": 1.701855116783424e-05, + "loss": 0.0429, "step": 139670 }, { "epoch": 6.52, - "learning_rate": 6.9977966340068445e-06, - "loss": 0.0969, + "learning_rate": 1.701808309799822e-05, + "loss": 0.1196, "step": 139675 }, { "epoch": 6.52, - "learning_rate": 6.997327832731705e-06, - "loss": 0.1854, + "learning_rate": 1.7017615028162204e-05, + "loss": 0.135, "step": 139680 }, { "epoch": 6.52, - "learning_rate": 6.996859031456567e-06, - "loss": 0.1933, + "learning_rate": 1.7017146958326184e-05, + "loss": 0.2025, "step": 139685 }, { "epoch": 6.52, - "learning_rate": 6.996390230181427e-06, - "loss": 0.1478, + "learning_rate": 1.7016678888490164e-05, + "loss": 0.2621, "step": 139690 }, { "epoch": 6.52, - "learning_rate": 6.995921428906287e-06, - "loss": 0.0199, + "learning_rate": 1.701621081865414e-05, + "loss": 0.019, "step": 139695 }, { "epoch": 6.52, - "learning_rate": 6.9954526276311475e-06, - "loss": 0.0024, + "learning_rate": 1.7015742748818124e-05, + "loss": 0.0111, "step": 139700 }, { "epoch": 6.52, - "learning_rate": 6.994983826356009e-06, - "loss": 0.0213, + "learning_rate": 1.7015274678982104e-05, + "loss": 0.0341, "step": 139705 }, { "epoch": 6.52, - "learning_rate": 6.994515025080869e-06, - "loss": 0.0313, + "learning_rate": 1.7014806609146083e-05, + "loss": 0.0306, "step": 139710 }, { "epoch": 6.52, - "learning_rate": 6.994046223805729e-06, - "loss": 0.0558, + "learning_rate": 1.7014338539310067e-05, + "loss": 0.0127, "step": 139715 }, { "epoch": 6.52, - "learning_rate": 6.99357742253059e-06, - "loss": 0.0747, + "learning_rate": 1.7013870469474046e-05, + "loss": 0.0602, "step": 139720 }, { "epoch": 6.52, - "learning_rate": 6.99310862125545e-06, - "loss": 0.0701, + "learning_rate": 1.7013402399638026e-05, + "loss": 0.0983, "step": 139725 }, { "epoch": 6.52, - "learning_rate": 6.992639819980311e-06, - "loss": 0.0917, + "learning_rate": 1.7012934329802006e-05, + "loss": 0.0843, "step": 139730 }, { "epoch": 6.52, - "learning_rate": 6.992171018705171e-06, - "loss": 0.115, + "learning_rate": 1.701246625996599e-05, + "loss": 0.1552, "step": 139735 }, { "epoch": 6.52, - "learning_rate": 6.991702217430032e-06, - "loss": 0.1255, + "learning_rate": 1.701199819012997e-05, + "loss": 0.1802, "step": 139740 }, { "epoch": 6.52, - "learning_rate": 6.991233416154892e-06, - "loss": 0.0218, + "learning_rate": 1.701153012029395e-05, + "loss": 0.0117, "step": 139745 }, { "epoch": 6.52, - "learning_rate": 6.990764614879753e-06, - "loss": 0.0008, + "learning_rate": 1.701106205045793e-05, + "loss": 0.0014, "step": 139750 }, { "epoch": 6.52, - "learning_rate": 6.9902958136046136e-06, - "loss": 0.0487, + "learning_rate": 1.701059398062191e-05, + "loss": 0.0455, "step": 139755 }, { "epoch": 6.52, - "learning_rate": 6.989827012329474e-06, - "loss": 0.0201, + "learning_rate": 1.701012591078589e-05, + "loss": 0.026, "step": 139760 }, { "epoch": 6.52, - "learning_rate": 6.989358211054334e-06, - "loss": 0.0289, + "learning_rate": 1.7009657840949868e-05, + "loss": 0.0181, "step": 139765 }, { "epoch": 6.52, - "learning_rate": 6.988889409779195e-06, - "loss": 0.0605, + "learning_rate": 1.700918977111385e-05, + "loss": 0.017, "step": 139770 }, { "epoch": 6.52, - "learning_rate": 6.988420608504056e-06, - "loss": 0.0867, + "learning_rate": 1.700872170127783e-05, + "loss": 0.0605, "step": 139775 }, { "epoch": 6.52, - "learning_rate": 6.987951807228917e-06, - "loss": 0.1088, + "learning_rate": 1.700825363144181e-05, + "loss": 0.1047, "step": 139780 }, { "epoch": 6.52, - "learning_rate": 6.9874830059537766e-06, - "loss": 0.139, + "learning_rate": 1.700778556160579e-05, + "loss": 0.2485, "step": 139785 }, { "epoch": 6.52, - "learning_rate": 6.987014204678637e-06, - "loss": 0.0989, + "learning_rate": 1.7007317491769774e-05, + "loss": 0.1485, "step": 139790 }, { "epoch": 6.52, - "learning_rate": 6.986545403403497e-06, - "loss": 0.002, + "learning_rate": 1.7006849421933754e-05, + "loss": 0.0315, "step": 139795 }, { "epoch": 6.52, - "learning_rate": 6.986076602128359e-06, - "loss": 0.0056, + "learning_rate": 1.7006381352097734e-05, + "loss": 0.0441, "step": 139800 }, { "epoch": 6.52, - "learning_rate": 6.985607800853219e-06, - "loss": 0.0017, + "learning_rate": 1.7005913282261713e-05, + "loss": 0.0034, "step": 139805 }, { "epoch": 6.52, - "learning_rate": 6.98513899957808e-06, - "loss": 0.0251, + "learning_rate": 1.7005445212425697e-05, + "loss": 0.061, "step": 139810 }, { "epoch": 6.52, - "learning_rate": 6.9846701983029395e-06, - "loss": 0.0554, + "learning_rate": 1.7004977142589677e-05, + "loss": 0.101, "step": 139815 }, { "epoch": 6.52, - "learning_rate": 6.9842013970278e-06, - "loss": 0.0671, + "learning_rate": 1.7004509072753653e-05, + "loss": 0.0405, "step": 139820 }, { "epoch": 6.52, - "learning_rate": 6.983732595752661e-06, - "loss": 0.1737, + "learning_rate": 1.7004041002917633e-05, + "loss": 0.1025, "step": 139825 }, { "epoch": 6.52, - "learning_rate": 6.983263794477522e-06, - "loss": 0.077, + "learning_rate": 1.7003572933081616e-05, + "loss": 0.1239, "step": 139830 }, { "epoch": 6.52, - "learning_rate": 6.982794993202382e-06, - "loss": 0.1987, + "learning_rate": 1.7003104863245596e-05, + "loss": 0.1243, "step": 139835 }, { "epoch": 6.53, - "learning_rate": 6.982326191927243e-06, - "loss": 0.1951, + "learning_rate": 1.7002636793409576e-05, + "loss": 0.167, "step": 139840 }, { "epoch": 6.53, - "learning_rate": 6.981857390652103e-06, - "loss": 0.0025, + "learning_rate": 1.700216872357356e-05, + "loss": 0.0087, "step": 139845 }, { "epoch": 6.53, - "learning_rate": 6.981388589376964e-06, - "loss": 0.0271, + "learning_rate": 1.700170065373754e-05, + "loss": 0.0248, "step": 139850 }, { "epoch": 6.53, - "learning_rate": 6.980919788101824e-06, - "loss": 0.047, + "learning_rate": 1.700123258390152e-05, + "loss": 0.0039, "step": 139855 }, { "epoch": 6.53, - "learning_rate": 6.980450986826685e-06, - "loss": 0.0293, + "learning_rate": 1.7000764514065498e-05, + "loss": 0.0059, "step": 139860 }, { "epoch": 6.53, - "learning_rate": 6.979982185551545e-06, - "loss": 0.0274, + "learning_rate": 1.700029644422948e-05, + "loss": 0.0071, "step": 139865 }, { "epoch": 6.53, - "learning_rate": 6.9795133842764064e-06, - "loss": 0.0745, + "learning_rate": 1.699982837439346e-05, + "loss": 0.0858, "step": 139870 }, { "epoch": 6.53, - "learning_rate": 6.979044583001266e-06, - "loss": 0.0671, + "learning_rate": 1.699936030455744e-05, + "loss": 0.0919, "step": 139875 }, { "epoch": 6.53, - "learning_rate": 6.978575781726127e-06, - "loss": 0.0729, + "learning_rate": 1.699889223472142e-05, + "loss": 0.0891, "step": 139880 }, { "epoch": 6.53, - "learning_rate": 6.978106980450987e-06, - "loss": 0.1897, + "learning_rate": 1.69984241648854e-05, + "loss": 0.1415, "step": 139885 }, { "epoch": 6.53, - "learning_rate": 6.977638179175847e-06, - "loss": 0.183, + "learning_rate": 1.699795609504938e-05, + "loss": 0.1194, "step": 139890 }, { "epoch": 6.53, - "learning_rate": 6.977169377900709e-06, - "loss": 0.0088, + "learning_rate": 1.699748802521336e-05, + "loss": 0.0151, "step": 139895 }, { "epoch": 6.53, - "learning_rate": 6.9767005766255694e-06, - "loss": 0.0276, + "learning_rate": 1.6997019955377344e-05, + "loss": 0.0109, "step": 139900 }, { "epoch": 6.53, - "learning_rate": 6.976231775350429e-06, - "loss": 0.0237, + "learning_rate": 1.6996551885541323e-05, + "loss": 0.0188, "step": 139905 }, { "epoch": 6.53, - "learning_rate": 6.975762974075289e-06, - "loss": 0.0376, + "learning_rate": 1.6996083815705303e-05, + "loss": 0.0053, "step": 139910 }, { "epoch": 6.53, - "learning_rate": 6.975294172800151e-06, - "loss": 0.0346, + "learning_rate": 1.6995615745869283e-05, + "loss": 0.0514, "step": 139915 }, { "epoch": 6.53, - "learning_rate": 6.974825371525012e-06, - "loss": 0.1384, + "learning_rate": 1.6995147676033266e-05, + "loss": 0.0595, "step": 139920 }, { "epoch": 6.53, - "learning_rate": 6.974356570249872e-06, - "loss": 0.0573, + "learning_rate": 1.6994679606197246e-05, + "loss": 0.1609, "step": 139925 }, { "epoch": 6.53, - "learning_rate": 6.9738877689747316e-06, - "loss": 0.1743, + "learning_rate": 1.6994211536361226e-05, + "loss": 0.1033, "step": 139930 }, { "epoch": 6.53, - "learning_rate": 6.973418967699592e-06, - "loss": 0.2355, + "learning_rate": 1.6993743466525206e-05, + "loss": 0.0491, "step": 139935 }, { "epoch": 6.53, - "learning_rate": 6.972950166424454e-06, - "loss": 0.1832, + "learning_rate": 1.699327539668919e-05, + "loss": 0.1039, "step": 139940 }, { "epoch": 6.53, - "learning_rate": 6.972481365149314e-06, - "loss": 0.0413, + "learning_rate": 1.6992807326853165e-05, + "loss": 0.0025, "step": 139945 }, { "epoch": 6.53, - "learning_rate": 6.972012563874174e-06, - "loss": 0.0419, + "learning_rate": 1.6992339257017145e-05, + "loss": 0.0176, "step": 139950 }, { "epoch": 6.53, - "learning_rate": 6.971543762599035e-06, - "loss": 0.0141, + "learning_rate": 1.699187118718113e-05, + "loss": 0.5368, "step": 139955 }, { "epoch": 6.53, - "learning_rate": 6.9710749613238946e-06, - "loss": 0.0454, + "learning_rate": 1.6991403117345108e-05, + "loss": 0.0456, "step": 139960 }, { "epoch": 6.53, - "learning_rate": 6.970606160048756e-06, - "loss": 0.0534, + "learning_rate": 1.6990935047509088e-05, + "loss": 0.023, "step": 139965 }, { "epoch": 6.53, - "learning_rate": 6.970137358773616e-06, - "loss": 0.0526, + "learning_rate": 1.6990466977673068e-05, + "loss": 0.0616, "step": 139970 }, { "epoch": 6.53, - "learning_rate": 6.969668557498477e-06, - "loss": 0.1037, + "learning_rate": 1.698999890783705e-05, + "loss": 0.0542, "step": 139975 }, { "epoch": 6.53, - "learning_rate": 6.969199756223337e-06, - "loss": 0.0695, + "learning_rate": 1.698953083800103e-05, + "loss": 0.0774, "step": 139980 }, { "epoch": 6.53, - "learning_rate": 6.9687309549481985e-06, - "loss": 0.2342, + "learning_rate": 1.698906276816501e-05, + "loss": 0.3795, "step": 139985 }, { "epoch": 6.53, - "learning_rate": 6.968262153673058e-06, - "loss": 0.1429, + "learning_rate": 1.698859469832899e-05, + "loss": 0.1854, "step": 139990 }, { "epoch": 6.53, - "learning_rate": 6.967793352397919e-06, - "loss": 0.0407, + "learning_rate": 1.6988126628492974e-05, + "loss": 0.0368, "step": 139995 }, { "epoch": 6.53, - "learning_rate": 6.967324551122779e-06, - "loss": 0.0348, + "learning_rate": 1.6987658558656953e-05, + "loss": 0.0115, "step": 140000 }, { "epoch": 6.53, - "learning_rate": 6.96685574984764e-06, - "loss": 0.0111, + "learning_rate": 1.6987190488820933e-05, + "loss": 0.0098, "step": 140005 }, { "epoch": 6.53, - "learning_rate": 6.966386948572501e-06, - "loss": 0.0143, + "learning_rate": 1.6986722418984913e-05, + "loss": 0.018, "step": 140010 }, { "epoch": 6.53, - "learning_rate": 6.9659181472973615e-06, - "loss": 0.0376, + "learning_rate": 1.6986254349148893e-05, + "loss": 0.0438, "step": 140015 }, { "epoch": 6.53, - "learning_rate": 6.965449346022221e-06, - "loss": 0.0533, + "learning_rate": 1.6985786279312873e-05, + "loss": 0.1173, "step": 140020 }, { "epoch": 6.53, - "learning_rate": 6.964980544747082e-06, - "loss": 0.0727, + "learning_rate": 1.6985318209476853e-05, + "loss": 0.0586, "step": 140025 }, { "epoch": 6.53, - "learning_rate": 6.964511743471943e-06, - "loss": 0.1093, + "learning_rate": 1.6984850139640836e-05, + "loss": 0.0735, "step": 140030 }, { "epoch": 6.53, - "learning_rate": 6.964042942196804e-06, - "loss": 0.106, + "learning_rate": 1.6984382069804816e-05, + "loss": 0.1568, "step": 140035 }, { "epoch": 6.53, - "learning_rate": 6.963574140921664e-06, - "loss": 0.2457, + "learning_rate": 1.6983913999968795e-05, + "loss": 0.1764, "step": 140040 }, { "epoch": 6.53, - "learning_rate": 6.9631053396465244e-06, - "loss": 0.0137, + "learning_rate": 1.6983445930132775e-05, + "loss": 0.0478, "step": 140045 }, { "epoch": 6.53, - "learning_rate": 6.962636538371384e-06, - "loss": 0.002, + "learning_rate": 1.698297786029676e-05, + "loss": 0.0577, "step": 140050 }, { "epoch": 6.54, - "learning_rate": 6.962167737096246e-06, - "loss": 0.0344, + "learning_rate": 1.6982509790460738e-05, + "loss": 0.0032, "step": 140055 }, { "epoch": 6.54, - "learning_rate": 6.961698935821106e-06, - "loss": 0.041, + "learning_rate": 1.6982041720624718e-05, + "loss": 0.0293, "step": 140060 }, { "epoch": 6.54, - "learning_rate": 6.961230134545967e-06, - "loss": 0.0504, + "learning_rate": 1.6981573650788698e-05, + "loss": 0.0191, "step": 140065 }, { "epoch": 6.54, - "learning_rate": 6.960761333270827e-06, - "loss": 0.0562, + "learning_rate": 1.698110558095268e-05, + "loss": 0.0578, "step": 140070 }, { "epoch": 6.54, - "learning_rate": 6.9602925319956874e-06, - "loss": 0.0512, + "learning_rate": 1.6980637511116658e-05, + "loss": 0.1019, "step": 140075 }, { "epoch": 6.54, - "learning_rate": 6.959823730720548e-06, - "loss": 0.0809, + "learning_rate": 1.6980169441280637e-05, + "loss": 0.1263, "step": 140080 }, { "epoch": 6.54, - "learning_rate": 6.959354929445409e-06, - "loss": 0.202, + "learning_rate": 1.697970137144462e-05, + "loss": 0.3102, "step": 140085 }, { "epoch": 6.54, - "learning_rate": 6.958886128170269e-06, - "loss": 0.1936, + "learning_rate": 1.69792333016086e-05, + "loss": 0.1851, "step": 140090 }, { "epoch": 6.54, - "learning_rate": 6.95841732689513e-06, - "loss": 0.0108, + "learning_rate": 1.697876523177258e-05, + "loss": 0.0661, "step": 140095 }, { "epoch": 6.54, - "learning_rate": 6.9579485256199905e-06, - "loss": 0.0418, + "learning_rate": 1.697829716193656e-05, + "loss": 0.0108, "step": 140100 }, { "epoch": 6.54, - "learning_rate": 6.957479724344851e-06, - "loss": 0.0356, + "learning_rate": 1.6977829092100543e-05, + "loss": 0.0247, "step": 140105 }, { "epoch": 6.54, - "learning_rate": 6.957010923069711e-06, - "loss": 0.026, + "learning_rate": 1.6977361022264523e-05, + "loss": 0.0189, "step": 140110 }, { "epoch": 6.54, - "learning_rate": 6.956542121794572e-06, - "loss": 0.0327, + "learning_rate": 1.6976892952428503e-05, + "loss": 0.0476, "step": 140115 }, { "epoch": 6.54, - "learning_rate": 6.956073320519432e-06, - "loss": 0.1002, + "learning_rate": 1.6976424882592483e-05, + "loss": 0.0478, "step": 140120 }, { "epoch": 6.54, - "learning_rate": 6.9556045192442935e-06, - "loss": 0.1014, + "learning_rate": 1.6975956812756466e-05, + "loss": 0.0652, "step": 140125 }, { "epoch": 6.54, - "learning_rate": 6.9551357179691535e-06, - "loss": 0.1442, + "learning_rate": 1.6975488742920446e-05, + "loss": 0.1141, "step": 140130 }, { "epoch": 6.54, - "learning_rate": 6.954666916694014e-06, - "loss": 0.1524, + "learning_rate": 1.6975020673084422e-05, + "loss": 0.139, "step": 140135 }, { "epoch": 6.54, - "learning_rate": 6.954198115418874e-06, - "loss": 0.1921, + "learning_rate": 1.6974552603248405e-05, + "loss": 0.1701, "step": 140140 }, { "epoch": 6.54, - "learning_rate": 6.953729314143734e-06, - "loss": 0.0124, + "learning_rate": 1.6974084533412385e-05, + "loss": 0.0027, "step": 140145 }, { "epoch": 6.54, - "learning_rate": 6.953260512868596e-06, - "loss": 0.0165, + "learning_rate": 1.6973616463576365e-05, + "loss": 0.0259, "step": 140150 }, { "epoch": 6.54, - "learning_rate": 6.9527917115934565e-06, - "loss": 0.0252, + "learning_rate": 1.6973148393740345e-05, + "loss": 0.0169, "step": 140155 }, { "epoch": 6.54, - "learning_rate": 6.9523229103183165e-06, - "loss": 0.0294, + "learning_rate": 1.6972680323904328e-05, + "loss": 0.0274, "step": 140160 }, { "epoch": 6.54, - "learning_rate": 6.951854109043176e-06, - "loss": 0.0302, + "learning_rate": 1.6972212254068308e-05, + "loss": 0.0464, "step": 140165 }, { "epoch": 6.54, - "learning_rate": 6.951385307768038e-06, - "loss": 0.0294, + "learning_rate": 1.6971744184232288e-05, + "loss": 0.0435, "step": 140170 }, { "epoch": 6.54, - "learning_rate": 6.950916506492899e-06, - "loss": 0.1042, + "learning_rate": 1.6971276114396267e-05, + "loss": 0.0531, "step": 140175 }, { "epoch": 6.54, - "learning_rate": 6.950447705217759e-06, - "loss": 0.0744, + "learning_rate": 1.697080804456025e-05, + "loss": 0.1635, "step": 140180 }, { "epoch": 6.54, - "learning_rate": 6.949978903942619e-06, - "loss": 0.2287, + "learning_rate": 1.697033997472423e-05, + "loss": 0.1892, "step": 140185 }, { "epoch": 6.54, - "learning_rate": 6.9495101026674795e-06, - "loss": 0.1557, + "learning_rate": 1.696987190488821e-05, + "loss": 0.3352, "step": 140190 }, { "epoch": 6.54, - "learning_rate": 6.949041301392341e-06, - "loss": 0.021, + "learning_rate": 1.6969403835052193e-05, + "loss": 0.006, "step": 140195 }, { "epoch": 6.54, - "learning_rate": 6.948572500117201e-06, - "loss": 0.0337, + "learning_rate": 1.696893576521617e-05, + "loss": 0.0161, "step": 140200 }, { "epoch": 6.54, - "learning_rate": 6.948103698842061e-06, - "loss": 0.0473, + "learning_rate": 1.696846769538015e-05, + "loss": 0.0231, "step": 140205 }, { "epoch": 6.54, - "learning_rate": 6.947634897566922e-06, - "loss": 0.0579, + "learning_rate": 1.696799962554413e-05, + "loss": 0.0152, "step": 140210 }, { "epoch": 6.54, - "learning_rate": 6.947166096291782e-06, - "loss": 0.0619, + "learning_rate": 1.6967531555708113e-05, + "loss": 0.0712, "step": 140215 }, { "epoch": 6.54, - "learning_rate": 6.946697295016643e-06, - "loss": 0.0289, + "learning_rate": 1.6967063485872093e-05, + "loss": 0.0102, "step": 140220 }, { "epoch": 6.54, - "learning_rate": 6.946228493741503e-06, - "loss": 0.1032, + "learning_rate": 1.6966595416036072e-05, + "loss": 0.0493, "step": 140225 }, { "epoch": 6.54, - "learning_rate": 6.945759692466364e-06, - "loss": 0.1398, + "learning_rate": 1.6966127346200052e-05, + "loss": 0.1159, "step": 140230 }, { "epoch": 6.54, - "learning_rate": 6.945290891191224e-06, - "loss": 0.1475, + "learning_rate": 1.6965659276364035e-05, + "loss": 0.2619, "step": 140235 }, { "epoch": 6.54, - "learning_rate": 6.9448220899160856e-06, - "loss": 0.1634, + "learning_rate": 1.6965191206528015e-05, + "loss": 0.1317, "step": 140240 }, { "epoch": 6.54, - "learning_rate": 6.944353288640946e-06, - "loss": 0.0176, + "learning_rate": 1.6964723136691995e-05, + "loss": 0.0134, "step": 140245 }, { "epoch": 6.54, - "learning_rate": 6.943884487365806e-06, - "loss": 0.0012, + "learning_rate": 1.6964255066855975e-05, + "loss": 0.0142, "step": 140250 }, { "epoch": 6.54, - "learning_rate": 6.943415686090666e-06, - "loss": 0.0236, + "learning_rate": 1.6963786997019958e-05, + "loss": 0.0154, "step": 140255 }, { "epoch": 6.54, - "learning_rate": 6.942946884815527e-06, - "loss": 0.0331, + "learning_rate": 1.6963318927183934e-05, + "loss": 0.0621, "step": 140260 }, { "epoch": 6.54, - "learning_rate": 6.942478083540389e-06, - "loss": 0.0681, + "learning_rate": 1.6962850857347914e-05, + "loss": 0.0225, "step": 140265 }, { "epoch": 6.55, - "learning_rate": 6.9420092822652486e-06, - "loss": 0.0233, + "learning_rate": 1.6962382787511898e-05, + "loss": 0.0755, "step": 140270 }, { "epoch": 6.55, - "learning_rate": 6.9415404809901085e-06, - "loss": 0.0746, + "learning_rate": 1.6961914717675877e-05, + "loss": 0.0417, "step": 140275 }, { "epoch": 6.55, - "learning_rate": 6.941071679714969e-06, - "loss": 0.0781, + "learning_rate": 1.6961446647839857e-05, + "loss": 0.0876, "step": 140280 }, { "epoch": 6.55, - "learning_rate": 6.940602878439829e-06, - "loss": 0.1642, + "learning_rate": 1.6960978578003837e-05, + "loss": 0.1837, "step": 140285 }, { "epoch": 6.55, - "learning_rate": 6.940134077164691e-06, - "loss": 0.1668, + "learning_rate": 1.696051050816782e-05, + "loss": 0.0935, "step": 140290 }, { "epoch": 6.55, - "learning_rate": 6.939665275889551e-06, - "loss": 0.0236, + "learning_rate": 1.69600424383318e-05, + "loss": 0.026, "step": 140295 }, { "epoch": 6.55, - "learning_rate": 6.9391964746144116e-06, - "loss": 0.0163, + "learning_rate": 1.695957436849578e-05, + "loss": 0.014, "step": 140300 }, { "epoch": 6.55, - "learning_rate": 6.9387276733392715e-06, - "loss": 0.0266, + "learning_rate": 1.695910629865976e-05, + "loss": 0.0069, "step": 140305 }, { "epoch": 6.55, - "learning_rate": 6.938258872064133e-06, - "loss": 0.0357, + "learning_rate": 1.6958638228823743e-05, + "loss": 0.0941, "step": 140310 }, { "epoch": 6.55, - "learning_rate": 6.937790070788993e-06, - "loss": 0.0573, + "learning_rate": 1.6958170158987723e-05, + "loss": 0.0568, "step": 140315 }, { "epoch": 6.55, - "learning_rate": 6.937321269513854e-06, - "loss": 0.0594, + "learning_rate": 1.6957702089151702e-05, + "loss": 0.0418, "step": 140320 }, { "epoch": 6.55, - "learning_rate": 6.936852468238714e-06, - "loss": 0.1081, + "learning_rate": 1.6957234019315682e-05, + "loss": 0.1139, "step": 140325 }, { "epoch": 6.55, - "learning_rate": 6.9363836669635745e-06, - "loss": 0.0988, + "learning_rate": 1.6956765949479662e-05, + "loss": 0.12, "step": 140330 }, { "epoch": 6.55, - "learning_rate": 6.935914865688435e-06, - "loss": 0.19, + "learning_rate": 1.6956297879643642e-05, + "loss": 0.0616, "step": 140335 }, { "epoch": 6.55, - "learning_rate": 6.935446064413296e-06, - "loss": 0.1024, + "learning_rate": 1.6955829809807622e-05, + "loss": 0.0992, "step": 140340 }, { "epoch": 6.55, - "learning_rate": 6.934977263138156e-06, - "loss": 0.0111, + "learning_rate": 1.6955361739971605e-05, + "loss": 0.0192, "step": 140345 }, { "epoch": 6.55, - "learning_rate": 6.934508461863017e-06, - "loss": 0.0205, + "learning_rate": 1.6954893670135585e-05, + "loss": 0.0047, "step": 140350 }, { "epoch": 6.55, - "learning_rate": 6.934039660587878e-06, - "loss": 0.0583, + "learning_rate": 1.6954425600299565e-05, + "loss": 0.0391, "step": 140355 }, { "epoch": 6.55, - "learning_rate": 6.933570859312738e-06, - "loss": 0.0196, + "learning_rate": 1.6953957530463544e-05, + "loss": 0.032, "step": 140360 }, { "epoch": 6.55, - "learning_rate": 6.933102058037598e-06, - "loss": 0.0233, + "learning_rate": 1.6953489460627528e-05, + "loss": 0.0341, "step": 140365 }, { "epoch": 6.55, - "learning_rate": 6.932633256762459e-06, - "loss": 0.0327, + "learning_rate": 1.6953021390791507e-05, + "loss": 0.1394, "step": 140370 }, { "epoch": 6.55, - "learning_rate": 6.932164455487319e-06, - "loss": 0.0388, + "learning_rate": 1.6952553320955487e-05, + "loss": 0.0438, "step": 140375 }, { "epoch": 6.55, - "learning_rate": 6.931695654212181e-06, - "loss": 0.1176, + "learning_rate": 1.695208525111947e-05, + "loss": 0.1009, "step": 140380 }, { "epoch": 6.55, - "learning_rate": 6.931226852937041e-06, - "loss": 0.348, + "learning_rate": 1.695161718128345e-05, + "loss": 0.1818, "step": 140385 }, { "epoch": 6.55, - "learning_rate": 6.930758051661901e-06, - "loss": 0.1438, + "learning_rate": 1.6951149111447427e-05, + "loss": 0.1476, "step": 140390 }, { "epoch": 6.55, - "learning_rate": 6.930289250386761e-06, - "loss": 0.011, + "learning_rate": 1.6950681041611407e-05, + "loss": 0.0152, "step": 140395 }, { "epoch": 6.55, - "learning_rate": 6.929820449111621e-06, - "loss": 0.0073, + "learning_rate": 1.695021297177539e-05, + "loss": 0.0219, "step": 140400 }, { "epoch": 6.55, - "learning_rate": 6.929351647836483e-06, - "loss": 0.0292, + "learning_rate": 1.694974490193937e-05, + "loss": 0.0498, "step": 140405 }, { "epoch": 6.55, - "learning_rate": 6.928882846561344e-06, - "loss": 0.0409, + "learning_rate": 1.694927683210335e-05, + "loss": 0.0319, "step": 140410 }, { "epoch": 6.55, - "learning_rate": 6.928414045286204e-06, - "loss": 0.0515, + "learning_rate": 1.694880876226733e-05, + "loss": 0.0534, "step": 140415 }, { "epoch": 6.55, - "learning_rate": 6.927945244011064e-06, - "loss": 0.0666, + "learning_rate": 1.6948340692431312e-05, + "loss": 0.0482, "step": 140420 }, { "epoch": 6.55, - "learning_rate": 6.927476442735925e-06, - "loss": 0.096, + "learning_rate": 1.6947872622595292e-05, + "loss": 0.0776, "step": 140425 }, { "epoch": 6.55, - "learning_rate": 6.927007641460786e-06, - "loss": 0.0837, + "learning_rate": 1.6947404552759272e-05, + "loss": 0.1589, "step": 140430 }, { "epoch": 6.55, - "learning_rate": 6.926538840185646e-06, - "loss": 0.1979, + "learning_rate": 1.6946936482923252e-05, + "loss": 0.2186, "step": 140435 }, { "epoch": 6.55, - "learning_rate": 6.926070038910507e-06, - "loss": 0.1427, + "learning_rate": 1.6946468413087235e-05, + "loss": 0.165, "step": 140440 }, { "epoch": 6.55, - "learning_rate": 6.9256012376353666e-06, - "loss": 0.0016, + "learning_rate": 1.6946000343251215e-05, + "loss": 0.0268, "step": 140445 }, { "epoch": 6.55, - "learning_rate": 6.925132436360228e-06, - "loss": 0.0163, + "learning_rate": 1.694553227341519e-05, + "loss": 0.0032, "step": 140450 }, { "epoch": 6.55, - "learning_rate": 6.924663635085088e-06, - "loss": 0.0336, + "learning_rate": 1.6945064203579174e-05, + "loss": 0.0466, "step": 140455 }, { "epoch": 6.55, - "learning_rate": 6.924194833809949e-06, - "loss": 0.0428, + "learning_rate": 1.6944596133743154e-05, + "loss": 0.028, "step": 140460 }, { "epoch": 6.55, - "learning_rate": 6.923726032534809e-06, - "loss": 0.0449, + "learning_rate": 1.6944128063907134e-05, + "loss": 0.0175, "step": 140465 }, { "epoch": 6.55, - "learning_rate": 6.923257231259669e-06, - "loss": 0.0517, + "learning_rate": 1.6943659994071114e-05, + "loss": 0.1148, "step": 140470 }, { "epoch": 6.55, - "learning_rate": 6.92278842998453e-06, - "loss": 0.1327, + "learning_rate": 1.6943191924235097e-05, + "loss": 0.0501, "step": 140475 }, { "epoch": 6.55, - "learning_rate": 6.922319628709391e-06, - "loss": 0.0909, + "learning_rate": 1.6942723854399077e-05, + "loss": 0.1015, "step": 140480 }, { "epoch": 6.56, - "learning_rate": 6.921850827434251e-06, - "loss": 0.2362, + "learning_rate": 1.6942255784563057e-05, + "loss": 0.299, "step": 140485 }, { "epoch": 6.56, - "learning_rate": 6.921382026159111e-06, - "loss": 0.155, + "learning_rate": 1.6941787714727037e-05, + "loss": 0.1205, "step": 140490 }, { "epoch": 6.56, - "learning_rate": 6.920913224883973e-06, - "loss": 0.0143, + "learning_rate": 1.694131964489102e-05, + "loss": 0.0041, "step": 140495 }, { "epoch": 6.56, - "learning_rate": 6.9204444236088335e-06, - "loss": 0.04, + "learning_rate": 1.6940851575055e-05, + "loss": 0.0146, "step": 140500 }, { "epoch": 6.56, - "learning_rate": 6.919975622333693e-06, - "loss": 0.0021, + "learning_rate": 1.694038350521898e-05, + "loss": 0.1162, "step": 140505 }, { "epoch": 6.56, - "learning_rate": 6.919506821058553e-06, - "loss": 0.0236, + "learning_rate": 1.6939915435382963e-05, + "loss": 0.0183, "step": 140510 }, { "epoch": 6.56, - "learning_rate": 6.919038019783414e-06, - "loss": 0.0299, + "learning_rate": 1.693944736554694e-05, + "loss": 0.0448, "step": 140515 }, { "epoch": 6.56, - "learning_rate": 6.918569218508276e-06, - "loss": 0.0639, + "learning_rate": 1.693897929571092e-05, + "loss": 0.0749, "step": 140520 }, { "epoch": 6.56, - "learning_rate": 6.918100417233136e-06, - "loss": 0.0943, + "learning_rate": 1.69385112258749e-05, + "loss": 0.1054, "step": 140525 }, { "epoch": 6.56, - "learning_rate": 6.917631615957996e-06, - "loss": 0.0538, + "learning_rate": 1.6938043156038882e-05, + "loss": 0.0852, "step": 140530 }, { "epoch": 6.56, - "learning_rate": 6.917162814682856e-06, - "loss": 0.2364, + "learning_rate": 1.6937575086202862e-05, + "loss": 0.45, "step": 140535 }, { "epoch": 6.56, - "learning_rate": 6.916694013407716e-06, - "loss": 0.1981, + "learning_rate": 1.693710701636684e-05, + "loss": 0.198, "step": 140540 }, { "epoch": 6.56, - "learning_rate": 6.916225212132578e-06, - "loss": 0.0049, + "learning_rate": 1.693663894653082e-05, + "loss": 0.0052, "step": 140545 }, { "epoch": 6.56, - "learning_rate": 6.915756410857438e-06, - "loss": 0.0231, + "learning_rate": 1.6936170876694805e-05, + "loss": 0.0143, "step": 140550 }, { "epoch": 6.56, - "learning_rate": 6.915287609582299e-06, - "loss": 0.0363, + "learning_rate": 1.6935702806858784e-05, + "loss": 0.0394, "step": 140555 }, { "epoch": 6.56, - "learning_rate": 6.914818808307159e-06, - "loss": 0.0531, + "learning_rate": 1.6935234737022764e-05, + "loss": 0.0578, "step": 140560 }, { "epoch": 6.56, - "learning_rate": 6.91435000703202e-06, - "loss": 0.0793, + "learning_rate": 1.6934766667186747e-05, + "loss": 0.0278, "step": 140565 }, { "epoch": 6.56, - "learning_rate": 6.91388120575688e-06, - "loss": 0.0844, + "learning_rate": 1.6934298597350727e-05, + "loss": 0.0306, "step": 140570 }, { "epoch": 6.56, - "learning_rate": 6.913412404481741e-06, - "loss": 0.087, + "learning_rate": 1.6933830527514707e-05, + "loss": 0.0489, "step": 140575 }, { "epoch": 6.56, - "learning_rate": 6.912943603206601e-06, - "loss": 0.1244, + "learning_rate": 1.6933362457678683e-05, + "loss": 0.1142, "step": 140580 }, { "epoch": 6.56, - "learning_rate": 6.912474801931462e-06, - "loss": 0.1779, + "learning_rate": 1.6932894387842667e-05, + "loss": 0.1456, "step": 140585 }, { "epoch": 6.56, - "learning_rate": 6.9120060006563224e-06, - "loss": 0.1188, + "learning_rate": 1.6932426318006647e-05, + "loss": 0.1577, "step": 140590 }, { "epoch": 6.56, - "learning_rate": 6.911537199381183e-06, - "loss": 0.0062, + "learning_rate": 1.6931958248170626e-05, + "loss": 0.038, "step": 140595 }, { "epoch": 6.56, - "learning_rate": 6.911068398106043e-06, - "loss": 0.0216, + "learning_rate": 1.6931490178334606e-05, + "loss": 0.0194, "step": 140600 }, { "epoch": 6.56, - "learning_rate": 6.910599596830904e-06, - "loss": 0.0261, + "learning_rate": 1.693102210849859e-05, + "loss": 0.0316, "step": 140605 }, { "epoch": 6.56, - "learning_rate": 6.910130795555764e-06, - "loss": 0.0253, + "learning_rate": 1.693055403866257e-05, + "loss": 0.024, "step": 140610 }, { "epoch": 6.56, - "learning_rate": 6.9096619942806255e-06, - "loss": 0.0181, + "learning_rate": 1.693008596882655e-05, + "loss": 0.0686, "step": 140615 }, { "epoch": 6.56, - "learning_rate": 6.9091931930054854e-06, - "loss": 0.0467, + "learning_rate": 1.6929617898990532e-05, + "loss": 0.0766, "step": 140620 }, { "epoch": 6.56, - "learning_rate": 6.908724391730346e-06, - "loss": 0.0919, + "learning_rate": 1.6929149829154512e-05, + "loss": 0.0948, "step": 140625 }, { "epoch": 6.56, - "learning_rate": 6.908255590455206e-06, - "loss": 0.0227, + "learning_rate": 1.6928681759318492e-05, + "loss": 0.0807, "step": 140630 }, { "epoch": 6.56, - "learning_rate": 6.907786789180068e-06, - "loss": 0.1367, + "learning_rate": 1.692821368948247e-05, + "loss": 0.2538, "step": 140635 }, { "epoch": 6.56, - "learning_rate": 6.907317987904928e-06, - "loss": 0.2475, + "learning_rate": 1.692774561964645e-05, + "loss": 0.097, "step": 140640 }, { "epoch": 6.56, - "learning_rate": 6.9068491866297885e-06, - "loss": 0.0144, + "learning_rate": 1.692727754981043e-05, + "loss": 0.0189, "step": 140645 }, { "epoch": 6.56, - "learning_rate": 6.906380385354648e-06, - "loss": 0.026, + "learning_rate": 1.692680947997441e-05, + "loss": 0.0131, "step": 140650 }, { "epoch": 6.56, - "learning_rate": 6.905911584079509e-06, - "loss": 0.0407, + "learning_rate": 1.692634141013839e-05, + "loss": 0.0324, "step": 140655 }, { "epoch": 6.56, - "learning_rate": 6.90544278280437e-06, - "loss": 0.054, + "learning_rate": 1.6925873340302374e-05, + "loss": 0.0199, "step": 140660 }, { "epoch": 6.56, - "learning_rate": 6.904973981529231e-06, - "loss": 0.1056, + "learning_rate": 1.6925405270466354e-05, + "loss": 0.0297, "step": 140665 }, { "epoch": 6.56, - "learning_rate": 6.904505180254091e-06, - "loss": 0.0657, + "learning_rate": 1.6924937200630334e-05, + "loss": 0.0422, "step": 140670 }, { "epoch": 6.56, - "learning_rate": 6.9040363789789515e-06, - "loss": 0.0679, + "learning_rate": 1.6924469130794314e-05, + "loss": 0.0505, "step": 140675 }, { "epoch": 6.56, - "learning_rate": 6.903567577703812e-06, - "loss": 0.0773, + "learning_rate": 1.6924001060958297e-05, + "loss": 0.0283, "step": 140680 }, { "epoch": 6.56, - "learning_rate": 6.903098776428673e-06, - "loss": 0.1055, + "learning_rate": 1.6923532991122277e-05, + "loss": 0.1574, "step": 140685 }, { "epoch": 6.56, - "learning_rate": 6.902629975153533e-06, - "loss": 0.1081, + "learning_rate": 1.6923064921286256e-05, + "loss": 0.1154, "step": 140690 }, { "epoch": 6.57, - "learning_rate": 6.902161173878394e-06, - "loss": 0.0073, + "learning_rate": 1.692259685145024e-05, + "loss": 0.0158, "step": 140695 }, { "epoch": 6.57, - "learning_rate": 6.901692372603254e-06, - "loss": 0.0083, + "learning_rate": 1.692212878161422e-05, + "loss": 0.0068, "step": 140700 }, { "epoch": 6.57, - "learning_rate": 6.901223571328115e-06, - "loss": 0.0498, + "learning_rate": 1.6921660711778196e-05, + "loss": 0.0094, "step": 140705 }, { "epoch": 6.57, - "learning_rate": 6.900754770052975e-06, - "loss": 0.0632, + "learning_rate": 1.6921192641942176e-05, + "loss": 0.0826, "step": 140710 }, { "epoch": 6.57, - "learning_rate": 6.900285968777836e-06, - "loss": 0.0997, + "learning_rate": 1.692072457210616e-05, + "loss": 0.0608, "step": 140715 }, { "epoch": 6.57, - "learning_rate": 6.899817167502696e-06, - "loss": 0.0704, + "learning_rate": 1.692025650227014e-05, + "loss": 0.0806, "step": 140720 }, { "epoch": 6.57, - "learning_rate": 6.899348366227556e-06, - "loss": 0.101, + "learning_rate": 1.691978843243412e-05, + "loss": 0.0901, "step": 140725 }, { "epoch": 6.57, - "learning_rate": 6.8988795649524175e-06, - "loss": 0.0753, + "learning_rate": 1.69193203625981e-05, + "loss": 0.1816, "step": 140730 }, { "epoch": 6.57, - "learning_rate": 6.898410763677278e-06, - "loss": 0.142, + "learning_rate": 1.691885229276208e-05, + "loss": 0.1907, "step": 140735 }, { "epoch": 6.57, - "learning_rate": 6.897941962402138e-06, - "loss": 0.1943, + "learning_rate": 1.691838422292606e-05, + "loss": 0.2108, "step": 140740 }, { "epoch": 6.57, - "learning_rate": 6.897473161126998e-06, - "loss": 0.0067, + "learning_rate": 1.691791615309004e-05, + "loss": 0.0316, "step": 140745 }, { "epoch": 6.57, - "learning_rate": 6.89700435985186e-06, - "loss": 0.0039, + "learning_rate": 1.6917448083254024e-05, + "loss": 0.0115, "step": 140750 }, { "epoch": 6.57, - "learning_rate": 6.8965355585767206e-06, - "loss": 0.0078, + "learning_rate": 1.6916980013418004e-05, + "loss": 0.0085, "step": 140755 }, { "epoch": 6.57, - "learning_rate": 6.8960667573015805e-06, - "loss": 0.0466, + "learning_rate": 1.6916511943581984e-05, + "loss": 0.0155, "step": 140760 }, { "epoch": 6.57, - "learning_rate": 6.8955979560264404e-06, - "loss": 0.0079, + "learning_rate": 1.6916043873745964e-05, + "loss": 0.0529, "step": 140765 }, { "epoch": 6.57, - "learning_rate": 6.895129154751301e-06, - "loss": 0.0301, + "learning_rate": 1.6915575803909944e-05, + "loss": 0.051, "step": 140770 }, { "epoch": 6.57, - "learning_rate": 6.894660353476163e-06, - "loss": 0.0261, + "learning_rate": 1.6915107734073923e-05, + "loss": 0.0649, "step": 140775 }, { "epoch": 6.57, - "learning_rate": 6.894191552201023e-06, - "loss": 0.0835, + "learning_rate": 1.6914639664237903e-05, + "loss": 0.1289, "step": 140780 }, { "epoch": 6.57, - "learning_rate": 6.893722750925883e-06, - "loss": 0.1444, + "learning_rate": 1.6914171594401883e-05, + "loss": 0.1433, "step": 140785 }, { "epoch": 6.57, - "learning_rate": 6.8932539496507435e-06, - "loss": 0.1654, + "learning_rate": 1.6913703524565866e-05, + "loss": 0.1086, "step": 140790 }, { "epoch": 6.57, - "learning_rate": 6.8927851483756034e-06, - "loss": 0.0207, + "learning_rate": 1.6913235454729846e-05, + "loss": 0.0223, "step": 140795 }, { "epoch": 6.57, - "learning_rate": 6.892316347100465e-06, - "loss": 0.0495, + "learning_rate": 1.6912767384893826e-05, + "loss": 0.0036, "step": 140800 }, { "epoch": 6.57, - "learning_rate": 6.891847545825325e-06, - "loss": 0.0503, + "learning_rate": 1.691229931505781e-05, + "loss": 0.024, "step": 140805 }, { "epoch": 6.57, - "learning_rate": 6.891378744550186e-06, - "loss": 0.0352, + "learning_rate": 1.691183124522179e-05, + "loss": 0.0207, "step": 140810 }, { "epoch": 6.57, - "learning_rate": 6.890909943275046e-06, - "loss": 0.0888, + "learning_rate": 1.691136317538577e-05, + "loss": 0.049, "step": 140815 }, { "epoch": 6.57, - "learning_rate": 6.890441141999907e-06, - "loss": 0.0387, + "learning_rate": 1.691089510554975e-05, + "loss": 0.0482, "step": 140820 }, { "epoch": 6.57, - "learning_rate": 6.889972340724767e-06, - "loss": 0.1524, + "learning_rate": 1.6910427035713732e-05, + "loss": 0.047, "step": 140825 }, { "epoch": 6.57, - "learning_rate": 6.889503539449628e-06, - "loss": 0.0878, + "learning_rate": 1.6909958965877708e-05, + "loss": 0.1231, "step": 140830 }, { "epoch": 6.57, - "learning_rate": 6.889034738174488e-06, - "loss": 0.1294, + "learning_rate": 1.6909490896041688e-05, + "loss": 0.2242, "step": 140835 }, { "epoch": 6.57, - "learning_rate": 6.888565936899349e-06, - "loss": 0.1507, + "learning_rate": 1.6909022826205668e-05, + "loss": 0.2248, "step": 140840 }, { "epoch": 6.57, - "learning_rate": 6.8880971356242095e-06, - "loss": 0.0259, + "learning_rate": 1.690855475636965e-05, + "loss": 0.0197, "step": 140845 }, { "epoch": 6.57, - "learning_rate": 6.88762833434907e-06, - "loss": 0.0114, + "learning_rate": 1.690808668653363e-05, + "loss": 0.0069, "step": 140850 }, { "epoch": 6.57, - "learning_rate": 6.88715953307393e-06, - "loss": 0.0156, + "learning_rate": 1.690761861669761e-05, + "loss": 0.0148, "step": 140855 }, { "epoch": 6.57, - "learning_rate": 6.886690731798791e-06, - "loss": 0.0116, + "learning_rate": 1.690715054686159e-05, + "loss": 0.0616, "step": 140860 }, { "epoch": 6.57, - "learning_rate": 6.886221930523651e-06, - "loss": 0.0467, + "learning_rate": 1.6906682477025574e-05, + "loss": 0.0308, "step": 140865 }, { "epoch": 6.57, - "learning_rate": 6.885753129248513e-06, - "loss": 0.0955, + "learning_rate": 1.6906214407189554e-05, + "loss": 0.0282, "step": 140870 }, { "epoch": 6.57, - "learning_rate": 6.8852843279733725e-06, - "loss": 0.1188, + "learning_rate": 1.6905746337353533e-05, + "loss": 0.1108, "step": 140875 }, { "epoch": 6.57, - "learning_rate": 6.884815526698233e-06, - "loss": 0.1026, + "learning_rate": 1.6905278267517517e-05, + "loss": 0.07, "step": 140880 }, { "epoch": 6.57, - "learning_rate": 6.884346725423093e-06, - "loss": 0.1423, + "learning_rate": 1.6904810197681496e-05, + "loss": 0.2193, "step": 140885 }, { "epoch": 6.57, - "learning_rate": 6.883877924147955e-06, - "loss": 0.1798, + "learning_rate": 1.6904342127845476e-05, + "loss": 0.1228, "step": 140890 }, { "epoch": 6.57, - "learning_rate": 6.883409122872815e-06, - "loss": 0.0228, + "learning_rate": 1.6903874058009453e-05, + "loss": 0.0062, "step": 140895 }, { "epoch": 6.57, - "learning_rate": 6.882940321597676e-06, - "loss": 0.0101, + "learning_rate": 1.6903405988173436e-05, + "loss": 0.0243, "step": 140900 }, { "epoch": 6.57, - "learning_rate": 6.8824715203225355e-06, - "loss": 0.0314, + "learning_rate": 1.6902937918337416e-05, + "loss": 0.0185, "step": 140905 }, { "epoch": 6.58, - "learning_rate": 6.882002719047396e-06, - "loss": 0.0385, + "learning_rate": 1.6902469848501395e-05, + "loss": 0.0337, "step": 140910 }, { "epoch": 6.58, - "learning_rate": 6.881533917772257e-06, - "loss": 0.0382, + "learning_rate": 1.6902001778665375e-05, + "loss": 0.0527, "step": 140915 }, { "epoch": 6.58, - "learning_rate": 6.881065116497118e-06, - "loss": 0.0487, + "learning_rate": 1.690153370882936e-05, + "loss": 0.0898, "step": 140920 }, { "epoch": 6.58, - "learning_rate": 6.880596315221978e-06, - "loss": 0.1082, + "learning_rate": 1.690106563899334e-05, + "loss": 0.0741, "step": 140925 }, { "epoch": 6.58, - "learning_rate": 6.880127513946839e-06, - "loss": 0.115, + "learning_rate": 1.6900597569157318e-05, + "loss": 0.1488, "step": 140930 }, { "epoch": 6.58, - "learning_rate": 6.8796587126716985e-06, - "loss": 0.1703, + "learning_rate": 1.69001294993213e-05, + "loss": 0.1226, "step": 140935 }, { "epoch": 6.58, - "learning_rate": 6.87918991139656e-06, - "loss": 0.098, + "learning_rate": 1.689966142948528e-05, + "loss": 0.1682, "step": 140940 }, { "epoch": 6.58, - "learning_rate": 6.87872111012142e-06, - "loss": 0.0151, + "learning_rate": 1.689919335964926e-05, + "loss": 0.0215, "step": 140945 }, { "epoch": 6.58, - "learning_rate": 6.878252308846281e-06, - "loss": 0.0201, + "learning_rate": 1.689872528981324e-05, + "loss": 0.0187, "step": 140950 }, { "epoch": 6.58, - "learning_rate": 6.877783507571141e-06, - "loss": 0.024, + "learning_rate": 1.689825721997722e-05, + "loss": 0.01, "step": 140955 }, { "epoch": 6.58, - "learning_rate": 6.877314706296002e-06, - "loss": 0.0258, + "learning_rate": 1.68977891501412e-05, + "loss": 0.0498, "step": 140960 }, { "epoch": 6.58, - "learning_rate": 6.876845905020862e-06, - "loss": 0.026, + "learning_rate": 1.689732108030518e-05, + "loss": 0.0492, "step": 140965 }, { "epoch": 6.58, - "learning_rate": 6.876377103745723e-06, - "loss": 0.1006, + "learning_rate": 1.689685301046916e-05, + "loss": 0.03, "step": 140970 }, { "epoch": 6.58, - "learning_rate": 6.875908302470583e-06, - "loss": 0.0712, + "learning_rate": 1.6896384940633143e-05, + "loss": 0.0267, "step": 140975 }, { "epoch": 6.58, - "learning_rate": 6.875439501195443e-06, - "loss": 0.1041, + "learning_rate": 1.6895916870797123e-05, + "loss": 0.0916, "step": 140980 }, { "epoch": 6.58, - "learning_rate": 6.874970699920305e-06, - "loss": 0.1119, + "learning_rate": 1.6895448800961103e-05, + "loss": 0.2104, "step": 140985 }, { "epoch": 6.58, - "learning_rate": 6.874501898645165e-06, - "loss": 0.1361, + "learning_rate": 1.6894980731125086e-05, + "loss": 0.2004, "step": 140990 }, { "epoch": 6.58, - "learning_rate": 6.874033097370025e-06, - "loss": 0.007, + "learning_rate": 1.6894512661289066e-05, + "loss": 0.0022, "step": 140995 }, { "epoch": 6.58, - "learning_rate": 6.873564296094885e-06, - "loss": 0.0256, + "learning_rate": 1.6894044591453046e-05, + "loss": 0.0206, "step": 141000 }, { "epoch": 6.58, - "learning_rate": 6.873095494819746e-06, - "loss": 0.0495, + "learning_rate": 1.6893576521617026e-05, + "loss": 0.0231, "step": 141005 }, { "epoch": 6.58, - "learning_rate": 6.872626693544608e-06, - "loss": 0.0257, + "learning_rate": 1.689310845178101e-05, + "loss": 0.0476, "step": 141010 }, { "epoch": 6.58, - "learning_rate": 6.872157892269468e-06, - "loss": 0.0638, + "learning_rate": 1.689264038194499e-05, + "loss": 0.0397, "step": 141015 }, { "epoch": 6.58, - "learning_rate": 6.8716890909943275e-06, - "loss": 0.1408, + "learning_rate": 1.6892172312108965e-05, + "loss": 0.0916, "step": 141020 }, { "epoch": 6.58, - "learning_rate": 6.871220289719188e-06, - "loss": 0.0269, + "learning_rate": 1.6891704242272945e-05, + "loss": 0.0685, "step": 141025 }, { "epoch": 6.58, - "learning_rate": 6.87075148844405e-06, - "loss": 0.1583, + "learning_rate": 1.6891236172436928e-05, + "loss": 0.1603, "step": 141030 }, { "epoch": 6.58, - "learning_rate": 6.87028268716891e-06, - "loss": 0.1063, + "learning_rate": 1.6890768102600908e-05, + "loss": 0.1495, "step": 141035 }, { "epoch": 6.58, - "learning_rate": 6.86981388589377e-06, - "loss": 0.0864, + "learning_rate": 1.6890300032764888e-05, + "loss": 0.1551, "step": 141040 }, { "epoch": 6.58, - "learning_rate": 6.869345084618631e-06, - "loss": 0.0131, + "learning_rate": 1.6889831962928868e-05, + "loss": 0.0511, "step": 141045 }, { "epoch": 6.58, - "learning_rate": 6.8688762833434905e-06, - "loss": 0.004, + "learning_rate": 1.688936389309285e-05, + "loss": 0.032, "step": 141050 }, { "epoch": 6.58, - "learning_rate": 6.868407482068352e-06, - "loss": 0.017, + "learning_rate": 1.688889582325683e-05, + "loss": 0.0037, "step": 141055 }, { "epoch": 6.58, - "learning_rate": 6.867938680793212e-06, - "loss": 0.0414, + "learning_rate": 1.688842775342081e-05, + "loss": 0.0233, "step": 141060 }, { "epoch": 6.58, - "learning_rate": 6.867469879518073e-06, - "loss": 0.1521, + "learning_rate": 1.6887959683584794e-05, + "loss": 0.0206, "step": 141065 }, { "epoch": 6.58, - "learning_rate": 6.867001078242933e-06, - "loss": 0.1443, + "learning_rate": 1.6887491613748773e-05, + "loss": 0.0371, "step": 141070 }, { "epoch": 6.58, - "learning_rate": 6.8665322769677944e-06, - "loss": 0.041, + "learning_rate": 1.6887023543912753e-05, + "loss": 0.0208, "step": 141075 }, { "epoch": 6.58, - "learning_rate": 6.866063475692654e-06, - "loss": 0.0816, + "learning_rate": 1.6886555474076733e-05, + "loss": 0.1244, "step": 141080 }, { "epoch": 6.58, - "learning_rate": 6.865594674417515e-06, - "loss": 0.1016, + "learning_rate": 1.6886087404240713e-05, + "loss": 0.3212, "step": 141085 }, { "epoch": 6.58, - "learning_rate": 6.865125873142375e-06, - "loss": 0.2299, + "learning_rate": 1.6885619334404693e-05, + "loss": 0.1557, "step": 141090 }, { "epoch": 6.58, - "learning_rate": 6.864657071867236e-06, - "loss": 0.0713, + "learning_rate": 1.6885151264568672e-05, + "loss": 0.0036, "step": 141095 }, { "epoch": 6.58, - "learning_rate": 6.864188270592097e-06, - "loss": 0.0403, + "learning_rate": 1.6884683194732652e-05, + "loss": 0.033, "step": 141100 }, { "epoch": 6.58, - "learning_rate": 6.8637194693169574e-06, - "loss": 0.0389, + "learning_rate": 1.6884215124896635e-05, + "loss": 0.0151, "step": 141105 }, { "epoch": 6.58, - "learning_rate": 6.863250668041817e-06, - "loss": 0.0277, + "learning_rate": 1.6883747055060615e-05, + "loss": 0.0261, "step": 141110 }, { "epoch": 6.58, - "learning_rate": 6.862781866766678e-06, - "loss": 0.0619, + "learning_rate": 1.6883278985224595e-05, + "loss": 0.0313, "step": 141115 }, { "epoch": 6.58, - "learning_rate": 6.862313065491538e-06, - "loss": 0.1039, + "learning_rate": 1.688281091538858e-05, + "loss": 0.1316, "step": 141120 }, { "epoch": 6.59, - "learning_rate": 6.8618442642164e-06, - "loss": 0.0634, + "learning_rate": 1.6882342845552558e-05, + "loss": 0.0648, "step": 141125 }, { "epoch": 6.59, - "learning_rate": 6.86137546294126e-06, - "loss": 0.1441, + "learning_rate": 1.6881874775716538e-05, + "loss": 0.1296, "step": 141130 }, { "epoch": 6.59, - "learning_rate": 6.86090666166612e-06, - "loss": 0.2388, + "learning_rate": 1.6881406705880518e-05, + "loss": 0.2123, "step": 141135 }, { "epoch": 6.59, - "learning_rate": 6.86043786039098e-06, - "loss": 0.1488, + "learning_rate": 1.68809386360445e-05, + "loss": 0.1295, "step": 141140 }, { "epoch": 6.59, - "learning_rate": 6.859969059115842e-06, - "loss": 0.0187, + "learning_rate": 1.6880470566208477e-05, + "loss": 0.0022, "step": 141145 }, { "epoch": 6.59, - "learning_rate": 6.859500257840702e-06, - "loss": 0.0383, + "learning_rate": 1.6880002496372457e-05, + "loss": 0.0294, "step": 141150 }, { "epoch": 6.59, - "learning_rate": 6.859031456565563e-06, - "loss": 0.008, + "learning_rate": 1.6879534426536437e-05, + "loss": 0.0345, "step": 141155 }, { "epoch": 6.59, - "learning_rate": 6.858562655290423e-06, - "loss": 0.0426, + "learning_rate": 1.687906635670042e-05, + "loss": 0.0675, "step": 141160 }, { "epoch": 6.59, - "learning_rate": 6.858093854015283e-06, - "loss": 0.0613, + "learning_rate": 1.68785982868644e-05, + "loss": 0.0307, "step": 141165 }, { "epoch": 6.59, - "learning_rate": 6.857625052740144e-06, - "loss": 0.1189, + "learning_rate": 1.687813021702838e-05, + "loss": 0.0133, "step": 141170 }, { "epoch": 6.59, - "learning_rate": 6.857156251465005e-06, - "loss": 0.0841, + "learning_rate": 1.6877662147192363e-05, + "loss": 0.0541, "step": 141175 }, { "epoch": 6.59, - "learning_rate": 6.856687450189865e-06, - "loss": 0.1386, + "learning_rate": 1.6877194077356343e-05, + "loss": 0.0811, "step": 141180 }, { "epoch": 6.59, - "learning_rate": 6.856218648914726e-06, - "loss": 0.2131, + "learning_rate": 1.6876726007520323e-05, + "loss": 0.1571, "step": 141185 }, { "epoch": 6.59, - "learning_rate": 6.855749847639586e-06, - "loss": 0.1622, + "learning_rate": 1.6876257937684303e-05, + "loss": 0.1232, "step": 141190 }, { "epoch": 6.59, - "learning_rate": 6.855281046364447e-06, - "loss": 0.0237, + "learning_rate": 1.6875789867848286e-05, + "loss": 0.0097, "step": 141195 }, { "epoch": 6.59, - "learning_rate": 6.854812245089307e-06, - "loss": 0.0291, + "learning_rate": 1.6875321798012266e-05, + "loss": 0.0456, "step": 141200 }, { "epoch": 6.59, - "learning_rate": 6.854343443814168e-06, - "loss": 0.0088, + "learning_rate": 1.6874853728176245e-05, + "loss": 0.0144, "step": 141205 }, { "epoch": 6.59, - "learning_rate": 6.853874642539028e-06, - "loss": 0.069, + "learning_rate": 1.6874385658340222e-05, + "loss": 0.0252, "step": 141210 }, { "epoch": 6.59, - "learning_rate": 6.8534058412638895e-06, - "loss": 0.1078, + "learning_rate": 1.6873917588504205e-05, + "loss": 0.0391, "step": 141215 }, { "epoch": 6.59, - "learning_rate": 6.8529370399887495e-06, - "loss": 0.0551, + "learning_rate": 1.6873449518668185e-05, + "loss": 0.0515, "step": 141220 }, { "epoch": 6.59, - "learning_rate": 6.85246823871361e-06, - "loss": 0.0319, + "learning_rate": 1.6872981448832165e-05, + "loss": 0.0405, "step": 141225 }, { "epoch": 6.59, - "learning_rate": 6.85199943743847e-06, - "loss": 0.1135, + "learning_rate": 1.6872513378996144e-05, + "loss": 0.1083, "step": 141230 }, { "epoch": 6.59, - "learning_rate": 6.85153063616333e-06, - "loss": 0.2742, + "learning_rate": 1.6872045309160128e-05, + "loss": 0.153, "step": 141235 }, { "epoch": 6.59, - "learning_rate": 6.851061834888192e-06, - "loss": 0.0757, + "learning_rate": 1.6871577239324108e-05, + "loss": 0.1793, "step": 141240 }, { "epoch": 6.59, - "learning_rate": 6.8505930336130525e-06, - "loss": 0.0083, + "learning_rate": 1.6871109169488087e-05, + "loss": 0.0137, "step": 141245 }, { "epoch": 6.59, - "learning_rate": 6.8501242323379124e-06, - "loss": 0.0494, + "learning_rate": 1.687064109965207e-05, + "loss": 0.0039, "step": 141250 }, { "epoch": 6.59, - "learning_rate": 6.849655431062772e-06, - "loss": 0.0512, + "learning_rate": 1.687017302981605e-05, + "loss": 0.0325, "step": 141255 }, { "epoch": 6.59, - "learning_rate": 6.849186629787633e-06, - "loss": 0.0149, + "learning_rate": 1.686970495998003e-05, + "loss": 0.085, "step": 141260 }, { "epoch": 6.59, - "learning_rate": 6.848717828512495e-06, - "loss": 0.0207, + "learning_rate": 1.686923689014401e-05, + "loss": 0.0477, "step": 141265 }, { "epoch": 6.59, - "learning_rate": 6.848249027237355e-06, - "loss": 0.039, + "learning_rate": 1.6868768820307993e-05, + "loss": 0.0215, "step": 141270 }, { "epoch": 6.59, - "learning_rate": 6.847780225962215e-06, - "loss": 0.0534, + "learning_rate": 1.686830075047197e-05, + "loss": 0.0397, "step": 141275 }, { "epoch": 6.59, - "learning_rate": 6.8473114246870754e-06, - "loss": 0.1372, + "learning_rate": 1.686783268063595e-05, + "loss": 0.0949, "step": 141280 }, { "epoch": 6.59, - "learning_rate": 6.846842623411937e-06, - "loss": 0.1781, + "learning_rate": 1.686736461079993e-05, + "loss": 0.196, "step": 141285 }, { "epoch": 6.59, - "learning_rate": 6.846373822136797e-06, - "loss": 0.1698, + "learning_rate": 1.6866896540963912e-05, + "loss": 0.171, "step": 141290 }, { "epoch": 6.59, - "learning_rate": 6.845905020861657e-06, - "loss": 0.0161, + "learning_rate": 1.6866428471127892e-05, + "loss": 0.0003, "step": 141295 }, { "epoch": 6.59, - "learning_rate": 6.845436219586518e-06, - "loss": 0.0215, + "learning_rate": 1.6865960401291872e-05, + "loss": 0.0134, "step": 141300 }, { "epoch": 6.59, - "learning_rate": 6.844967418311378e-06, - "loss": 0.0379, + "learning_rate": 1.6865492331455855e-05, + "loss": 0.0033, "step": 141305 }, { "epoch": 6.59, - "learning_rate": 6.844498617036239e-06, - "loss": 0.0188, + "learning_rate": 1.6865024261619835e-05, + "loss": 0.0562, "step": 141310 }, { "epoch": 6.59, - "learning_rate": 6.844029815761099e-06, - "loss": 0.038, + "learning_rate": 1.6864556191783815e-05, + "loss": 0.12, "step": 141315 }, { "epoch": 6.59, - "learning_rate": 6.84356101448596e-06, - "loss": 0.0214, + "learning_rate": 1.6864088121947795e-05, + "loss": 0.0606, "step": 141320 }, { "epoch": 6.59, - "learning_rate": 6.84309221321082e-06, - "loss": 0.0483, + "learning_rate": 1.6863620052111778e-05, + "loss": 0.04, "step": 141325 }, { "epoch": 6.59, - "learning_rate": 6.842623411935681e-06, - "loss": 0.1985, + "learning_rate": 1.6863151982275758e-05, + "loss": 0.1003, "step": 141330 }, { "epoch": 6.59, - "learning_rate": 6.8421546106605415e-06, - "loss": 0.1717, + "learning_rate": 1.6862683912439734e-05, + "loss": 0.2694, "step": 141335 }, { "epoch": 6.6, - "learning_rate": 6.841685809385402e-06, - "loss": 0.1547, + "learning_rate": 1.6862215842603714e-05, + "loss": 0.19, "step": 141340 }, { "epoch": 6.6, - "learning_rate": 6.841217008110262e-06, - "loss": 0.0028, + "learning_rate": 1.6861747772767697e-05, + "loss": 0.0162, "step": 141345 }, { "epoch": 6.6, - "learning_rate": 6.840748206835123e-06, - "loss": 0.0189, + "learning_rate": 1.6861279702931677e-05, + "loss": 0.0144, "step": 141350 }, { "epoch": 6.6, - "learning_rate": 6.840279405559984e-06, - "loss": 0.0466, + "learning_rate": 1.6860811633095657e-05, + "loss": 0.0187, "step": 141355 }, { "epoch": 6.6, - "learning_rate": 6.8398106042848445e-06, - "loss": 0.0335, + "learning_rate": 1.686034356325964e-05, + "loss": 0.0103, "step": 141360 }, { "epoch": 6.6, - "learning_rate": 6.8393418030097045e-06, - "loss": 0.011, + "learning_rate": 1.685987549342362e-05, + "loss": 0.0382, "step": 141365 }, { "epoch": 6.6, - "learning_rate": 6.838873001734565e-06, - "loss": 0.1151, + "learning_rate": 1.68594074235876e-05, + "loss": 0.0232, "step": 141370 }, { "epoch": 6.6, - "learning_rate": 6.838404200459425e-06, - "loss": 0.0259, + "learning_rate": 1.685893935375158e-05, + "loss": 0.1039, "step": 141375 }, { "epoch": 6.6, - "learning_rate": 6.837935399184287e-06, - "loss": 0.1485, + "learning_rate": 1.6858471283915563e-05, + "loss": 0.1111, "step": 141380 }, { "epoch": 6.6, - "learning_rate": 6.837466597909147e-06, - "loss": 0.1155, + "learning_rate": 1.6858003214079543e-05, + "loss": 0.166, "step": 141385 }, { "epoch": 6.6, - "learning_rate": 6.8369977966340075e-06, - "loss": 0.1875, + "learning_rate": 1.6857535144243522e-05, + "loss": 0.1334, "step": 141390 }, { "epoch": 6.6, - "learning_rate": 6.8365289953588675e-06, - "loss": 0.0169, + "learning_rate": 1.6857067074407502e-05, + "loss": 0.0112, "step": 141395 }, { "epoch": 6.6, - "learning_rate": 6.836060194083729e-06, - "loss": 0.0111, + "learning_rate": 1.6856599004571482e-05, + "loss": 0.0282, "step": 141400 }, { "epoch": 6.6, - "learning_rate": 6.835591392808589e-06, - "loss": 0.0102, + "learning_rate": 1.6856130934735462e-05, + "loss": 0.0574, "step": 141405 }, { "epoch": 6.6, - "learning_rate": 6.83512259153345e-06, - "loss": 0.0439, + "learning_rate": 1.685566286489944e-05, + "loss": 0.0071, "step": 141410 }, { "epoch": 6.6, - "learning_rate": 6.83465379025831e-06, - "loss": 0.0367, + "learning_rate": 1.6855194795063425e-05, + "loss": 0.0491, "step": 141415 }, { "epoch": 6.6, - "learning_rate": 6.8341849889831705e-06, - "loss": 0.0847, + "learning_rate": 1.6854726725227405e-05, + "loss": 0.0539, "step": 141420 }, { "epoch": 6.6, - "learning_rate": 6.833716187708031e-06, - "loss": 0.0746, + "learning_rate": 1.6854258655391384e-05, + "loss": 0.0964, "step": 141425 }, { "epoch": 6.6, - "learning_rate": 6.833247386432892e-06, - "loss": 0.0994, + "learning_rate": 1.6853790585555364e-05, + "loss": 0.1014, "step": 141430 }, { "epoch": 6.6, - "learning_rate": 6.832778585157752e-06, - "loss": 0.0863, + "learning_rate": 1.6853322515719347e-05, + "loss": 0.2258, "step": 141435 }, { "epoch": 6.6, - "learning_rate": 6.832309783882613e-06, - "loss": 0.1369, + "learning_rate": 1.6852854445883327e-05, + "loss": 0.1782, "step": 141440 }, { "epoch": 6.6, - "learning_rate": 6.831840982607473e-06, - "loss": 0.0043, + "learning_rate": 1.6852386376047307e-05, + "loss": 0.0025, "step": 141445 }, { "epoch": 6.6, - "learning_rate": 6.831372181332334e-06, - "loss": 0.0193, + "learning_rate": 1.6851918306211287e-05, + "loss": 0.0213, "step": 141450 }, { "epoch": 6.6, - "learning_rate": 6.830903380057194e-06, - "loss": 0.0934, + "learning_rate": 1.685145023637527e-05, + "loss": 0.041, "step": 141455 }, { "epoch": 6.6, - "learning_rate": 6.830434578782055e-06, - "loss": 0.0481, + "learning_rate": 1.685098216653925e-05, + "loss": 0.0675, "step": 141460 }, { "epoch": 6.6, - "learning_rate": 6.829965777506915e-06, - "loss": 0.0452, + "learning_rate": 1.6850514096703226e-05, + "loss": 0.0578, "step": 141465 }, { "epoch": 6.6, - "learning_rate": 6.829496976231777e-06, - "loss": 0.04, + "learning_rate": 1.6850046026867206e-05, + "loss": 0.0341, "step": 141470 }, { "epoch": 6.6, - "learning_rate": 6.8290281749566366e-06, - "loss": 0.0775, + "learning_rate": 1.684957795703119e-05, + "loss": 0.144, "step": 141475 }, { "epoch": 6.6, - "learning_rate": 6.828559373681497e-06, - "loss": 0.0606, + "learning_rate": 1.684910988719517e-05, + "loss": 0.1261, "step": 141480 }, { "epoch": 6.6, - "learning_rate": 6.828090572406357e-06, - "loss": 0.2203, + "learning_rate": 1.684864181735915e-05, + "loss": 0.2158, "step": 141485 }, { "epoch": 6.6, - "learning_rate": 6.827621771131217e-06, - "loss": 0.1234, + "learning_rate": 1.6848173747523132e-05, + "loss": 0.1308, "step": 141490 }, { "epoch": 6.6, - "learning_rate": 6.827152969856079e-06, - "loss": 0.0227, + "learning_rate": 1.6847705677687112e-05, + "loss": 0.0073, "step": 141495 }, { "epoch": 6.6, - "learning_rate": 6.82668416858094e-06, - "loss": 0.0122, + "learning_rate": 1.6847237607851092e-05, + "loss": 0.0222, "step": 141500 }, { "epoch": 6.6, - "learning_rate": 6.8262153673057996e-06, - "loss": 0.034, + "learning_rate": 1.6846769538015072e-05, + "loss": 0.0153, "step": 141505 }, { "epoch": 6.6, - "learning_rate": 6.8257465660306595e-06, - "loss": 0.028, + "learning_rate": 1.6846301468179055e-05, + "loss": 0.0839, "step": 141510 }, { "epoch": 6.6, - "learning_rate": 6.82527776475552e-06, - "loss": 0.0254, + "learning_rate": 1.6845833398343035e-05, + "loss": 0.0436, "step": 141515 }, { "epoch": 6.6, - "learning_rate": 6.824808963480382e-06, - "loss": 0.1327, + "learning_rate": 1.6845365328507015e-05, + "loss": 0.0781, "step": 141520 }, { "epoch": 6.6, - "learning_rate": 6.824340162205242e-06, - "loss": 0.0881, + "learning_rate": 1.684489725867099e-05, + "loss": 0.0631, "step": 141525 }, { "epoch": 6.6, - "learning_rate": 6.823871360930102e-06, - "loss": 0.1132, + "learning_rate": 1.6844429188834974e-05, + "loss": 0.0972, "step": 141530 }, { "epoch": 6.6, - "learning_rate": 6.8234025596549625e-06, - "loss": 0.1427, + "learning_rate": 1.6843961118998954e-05, + "loss": 0.1924, "step": 141535 }, { "epoch": 6.6, - "learning_rate": 6.822933758379824e-06, - "loss": 0.1341, + "learning_rate": 1.6843493049162934e-05, + "loss": 0.1671, "step": 141540 }, { "epoch": 6.6, - "learning_rate": 6.822464957104684e-06, - "loss": 0.0338, + "learning_rate": 1.6843024979326917e-05, + "loss": 0.0087, "step": 141545 }, { "epoch": 6.6, - "learning_rate": 6.821996155829544e-06, - "loss": 0.0153, + "learning_rate": 1.6842556909490897e-05, + "loss": 0.0254, "step": 141550 }, { "epoch": 6.61, - "learning_rate": 6.821527354554405e-06, - "loss": 0.0175, + "learning_rate": 1.6842088839654877e-05, + "loss": 0.036, "step": 141555 }, { "epoch": 6.61, - "learning_rate": 6.821058553279265e-06, - "loss": 0.0434, + "learning_rate": 1.6841620769818856e-05, + "loss": 0.029, "step": 141560 }, { "epoch": 6.61, - "learning_rate": 6.820589752004126e-06, - "loss": 0.0299, + "learning_rate": 1.684115269998284e-05, + "loss": 0.0473, "step": 141565 }, { "epoch": 6.61, - "learning_rate": 6.820120950728986e-06, - "loss": 0.054, + "learning_rate": 1.684068463014682e-05, + "loss": 0.0401, "step": 141570 }, { "epoch": 6.61, - "learning_rate": 6.819652149453847e-06, - "loss": 0.0377, + "learning_rate": 1.68402165603108e-05, + "loss": 0.0747, "step": 141575 }, { "epoch": 6.61, - "learning_rate": 6.819183348178707e-06, - "loss": 0.102, + "learning_rate": 1.683974849047478e-05, + "loss": 0.1263, "step": 141580 }, { "epoch": 6.61, - "learning_rate": 6.818714546903568e-06, - "loss": 0.1639, + "learning_rate": 1.6839280420638762e-05, + "loss": 0.1226, "step": 141585 }, { "epoch": 6.61, - "learning_rate": 6.818245745628429e-06, - "loss": 0.0837, + "learning_rate": 1.683881235080274e-05, + "loss": 0.1055, "step": 141590 }, { "epoch": 6.61, - "learning_rate": 6.817776944353289e-06, - "loss": 0.0132, + "learning_rate": 1.683834428096672e-05, + "loss": 0.0141, "step": 141595 }, { "epoch": 6.61, - "learning_rate": 6.817308143078149e-06, - "loss": 0.0386, + "learning_rate": 1.6837876211130702e-05, + "loss": 0.007, "step": 141600 }, { "epoch": 6.61, - "learning_rate": 6.81683934180301e-06, - "loss": 0.0177, + "learning_rate": 1.683740814129468e-05, + "loss": 0.0131, "step": 141605 }, { "epoch": 6.61, - "learning_rate": 6.816370540527871e-06, - "loss": 0.0141, + "learning_rate": 1.683694007145866e-05, + "loss": 0.0528, "step": 141610 }, { "epoch": 6.61, - "learning_rate": 6.815901739252732e-06, - "loss": 0.0253, + "learning_rate": 1.683647200162264e-05, + "loss": 0.0136, "step": 141615 }, { "epoch": 6.61, - "learning_rate": 6.815432937977592e-06, - "loss": 0.022, + "learning_rate": 1.6836003931786624e-05, + "loss": 0.0479, "step": 141620 }, { "epoch": 6.61, - "learning_rate": 6.814964136702452e-06, - "loss": 0.0833, + "learning_rate": 1.6835535861950604e-05, + "loss": 0.0862, "step": 141625 }, { "epoch": 6.61, - "learning_rate": 6.814495335427312e-06, - "loss": 0.1816, + "learning_rate": 1.6835067792114584e-05, + "loss": 0.1611, "step": 141630 }, { "epoch": 6.61, - "learning_rate": 6.814026534152174e-06, - "loss": 0.2375, + "learning_rate": 1.6834599722278564e-05, + "loss": 0.2656, "step": 141635 }, { "epoch": 6.61, - "learning_rate": 6.813557732877034e-06, - "loss": 0.1512, + "learning_rate": 1.6834131652442547e-05, + "loss": 0.0708, "step": 141640 }, { "epoch": 6.61, - "learning_rate": 6.813088931601895e-06, - "loss": 0.0136, + "learning_rate": 1.6833663582606527e-05, + "loss": 0.0516, "step": 141645 }, { "epoch": 6.61, - "learning_rate": 6.8126201303267546e-06, - "loss": 0.0112, + "learning_rate": 1.6833195512770503e-05, + "loss": 0.0065, "step": 141650 }, { "epoch": 6.61, - "learning_rate": 6.812151329051615e-06, - "loss": 0.0029, + "learning_rate": 1.6832727442934483e-05, + "loss": 0.0096, "step": 141655 }, { "epoch": 6.61, - "learning_rate": 6.811682527776476e-06, - "loss": 0.026, + "learning_rate": 1.6832259373098466e-05, + "loss": 0.0796, "step": 141660 }, { "epoch": 6.61, - "learning_rate": 6.811213726501337e-06, - "loss": 0.0254, + "learning_rate": 1.6831791303262446e-05, + "loss": 0.0304, "step": 141665 }, { "epoch": 6.61, - "learning_rate": 6.810744925226197e-06, - "loss": 0.0129, + "learning_rate": 1.6831323233426426e-05, + "loss": 0.0261, "step": 141670 }, { "epoch": 6.61, - "learning_rate": 6.810276123951058e-06, - "loss": 0.0788, + "learning_rate": 1.683085516359041e-05, + "loss": 0.0771, "step": 141675 }, { "epoch": 6.61, - "learning_rate": 6.809807322675918e-06, - "loss": 0.2217, + "learning_rate": 1.683038709375439e-05, + "loss": 0.0923, "step": 141680 }, { "epoch": 6.61, - "learning_rate": 6.809338521400779e-06, - "loss": 0.158, + "learning_rate": 1.682991902391837e-05, + "loss": 0.1942, "step": 141685 }, { "epoch": 6.61, - "learning_rate": 6.808869720125639e-06, - "loss": 0.1536, + "learning_rate": 1.682945095408235e-05, + "loss": 0.359, "step": 141690 }, { "epoch": 6.61, - "learning_rate": 6.8084009188505e-06, - "loss": 0.0373, + "learning_rate": 1.6828982884246332e-05, + "loss": 0.0003, "step": 141695 }, { "epoch": 6.61, - "learning_rate": 6.80793211757536e-06, - "loss": 0.0171, + "learning_rate": 1.6828514814410312e-05, + "loss": 0.0197, "step": 141700 }, { "epoch": 6.61, - "learning_rate": 6.8074633163002215e-06, - "loss": 0.0223, + "learning_rate": 1.682804674457429e-05, + "loss": 0.0406, "step": 141705 }, { "epoch": 6.61, - "learning_rate": 6.806994515025081e-06, - "loss": 0.0449, + "learning_rate": 1.682757867473827e-05, + "loss": 0.0226, "step": 141710 }, { "epoch": 6.61, - "learning_rate": 6.806525713749942e-06, - "loss": 0.074, + "learning_rate": 1.682711060490225e-05, + "loss": 0.0436, "step": 141715 }, { "epoch": 6.61, - "learning_rate": 6.806056912474802e-06, - "loss": 0.1981, + "learning_rate": 1.682664253506623e-05, + "loss": 0.0655, "step": 141720 }, { "epoch": 6.61, - "learning_rate": 6.805588111199664e-06, - "loss": 0.0952, + "learning_rate": 1.682617446523021e-05, + "loss": 0.0593, "step": 141725 }, { "epoch": 6.61, - "learning_rate": 6.805119309924524e-06, - "loss": 0.166, + "learning_rate": 1.6825706395394194e-05, + "loss": 0.0697, "step": 141730 }, { "epoch": 6.61, - "learning_rate": 6.8046505086493845e-06, - "loss": 0.161, + "learning_rate": 1.6825238325558174e-05, + "loss": 0.1841, "step": 141735 }, { "epoch": 6.61, - "learning_rate": 6.804181707374244e-06, - "loss": 0.2472, + "learning_rate": 1.6824770255722154e-05, + "loss": 0.1349, "step": 141740 }, { "epoch": 6.61, - "learning_rate": 6.803712906099104e-06, - "loss": 0.0062, + "learning_rate": 1.6824302185886133e-05, + "loss": 0.0567, "step": 141745 }, { "epoch": 6.61, - "learning_rate": 6.803244104823966e-06, - "loss": 0.0215, + "learning_rate": 1.6823834116050117e-05, + "loss": 0.0172, "step": 141750 }, { "epoch": 6.61, - "learning_rate": 6.802775303548827e-06, - "loss": 0.0218, + "learning_rate": 1.6823366046214096e-05, + "loss": 0.0657, "step": 141755 }, { "epoch": 6.61, - "learning_rate": 6.802306502273687e-06, - "loss": 0.0217, + "learning_rate": 1.6822897976378076e-05, + "loss": 0.0294, "step": 141760 }, { "epoch": 6.61, - "learning_rate": 6.801837700998547e-06, - "loss": 0.033, + "learning_rate": 1.6822429906542056e-05, + "loss": 0.0643, "step": 141765 }, { "epoch": 6.62, - "learning_rate": 6.801368899723407e-06, - "loss": 0.1064, + "learning_rate": 1.682196183670604e-05, + "loss": 0.0575, "step": 141770 }, { "epoch": 6.62, - "learning_rate": 6.800900098448269e-06, - "loss": 0.0672, + "learning_rate": 1.682149376687002e-05, + "loss": 0.0563, "step": 141775 }, { "epoch": 6.62, - "learning_rate": 6.800431297173129e-06, - "loss": 0.101, + "learning_rate": 1.6821025697033996e-05, + "loss": 0.0985, "step": 141780 }, { "epoch": 6.62, - "learning_rate": 6.799962495897989e-06, - "loss": 0.1305, + "learning_rate": 1.682055762719798e-05, + "loss": 0.1646, "step": 141785 }, { "epoch": 6.62, - "learning_rate": 6.79949369462285e-06, - "loss": 0.1373, + "learning_rate": 1.682008955736196e-05, + "loss": 0.085, "step": 141790 }, { "epoch": 6.62, - "learning_rate": 6.799024893347711e-06, - "loss": 0.0085, + "learning_rate": 1.681962148752594e-05, + "loss": 0.0107, "step": 141795 }, { "epoch": 6.62, - "learning_rate": 6.798556092072571e-06, - "loss": 0.0193, + "learning_rate": 1.6819153417689918e-05, + "loss": 0.0517, "step": 141800 }, { "epoch": 6.62, - "learning_rate": 6.798087290797431e-06, - "loss": 0.0121, + "learning_rate": 1.68186853478539e-05, + "loss": 0.0789, "step": 141805 }, { "epoch": 6.62, - "learning_rate": 6.797618489522292e-06, - "loss": 0.027, + "learning_rate": 1.681821727801788e-05, + "loss": 0.0353, "step": 141810 }, { "epoch": 6.62, - "learning_rate": 6.797149688247152e-06, - "loss": 0.0609, + "learning_rate": 1.681774920818186e-05, + "loss": 0.0401, "step": 141815 }, { "epoch": 6.62, - "learning_rate": 6.7966808869720135e-06, - "loss": 0.0715, + "learning_rate": 1.681728113834584e-05, + "loss": 0.0653, "step": 141820 }, { "epoch": 6.62, - "learning_rate": 6.7962120856968734e-06, - "loss": 0.059, + "learning_rate": 1.6816813068509824e-05, + "loss": 0.0865, "step": 141825 }, { "epoch": 6.62, - "learning_rate": 6.795743284421734e-06, - "loss": 0.1802, + "learning_rate": 1.6816344998673804e-05, + "loss": 0.1362, "step": 141830 }, { "epoch": 6.62, - "learning_rate": 6.795274483146594e-06, - "loss": 0.1835, + "learning_rate": 1.6815876928837784e-05, + "loss": 0.1323, "step": 141835 }, { "epoch": 6.62, - "learning_rate": 6.794805681871455e-06, - "loss": 0.1656, + "learning_rate": 1.681540885900176e-05, + "loss": 0.1332, "step": 141840 }, { "epoch": 6.62, - "learning_rate": 6.7943368805963166e-06, - "loss": 0.0035, + "learning_rate": 1.6814940789165743e-05, + "loss": 0.0082, "step": 141845 }, { "epoch": 6.62, - "learning_rate": 6.7938680793211765e-06, - "loss": 0.0275, + "learning_rate": 1.6814472719329723e-05, + "loss": 0.0116, "step": 141850 }, { "epoch": 6.62, - "learning_rate": 6.793399278046036e-06, - "loss": 0.0292, + "learning_rate": 1.6814004649493703e-05, + "loss": 0.0182, "step": 141855 }, { "epoch": 6.62, - "learning_rate": 6.792930476770897e-06, - "loss": 0.0301, + "learning_rate": 1.6813536579657686e-05, + "loss": 0.0568, "step": 141860 }, { "epoch": 6.62, - "learning_rate": 6.792461675495759e-06, - "loss": 0.0594, + "learning_rate": 1.6813068509821666e-05, + "loss": 0.0204, "step": 141865 }, { "epoch": 6.62, - "learning_rate": 6.791992874220619e-06, - "loss": 0.0842, + "learning_rate": 1.6812600439985646e-05, + "loss": 0.0282, "step": 141870 }, { "epoch": 6.62, - "learning_rate": 6.791524072945479e-06, - "loss": 0.0754, + "learning_rate": 1.6812132370149626e-05, + "loss": 0.0901, "step": 141875 }, { "epoch": 6.62, - "learning_rate": 6.7910552716703395e-06, - "loss": 0.133, + "learning_rate": 1.681166430031361e-05, + "loss": 0.1201, "step": 141880 }, { "epoch": 6.62, - "learning_rate": 6.790586470395199e-06, - "loss": 0.222, + "learning_rate": 1.681119623047759e-05, + "loss": 0.1446, "step": 141885 }, { "epoch": 6.62, - "learning_rate": 6.790117669120061e-06, - "loss": 0.1512, + "learning_rate": 1.681072816064157e-05, + "loss": 0.1475, "step": 141890 }, { "epoch": 6.62, - "learning_rate": 6.789648867844921e-06, - "loss": 0.0205, + "learning_rate": 1.681026009080555e-05, + "loss": 0.0177, "step": 141895 }, { "epoch": 6.62, - "learning_rate": 6.789180066569782e-06, - "loss": 0.0191, + "learning_rate": 1.680979202096953e-05, + "loss": 0.0113, "step": 141900 }, { "epoch": 6.62, - "learning_rate": 6.788711265294642e-06, - "loss": 0.0329, + "learning_rate": 1.6809323951133508e-05, + "loss": 0.1041, "step": 141905 }, { "epoch": 6.62, - "learning_rate": 6.7882424640195025e-06, - "loss": 0.0168, + "learning_rate": 1.6808855881297488e-05, + "loss": 0.0683, "step": 141910 }, { "epoch": 6.62, - "learning_rate": 6.787773662744363e-06, - "loss": 0.0829, + "learning_rate": 1.680838781146147e-05, + "loss": 0.1047, "step": 141915 }, { "epoch": 6.62, - "learning_rate": 6.787304861469224e-06, - "loss": 0.0568, + "learning_rate": 1.680791974162545e-05, + "loss": 0.074, "step": 141920 }, { "epoch": 6.62, - "learning_rate": 6.786836060194084e-06, - "loss": 0.074, + "learning_rate": 1.680745167178943e-05, + "loss": 0.1767, "step": 141925 }, { "epoch": 6.62, - "learning_rate": 6.786367258918945e-06, - "loss": 0.0499, + "learning_rate": 1.680698360195341e-05, + "loss": 0.0517, "step": 141930 }, { "epoch": 6.62, - "learning_rate": 6.7858984576438055e-06, - "loss": 0.2201, + "learning_rate": 1.6806515532117394e-05, + "loss": 0.0957, "step": 141935 }, { "epoch": 6.62, - "learning_rate": 6.785429656368666e-06, - "loss": 0.191, + "learning_rate": 1.6806047462281373e-05, + "loss": 0.1181, "step": 141940 }, { "epoch": 6.62, - "learning_rate": 6.784960855093526e-06, - "loss": 0.0115, + "learning_rate": 1.6805579392445353e-05, + "loss": 0.0061, "step": 141945 }, { "epoch": 6.62, - "learning_rate": 6.784492053818387e-06, - "loss": 0.0231, + "learning_rate": 1.6805111322609333e-05, + "loss": 0.0337, "step": 141950 }, { "epoch": 6.62, - "learning_rate": 6.784023252543247e-06, - "loss": 0.0332, + "learning_rate": 1.6804643252773316e-05, + "loss": 0.0089, "step": 141955 }, { "epoch": 6.62, - "learning_rate": 6.783554451268109e-06, - "loss": 0.044, + "learning_rate": 1.6804175182937296e-05, + "loss": 0.052, "step": 141960 }, { "epoch": 6.62, - "learning_rate": 6.7830856499929685e-06, - "loss": 0.0638, + "learning_rate": 1.6803707113101276e-05, + "loss": 0.0951, "step": 141965 }, { "epoch": 6.62, - "learning_rate": 6.782616848717829e-06, - "loss": 0.1004, + "learning_rate": 1.6803239043265256e-05, + "loss": 0.0329, "step": 141970 }, { "epoch": 6.62, - "learning_rate": 6.782148047442689e-06, - "loss": 0.0224, + "learning_rate": 1.6802770973429236e-05, + "loss": 0.0356, "step": 141975 }, { "epoch": 6.62, - "learning_rate": 6.781679246167549e-06, - "loss": 0.1466, + "learning_rate": 1.6802302903593215e-05, + "loss": 0.1061, "step": 141980 }, { "epoch": 6.63, - "learning_rate": 6.781210444892411e-06, - "loss": 0.2023, + "learning_rate": 1.6801834833757195e-05, + "loss": 0.1529, "step": 141985 }, { "epoch": 6.63, - "learning_rate": 6.7807416436172716e-06, - "loss": 0.137, + "learning_rate": 1.680136676392118e-05, + "loss": 0.1839, "step": 141990 }, { "epoch": 6.63, - "learning_rate": 6.7802728423421315e-06, - "loss": 0.0047, + "learning_rate": 1.6800898694085158e-05, + "loss": 0.0249, "step": 141995 }, { "epoch": 6.63, - "learning_rate": 6.779804041066992e-06, - "loss": 0.0108, + "learning_rate": 1.6800430624249138e-05, + "loss": 0.0184, "step": 142000 }, { "epoch": 6.63, - "learning_rate": 6.779335239791853e-06, - "loss": 0.0143, + "learning_rate": 1.6799962554413118e-05, + "loss": 0.0185, "step": 142005 }, { "epoch": 6.63, - "learning_rate": 6.778866438516714e-06, - "loss": 0.0328, + "learning_rate": 1.67994944845771e-05, + "loss": 0.0443, "step": 142010 }, { "epoch": 6.63, - "learning_rate": 6.778397637241574e-06, - "loss": 0.027, + "learning_rate": 1.679902641474108e-05, + "loss": 0.0175, "step": 142015 }, { "epoch": 6.63, - "learning_rate": 6.7779288359664346e-06, - "loss": 0.0847, + "learning_rate": 1.679855834490506e-05, + "loss": 0.0748, "step": 142020 }, { "epoch": 6.63, - "learning_rate": 6.7774600346912945e-06, - "loss": 0.0713, + "learning_rate": 1.6798090275069044e-05, + "loss": 0.0947, "step": 142025 }, { "epoch": 6.63, - "learning_rate": 6.776991233416156e-06, - "loss": 0.1068, + "learning_rate": 1.679762220523302e-05, + "loss": 0.0769, "step": 142030 }, { "epoch": 6.63, - "learning_rate": 6.776522432141016e-06, - "loss": 0.2081, + "learning_rate": 1.6797154135397e-05, + "loss": 0.1146, "step": 142035 }, { "epoch": 6.63, - "learning_rate": 6.776053630865877e-06, - "loss": 0.174, + "learning_rate": 1.679668606556098e-05, + "loss": 0.1185, "step": 142040 }, { "epoch": 6.63, - "learning_rate": 6.775584829590737e-06, - "loss": 0.0498, + "learning_rate": 1.6796217995724963e-05, + "loss": 0.0179, "step": 142045 }, { "epoch": 6.63, - "learning_rate": 6.775116028315598e-06, - "loss": 0.0335, + "learning_rate": 1.6795749925888943e-05, + "loss": 0.0126, "step": 142050 }, { "epoch": 6.63, - "learning_rate": 6.774647227040458e-06, - "loss": 0.0207, + "learning_rate": 1.6795281856052923e-05, + "loss": 0.0319, "step": 142055 }, { "epoch": 6.63, - "learning_rate": 6.774178425765319e-06, - "loss": 0.0513, + "learning_rate": 1.6794813786216903e-05, + "loss": 0.0308, "step": 142060 }, { "epoch": 6.63, - "learning_rate": 6.773709624490179e-06, - "loss": 0.0195, + "learning_rate": 1.6794345716380886e-05, + "loss": 0.025, "step": 142065 }, { "epoch": 6.63, - "learning_rate": 6.773240823215039e-06, - "loss": 0.106, + "learning_rate": 1.6793877646544866e-05, + "loss": 0.0402, "step": 142070 }, { "epoch": 6.63, - "learning_rate": 6.772772021939901e-06, - "loss": 0.0372, + "learning_rate": 1.6793409576708845e-05, + "loss": 0.0974, "step": 142075 }, { "epoch": 6.63, - "learning_rate": 6.772303220664761e-06, - "loss": 0.0865, + "learning_rate": 1.6792941506872825e-05, + "loss": 0.1409, "step": 142080 }, { "epoch": 6.63, - "learning_rate": 6.771834419389621e-06, - "loss": 0.1488, + "learning_rate": 1.679247343703681e-05, + "loss": 0.0937, "step": 142085 }, { "epoch": 6.63, - "learning_rate": 6.771365618114481e-06, - "loss": 0.1212, + "learning_rate": 1.679200536720079e-05, + "loss": 0.1462, "step": 142090 }, { "epoch": 6.63, - "learning_rate": 6.770896816839342e-06, - "loss": 0.003, + "learning_rate": 1.6791537297364765e-05, + "loss": 0.0097, "step": 142095 }, { "epoch": 6.63, - "learning_rate": 6.770428015564204e-06, - "loss": 0.0105, + "learning_rate": 1.6791069227528748e-05, + "loss": 0.0214, "step": 142100 }, { "epoch": 6.63, - "learning_rate": 6.769959214289064e-06, - "loss": 0.0228, + "learning_rate": 1.6790601157692728e-05, + "loss": 0.0123, "step": 142105 }, { "epoch": 6.63, - "learning_rate": 6.7694904130139235e-06, - "loss": 0.0134, + "learning_rate": 1.6790133087856708e-05, + "loss": 0.0522, "step": 142110 }, { "epoch": 6.63, - "learning_rate": 6.769021611738784e-06, - "loss": 0.0275, + "learning_rate": 1.6789665018020687e-05, + "loss": 0.0593, "step": 142115 }, { "epoch": 6.63, - "learning_rate": 6.768552810463646e-06, - "loss": 0.0533, + "learning_rate": 1.678919694818467e-05, + "loss": 0.0518, "step": 142120 }, { "epoch": 6.63, - "learning_rate": 6.768084009188506e-06, - "loss": 0.0674, + "learning_rate": 1.678872887834865e-05, + "loss": 0.0743, "step": 142125 }, { "epoch": 6.63, - "learning_rate": 6.767615207913366e-06, - "loss": 0.0665, + "learning_rate": 1.678826080851263e-05, + "loss": 0.1061, "step": 142130 }, { "epoch": 6.63, - "learning_rate": 6.767146406638227e-06, - "loss": 0.1453, + "learning_rate": 1.678779273867661e-05, + "loss": 0.1715, "step": 142135 }, { "epoch": 6.63, - "learning_rate": 6.7666776053630865e-06, - "loss": 0.075, + "learning_rate": 1.6787324668840593e-05, + "loss": 0.2172, "step": 142140 }, { "epoch": 6.63, - "learning_rate": 6.766208804087948e-06, - "loss": 0.0299, + "learning_rate": 1.6786856599004573e-05, + "loss": 0.02, "step": 142145 }, { "epoch": 6.63, - "learning_rate": 6.765740002812808e-06, - "loss": 0.0689, + "learning_rate": 1.6786388529168553e-05, + "loss": 0.0049, "step": 142150 }, { "epoch": 6.63, - "learning_rate": 6.765271201537669e-06, - "loss": 0.0259, + "learning_rate": 1.6785920459332533e-05, + "loss": 0.0214, "step": 142155 }, { "epoch": 6.63, - "learning_rate": 6.764802400262529e-06, - "loss": 0.0614, + "learning_rate": 1.6785452389496513e-05, + "loss": 0.0087, "step": 142160 }, { "epoch": 6.63, - "learning_rate": 6.7643335989873896e-06, - "loss": 0.0197, + "learning_rate": 1.6784984319660492e-05, + "loss": 0.0977, "step": 142165 }, { "epoch": 6.63, - "learning_rate": 6.76386479771225e-06, - "loss": 0.0619, + "learning_rate": 1.6784516249824472e-05, + "loss": 0.0842, "step": 142170 }, { "epoch": 6.63, - "learning_rate": 6.763395996437111e-06, - "loss": 0.167, + "learning_rate": 1.6784048179988455e-05, + "loss": 0.0551, "step": 142175 }, { "epoch": 6.63, - "learning_rate": 6.762927195161971e-06, - "loss": 0.0848, + "learning_rate": 1.6783580110152435e-05, + "loss": 0.1413, "step": 142180 }, { "epoch": 6.63, - "learning_rate": 6.762458393886832e-06, - "loss": 0.159, + "learning_rate": 1.6783112040316415e-05, + "loss": 0.1724, "step": 142185 }, { "epoch": 6.63, - "learning_rate": 6.761989592611693e-06, - "loss": 0.2475, + "learning_rate": 1.6782643970480395e-05, + "loss": 0.1207, "step": 142190 }, { "epoch": 6.64, - "learning_rate": 6.761520791336553e-06, - "loss": 0.0446, + "learning_rate": 1.6782175900644378e-05, + "loss": 0.107, "step": 142195 }, { "epoch": 6.64, - "learning_rate": 6.761051990061413e-06, - "loss": 0.013, + "learning_rate": 1.6781707830808358e-05, + "loss": 0.0168, "step": 142200 }, { "epoch": 6.64, - "learning_rate": 6.760583188786274e-06, - "loss": 0.0313, + "learning_rate": 1.6781239760972338e-05, + "loss": 0.0247, "step": 142205 }, { "epoch": 6.64, - "learning_rate": 6.760114387511134e-06, - "loss": 0.0179, + "learning_rate": 1.678077169113632e-05, + "loss": 0.0146, "step": 142210 }, { "epoch": 6.64, - "learning_rate": 6.759645586235996e-06, - "loss": 0.0923, + "learning_rate": 1.67803036213003e-05, + "loss": 0.0261, "step": 142215 }, { "epoch": 6.64, - "learning_rate": 6.759176784960856e-06, - "loss": 0.0825, + "learning_rate": 1.6779835551464277e-05, + "loss": 0.0655, "step": 142220 }, { "epoch": 6.64, - "learning_rate": 6.758707983685716e-06, - "loss": 0.0683, + "learning_rate": 1.6779367481628257e-05, + "loss": 0.0709, "step": 142225 }, { "epoch": 6.64, - "learning_rate": 6.758239182410576e-06, - "loss": 0.1264, + "learning_rate": 1.677889941179224e-05, + "loss": 0.128, "step": 142230 }, { "epoch": 6.64, - "learning_rate": 6.757770381135437e-06, - "loss": 0.248, + "learning_rate": 1.677843134195622e-05, + "loss": 0.0763, "step": 142235 }, { "epoch": 6.64, - "learning_rate": 6.757301579860298e-06, - "loss": 0.1435, + "learning_rate": 1.67779632721202e-05, + "loss": 0.0819, "step": 142240 }, { "epoch": 6.64, - "learning_rate": 6.756832778585159e-06, - "loss": 0.0135, + "learning_rate": 1.677749520228418e-05, + "loss": 0.0044, "step": 142245 }, { "epoch": 6.64, - "learning_rate": 6.756363977310019e-06, - "loss": 0.0185, + "learning_rate": 1.6777027132448163e-05, + "loss": 0.0269, "step": 142250 }, { "epoch": 6.64, - "learning_rate": 6.755895176034879e-06, - "loss": 0.0122, + "learning_rate": 1.6776559062612143e-05, + "loss": 0.0029, "step": 142255 }, { "epoch": 6.64, - "learning_rate": 6.75542637475974e-06, - "loss": 0.014, + "learning_rate": 1.6776090992776122e-05, + "loss": 0.0263, "step": 142260 }, { "epoch": 6.64, - "learning_rate": 6.754957573484601e-06, - "loss": 0.0353, + "learning_rate": 1.6775622922940102e-05, + "loss": 0.0362, "step": 142265 }, { "epoch": 6.64, - "learning_rate": 6.754488772209461e-06, - "loss": 0.0326, + "learning_rate": 1.6775154853104085e-05, + "loss": 0.0599, "step": 142270 }, { "epoch": 6.64, - "learning_rate": 6.754019970934322e-06, - "loss": 0.0853, + "learning_rate": 1.6774686783268065e-05, + "loss": 0.0343, "step": 142275 }, { "epoch": 6.64, - "learning_rate": 6.753551169659182e-06, - "loss": 0.0944, + "learning_rate": 1.6774218713432045e-05, + "loss": 0.1239, "step": 142280 }, { "epoch": 6.64, - "learning_rate": 6.753082368384043e-06, - "loss": 0.1551, + "learning_rate": 1.6773750643596025e-05, + "loss": 0.2497, "step": 142285 }, { "epoch": 6.64, - "learning_rate": 6.752613567108903e-06, - "loss": 0.1826, + "learning_rate": 1.6773282573760005e-05, + "loss": 0.0891, "step": 142290 }, { "epoch": 6.64, - "learning_rate": 6.752144765833764e-06, - "loss": 0.0453, + "learning_rate": 1.6772814503923985e-05, + "loss": 0.035, "step": 142295 }, { "epoch": 6.64, - "learning_rate": 6.751675964558624e-06, - "loss": 0.0312, + "learning_rate": 1.6772346434087964e-05, + "loss": 0.0218, "step": 142300 }, { "epoch": 6.64, - "learning_rate": 6.751207163283484e-06, - "loss": 0.0254, + "learning_rate": 1.6771878364251948e-05, + "loss": 0.0013, "step": 142305 }, { "epoch": 6.64, - "learning_rate": 6.7507383620083454e-06, - "loss": 0.1198, + "learning_rate": 1.6771410294415927e-05, + "loss": 0.0572, "step": 142310 }, { "epoch": 6.64, - "learning_rate": 6.750269560733206e-06, - "loss": 0.0474, + "learning_rate": 1.6770942224579907e-05, + "loss": 0.0183, "step": 142315 }, { "epoch": 6.64, - "learning_rate": 6.749800759458066e-06, - "loss": 0.0314, + "learning_rate": 1.6770474154743887e-05, + "loss": 0.0401, "step": 142320 }, { "epoch": 6.64, - "learning_rate": 6.749331958182926e-06, - "loss": 0.0864, + "learning_rate": 1.677000608490787e-05, + "loss": 0.0511, "step": 142325 }, { "epoch": 6.64, - "learning_rate": 6.748863156907788e-06, - "loss": 0.122, + "learning_rate": 1.676953801507185e-05, + "loss": 0.1225, "step": 142330 }, { "epoch": 6.64, - "learning_rate": 6.7483943556326485e-06, - "loss": 0.1715, + "learning_rate": 1.676906994523583e-05, + "loss": 0.2452, "step": 142335 }, { "epoch": 6.64, - "learning_rate": 6.7479255543575084e-06, - "loss": 0.1665, + "learning_rate": 1.6768601875399813e-05, + "loss": 0.1955, "step": 142340 }, { "epoch": 6.64, - "learning_rate": 6.747456753082368e-06, - "loss": 0.0321, + "learning_rate": 1.676813380556379e-05, + "loss": 0.005, "step": 142345 }, { "epoch": 6.64, - "learning_rate": 6.746987951807229e-06, - "loss": 0.026, + "learning_rate": 1.676766573572777e-05, + "loss": 0.0293, "step": 142350 }, { "epoch": 6.64, - "learning_rate": 6.746519150532091e-06, - "loss": 0.0449, + "learning_rate": 1.676719766589175e-05, + "loss": 0.0088, "step": 142355 }, { "epoch": 6.64, - "learning_rate": 6.746050349256951e-06, - "loss": 0.0798, + "learning_rate": 1.6766729596055732e-05, + "loss": 0.0123, "step": 142360 }, { "epoch": 6.64, - "learning_rate": 6.745581547981811e-06, - "loss": 0.0545, + "learning_rate": 1.6766261526219712e-05, + "loss": 0.0502, "step": 142365 }, { "epoch": 6.64, - "learning_rate": 6.745112746706671e-06, - "loss": 0.0901, + "learning_rate": 1.6765793456383692e-05, + "loss": 0.0501, "step": 142370 }, { "epoch": 6.64, - "learning_rate": 6.744643945431533e-06, - "loss": 0.0245, + "learning_rate": 1.6765325386547672e-05, + "loss": 0.1816, "step": 142375 }, { "epoch": 6.64, - "learning_rate": 6.744175144156393e-06, - "loss": 0.1083, + "learning_rate": 1.6764857316711655e-05, + "loss": 0.0486, "step": 142380 }, { "epoch": 6.64, - "learning_rate": 6.743706342881253e-06, - "loss": 0.1583, + "learning_rate": 1.6764389246875635e-05, + "loss": 0.2165, "step": 142385 }, { "epoch": 6.64, - "learning_rate": 6.743237541606114e-06, - "loss": 0.1409, + "learning_rate": 1.6763921177039615e-05, + "loss": 0.154, "step": 142390 }, { "epoch": 6.64, - "learning_rate": 6.742768740330974e-06, - "loss": 0.0446, + "learning_rate": 1.6763453107203598e-05, + "loss": 0.0048, "step": 142395 }, { "epoch": 6.64, - "learning_rate": 6.742299939055835e-06, - "loss": 0.0112, + "learning_rate": 1.6762985037367578e-05, + "loss": 0.0124, "step": 142400 }, { "epoch": 6.64, - "learning_rate": 6.741831137780695e-06, - "loss": 0.0206, + "learning_rate": 1.6762516967531557e-05, + "loss": 0.0264, "step": 142405 }, { "epoch": 6.65, - "learning_rate": 6.741362336505556e-06, - "loss": 0.0458, + "learning_rate": 1.6762048897695534e-05, + "loss": 0.0179, "step": 142410 }, { "epoch": 6.65, - "learning_rate": 6.740893535230416e-06, - "loss": 0.0443, + "learning_rate": 1.6761580827859517e-05, + "loss": 0.0312, "step": 142415 }, { "epoch": 6.65, - "learning_rate": 6.740424733955277e-06, - "loss": 0.029, + "learning_rate": 1.6761112758023497e-05, + "loss": 0.036, "step": 142420 }, { "epoch": 6.65, - "learning_rate": 6.7399559326801375e-06, - "loss": 0.114, + "learning_rate": 1.6760644688187477e-05, + "loss": 0.1226, "step": 142425 }, { "epoch": 6.65, - "learning_rate": 6.739487131404998e-06, - "loss": 0.0545, + "learning_rate": 1.6760176618351457e-05, + "loss": 0.1146, "step": 142430 }, { "epoch": 6.65, - "learning_rate": 6.739018330129858e-06, - "loss": 0.1277, + "learning_rate": 1.675970854851544e-05, + "loss": 0.1676, "step": 142435 }, { "epoch": 6.65, - "learning_rate": 6.738549528854719e-06, - "loss": 0.1619, + "learning_rate": 1.675924047867942e-05, + "loss": 0.1622, "step": 142440 }, { "epoch": 6.65, - "learning_rate": 6.73808072757958e-06, - "loss": 0.0157, + "learning_rate": 1.67587724088434e-05, + "loss": 0.0374, "step": 142445 }, { "epoch": 6.65, - "learning_rate": 6.7376119263044405e-06, - "loss": 0.07, + "learning_rate": 1.675830433900738e-05, + "loss": 0.0054, "step": 142450 }, { "epoch": 6.65, - "learning_rate": 6.7371431250293005e-06, - "loss": 0.0235, + "learning_rate": 1.6757836269171362e-05, + "loss": 0.0126, "step": 142455 }, { "epoch": 6.65, - "learning_rate": 6.736674323754161e-06, - "loss": 0.0369, + "learning_rate": 1.6757368199335342e-05, + "loss": 0.0195, "step": 142460 }, { "epoch": 6.65, - "learning_rate": 6.736205522479021e-06, - "loss": 0.0824, + "learning_rate": 1.6756900129499322e-05, + "loss": 0.019, "step": 142465 }, { "epoch": 6.65, - "learning_rate": 6.735736721203883e-06, - "loss": 0.0576, + "learning_rate": 1.6756432059663305e-05, + "loss": 0.1296, "step": 142470 }, { "epoch": 6.65, - "learning_rate": 6.735267919928743e-06, - "loss": 0.0497, + "learning_rate": 1.6755963989827282e-05, + "loss": 0.1829, "step": 142475 }, { "epoch": 6.65, - "learning_rate": 6.7347991186536035e-06, - "loss": 0.1528, + "learning_rate": 1.675549591999126e-05, + "loss": 0.0577, "step": 142480 }, { "epoch": 6.65, - "learning_rate": 6.7343303173784634e-06, - "loss": 0.1678, + "learning_rate": 1.675502785015524e-05, + "loss": 0.1085, "step": 142485 }, { "epoch": 6.65, - "learning_rate": 6.733861516103324e-06, - "loss": 0.1219, + "learning_rate": 1.6754559780319225e-05, + "loss": 0.1276, "step": 142490 }, { "epoch": 6.65, - "learning_rate": 6.733392714828185e-06, - "loss": 0.0053, + "learning_rate": 1.6754091710483204e-05, + "loss": 0.0322, "step": 142495 }, { "epoch": 6.65, - "learning_rate": 6.732923913553046e-06, - "loss": 0.0219, + "learning_rate": 1.6753623640647184e-05, + "loss": 0.0221, "step": 142500 }, { "epoch": 6.65, - "learning_rate": 6.732455112277906e-06, - "loss": 0.0351, + "learning_rate": 1.6753155570811164e-05, + "loss": 0.0175, "step": 142505 }, { "epoch": 6.65, - "learning_rate": 6.7319863110027665e-06, - "loss": 0.0118, + "learning_rate": 1.6752687500975147e-05, + "loss": 0.0412, "step": 142510 }, { "epoch": 6.65, - "learning_rate": 6.731517509727627e-06, - "loss": 0.0643, + "learning_rate": 1.6752219431139127e-05, + "loss": 0.0736, "step": 142515 }, { "epoch": 6.65, - "learning_rate": 6.731048708452488e-06, - "loss": 0.1161, + "learning_rate": 1.6751751361303107e-05, + "loss": 0.0281, "step": 142520 }, { "epoch": 6.65, - "learning_rate": 6.730579907177348e-06, - "loss": 0.0935, + "learning_rate": 1.675128329146709e-05, + "loss": 0.0889, "step": 142525 }, { "epoch": 6.65, - "learning_rate": 6.730111105902209e-06, - "loss": 0.1157, + "learning_rate": 1.675081522163107e-05, + "loss": 0.074, "step": 142530 }, { "epoch": 6.65, - "learning_rate": 6.729642304627069e-06, - "loss": 0.2496, + "learning_rate": 1.6750347151795046e-05, + "loss": 0.1052, "step": 142535 }, { "epoch": 6.65, - "learning_rate": 6.72917350335193e-06, - "loss": 0.125, + "learning_rate": 1.6749879081959026e-05, + "loss": 0.0676, "step": 142540 }, { "epoch": 6.65, - "learning_rate": 6.72870470207679e-06, - "loss": 0.0603, + "learning_rate": 1.674941101212301e-05, + "loss": 0.0212, "step": 142545 }, { "epoch": 6.65, - "learning_rate": 6.728235900801651e-06, - "loss": 0.0733, + "learning_rate": 1.674894294228699e-05, + "loss": 0.0139, "step": 142550 }, { "epoch": 6.65, - "learning_rate": 6.727767099526511e-06, - "loss": 0.0558, + "learning_rate": 1.674847487245097e-05, + "loss": 0.0575, "step": 142555 }, { "epoch": 6.65, - "learning_rate": 6.727298298251371e-06, - "loss": 0.0181, + "learning_rate": 1.674800680261495e-05, + "loss": 0.0526, "step": 142560 }, { "epoch": 6.65, - "learning_rate": 6.7268294969762325e-06, - "loss": 0.016, + "learning_rate": 1.6747538732778932e-05, + "loss": 0.0078, "step": 142565 }, { "epoch": 6.65, - "learning_rate": 6.726360695701093e-06, - "loss": 0.0392, + "learning_rate": 1.6747070662942912e-05, + "loss": 0.0698, "step": 142570 }, { "epoch": 6.65, - "learning_rate": 6.725891894425953e-06, - "loss": 0.1423, + "learning_rate": 1.674660259310689e-05, + "loss": 0.0422, "step": 142575 }, { "epoch": 6.65, - "learning_rate": 6.725423093150813e-06, - "loss": 0.0573, + "learning_rate": 1.6746134523270875e-05, + "loss": 0.107, "step": 142580 }, { "epoch": 6.65, - "learning_rate": 6.724954291875675e-06, - "loss": 0.2021, + "learning_rate": 1.6745666453434855e-05, + "loss": 0.2193, "step": 142585 }, { "epoch": 6.65, - "learning_rate": 6.724485490600536e-06, - "loss": 0.126, + "learning_rate": 1.6745198383598834e-05, + "loss": 0.1235, "step": 142590 }, { "epoch": 6.65, - "learning_rate": 6.7240166893253955e-06, - "loss": 0.0208, + "learning_rate": 1.6744730313762814e-05, + "loss": 0.0294, "step": 142595 }, { "epoch": 6.65, - "learning_rate": 6.7235478880502555e-06, - "loss": 0.0066, + "learning_rate": 1.6744262243926794e-05, + "loss": 0.0204, "step": 142600 }, { "epoch": 6.65, - "learning_rate": 6.723079086775116e-06, - "loss": 0.065, + "learning_rate": 1.6743794174090774e-05, + "loss": 0.0057, "step": 142605 }, { "epoch": 6.65, - "learning_rate": 6.722610285499978e-06, - "loss": 0.0326, + "learning_rate": 1.6743326104254754e-05, + "loss": 0.049, "step": 142610 }, { "epoch": 6.65, - "learning_rate": 6.722141484224838e-06, - "loss": 0.0097, + "learning_rate": 1.6742858034418734e-05, + "loss": 0.0289, "step": 142615 }, { "epoch": 6.65, - "learning_rate": 6.721672682949698e-06, - "loss": 0.0609, + "learning_rate": 1.6742389964582717e-05, + "loss": 0.0225, "step": 142620 }, { "epoch": 6.66, - "learning_rate": 6.7212038816745585e-06, - "loss": 0.1101, + "learning_rate": 1.6741921894746697e-05, + "loss": 0.0601, "step": 142625 }, { "epoch": 6.66, - "learning_rate": 6.7207350803994185e-06, - "loss": 0.053, + "learning_rate": 1.6741453824910676e-05, + "loss": 0.1438, "step": 142630 }, { "epoch": 6.66, - "learning_rate": 6.72026627912428e-06, - "loss": 0.1988, + "learning_rate": 1.674098575507466e-05, + "loss": 0.1448, "step": 142635 }, { "epoch": 6.66, - "learning_rate": 6.71979747784914e-06, - "loss": 0.1118, + "learning_rate": 1.674051768523864e-05, + "loss": 0.1561, "step": 142640 }, { "epoch": 6.66, - "learning_rate": 6.719328676574001e-06, - "loss": 0.0347, + "learning_rate": 1.674004961540262e-05, + "loss": 0.0199, "step": 142645 }, { "epoch": 6.66, - "learning_rate": 6.718859875298861e-06, - "loss": 0.0455, + "learning_rate": 1.67395815455666e-05, + "loss": 0.0081, "step": 142650 }, { "epoch": 6.66, - "learning_rate": 6.718391074023722e-06, - "loss": 0.0189, + "learning_rate": 1.6739113475730582e-05, + "loss": 0.0425, "step": 142655 }, { "epoch": 6.66, - "learning_rate": 6.717922272748582e-06, - "loss": 0.0164, + "learning_rate": 1.6738645405894562e-05, + "loss": 0.0239, "step": 142660 }, { "epoch": 6.66, - "learning_rate": 6.717453471473443e-06, - "loss": 0.0479, + "learning_rate": 1.673817733605854e-05, + "loss": 0.0558, "step": 142665 }, { "epoch": 6.66, - "learning_rate": 6.716984670198303e-06, - "loss": 0.077, + "learning_rate": 1.673770926622252e-05, + "loss": 0.0567, "step": 142670 }, { "epoch": 6.66, - "learning_rate": 6.716515868923164e-06, - "loss": 0.0868, + "learning_rate": 1.67372411963865e-05, + "loss": 0.0594, "step": 142675 }, { "epoch": 6.66, - "learning_rate": 6.7160470676480246e-06, - "loss": 0.1904, + "learning_rate": 1.673677312655048e-05, + "loss": 0.1549, "step": 142680 }, { "epoch": 6.66, - "learning_rate": 6.715578266372885e-06, - "loss": 0.1354, + "learning_rate": 1.673630505671446e-05, + "loss": 0.1299, "step": 142685 }, { "epoch": 6.66, - "learning_rate": 6.715109465097745e-06, - "loss": 0.1988, + "learning_rate": 1.673583698687844e-05, + "loss": 0.1512, "step": 142690 }, { "epoch": 6.66, - "learning_rate": 6.714640663822606e-06, - "loss": 0.0275, + "learning_rate": 1.6735368917042424e-05, + "loss": 0.0442, "step": 142695 }, { "epoch": 6.66, - "learning_rate": 6.714171862547467e-06, - "loss": 0.0344, + "learning_rate": 1.6734900847206404e-05, + "loss": 0.0059, "step": 142700 }, { "epoch": 6.66, - "learning_rate": 6.713703061272328e-06, - "loss": 0.0237, + "learning_rate": 1.6734432777370384e-05, + "loss": 0.0082, "step": 142705 }, { "epoch": 6.66, - "learning_rate": 6.7132342599971876e-06, - "loss": 0.0679, + "learning_rate": 1.6733964707534367e-05, + "loss": 0.0186, "step": 142710 }, { "epoch": 6.66, - "learning_rate": 6.712765458722048e-06, - "loss": 0.013, + "learning_rate": 1.6733496637698347e-05, + "loss": 0.0829, "step": 142715 }, { "epoch": 6.66, - "learning_rate": 6.712296657446908e-06, - "loss": 0.0445, + "learning_rate": 1.6733028567862327e-05, + "loss": 0.0519, "step": 142720 }, { "epoch": 6.66, - "learning_rate": 6.71182785617177e-06, - "loss": 0.0985, + "learning_rate": 1.6732560498026303e-05, + "loss": 0.0689, "step": 142725 }, { "epoch": 6.66, - "learning_rate": 6.71135905489663e-06, - "loss": 0.14, + "learning_rate": 1.6732092428190286e-05, + "loss": 0.1195, "step": 142730 }, { "epoch": 6.66, - "learning_rate": 6.710890253621491e-06, - "loss": 0.2257, + "learning_rate": 1.6731624358354266e-05, + "loss": 0.1214, "step": 142735 }, { "epoch": 6.66, - "learning_rate": 6.7104214523463506e-06, - "loss": 0.1283, + "learning_rate": 1.6731156288518246e-05, + "loss": 0.1316, "step": 142740 }, { "epoch": 6.66, - "learning_rate": 6.709952651071211e-06, - "loss": 0.0258, + "learning_rate": 1.6730688218682226e-05, + "loss": 0.0133, "step": 142745 }, { "epoch": 6.66, - "learning_rate": 6.709483849796072e-06, - "loss": 0.0361, + "learning_rate": 1.673022014884621e-05, + "loss": 0.0167, "step": 142750 }, { "epoch": 6.66, - "learning_rate": 6.709015048520933e-06, - "loss": 0.0404, + "learning_rate": 1.672975207901019e-05, + "loss": 0.0454, "step": 142755 }, { "epoch": 6.66, - "learning_rate": 6.708546247245793e-06, - "loss": 0.0405, + "learning_rate": 1.672928400917417e-05, + "loss": 0.0394, "step": 142760 }, { "epoch": 6.66, - "learning_rate": 6.708077445970654e-06, - "loss": 0.0479, + "learning_rate": 1.6728815939338152e-05, + "loss": 0.0738, "step": 142765 }, { "epoch": 6.66, - "learning_rate": 6.707608644695514e-06, - "loss": 0.0393, + "learning_rate": 1.672834786950213e-05, + "loss": 0.0463, "step": 142770 }, { "epoch": 6.66, - "learning_rate": 6.707139843420375e-06, - "loss": 0.065, + "learning_rate": 1.672787979966611e-05, + "loss": 0.0391, "step": 142775 }, { "epoch": 6.66, - "learning_rate": 6.706671042145235e-06, - "loss": 0.0861, + "learning_rate": 1.672741172983009e-05, + "loss": 0.1153, "step": 142780 }, { "epoch": 6.66, - "learning_rate": 6.706202240870096e-06, - "loss": 0.1497, + "learning_rate": 1.6726943659994074e-05, + "loss": 0.1698, "step": 142785 }, { "epoch": 6.66, - "learning_rate": 6.705733439594956e-06, - "loss": 0.1133, + "learning_rate": 1.672647559015805e-05, + "loss": 0.1092, "step": 142790 }, { "epoch": 6.66, - "learning_rate": 6.7052646383198174e-06, - "loss": 0.0245, + "learning_rate": 1.672600752032203e-05, + "loss": 0.0005, "step": 142795 }, { "epoch": 6.66, - "learning_rate": 6.704795837044677e-06, - "loss": 0.0112, + "learning_rate": 1.672553945048601e-05, + "loss": 0.0327, "step": 142800 }, { "epoch": 6.66, - "learning_rate": 6.704327035769538e-06, - "loss": 0.0108, + "learning_rate": 1.6725071380649994e-05, + "loss": 0.0608, "step": 142805 }, { "epoch": 6.66, - "learning_rate": 6.703858234494398e-06, - "loss": 0.0217, + "learning_rate": 1.6724603310813974e-05, + "loss": 0.0359, "step": 142810 }, { "epoch": 6.66, - "learning_rate": 6.703389433219258e-06, - "loss": 0.0464, + "learning_rate": 1.6724135240977953e-05, + "loss": 0.023, "step": 142815 }, { "epoch": 6.66, - "learning_rate": 6.70292063194412e-06, - "loss": 0.0866, + "learning_rate": 1.6723667171141937e-05, + "loss": 0.059, "step": 142820 }, { "epoch": 6.66, - "learning_rate": 6.7024518306689804e-06, - "loss": 0.0505, + "learning_rate": 1.6723199101305916e-05, + "loss": 0.0869, "step": 142825 }, { "epoch": 6.66, - "learning_rate": 6.70198302939384e-06, - "loss": 0.1213, + "learning_rate": 1.6722731031469896e-05, + "loss": 0.3753, "step": 142830 }, { "epoch": 6.66, - "learning_rate": 6.7015142281187e-06, - "loss": 0.2048, + "learning_rate": 1.6722262961633876e-05, + "loss": 0.1909, "step": 142835 }, { "epoch": 6.67, - "learning_rate": 6.701045426843562e-06, - "loss": 0.0908, + "learning_rate": 1.672179489179786e-05, + "loss": 0.1209, "step": 142840 }, { "epoch": 6.67, - "learning_rate": 6.700576625568423e-06, - "loss": 0.0329, + "learning_rate": 1.672132682196184e-05, + "loss": 0.0051, "step": 142845 }, { "epoch": 6.67, - "learning_rate": 6.700107824293283e-06, - "loss": 0.0146, + "learning_rate": 1.6720858752125815e-05, + "loss": 0.0227, "step": 142850 }, { "epoch": 6.67, - "learning_rate": 6.699639023018143e-06, - "loss": 0.0365, + "learning_rate": 1.6720390682289795e-05, + "loss": 0.0209, "step": 142855 }, { "epoch": 6.67, - "learning_rate": 6.699170221743003e-06, - "loss": 0.0365, + "learning_rate": 1.671992261245378e-05, + "loss": 0.0678, "step": 142860 }, { "epoch": 6.67, - "learning_rate": 6.698701420467865e-06, - "loss": 0.0282, + "learning_rate": 1.671945454261776e-05, + "loss": 0.0367, "step": 142865 }, { "epoch": 6.67, - "learning_rate": 6.698232619192725e-06, - "loss": 0.1131, + "learning_rate": 1.6718986472781738e-05, + "loss": 0.1052, "step": 142870 }, { "epoch": 6.67, - "learning_rate": 6.697763817917585e-06, - "loss": 0.0357, + "learning_rate": 1.6718518402945718e-05, + "loss": 0.0403, "step": 142875 }, { "epoch": 6.67, - "learning_rate": 6.697295016642446e-06, - "loss": 0.1394, + "learning_rate": 1.67180503331097e-05, + "loss": 0.0497, "step": 142880 }, { "epoch": 6.67, - "learning_rate": 6.6968262153673056e-06, - "loss": 0.2771, + "learning_rate": 1.671758226327368e-05, + "loss": 0.1545, "step": 142885 }, { "epoch": 6.67, - "learning_rate": 6.696357414092167e-06, - "loss": 0.1706, + "learning_rate": 1.671711419343766e-05, + "loss": 0.1134, "step": 142890 }, { "epoch": 6.67, - "learning_rate": 6.695888612817027e-06, - "loss": 0.0049, + "learning_rate": 1.6716646123601644e-05, + "loss": 0.0015, "step": 142895 }, { "epoch": 6.67, - "learning_rate": 6.695419811541888e-06, - "loss": 0.0324, + "learning_rate": 1.6716178053765624e-05, + "loss": 0.0167, "step": 142900 }, { "epoch": 6.67, - "learning_rate": 6.694951010266748e-06, - "loss": 0.045, + "learning_rate": 1.6715709983929604e-05, + "loss": 0.0222, "step": 142905 }, { "epoch": 6.67, - "learning_rate": 6.6944822089916095e-06, - "loss": 0.0409, + "learning_rate": 1.6715241914093583e-05, + "loss": 0.0048, "step": 142910 }, { "epoch": 6.67, - "learning_rate": 6.694013407716469e-06, - "loss": 0.039, + "learning_rate": 1.6714773844257563e-05, + "loss": 0.0463, "step": 142915 }, { "epoch": 6.67, - "learning_rate": 6.69354460644133e-06, - "loss": 0.1247, + "learning_rate": 1.6714305774421543e-05, + "loss": 0.0212, "step": 142920 }, { "epoch": 6.67, - "learning_rate": 6.69307580516619e-06, - "loss": 0.0768, + "learning_rate": 1.6713837704585523e-05, + "loss": 0.0665, "step": 142925 }, { "epoch": 6.67, - "learning_rate": 6.692607003891051e-06, - "loss": 0.1035, + "learning_rate": 1.6713369634749503e-05, + "loss": 0.0714, "step": 142930 }, { "epoch": 6.67, - "learning_rate": 6.692138202615912e-06, - "loss": 0.1236, + "learning_rate": 1.6712901564913486e-05, + "loss": 0.1536, "step": 142935 }, { "epoch": 6.67, - "learning_rate": 6.6916694013407725e-06, - "loss": 0.1187, + "learning_rate": 1.6712433495077466e-05, + "loss": 0.1315, "step": 142940 }, { "epoch": 6.67, - "learning_rate": 6.691200600065632e-06, - "loss": 0.0139, + "learning_rate": 1.6711965425241446e-05, + "loss": 0.0183, "step": 142945 }, { "epoch": 6.67, - "learning_rate": 6.690731798790493e-06, - "loss": 0.0313, + "learning_rate": 1.671149735540543e-05, + "loss": 0.1045, "step": 142950 }, { "epoch": 6.67, - "learning_rate": 6.690262997515353e-06, - "loss": 0.0405, + "learning_rate": 1.671102928556941e-05, + "loss": 0.0095, "step": 142955 }, { "epoch": 6.67, - "learning_rate": 6.689794196240215e-06, - "loss": 0.0186, + "learning_rate": 1.671056121573339e-05, + "loss": 0.0389, "step": 142960 }, { "epoch": 6.67, - "learning_rate": 6.689325394965075e-06, - "loss": 0.035, + "learning_rate": 1.6710093145897368e-05, + "loss": 0.0317, "step": 142965 }, { "epoch": 6.67, - "learning_rate": 6.6888565936899355e-06, - "loss": 0.0412, + "learning_rate": 1.670962507606135e-05, + "loss": 0.0554, "step": 142970 }, { "epoch": 6.67, - "learning_rate": 6.688387792414795e-06, - "loss": 0.0415, + "learning_rate": 1.670915700622533e-05, + "loss": 0.0635, "step": 142975 }, { "epoch": 6.67, - "learning_rate": 6.687918991139657e-06, - "loss": 0.1342, + "learning_rate": 1.6708688936389308e-05, + "loss": 0.102, "step": 142980 }, { "epoch": 6.67, - "learning_rate": 6.687450189864517e-06, - "loss": 0.0657, + "learning_rate": 1.6708220866553287e-05, + "loss": 0.1501, "step": 142985 }, { "epoch": 6.67, - "learning_rate": 6.686981388589378e-06, - "loss": 0.1502, + "learning_rate": 1.670775279671727e-05, + "loss": 0.1964, "step": 142990 }, { "epoch": 6.67, - "learning_rate": 6.686512587314238e-06, - "loss": 0.0466, + "learning_rate": 1.670728472688125e-05, + "loss": 0.0119, "step": 142995 }, { "epoch": 6.67, - "learning_rate": 6.6860437860390984e-06, - "loss": 0.0026, + "learning_rate": 1.670681665704523e-05, + "loss": 0.0139, "step": 143000 }, { "epoch": 6.67, - "learning_rate": 6.685574984763959e-06, - "loss": 0.0025, + "learning_rate": 1.6706348587209214e-05, + "loss": 0.0273, "step": 143005 }, { "epoch": 6.67, - "learning_rate": 6.68510618348882e-06, - "loss": 0.0687, + "learning_rate": 1.6705880517373193e-05, + "loss": 0.0315, "step": 143010 }, { "epoch": 6.67, - "learning_rate": 6.68463738221368e-06, - "loss": 0.0647, + "learning_rate": 1.6705412447537173e-05, + "loss": 0.0611, "step": 143015 }, { "epoch": 6.67, - "learning_rate": 6.684168580938541e-06, - "loss": 0.072, + "learning_rate": 1.6704944377701153e-05, + "loss": 0.0462, "step": 143020 }, { "epoch": 6.67, - "learning_rate": 6.6836997796634015e-06, - "loss": 0.0739, + "learning_rate": 1.6704476307865136e-05, + "loss": 0.0795, "step": 143025 }, { "epoch": 6.67, - "learning_rate": 6.683230978388262e-06, - "loss": 0.1183, + "learning_rate": 1.6704008238029116e-05, + "loss": 0.097, "step": 143030 }, { "epoch": 6.67, - "learning_rate": 6.682762177113122e-06, - "loss": 0.3529, + "learning_rate": 1.6703540168193096e-05, + "loss": 0.1355, "step": 143035 }, { "epoch": 6.67, - "learning_rate": 6.682293375837983e-06, - "loss": 0.1289, + "learning_rate": 1.6703072098357072e-05, + "loss": 0.131, "step": 143040 }, { "epoch": 6.67, - "learning_rate": 6.681824574562843e-06, - "loss": 0.0019, + "learning_rate": 1.6702604028521055e-05, + "loss": 0.0201, "step": 143045 }, { "epoch": 6.67, - "learning_rate": 6.6813557732877046e-06, - "loss": 0.0297, + "learning_rate": 1.6702135958685035e-05, + "loss": 0.0468, "step": 143050 }, { "epoch": 6.68, - "learning_rate": 6.6808869720125645e-06, - "loss": 0.0174, + "learning_rate": 1.6701667888849015e-05, + "loss": 0.0148, "step": 143055 }, { "epoch": 6.68, - "learning_rate": 6.680418170737425e-06, - "loss": 0.0398, + "learning_rate": 1.6701199819012995e-05, + "loss": 0.0252, "step": 143060 }, { "epoch": 6.68, - "learning_rate": 6.679949369462285e-06, - "loss": 0.056, + "learning_rate": 1.6700731749176978e-05, + "loss": 0.0487, "step": 143065 }, { "epoch": 6.68, - "learning_rate": 6.679480568187145e-06, - "loss": 0.0598, + "learning_rate": 1.6700263679340958e-05, + "loss": 0.0706, "step": 143070 }, { "epoch": 6.68, - "learning_rate": 6.679011766912007e-06, - "loss": 0.1807, + "learning_rate": 1.6699795609504938e-05, + "loss": 0.0657, "step": 143075 }, { "epoch": 6.68, - "learning_rate": 6.6785429656368675e-06, - "loss": 0.1053, + "learning_rate": 1.669932753966892e-05, + "loss": 0.1667, "step": 143080 }, { "epoch": 6.68, - "learning_rate": 6.6780741643617275e-06, - "loss": 0.2885, + "learning_rate": 1.66988594698329e-05, + "loss": 0.156, "step": 143085 }, { "epoch": 6.68, - "learning_rate": 6.677605363086587e-06, - "loss": 0.2455, + "learning_rate": 1.669839139999688e-05, + "loss": 0.1768, "step": 143090 }, { "epoch": 6.68, - "learning_rate": 6.677136561811449e-06, - "loss": 0.0242, + "learning_rate": 1.669792333016086e-05, + "loss": 0.0019, "step": 143095 }, { "epoch": 6.68, - "learning_rate": 6.67666776053631e-06, - "loss": 0.0107, + "learning_rate": 1.6697455260324844e-05, + "loss": 0.0061, "step": 143100 }, { "epoch": 6.68, - "learning_rate": 6.67619895926117e-06, - "loss": 0.0312, + "learning_rate": 1.669698719048882e-05, + "loss": 0.0665, "step": 143105 }, { "epoch": 6.68, - "learning_rate": 6.67573015798603e-06, - "loss": 0.0206, + "learning_rate": 1.66965191206528e-05, + "loss": 0.0365, "step": 143110 }, { "epoch": 6.68, - "learning_rate": 6.6752613567108905e-06, - "loss": 0.0429, + "learning_rate": 1.669605105081678e-05, + "loss": 0.0492, "step": 143115 }, { "epoch": 6.68, - "learning_rate": 6.674792555435752e-06, - "loss": 0.0233, + "learning_rate": 1.6695582980980763e-05, + "loss": 0.0484, "step": 143120 }, { "epoch": 6.68, - "learning_rate": 6.674323754160612e-06, - "loss": 0.0434, + "learning_rate": 1.6695114911144743e-05, + "loss": 0.1118, "step": 143125 }, { "epoch": 6.68, - "learning_rate": 6.673854952885472e-06, - "loss": 0.1059, + "learning_rate": 1.6694646841308723e-05, + "loss": 0.0833, "step": 143130 }, { "epoch": 6.68, - "learning_rate": 6.673386151610333e-06, - "loss": 0.185, + "learning_rate": 1.6694178771472706e-05, + "loss": 0.1641, "step": 143135 }, { "epoch": 6.68, - "learning_rate": 6.672917350335193e-06, - "loss": 0.1576, + "learning_rate": 1.6693710701636686e-05, + "loss": 0.2435, "step": 143140 }, { "epoch": 6.68, - "learning_rate": 6.672448549060054e-06, - "loss": 0.0437, + "learning_rate": 1.6693242631800665e-05, + "loss": 0.0355, "step": 143145 }, { "epoch": 6.68, - "learning_rate": 6.671979747784914e-06, - "loss": 0.0277, + "learning_rate": 1.6692774561964645e-05, + "loss": 0.0687, "step": 143150 }, { "epoch": 6.68, - "learning_rate": 6.671510946509775e-06, - "loss": 0.0238, + "learning_rate": 1.669230649212863e-05, + "loss": 0.0191, "step": 143155 }, { "epoch": 6.68, - "learning_rate": 6.671042145234635e-06, - "loss": 0.0373, + "learning_rate": 1.6691838422292608e-05, + "loss": 0.0552, "step": 143160 }, { "epoch": 6.68, - "learning_rate": 6.670573343959497e-06, - "loss": 0.0315, + "learning_rate": 1.6691370352456588e-05, + "loss": 0.0382, "step": 143165 }, { "epoch": 6.68, - "learning_rate": 6.6701045426843565e-06, - "loss": 0.0915, + "learning_rate": 1.6690902282620564e-05, + "loss": 0.0293, "step": 143170 }, { "epoch": 6.68, - "learning_rate": 6.669635741409217e-06, - "loss": 0.043, + "learning_rate": 1.6690434212784548e-05, + "loss": 0.0768, "step": 143175 }, { "epoch": 6.68, - "learning_rate": 6.669166940134077e-06, - "loss": 0.0695, + "learning_rate": 1.6689966142948527e-05, + "loss": 0.0646, "step": 143180 }, { "epoch": 6.68, - "learning_rate": 6.668698138858938e-06, - "loss": 0.2204, + "learning_rate": 1.6689498073112507e-05, + "loss": 0.1695, "step": 143185 }, { "epoch": 6.68, - "learning_rate": 6.668229337583799e-06, - "loss": 0.1928, + "learning_rate": 1.668903000327649e-05, + "loss": 0.1749, "step": 143190 }, { "epoch": 6.68, - "learning_rate": 6.6677605363086596e-06, - "loss": 0.0237, + "learning_rate": 1.668856193344047e-05, + "loss": 0.022, "step": 143195 }, { "epoch": 6.68, - "learning_rate": 6.6672917350335195e-06, - "loss": 0.0237, + "learning_rate": 1.668809386360445e-05, + "loss": 0.0033, "step": 143200 }, { "epoch": 6.68, - "learning_rate": 6.66682293375838e-06, - "loss": 0.0161, + "learning_rate": 1.668762579376843e-05, + "loss": 0.0376, "step": 143205 }, { "epoch": 6.68, - "learning_rate": 6.66635413248324e-06, - "loss": 0.0887, + "learning_rate": 1.6687157723932413e-05, + "loss": 0.022, "step": 143210 }, { "epoch": 6.68, - "learning_rate": 6.665885331208102e-06, - "loss": 0.0411, + "learning_rate": 1.6686689654096393e-05, + "loss": 0.0393, "step": 143215 }, { "epoch": 6.68, - "learning_rate": 6.665416529932962e-06, - "loss": 0.0629, + "learning_rate": 1.6686221584260373e-05, + "loss": 0.0518, "step": 143220 }, { "epoch": 6.68, - "learning_rate": 6.6649477286578226e-06, - "loss": 0.0375, + "learning_rate": 1.6685753514424353e-05, + "loss": 0.0661, "step": 143225 }, { "epoch": 6.68, - "learning_rate": 6.6644789273826825e-06, - "loss": 0.1744, + "learning_rate": 1.6685285444588332e-05, + "loss": 0.1036, "step": 143230 }, { "epoch": 6.68, - "learning_rate": 6.664010126107544e-06, - "loss": 0.1058, + "learning_rate": 1.6684817374752312e-05, + "loss": 0.1732, "step": 143235 }, { "epoch": 6.68, - "learning_rate": 6.663541324832404e-06, - "loss": 0.1501, + "learning_rate": 1.6684349304916292e-05, + "loss": 0.2184, "step": 143240 }, { "epoch": 6.68, - "learning_rate": 6.663072523557265e-06, - "loss": 0.0124, + "learning_rate": 1.6683881235080272e-05, + "loss": 0.0157, "step": 143245 }, { "epoch": 6.68, - "learning_rate": 6.662603722282125e-06, - "loss": 0.0299, + "learning_rate": 1.6683413165244255e-05, + "loss": 0.0272, "step": 143250 }, { "epoch": 6.68, - "learning_rate": 6.6621349210069856e-06, - "loss": 0.0375, + "learning_rate": 1.6682945095408235e-05, + "loss": 0.0486, "step": 143255 }, { "epoch": 6.68, - "learning_rate": 6.661666119731846e-06, - "loss": 0.041, + "learning_rate": 1.6682477025572215e-05, + "loss": 0.0573, "step": 143260 }, { "epoch": 6.68, - "learning_rate": 6.661197318456707e-06, - "loss": 0.018, + "learning_rate": 1.6682008955736198e-05, + "loss": 0.0779, "step": 143265 }, { "epoch": 6.69, - "learning_rate": 6.660728517181567e-06, - "loss": 0.0388, + "learning_rate": 1.6681540885900178e-05, + "loss": 0.0199, "step": 143270 }, { "epoch": 6.69, - "learning_rate": 6.660259715906428e-06, - "loss": 0.1152, + "learning_rate": 1.6681072816064158e-05, + "loss": 0.0694, "step": 143275 }, { "epoch": 6.69, - "learning_rate": 6.659790914631288e-06, - "loss": 0.1067, + "learning_rate": 1.6680604746228137e-05, + "loss": 0.0682, "step": 143280 }, { "epoch": 6.69, - "learning_rate": 6.659322113356149e-06, - "loss": 0.1313, + "learning_rate": 1.668013667639212e-05, + "loss": 0.2373, "step": 143285 }, { "epoch": 6.69, - "learning_rate": 6.658853312081009e-06, - "loss": 0.1633, + "learning_rate": 1.66796686065561e-05, + "loss": 0.2124, "step": 143290 }, { "epoch": 6.69, - "learning_rate": 6.65838451080587e-06, - "loss": 0.0093, + "learning_rate": 1.6679200536720077e-05, + "loss": 0.0003, "step": 143295 }, { "epoch": 6.69, - "learning_rate": 6.65791570953073e-06, - "loss": 0.0396, + "learning_rate": 1.6678732466884057e-05, + "loss": 0.0214, "step": 143300 }, { "epoch": 6.69, - "learning_rate": 6.657446908255592e-06, - "loss": 0.0075, + "learning_rate": 1.667826439704804e-05, + "loss": 0.022, "step": 143305 }, { "epoch": 6.69, - "learning_rate": 6.656978106980452e-06, - "loss": 0.047, + "learning_rate": 1.667779632721202e-05, + "loss": 0.0242, "step": 143310 }, { "epoch": 6.69, - "learning_rate": 6.656509305705312e-06, - "loss": 0.0422, + "learning_rate": 1.6677328257376e-05, + "loss": 0.0065, "step": 143315 }, { "epoch": 6.69, - "learning_rate": 6.656040504430172e-06, - "loss": 0.058, + "learning_rate": 1.6676860187539983e-05, + "loss": 0.072, "step": 143320 }, { "epoch": 6.69, - "learning_rate": 6.655571703155032e-06, - "loss": 0.0078, + "learning_rate": 1.6676392117703963e-05, + "loss": 0.0395, "step": 143325 }, { "epoch": 6.69, - "learning_rate": 6.655102901879894e-06, - "loss": 0.1769, + "learning_rate": 1.6675924047867942e-05, + "loss": 0.1122, "step": 143330 }, { "epoch": 6.69, - "learning_rate": 6.654634100604755e-06, - "loss": 0.2019, + "learning_rate": 1.6675455978031922e-05, + "loss": 0.2097, "step": 143335 }, { "epoch": 6.69, - "learning_rate": 6.654165299329615e-06, - "loss": 0.1558, + "learning_rate": 1.6674987908195905e-05, + "loss": 0.1598, "step": 143340 }, { "epoch": 6.69, - "learning_rate": 6.6536964980544745e-06, - "loss": 0.0287, + "learning_rate": 1.6674519838359885e-05, + "loss": 0.0099, "step": 143345 }, { "epoch": 6.69, - "learning_rate": 6.653227696779336e-06, - "loss": 0.0169, + "learning_rate": 1.6674051768523865e-05, + "loss": 0.0495, "step": 143350 }, { "epoch": 6.69, - "learning_rate": 6.652758895504197e-06, - "loss": 0.0286, + "learning_rate": 1.6673583698687845e-05, + "loss": 0.025, "step": 143355 }, { "epoch": 6.69, - "learning_rate": 6.652290094229057e-06, - "loss": 0.0177, + "learning_rate": 1.6673115628851825e-05, + "loss": 0.0217, "step": 143360 }, { "epoch": 6.69, - "learning_rate": 6.651821292953917e-06, - "loss": 0.0914, + "learning_rate": 1.6672647559015804e-05, + "loss": 0.118, "step": 143365 }, { "epoch": 6.69, - "learning_rate": 6.651352491678778e-06, - "loss": 0.0865, + "learning_rate": 1.6672179489179784e-05, + "loss": 0.0876, "step": 143370 }, { "epoch": 6.69, - "learning_rate": 6.650883690403639e-06, - "loss": 0.0987, + "learning_rate": 1.6671711419343767e-05, + "loss": 0.0321, "step": 143375 }, { "epoch": 6.69, - "learning_rate": 6.650414889128499e-06, - "loss": 0.0963, + "learning_rate": 1.6671243349507747e-05, + "loss": 0.0669, "step": 143380 }, { "epoch": 6.69, - "learning_rate": 6.649946087853359e-06, - "loss": 0.1337, + "learning_rate": 1.6670775279671727e-05, + "loss": 0.235, "step": 143385 }, { "epoch": 6.69, - "learning_rate": 6.64947728657822e-06, - "loss": 0.1437, + "learning_rate": 1.6670307209835707e-05, + "loss": 0.1854, "step": 143390 }, { "epoch": 6.69, - "learning_rate": 6.64900848530308e-06, - "loss": 0.0157, + "learning_rate": 1.666983913999969e-05, + "loss": 0.017, "step": 143395 }, { "epoch": 6.69, - "learning_rate": 6.648539684027941e-06, - "loss": 0.0475, + "learning_rate": 1.666937107016367e-05, + "loss": 0.0226, "step": 143400 }, { "epoch": 6.69, - "learning_rate": 6.648070882752801e-06, - "loss": 0.0514, + "learning_rate": 1.666890300032765e-05, + "loss": 0.0371, "step": 143405 }, { "epoch": 6.69, - "learning_rate": 6.647602081477662e-06, - "loss": 0.0374, + "learning_rate": 1.666843493049163e-05, + "loss": 0.0302, "step": 143410 }, { "epoch": 6.69, - "learning_rate": 6.647133280202522e-06, - "loss": 0.0712, + "learning_rate": 1.6667966860655613e-05, + "loss": 0.0318, "step": 143415 }, { "epoch": 6.69, - "learning_rate": 6.646664478927384e-06, - "loss": 0.0582, + "learning_rate": 1.666749879081959e-05, + "loss": 0.0547, "step": 143420 }, { "epoch": 6.69, - "learning_rate": 6.6461956776522445e-06, - "loss": 0.0646, + "learning_rate": 1.666703072098357e-05, + "loss": 0.1263, "step": 143425 }, { "epoch": 6.69, - "learning_rate": 6.645726876377104e-06, - "loss": 0.0631, + "learning_rate": 1.6666562651147552e-05, + "loss": 0.1848, "step": 143430 }, { "epoch": 6.69, - "learning_rate": 6.645258075101964e-06, - "loss": 0.2166, + "learning_rate": 1.6666094581311532e-05, + "loss": 0.1887, "step": 143435 }, { "epoch": 6.69, - "learning_rate": 6.644789273826825e-06, - "loss": 0.1651, + "learning_rate": 1.6665626511475512e-05, + "loss": 0.0796, "step": 143440 }, { "epoch": 6.69, - "learning_rate": 6.644320472551687e-06, - "loss": 0.0093, + "learning_rate": 1.6665158441639492e-05, + "loss": 0.0058, "step": 143445 }, { "epoch": 6.69, - "learning_rate": 6.643851671276547e-06, - "loss": 0.0206, + "learning_rate": 1.6664690371803475e-05, + "loss": 0.0152, "step": 143450 }, { "epoch": 6.69, - "learning_rate": 6.643382870001407e-06, - "loss": 0.013, + "learning_rate": 1.6664222301967455e-05, + "loss": 0.0159, "step": 143455 }, { "epoch": 6.69, - "learning_rate": 6.642914068726267e-06, - "loss": 0.0156, + "learning_rate": 1.6663754232131435e-05, + "loss": 0.0352, "step": 143460 }, { "epoch": 6.69, - "learning_rate": 6.642445267451127e-06, - "loss": 0.0707, + "learning_rate": 1.6663286162295414e-05, + "loss": 0.0623, "step": 143465 }, { "epoch": 6.69, - "learning_rate": 6.641976466175989e-06, - "loss": 0.0896, + "learning_rate": 1.6662818092459398e-05, + "loss": 0.0267, "step": 143470 }, { "epoch": 6.69, - "learning_rate": 6.641507664900849e-06, - "loss": 0.0925, + "learning_rate": 1.6662350022623377e-05, + "loss": 0.1153, "step": 143475 }, { "epoch": 6.69, - "learning_rate": 6.64103886362571e-06, - "loss": 0.0613, + "learning_rate": 1.6661881952787357e-05, + "loss": 0.0557, "step": 143480 }, { "epoch": 6.7, - "learning_rate": 6.64057006235057e-06, - "loss": 0.1486, + "learning_rate": 1.6661413882951334e-05, + "loss": 0.1702, "step": 143485 }, { "epoch": 6.7, - "learning_rate": 6.640101261075431e-06, - "loss": 0.112, + "learning_rate": 1.6660945813115317e-05, + "loss": 0.1014, "step": 143490 }, { "epoch": 6.7, - "learning_rate": 6.639632459800291e-06, - "loss": 0.0215, + "learning_rate": 1.6660477743279297e-05, + "loss": 0.0252, "step": 143495 }, { "epoch": 6.7, - "learning_rate": 6.639163658525152e-06, - "loss": 0.0449, + "learning_rate": 1.6660009673443276e-05, + "loss": 0.0015, "step": 143500 }, { "epoch": 6.7, - "learning_rate": 6.638694857250012e-06, - "loss": 0.0432, + "learning_rate": 1.665954160360726e-05, + "loss": 0.0391, "step": 143505 }, { "epoch": 6.7, - "learning_rate": 6.638226055974873e-06, - "loss": 0.0683, + "learning_rate": 1.665907353377124e-05, + "loss": 0.0528, "step": 143510 }, { "epoch": 6.7, - "learning_rate": 6.6377572546997334e-06, - "loss": 0.0413, + "learning_rate": 1.665860546393522e-05, + "loss": 0.0294, "step": 143515 }, { "epoch": 6.7, - "learning_rate": 6.637288453424594e-06, - "loss": 0.082, + "learning_rate": 1.66581373940992e-05, + "loss": 0.0588, "step": 143520 }, { "epoch": 6.7, - "learning_rate": 6.636819652149454e-06, - "loss": 0.0899, + "learning_rate": 1.6657669324263182e-05, + "loss": 0.0306, "step": 143525 }, { "epoch": 6.7, - "learning_rate": 6.636350850874315e-06, - "loss": 0.1147, + "learning_rate": 1.6657201254427162e-05, + "loss": 0.0192, "step": 143530 }, { "epoch": 6.7, - "learning_rate": 6.635882049599175e-06, - "loss": 0.179, + "learning_rate": 1.6656733184591142e-05, + "loss": 0.1017, "step": 143535 }, { "epoch": 6.7, - "learning_rate": 6.6354132483240365e-06, - "loss": 0.1846, + "learning_rate": 1.6656265114755122e-05, + "loss": 0.1601, "step": 143540 }, { "epoch": 6.7, - "learning_rate": 6.6349444470488964e-06, - "loss": 0.0104, + "learning_rate": 1.66557970449191e-05, + "loss": 0.0414, "step": 143545 }, { "epoch": 6.7, - "learning_rate": 6.634475645773757e-06, - "loss": 0.0196, + "learning_rate": 1.665532897508308e-05, + "loss": 0.0316, "step": 143550 }, { "epoch": 6.7, - "learning_rate": 6.634006844498617e-06, - "loss": 0.0069, + "learning_rate": 1.665486090524706e-05, + "loss": 0.0174, "step": 143555 }, { "epoch": 6.7, - "learning_rate": 6.633538043223479e-06, - "loss": 0.0299, + "learning_rate": 1.6654392835411044e-05, + "loss": 0.0278, "step": 143560 }, { "epoch": 6.7, - "learning_rate": 6.633069241948339e-06, - "loss": 0.0354, + "learning_rate": 1.6653924765575024e-05, + "loss": 0.0316, "step": 143565 }, { "epoch": 6.7, - "learning_rate": 6.6326004406731995e-06, - "loss": 0.0324, + "learning_rate": 1.6653456695739004e-05, + "loss": 0.0179, "step": 143570 }, { "epoch": 6.7, - "learning_rate": 6.632131639398059e-06, - "loss": 0.1025, + "learning_rate": 1.6652988625902984e-05, + "loss": 0.0571, "step": 143575 }, { "epoch": 6.7, - "learning_rate": 6.631662838122919e-06, - "loss": 0.1543, + "learning_rate": 1.6652520556066967e-05, + "loss": 0.0728, "step": 143580 }, { "epoch": 6.7, - "learning_rate": 6.631194036847781e-06, - "loss": 0.107, + "learning_rate": 1.6652052486230947e-05, + "loss": 0.1983, "step": 143585 }, { "epoch": 6.7, - "learning_rate": 6.630725235572642e-06, - "loss": 0.2029, + "learning_rate": 1.6651584416394927e-05, + "loss": 0.2035, "step": 143590 }, { "epoch": 6.7, - "learning_rate": 6.630256434297502e-06, - "loss": 0.0008, + "learning_rate": 1.6651116346558907e-05, + "loss": 0.0105, "step": 143595 }, { "epoch": 6.7, - "learning_rate": 6.6297876330223625e-06, - "loss": 0.016, + "learning_rate": 1.665064827672289e-05, + "loss": 0.0524, "step": 143600 }, { "epoch": 6.7, - "learning_rate": 6.629318831747222e-06, - "loss": 0.0248, + "learning_rate": 1.665018020688687e-05, + "loss": 0.0162, "step": 143605 }, { "epoch": 6.7, - "learning_rate": 6.628850030472084e-06, - "loss": 0.0068, + "learning_rate": 1.6649712137050846e-05, + "loss": 0.0298, "step": 143610 }, { "epoch": 6.7, - "learning_rate": 6.628381229196944e-06, - "loss": 0.0281, + "learning_rate": 1.664924406721483e-05, + "loss": 0.0722, "step": 143615 }, { "epoch": 6.7, - "learning_rate": 6.627912427921805e-06, - "loss": 0.0497, + "learning_rate": 1.664877599737881e-05, + "loss": 0.0565, "step": 143620 }, { "epoch": 6.7, - "learning_rate": 6.627443626646665e-06, - "loss": 0.0856, + "learning_rate": 1.664830792754279e-05, + "loss": 0.0518, "step": 143625 }, { "epoch": 6.7, - "learning_rate": 6.626974825371526e-06, - "loss": 0.0349, + "learning_rate": 1.664783985770677e-05, + "loss": 0.1344, "step": 143630 }, { "epoch": 6.7, - "learning_rate": 6.626506024096386e-06, - "loss": 0.1995, + "learning_rate": 1.6647371787870752e-05, + "loss": 0.1562, "step": 143635 }, { "epoch": 6.7, - "learning_rate": 6.626037222821247e-06, - "loss": 0.1415, + "learning_rate": 1.664690371803473e-05, + "loss": 0.1359, "step": 143640 }, { "epoch": 6.7, - "learning_rate": 6.625568421546107e-06, - "loss": 0.0004, + "learning_rate": 1.664643564819871e-05, + "loss": 0.0091, "step": 143645 }, { "epoch": 6.7, - "learning_rate": 6.625099620270967e-06, - "loss": 0.0164, + "learning_rate": 1.664596757836269e-05, + "loss": 0.0247, "step": 143650 }, { "epoch": 6.7, - "learning_rate": 6.6246308189958285e-06, - "loss": 0.0274, + "learning_rate": 1.6645499508526675e-05, + "loss": 0.0186, "step": 143655 }, { "epoch": 6.7, - "learning_rate": 6.624162017720689e-06, - "loss": 0.0417, + "learning_rate": 1.6645031438690654e-05, + "loss": 0.02, "step": 143660 }, { "epoch": 6.7, - "learning_rate": 6.623693216445549e-06, - "loss": 0.02, + "learning_rate": 1.6644563368854634e-05, + "loss": 0.0484, "step": 143665 }, { "epoch": 6.7, - "learning_rate": 6.623224415170409e-06, - "loss": 0.0786, + "learning_rate": 1.6644095299018614e-05, + "loss": 0.0672, "step": 143670 }, { "epoch": 6.7, - "learning_rate": 6.622755613895271e-06, - "loss": 0.0554, + "learning_rate": 1.6643627229182594e-05, + "loss": 0.0627, "step": 143675 }, { "epoch": 6.7, - "learning_rate": 6.622286812620132e-06, - "loss": 0.2297, + "learning_rate": 1.6643159159346574e-05, + "loss": 0.0899, "step": 143680 }, { "epoch": 6.7, - "learning_rate": 6.6218180113449915e-06, - "loss": 0.1804, + "learning_rate": 1.6642691089510553e-05, + "loss": 0.1493, "step": 143685 }, { "epoch": 6.7, - "learning_rate": 6.6213492100698514e-06, - "loss": 0.1023, + "learning_rate": 1.6642223019674537e-05, + "loss": 0.17, "step": 143690 }, { "epoch": 6.71, - "learning_rate": 6.620880408794712e-06, - "loss": 0.003, + "learning_rate": 1.6641754949838516e-05, + "loss": 0.0114, "step": 143695 }, { "epoch": 6.71, - "learning_rate": 6.620411607519574e-06, - "loss": 0.0589, + "learning_rate": 1.6641286880002496e-05, + "loss": 0.0303, "step": 143700 }, { "epoch": 6.71, - "learning_rate": 6.619942806244434e-06, - "loss": 0.0765, + "learning_rate": 1.6640818810166476e-05, + "loss": 0.0253, "step": 143705 }, { "epoch": 6.71, - "learning_rate": 6.619474004969294e-06, - "loss": 0.0465, + "learning_rate": 1.664035074033046e-05, + "loss": 0.0266, "step": 143710 }, { "epoch": 6.71, - "learning_rate": 6.6190052036941545e-06, - "loss": 0.0405, + "learning_rate": 1.663988267049444e-05, + "loss": 0.0815, "step": 143715 }, { "epoch": 6.71, - "learning_rate": 6.6185364024190144e-06, - "loss": 0.1367, + "learning_rate": 1.663941460065842e-05, + "loss": 0.0454, "step": 143720 }, { "epoch": 6.71, - "learning_rate": 6.618067601143876e-06, - "loss": 0.0887, + "learning_rate": 1.66389465308224e-05, + "loss": 0.1249, "step": 143725 }, { "epoch": 6.71, - "learning_rate": 6.617598799868736e-06, - "loss": 0.077, + "learning_rate": 1.6638478460986382e-05, + "loss": 0.0869, "step": 143730 }, { "epoch": 6.71, - "learning_rate": 6.617129998593597e-06, - "loss": 0.2018, + "learning_rate": 1.663801039115036e-05, + "loss": 0.2546, "step": 143735 }, { "epoch": 6.71, - "learning_rate": 6.616661197318457e-06, - "loss": 0.228, + "learning_rate": 1.6637542321314338e-05, + "loss": 0.1391, "step": 143740 }, { "epoch": 6.71, - "learning_rate": 6.616192396043318e-06, - "loss": 0.008, + "learning_rate": 1.663707425147832e-05, + "loss": 0.0208, "step": 143745 }, { "epoch": 6.71, - "learning_rate": 6.615723594768178e-06, - "loss": 0.0179, + "learning_rate": 1.66366061816423e-05, + "loss": 0.0119, "step": 143750 }, { "epoch": 6.71, - "learning_rate": 6.615254793493039e-06, - "loss": 0.0327, + "learning_rate": 1.663613811180628e-05, + "loss": 0.0176, "step": 143755 }, { "epoch": 6.71, - "learning_rate": 6.614785992217899e-06, - "loss": 0.0316, + "learning_rate": 1.663567004197026e-05, + "loss": 0.0583, "step": 143760 }, { "epoch": 6.71, - "learning_rate": 6.61431719094276e-06, - "loss": 0.0384, + "learning_rate": 1.6635201972134244e-05, + "loss": 0.0508, "step": 143765 }, { "epoch": 6.71, - "learning_rate": 6.6138483896676205e-06, - "loss": 0.0609, + "learning_rate": 1.6634733902298224e-05, + "loss": 0.0214, "step": 143770 }, { "epoch": 6.71, - "learning_rate": 6.613379588392481e-06, - "loss": 0.0828, + "learning_rate": 1.6634265832462204e-05, + "loss": 0.0716, "step": 143775 }, { "epoch": 6.71, - "learning_rate": 6.612910787117341e-06, - "loss": 0.1346, + "learning_rate": 1.6633797762626184e-05, + "loss": 0.1411, "step": 143780 }, { "epoch": 6.71, - "learning_rate": 6.612441985842202e-06, - "loss": 0.2565, + "learning_rate": 1.6633329692790167e-05, + "loss": 0.1363, "step": 143785 }, { "epoch": 6.71, - "learning_rate": 6.611973184567062e-06, - "loss": 0.1943, + "learning_rate": 1.6632861622954147e-05, + "loss": 0.118, "step": 143790 }, { "epoch": 6.71, - "learning_rate": 6.611504383291924e-06, - "loss": 0.0483, + "learning_rate": 1.6632393553118126e-05, + "loss": 0.0232, "step": 143795 }, { "epoch": 6.71, - "learning_rate": 6.6110355820167835e-06, - "loss": 0.0017, + "learning_rate": 1.6631925483282106e-05, + "loss": 0.0215, "step": 143800 }, { "epoch": 6.71, - "learning_rate": 6.610566780741644e-06, - "loss": 0.0178, + "learning_rate": 1.6631457413446086e-05, + "loss": 0.0181, "step": 143805 }, { "epoch": 6.71, - "learning_rate": 6.610097979466504e-06, - "loss": 0.0361, + "learning_rate": 1.6630989343610066e-05, + "loss": 0.0251, "step": 143810 }, { "epoch": 6.71, - "learning_rate": 6.609629178191366e-06, - "loss": 0.1006, + "learning_rate": 1.6630521273774046e-05, + "loss": 0.0399, "step": 143815 }, { "epoch": 6.71, - "learning_rate": 6.609160376916226e-06, - "loss": 0.0967, + "learning_rate": 1.663005320393803e-05, + "loss": 0.0263, "step": 143820 }, { "epoch": 6.71, - "learning_rate": 6.608691575641087e-06, - "loss": 0.1177, + "learning_rate": 1.662958513410201e-05, + "loss": 0.1073, "step": 143825 }, { "epoch": 6.71, - "learning_rate": 6.6082227743659465e-06, - "loss": 0.0614, + "learning_rate": 1.662911706426599e-05, + "loss": 0.0649, "step": 143830 }, { "epoch": 6.71, - "learning_rate": 6.607753973090807e-06, - "loss": 0.1622, + "learning_rate": 1.6628648994429968e-05, + "loss": 0.2438, "step": 143835 }, { "epoch": 6.71, - "learning_rate": 6.607285171815668e-06, - "loss": 0.1453, + "learning_rate": 1.662818092459395e-05, + "loss": 0.1577, "step": 143840 }, { "epoch": 6.71, - "learning_rate": 6.606816370540529e-06, - "loss": 0.0366, + "learning_rate": 1.662771285475793e-05, + "loss": 0.0149, "step": 143845 }, { "epoch": 6.71, - "learning_rate": 6.606347569265389e-06, - "loss": 0.0292, + "learning_rate": 1.662724478492191e-05, + "loss": 0.0053, "step": 143850 }, { "epoch": 6.71, - "learning_rate": 6.60587876799025e-06, - "loss": 0.0097, + "learning_rate": 1.662677671508589e-05, + "loss": 0.0427, "step": 143855 }, { "epoch": 6.71, - "learning_rate": 6.6054099667151095e-06, - "loss": 0.0585, + "learning_rate": 1.6626308645249874e-05, + "loss": 0.0114, "step": 143860 }, { "epoch": 6.71, - "learning_rate": 6.604941165439971e-06, - "loss": 0.0157, + "learning_rate": 1.662584057541385e-05, + "loss": 0.0799, "step": 143865 }, { "epoch": 6.71, - "learning_rate": 6.604472364164831e-06, - "loss": 0.1298, + "learning_rate": 1.662537250557783e-05, + "loss": 0.075, "step": 143870 }, { "epoch": 6.71, - "learning_rate": 6.604003562889692e-06, - "loss": 0.0509, + "learning_rate": 1.6624904435741814e-05, + "loss": 0.0324, "step": 143875 }, { "epoch": 6.71, - "learning_rate": 6.603534761614552e-06, - "loss": 0.0513, + "learning_rate": 1.6624436365905793e-05, + "loss": 0.1312, "step": 143880 }, { "epoch": 6.71, - "learning_rate": 6.603065960339413e-06, - "loss": 0.2577, + "learning_rate": 1.6623968296069773e-05, + "loss": 0.1579, "step": 143885 }, { "epoch": 6.71, - "learning_rate": 6.602597159064273e-06, - "loss": 0.1955, + "learning_rate": 1.6623500226233753e-05, + "loss": 0.0606, "step": 143890 }, { "epoch": 6.71, - "learning_rate": 6.602128357789134e-06, - "loss": 0.0382, + "learning_rate": 1.6623032156397736e-05, + "loss": 0.0091, "step": 143895 }, { "epoch": 6.71, - "learning_rate": 6.601659556513994e-06, - "loss": 0.0126, + "learning_rate": 1.6622564086561716e-05, + "loss": 0.044, "step": 143900 }, { "epoch": 6.71, - "learning_rate": 6.601190755238854e-06, - "loss": 0.0641, + "learning_rate": 1.6622096016725696e-05, + "loss": 0.0728, "step": 143905 }, { "epoch": 6.72, - "learning_rate": 6.600721953963716e-06, - "loss": 0.0329, + "learning_rate": 1.6621627946889676e-05, + "loss": 0.0375, "step": 143910 }, { "epoch": 6.72, - "learning_rate": 6.600253152688576e-06, - "loss": 0.0767, + "learning_rate": 1.662115987705366e-05, + "loss": 0.0545, "step": 143915 }, { "epoch": 6.72, - "learning_rate": 6.599784351413436e-06, - "loss": 0.0886, + "learning_rate": 1.662069180721764e-05, + "loss": 0.0431, "step": 143920 }, { "epoch": 6.72, - "learning_rate": 6.599315550138296e-06, - "loss": 0.0657, + "learning_rate": 1.6620223737381615e-05, + "loss": 0.0455, "step": 143925 }, { "epoch": 6.72, - "learning_rate": 6.598846748863157e-06, - "loss": 0.1265, + "learning_rate": 1.66197556675456e-05, + "loss": 0.1381, "step": 143930 }, { "epoch": 6.72, - "learning_rate": 6.598377947588019e-06, - "loss": 0.0837, + "learning_rate": 1.6619287597709578e-05, + "loss": 0.1652, "step": 143935 }, { "epoch": 6.72, - "learning_rate": 6.597909146312879e-06, - "loss": 0.1252, + "learning_rate": 1.6618819527873558e-05, + "loss": 0.2602, "step": 143940 }, { "epoch": 6.72, - "learning_rate": 6.5974403450377386e-06, - "loss": 0.0168, + "learning_rate": 1.6618351458037538e-05, + "loss": 0.0144, "step": 143945 }, { "epoch": 6.72, - "learning_rate": 6.596971543762599e-06, - "loss": 0.0198, + "learning_rate": 1.661788338820152e-05, + "loss": 0.0028, "step": 143950 }, { "epoch": 6.72, - "learning_rate": 6.596502742487461e-06, - "loss": 0.0113, + "learning_rate": 1.66174153183655e-05, + "loss": 0.0553, "step": 143955 }, { "epoch": 6.72, - "learning_rate": 6.596033941212321e-06, - "loss": 0.052, + "learning_rate": 1.661694724852948e-05, + "loss": 0.0089, "step": 143960 }, { "epoch": 6.72, - "learning_rate": 6.595565139937181e-06, - "loss": 0.0899, + "learning_rate": 1.661647917869346e-05, + "loss": 0.0339, "step": 143965 }, { "epoch": 6.72, - "learning_rate": 6.595096338662042e-06, - "loss": 0.0503, + "learning_rate": 1.6616011108857444e-05, + "loss": 0.1158, "step": 143970 }, { "epoch": 6.72, - "learning_rate": 6.5946275373869015e-06, - "loss": 0.0317, + "learning_rate": 1.6615543039021424e-05, + "loss": 0.0527, "step": 143975 }, { "epoch": 6.72, - "learning_rate": 6.594158736111763e-06, - "loss": 0.16, + "learning_rate": 1.6615074969185403e-05, + "loss": 0.1865, "step": 143980 }, { "epoch": 6.72, - "learning_rate": 6.593689934836623e-06, - "loss": 0.1751, + "learning_rate": 1.6614606899349387e-05, + "loss": 0.2267, "step": 143985 }, { "epoch": 6.72, - "learning_rate": 6.593221133561484e-06, - "loss": 0.145, + "learning_rate": 1.6614138829513363e-05, + "loss": 0.1777, "step": 143990 }, { "epoch": 6.72, - "learning_rate": 6.592752332286344e-06, - "loss": 0.0006, + "learning_rate": 1.6613670759677343e-05, + "loss": 0.003, "step": 143995 }, { "epoch": 6.72, - "learning_rate": 6.5922835310112054e-06, - "loss": 0.0144, + "learning_rate": 1.6613202689841323e-05, + "loss": 0.0169, "step": 144000 }, { "epoch": 6.72, - "learning_rate": 6.591814729736065e-06, - "loss": 0.0409, + "learning_rate": 1.6612734620005306e-05, + "loss": 0.052, "step": 144005 }, { "epoch": 6.72, - "learning_rate": 6.591345928460926e-06, - "loss": 0.0083, + "learning_rate": 1.6612266550169286e-05, + "loss": 0.0396, "step": 144010 }, { "epoch": 6.72, - "learning_rate": 6.590877127185786e-06, - "loss": 0.0678, + "learning_rate": 1.6611798480333265e-05, + "loss": 0.0219, "step": 144015 }, { "epoch": 6.72, - "learning_rate": 6.590408325910647e-06, - "loss": 0.0379, + "learning_rate": 1.6611330410497245e-05, + "loss": 0.0776, "step": 144020 }, { "epoch": 6.72, - "learning_rate": 6.589939524635508e-06, - "loss": 0.1232, + "learning_rate": 1.661086234066123e-05, + "loss": 0.1187, "step": 144025 }, { "epoch": 6.72, - "learning_rate": 6.5894707233603684e-06, - "loss": 0.0637, + "learning_rate": 1.6610394270825208e-05, + "loss": 0.073, "step": 144030 }, { "epoch": 6.72, - "learning_rate": 6.589001922085228e-06, - "loss": 0.1972, + "learning_rate": 1.6609926200989188e-05, + "loss": 0.122, "step": 144035 }, { "epoch": 6.72, - "learning_rate": 6.588533120810089e-06, - "loss": 0.1002, + "learning_rate": 1.660945813115317e-05, + "loss": 0.1727, "step": 144040 }, { "epoch": 6.72, - "learning_rate": 6.588064319534949e-06, - "loss": 0.0113, + "learning_rate": 1.660899006131715e-05, + "loss": 0.0159, "step": 144045 }, { "epoch": 6.72, - "learning_rate": 6.587595518259811e-06, - "loss": 0.0129, + "learning_rate": 1.660852199148113e-05, + "loss": 0.0241, "step": 144050 }, { "epoch": 6.72, - "learning_rate": 6.587126716984671e-06, - "loss": 0.028, + "learning_rate": 1.6608053921645107e-05, + "loss": 0.0452, "step": 144055 }, { "epoch": 6.72, - "learning_rate": 6.5866579157095314e-06, - "loss": 0.0982, + "learning_rate": 1.660758585180909e-05, + "loss": 0.092, "step": 144060 }, { "epoch": 6.72, - "learning_rate": 6.586189114434391e-06, - "loss": 0.0474, + "learning_rate": 1.660711778197307e-05, + "loss": 0.0297, "step": 144065 }, { "epoch": 6.72, - "learning_rate": 6.585720313159253e-06, - "loss": 0.0329, + "learning_rate": 1.660664971213705e-05, + "loss": 0.0256, "step": 144070 }, { "epoch": 6.72, - "learning_rate": 6.585251511884113e-06, - "loss": 0.0945, + "learning_rate": 1.660618164230103e-05, + "loss": 0.1963, "step": 144075 }, { "epoch": 6.72, - "learning_rate": 6.584782710608974e-06, - "loss": 0.1417, + "learning_rate": 1.6605713572465013e-05, + "loss": 0.0953, "step": 144080 }, { "epoch": 6.72, - "learning_rate": 6.584313909333834e-06, - "loss": 0.166, + "learning_rate": 1.6605245502628993e-05, + "loss": 0.2133, "step": 144085 }, { "epoch": 6.72, - "learning_rate": 6.583845108058694e-06, - "loss": 0.2201, + "learning_rate": 1.6604777432792973e-05, + "loss": 0.1363, "step": 144090 }, { "epoch": 6.72, - "learning_rate": 6.583376306783555e-06, - "loss": 0.039, + "learning_rate": 1.6604309362956953e-05, + "loss": 0.0566, "step": 144095 }, { "epoch": 6.72, - "learning_rate": 6.582907505508416e-06, - "loss": 0.0478, + "learning_rate": 1.6603841293120936e-05, + "loss": 0.0688, "step": 144100 }, { "epoch": 6.72, - "learning_rate": 6.582438704233276e-06, - "loss": 0.0179, + "learning_rate": 1.6603373223284916e-05, + "loss": 0.138, "step": 144105 }, { "epoch": 6.72, - "learning_rate": 6.581969902958137e-06, - "loss": 0.0314, + "learning_rate": 1.6602905153448896e-05, + "loss": 0.0178, "step": 144110 }, { "epoch": 6.72, - "learning_rate": 6.581501101682997e-06, - "loss": 0.0445, + "learning_rate": 1.6602437083612875e-05, + "loss": 0.024, "step": 144115 }, { "epoch": 6.72, - "learning_rate": 6.581032300407858e-06, - "loss": 0.0732, + "learning_rate": 1.6601969013776855e-05, + "loss": 0.0573, "step": 144120 }, { "epoch": 6.73, - "learning_rate": 6.580563499132718e-06, - "loss": 0.1289, + "learning_rate": 1.6601500943940835e-05, + "loss": 0.038, "step": 144125 }, { "epoch": 6.73, - "learning_rate": 6.580094697857579e-06, - "loss": 0.1481, + "learning_rate": 1.6601032874104815e-05, + "loss": 0.2073, "step": 144130 }, { "epoch": 6.73, - "learning_rate": 6.579625896582439e-06, - "loss": 0.2753, + "learning_rate": 1.6600564804268798e-05, + "loss": 0.1253, "step": 144135 }, { "epoch": 6.73, - "learning_rate": 6.5791570953073005e-06, - "loss": 0.1902, + "learning_rate": 1.6600096734432778e-05, + "loss": 0.1774, "step": 144140 }, { "epoch": 6.73, - "learning_rate": 6.5786882940321605e-06, - "loss": 0.0023, + "learning_rate": 1.6599628664596758e-05, + "loss": 0.0131, "step": 144145 }, { "epoch": 6.73, - "learning_rate": 6.578219492757021e-06, - "loss": 0.0152, + "learning_rate": 1.6599160594760737e-05, + "loss": 0.0187, "step": 144150 }, { "epoch": 6.73, - "learning_rate": 6.577750691481881e-06, - "loss": 0.0335, + "learning_rate": 1.659869252492472e-05, + "loss": 0.0267, "step": 144155 }, { "epoch": 6.73, - "learning_rate": 6.577281890206741e-06, - "loss": 0.0645, + "learning_rate": 1.65982244550887e-05, + "loss": 0.0618, "step": 144160 }, { "epoch": 6.73, - "learning_rate": 6.576813088931603e-06, - "loss": 0.0774, + "learning_rate": 1.659775638525268e-05, + "loss": 0.0736, "step": 144165 }, { "epoch": 6.73, - "learning_rate": 6.5763442876564635e-06, - "loss": 0.0363, + "learning_rate": 1.6597288315416664e-05, + "loss": 0.0251, "step": 144170 }, { "epoch": 6.73, - "learning_rate": 6.5758754863813235e-06, - "loss": 0.1048, + "learning_rate": 1.6596820245580643e-05, + "loss": 0.1548, "step": 144175 }, { "epoch": 6.73, - "learning_rate": 6.575406685106183e-06, - "loss": 0.0769, + "learning_rate": 1.659635217574462e-05, + "loss": 0.097, "step": 144180 }, { "epoch": 6.73, - "learning_rate": 6.574937883831044e-06, - "loss": 0.1942, + "learning_rate": 1.65958841059086e-05, + "loss": 0.1036, "step": 144185 }, { "epoch": 6.73, - "learning_rate": 6.574469082555906e-06, - "loss": 0.0819, + "learning_rate": 1.6595416036072583e-05, + "loss": 0.1585, "step": 144190 }, { "epoch": 6.73, - "learning_rate": 6.574000281280766e-06, - "loss": 0.0165, + "learning_rate": 1.6594947966236563e-05, + "loss": 0.0012, "step": 144195 }, { "epoch": 6.73, - "learning_rate": 6.573531480005626e-06, - "loss": 0.0088, + "learning_rate": 1.6594479896400542e-05, + "loss": 0.0254, "step": 144200 }, { "epoch": 6.73, - "learning_rate": 6.5730626787304864e-06, - "loss": 0.0626, + "learning_rate": 1.6594011826564522e-05, + "loss": 0.0424, "step": 144205 }, { "epoch": 6.73, - "learning_rate": 6.572593877455348e-06, - "loss": 0.0654, + "learning_rate": 1.6593543756728505e-05, + "loss": 0.0483, "step": 144210 }, { "epoch": 6.73, - "learning_rate": 6.572125076180208e-06, - "loss": 0.0306, + "learning_rate": 1.6593075686892485e-05, + "loss": 0.029, "step": 144215 }, { "epoch": 6.73, - "learning_rate": 6.571656274905068e-06, - "loss": 0.0719, + "learning_rate": 1.6592607617056465e-05, + "loss": 0.0661, "step": 144220 }, { "epoch": 6.73, - "learning_rate": 6.571187473629929e-06, - "loss": 0.0465, + "learning_rate": 1.6592139547220448e-05, + "loss": 0.0382, "step": 144225 }, { "epoch": 6.73, - "learning_rate": 6.570718672354789e-06, - "loss": 0.0511, + "learning_rate": 1.6591671477384428e-05, + "loss": 0.1452, "step": 144230 }, { "epoch": 6.73, - "learning_rate": 6.57024987107965e-06, - "loss": 0.1371, + "learning_rate": 1.6591203407548408e-05, + "loss": 0.2755, "step": 144235 }, { "epoch": 6.73, - "learning_rate": 6.56978106980451e-06, - "loss": 0.2046, + "learning_rate": 1.6590735337712384e-05, + "loss": 0.1619, "step": 144240 }, { "epoch": 6.73, - "learning_rate": 6.569312268529371e-06, - "loss": 0.0107, + "learning_rate": 1.6590267267876368e-05, + "loss": 0.0443, "step": 144245 }, { "epoch": 6.73, - "learning_rate": 6.568843467254231e-06, - "loss": 0.0299, + "learning_rate": 1.6589799198040347e-05, + "loss": 0.0053, "step": 144250 }, { "epoch": 6.73, - "learning_rate": 6.568374665979092e-06, - "loss": 0.026, + "learning_rate": 1.6589331128204327e-05, + "loss": 0.0414, "step": 144255 }, { "epoch": 6.73, - "learning_rate": 6.5679058647039525e-06, - "loss": 0.0035, + "learning_rate": 1.6588863058368307e-05, + "loss": 0.043, "step": 144260 }, { "epoch": 6.73, - "learning_rate": 6.567437063428813e-06, - "loss": 0.1011, + "learning_rate": 1.658839498853229e-05, + "loss": 0.0499, "step": 144265 }, { "epoch": 6.73, - "learning_rate": 6.566968262153673e-06, - "loss": 0.0219, + "learning_rate": 1.658792691869627e-05, + "loss": 0.0804, "step": 144270 }, { "epoch": 6.73, - "learning_rate": 6.566499460878534e-06, - "loss": 0.0327, + "learning_rate": 1.658745884886025e-05, + "loss": 0.0532, "step": 144275 }, { "epoch": 6.73, - "learning_rate": 6.566030659603395e-06, - "loss": 0.0781, + "learning_rate": 1.658699077902423e-05, + "loss": 0.0934, "step": 144280 }, { "epoch": 6.73, - "learning_rate": 6.5655618583282555e-06, - "loss": 0.1826, + "learning_rate": 1.6586522709188213e-05, + "loss": 0.2839, "step": 144285 }, { "epoch": 6.73, - "learning_rate": 6.5650930570531155e-06, - "loss": 0.1866, + "learning_rate": 1.6586054639352193e-05, + "loss": 0.0818, "step": 144290 }, { "epoch": 6.73, - "learning_rate": 6.564624255777976e-06, - "loss": 0.0158, + "learning_rate": 1.6585586569516173e-05, + "loss": 0.0445, "step": 144295 }, { "epoch": 6.73, - "learning_rate": 6.564155454502836e-06, - "loss": 0.0093, + "learning_rate": 1.6585118499680156e-05, + "loss": 0.0066, "step": 144300 }, { "epoch": 6.73, - "learning_rate": 6.563686653227698e-06, - "loss": 0.0389, + "learning_rate": 1.6584650429844132e-05, + "loss": 0.0194, "step": 144305 }, { "epoch": 6.73, - "learning_rate": 6.563217851952558e-06, - "loss": 0.0299, + "learning_rate": 1.6584182360008112e-05, + "loss": 0.0495, "step": 144310 }, { "epoch": 6.73, - "learning_rate": 6.5627490506774185e-06, - "loss": 0.0249, + "learning_rate": 1.6583714290172092e-05, + "loss": 0.0626, "step": 144315 }, { "epoch": 6.73, - "learning_rate": 6.5622802494022785e-06, - "loss": 0.0865, + "learning_rate": 1.6583246220336075e-05, + "loss": 0.0557, "step": 144320 }, { "epoch": 6.73, - "learning_rate": 6.56181144812714e-06, - "loss": 0.1069, + "learning_rate": 1.6582778150500055e-05, + "loss": 0.093, "step": 144325 }, { "epoch": 6.73, - "learning_rate": 6.561342646852e-06, - "loss": 0.0236, + "learning_rate": 1.6582310080664035e-05, + "loss": 0.1199, "step": 144330 }, { "epoch": 6.73, - "learning_rate": 6.560873845576861e-06, - "loss": 0.2582, + "learning_rate": 1.6581842010828014e-05, + "loss": 0.182, "step": 144335 }, { "epoch": 6.74, - "learning_rate": 6.560405044301721e-06, - "loss": 0.104, + "learning_rate": 1.6581373940991998e-05, + "loss": 0.1111, "step": 144340 }, { "epoch": 6.74, - "learning_rate": 6.5599362430265815e-06, - "loss": 0.0315, + "learning_rate": 1.6580905871155977e-05, + "loss": 0.0152, "step": 144345 }, { "epoch": 6.74, - "learning_rate": 6.559467441751442e-06, - "loss": 0.0688, + "learning_rate": 1.6580437801319957e-05, + "loss": 0.0129, "step": 144350 }, { "epoch": 6.74, - "learning_rate": 6.558998640476303e-06, - "loss": 0.0232, + "learning_rate": 1.657996973148394e-05, + "loss": 0.0149, "step": 144355 }, { "epoch": 6.74, - "learning_rate": 6.558529839201163e-06, - "loss": 0.0228, + "learning_rate": 1.657950166164792e-05, + "loss": 0.0967, "step": 144360 }, { "epoch": 6.74, - "learning_rate": 6.558061037926024e-06, - "loss": 0.1332, + "learning_rate": 1.65790335918119e-05, + "loss": 0.024, "step": 144365 }, { "epoch": 6.74, - "learning_rate": 6.557592236650884e-06, - "loss": 0.0954, + "learning_rate": 1.6578565521975877e-05, + "loss": 0.0175, "step": 144370 }, { "epoch": 6.74, - "learning_rate": 6.557123435375745e-06, - "loss": 0.0417, + "learning_rate": 1.657809745213986e-05, + "loss": 0.0881, "step": 144375 }, { "epoch": 6.74, - "learning_rate": 6.556654634100605e-06, - "loss": 0.1506, + "learning_rate": 1.657762938230384e-05, + "loss": 0.0851, "step": 144380 }, { "epoch": 6.74, - "learning_rate": 6.556185832825466e-06, - "loss": 0.1585, + "learning_rate": 1.657716131246782e-05, + "loss": 0.162, "step": 144385 }, { "epoch": 6.74, - "learning_rate": 6.555717031550326e-06, - "loss": 0.1669, + "learning_rate": 1.65766932426318e-05, + "loss": 0.1232, "step": 144390 }, { "epoch": 6.74, - "learning_rate": 6.555248230275188e-06, - "loss": 0.0267, + "learning_rate": 1.6576225172795782e-05, + "loss": 0.0343, "step": 144395 }, { "epoch": 6.74, - "learning_rate": 6.5547794290000476e-06, - "loss": 0.0145, + "learning_rate": 1.6575757102959762e-05, + "loss": 0.0125, "step": 144400 }, { "epoch": 6.74, - "learning_rate": 6.554310627724908e-06, - "loss": 0.0449, + "learning_rate": 1.6575289033123742e-05, + "loss": 0.023, "step": 144405 }, { "epoch": 6.74, - "learning_rate": 6.553841826449768e-06, - "loss": 0.0394, + "learning_rate": 1.6574820963287725e-05, + "loss": 0.0466, "step": 144410 }, { "epoch": 6.74, - "learning_rate": 6.553373025174628e-06, - "loss": 0.0307, + "learning_rate": 1.6574352893451705e-05, + "loss": 0.0687, "step": 144415 }, { "epoch": 6.74, - "learning_rate": 6.55290422389949e-06, - "loss": 0.0316, + "learning_rate": 1.6573884823615685e-05, + "loss": 0.0439, "step": 144420 }, { "epoch": 6.74, - "learning_rate": 6.552435422624351e-06, - "loss": 0.0735, + "learning_rate": 1.6573416753779665e-05, + "loss": 0.062, "step": 144425 }, { "epoch": 6.74, - "learning_rate": 6.5519666213492106e-06, - "loss": 0.0884, + "learning_rate": 1.6572948683943645e-05, + "loss": 0.0495, "step": 144430 }, { "epoch": 6.74, - "learning_rate": 6.5514978200740705e-06, - "loss": 0.2308, + "learning_rate": 1.6572480614107624e-05, + "loss": 0.3168, "step": 144435 }, { "epoch": 6.74, - "learning_rate": 6.551029018798931e-06, - "loss": 0.3005, + "learning_rate": 1.6572012544271604e-05, + "loss": 0.1953, "step": 144440 }, { "epoch": 6.74, - "learning_rate": 6.550560217523793e-06, - "loss": 0.0158, + "learning_rate": 1.6571544474435584e-05, + "loss": 0.007, "step": 144445 }, { "epoch": 6.74, - "learning_rate": 6.550091416248653e-06, - "loss": 0.0233, + "learning_rate": 1.6571076404599567e-05, + "loss": 0.0272, "step": 144450 }, { "epoch": 6.74, - "learning_rate": 6.549622614973513e-06, - "loss": 0.0534, + "learning_rate": 1.6570608334763547e-05, + "loss": 0.0351, "step": 144455 }, { "epoch": 6.74, - "learning_rate": 6.5491538136983736e-06, - "loss": 0.0322, + "learning_rate": 1.6570140264927527e-05, + "loss": 0.0216, "step": 144460 }, { "epoch": 6.74, - "learning_rate": 6.548685012423235e-06, - "loss": 0.0597, + "learning_rate": 1.6569672195091507e-05, + "loss": 0.0333, "step": 144465 }, { "epoch": 6.74, - "learning_rate": 6.548216211148095e-06, - "loss": 0.0854, + "learning_rate": 1.656920412525549e-05, + "loss": 0.0633, "step": 144470 }, { "epoch": 6.74, - "learning_rate": 6.547747409872955e-06, - "loss": 0.0306, + "learning_rate": 1.656873605541947e-05, + "loss": 0.0571, "step": 144475 }, { "epoch": 6.74, - "learning_rate": 6.547278608597816e-06, - "loss": 0.1717, + "learning_rate": 1.656826798558345e-05, + "loss": 0.0819, "step": 144480 }, { "epoch": 6.74, - "learning_rate": 6.546809807322676e-06, - "loss": 0.1618, + "learning_rate": 1.6567799915747433e-05, + "loss": 0.1118, "step": 144485 }, { "epoch": 6.74, - "learning_rate": 6.546341006047537e-06, - "loss": 0.1972, + "learning_rate": 1.6567331845911413e-05, + "loss": 0.0819, "step": 144490 }, { "epoch": 6.74, - "learning_rate": 6.545872204772397e-06, - "loss": 0.0412, + "learning_rate": 1.656686377607539e-05, + "loss": 0.0151, "step": 144495 }, { "epoch": 6.74, - "learning_rate": 6.545403403497258e-06, - "loss": 0.0094, + "learning_rate": 1.656639570623937e-05, + "loss": 0.0138, "step": 144500 }, { "epoch": 6.74, - "learning_rate": 6.544934602222118e-06, - "loss": 0.0173, + "learning_rate": 1.6565927636403352e-05, + "loss": 0.0097, "step": 144505 }, { "epoch": 6.74, - "learning_rate": 6.544465800946979e-06, - "loss": 0.0543, + "learning_rate": 1.6565459566567332e-05, + "loss": 0.0225, "step": 144510 }, { "epoch": 6.74, - "learning_rate": 6.54399699967184e-06, - "loss": 0.0575, + "learning_rate": 1.656499149673131e-05, + "loss": 0.0701, "step": 144515 }, { "epoch": 6.74, - "learning_rate": 6.5435281983967e-06, - "loss": 0.0753, + "learning_rate": 1.656452342689529e-05, + "loss": 0.0323, "step": 144520 }, { "epoch": 6.74, - "learning_rate": 6.54305939712156e-06, - "loss": 0.0845, + "learning_rate": 1.6564055357059275e-05, + "loss": 0.0447, "step": 144525 }, { "epoch": 6.74, - "learning_rate": 6.542590595846421e-06, - "loss": 0.1082, + "learning_rate": 1.6563587287223254e-05, + "loss": 0.1135, "step": 144530 }, { "epoch": 6.74, - "learning_rate": 6.542121794571282e-06, - "loss": 0.2149, + "learning_rate": 1.6563119217387234e-05, + "loss": 0.2964, "step": 144535 }, { "epoch": 6.74, - "learning_rate": 6.541652993296143e-06, - "loss": 0.2578, + "learning_rate": 1.6562651147551217e-05, + "loss": 0.1364, "step": 144540 }, { "epoch": 6.74, - "learning_rate": 6.541184192021003e-06, - "loss": 0.0031, + "learning_rate": 1.6562183077715197e-05, + "loss": 0.0052, "step": 144545 }, { "epoch": 6.74, - "learning_rate": 6.540715390745863e-06, - "loss": 0.0079, + "learning_rate": 1.6561715007879177e-05, + "loss": 0.0104, "step": 144550 }, { "epoch": 6.75, - "learning_rate": 6.540246589470723e-06, - "loss": 0.0301, + "learning_rate": 1.6561246938043157e-05, + "loss": 0.0115, "step": 144555 }, { "epoch": 6.75, - "learning_rate": 6.539777788195585e-06, - "loss": 0.0256, + "learning_rate": 1.6560778868207137e-05, + "loss": 0.0315, "step": 144560 }, { "epoch": 6.75, - "learning_rate": 6.539308986920445e-06, - "loss": 0.062, + "learning_rate": 1.6560310798371117e-05, + "loss": 0.0314, "step": 144565 }, { "epoch": 6.75, - "learning_rate": 6.538840185645306e-06, - "loss": 0.036, + "learning_rate": 1.6559842728535096e-05, + "loss": 0.0358, "step": 144570 }, { "epoch": 6.75, - "learning_rate": 6.538371384370166e-06, - "loss": 0.0748, + "learning_rate": 1.6559374658699076e-05, + "loss": 0.0581, "step": 144575 }, { "epoch": 6.75, - "learning_rate": 6.537902583095026e-06, - "loss": 0.1199, + "learning_rate": 1.655890658886306e-05, + "loss": 0.0917, "step": 144580 }, { "epoch": 6.75, - "learning_rate": 6.537433781819887e-06, - "loss": 0.1683, + "learning_rate": 1.655843851902704e-05, + "loss": 0.2698, "step": 144585 }, { "epoch": 6.75, - "learning_rate": 6.536964980544748e-06, - "loss": 0.1001, + "learning_rate": 1.655797044919102e-05, + "loss": 0.1145, "step": 144590 }, { "epoch": 6.75, - "learning_rate": 6.536496179269608e-06, - "loss": 0.0267, + "learning_rate": 1.6557502379355002e-05, + "loss": 0.0041, "step": 144595 }, { "epoch": 6.75, - "learning_rate": 6.536027377994469e-06, - "loss": 0.0245, + "learning_rate": 1.6557034309518982e-05, + "loss": 0.0191, "step": 144600 }, { "epoch": 6.75, - "learning_rate": 6.535558576719329e-06, - "loss": 0.0053, + "learning_rate": 1.6556566239682962e-05, + "loss": 0.0047, "step": 144605 }, { "epoch": 6.75, - "learning_rate": 6.53508977544419e-06, - "loss": 0.0123, + "learning_rate": 1.655609816984694e-05, + "loss": 0.0147, "step": 144610 }, { "epoch": 6.75, - "learning_rate": 6.53462097416905e-06, - "loss": 0.0401, + "learning_rate": 1.6555630100010925e-05, + "loss": 0.008, "step": 144615 }, { "epoch": 6.75, - "learning_rate": 6.534152172893911e-06, - "loss": 0.0542, + "learning_rate": 1.65551620301749e-05, + "loss": 0.048, "step": 144620 }, { "epoch": 6.75, - "learning_rate": 6.533683371618771e-06, - "loss": 0.0707, + "learning_rate": 1.655469396033888e-05, + "loss": 0.0357, "step": 144625 }, { "epoch": 6.75, - "learning_rate": 6.5332145703436325e-06, - "loss": 0.1155, + "learning_rate": 1.655422589050286e-05, + "loss": 0.1734, "step": 144630 }, { "epoch": 6.75, - "learning_rate": 6.532745769068492e-06, - "loss": 0.1869, + "learning_rate": 1.6553757820666844e-05, + "loss": 0.1897, "step": 144635 }, { "epoch": 6.75, - "learning_rate": 6.532276967793353e-06, - "loss": 0.2152, + "learning_rate": 1.6553289750830824e-05, + "loss": 0.2032, "step": 144640 }, { "epoch": 6.75, - "learning_rate": 6.531808166518213e-06, - "loss": 0.014, + "learning_rate": 1.6552821680994804e-05, + "loss": 0.0117, "step": 144645 }, { "epoch": 6.75, - "learning_rate": 6.531339365243075e-06, - "loss": 0.0372, + "learning_rate": 1.6552353611158784e-05, + "loss": 0.0102, "step": 144650 }, { "epoch": 6.75, - "learning_rate": 6.530870563967935e-06, - "loss": 0.0128, + "learning_rate": 1.6551885541322767e-05, + "loss": 0.0131, "step": 144655 }, { "epoch": 6.75, - "learning_rate": 6.5304017626927955e-06, - "loss": 0.0263, + "learning_rate": 1.6551417471486747e-05, + "loss": 0.016, "step": 144660 }, { "epoch": 6.75, - "learning_rate": 6.529932961417655e-06, - "loss": 0.0095, + "learning_rate": 1.6550949401650726e-05, + "loss": 0.0645, "step": 144665 }, { "epoch": 6.75, - "learning_rate": 6.529464160142515e-06, - "loss": 0.0523, + "learning_rate": 1.655048133181471e-05, + "loss": 0.0887, "step": 144670 }, { "epoch": 6.75, - "learning_rate": 6.528995358867377e-06, - "loss": 0.0512, + "learning_rate": 1.655001326197869e-05, + "loss": 0.0308, "step": 144675 }, { "epoch": 6.75, - "learning_rate": 6.528526557592238e-06, - "loss": 0.0592, + "learning_rate": 1.654954519214267e-05, + "loss": 0.1259, "step": 144680 }, { "epoch": 6.75, - "learning_rate": 6.528057756317098e-06, - "loss": 0.1742, + "learning_rate": 1.6549077122306646e-05, + "loss": 0.2189, "step": 144685 }, { "epoch": 6.75, - "learning_rate": 6.527588955041958e-06, - "loss": 0.3426, + "learning_rate": 1.654860905247063e-05, + "loss": 0.221, "step": 144690 }, { "epoch": 6.75, - "learning_rate": 6.527120153766818e-06, - "loss": 0.0066, + "learning_rate": 1.654814098263461e-05, + "loss": 0.0055, "step": 144695 }, { "epoch": 6.75, - "learning_rate": 6.52665135249168e-06, - "loss": 0.0586, + "learning_rate": 1.654767291279859e-05, + "loss": 0.0349, "step": 144700 }, { "epoch": 6.75, - "learning_rate": 6.52618255121654e-06, - "loss": 0.03, + "learning_rate": 1.654720484296257e-05, + "loss": 0.0182, "step": 144705 }, { "epoch": 6.75, - "learning_rate": 6.5257137499414e-06, - "loss": 0.0282, + "learning_rate": 1.654673677312655e-05, + "loss": 0.014, "step": 144710 }, { "epoch": 6.75, - "learning_rate": 6.525244948666261e-06, - "loss": 0.0578, + "learning_rate": 1.654626870329053e-05, + "loss": 0.0159, "step": 144715 }, { "epoch": 6.75, - "learning_rate": 6.524776147391122e-06, - "loss": 0.1312, + "learning_rate": 1.654580063345451e-05, + "loss": 0.072, "step": 144720 }, { "epoch": 6.75, - "learning_rate": 6.524307346115982e-06, - "loss": 0.0841, + "learning_rate": 1.6545332563618494e-05, + "loss": 0.1186, "step": 144725 }, { "epoch": 6.75, - "learning_rate": 6.523838544840842e-06, - "loss": 0.112, + "learning_rate": 1.6544864493782474e-05, + "loss": 0.0866, "step": 144730 }, { "epoch": 6.75, - "learning_rate": 6.523369743565703e-06, - "loss": 0.2308, + "learning_rate": 1.6544396423946454e-05, + "loss": 0.0921, "step": 144735 }, { "epoch": 6.75, - "learning_rate": 6.522900942290563e-06, - "loss": 0.0817, + "learning_rate": 1.6543928354110434e-05, + "loss": 0.1995, "step": 144740 }, { "epoch": 6.75, - "learning_rate": 6.5224321410154245e-06, - "loss": 0.0121, + "learning_rate": 1.6543460284274414e-05, + "loss": 0.0045, "step": 144745 }, { "epoch": 6.75, - "learning_rate": 6.5219633397402844e-06, - "loss": 0.0121, + "learning_rate": 1.6542992214438394e-05, + "loss": 0.0465, "step": 144750 }, { "epoch": 6.75, - "learning_rate": 6.521494538465145e-06, - "loss": 0.0207, + "learning_rate": 1.6542524144602373e-05, + "loss": 0.008, "step": 144755 }, { "epoch": 6.75, - "learning_rate": 6.521025737190005e-06, - "loss": 0.0303, + "learning_rate": 1.6542056074766353e-05, + "loss": 0.0474, "step": 144760 }, { "epoch": 6.75, - "learning_rate": 6.520556935914866e-06, - "loss": 0.0697, + "learning_rate": 1.6541588004930336e-05, + "loss": 0.1142, "step": 144765 }, { "epoch": 6.76, - "learning_rate": 6.520088134639727e-06, - "loss": 0.0282, + "learning_rate": 1.6541119935094316e-05, + "loss": 0.0306, "step": 144770 }, { "epoch": 6.76, - "learning_rate": 6.5196193333645875e-06, - "loss": 0.1325, + "learning_rate": 1.6540651865258296e-05, + "loss": 0.0474, "step": 144775 }, { "epoch": 6.76, - "learning_rate": 6.5191505320894474e-06, - "loss": 0.1661, + "learning_rate": 1.654018379542228e-05, + "loss": 0.0651, "step": 144780 }, { "epoch": 6.76, - "learning_rate": 6.518681730814308e-06, - "loss": 0.1371, + "learning_rate": 1.653971572558626e-05, + "loss": 0.1169, "step": 144785 }, { "epoch": 6.76, - "learning_rate": 6.518212929539169e-06, - "loss": 0.114, + "learning_rate": 1.653924765575024e-05, + "loss": 0.1561, "step": 144790 }, { "epoch": 6.76, - "learning_rate": 6.51774412826403e-06, - "loss": 0.0036, + "learning_rate": 1.653877958591422e-05, + "loss": 0.0146, "step": 144795 }, { "epoch": 6.76, - "learning_rate": 6.51727532698889e-06, - "loss": 0.0192, + "learning_rate": 1.6538311516078202e-05, + "loss": 0.0241, "step": 144800 }, { "epoch": 6.76, - "learning_rate": 6.5168065257137505e-06, - "loss": 0.0862, + "learning_rate": 1.653784344624218e-05, + "loss": 0.0445, "step": 144805 }, { "epoch": 6.76, - "learning_rate": 6.51633772443861e-06, - "loss": 0.0196, + "learning_rate": 1.6537375376406158e-05, + "loss": 0.033, "step": 144810 }, { "epoch": 6.76, - "learning_rate": 6.515868923163472e-06, - "loss": 0.0211, + "learning_rate": 1.6536907306570138e-05, + "loss": 0.0321, "step": 144815 }, { "epoch": 6.76, - "learning_rate": 6.515400121888332e-06, - "loss": 0.0154, + "learning_rate": 1.653643923673412e-05, + "loss": 0.0627, "step": 144820 }, { "epoch": 6.76, - "learning_rate": 6.514931320613193e-06, - "loss": 0.0431, + "learning_rate": 1.65359711668981e-05, + "loss": 0.0827, "step": 144825 }, { "epoch": 6.76, - "learning_rate": 6.514462519338053e-06, - "loss": 0.1521, + "learning_rate": 1.653550309706208e-05, + "loss": 0.0883, "step": 144830 }, { "epoch": 6.76, - "learning_rate": 6.5139937180629135e-06, - "loss": 0.2772, + "learning_rate": 1.6535035027226064e-05, + "loss": 0.1586, "step": 144835 }, { "epoch": 6.76, - "learning_rate": 6.513524916787774e-06, - "loss": 0.1758, + "learning_rate": 1.6534566957390044e-05, + "loss": 0.1623, "step": 144840 }, { "epoch": 6.76, - "learning_rate": 6.513056115512635e-06, - "loss": 0.0251, + "learning_rate": 1.6534098887554024e-05, + "loss": 0.0204, "step": 144845 }, { "epoch": 6.76, - "learning_rate": 6.512587314237495e-06, - "loss": 0.008, + "learning_rate": 1.6533630817718003e-05, + "loss": 0.0263, "step": 144850 }, { "epoch": 6.76, - "learning_rate": 6.512118512962356e-06, - "loss": 0.0409, + "learning_rate": 1.6533162747881987e-05, + "loss": 0.0184, "step": 144855 }, { "epoch": 6.76, - "learning_rate": 6.5116497116872165e-06, - "loss": 0.0145, + "learning_rate": 1.6532694678045966e-05, + "loss": 0.027, "step": 144860 }, { "epoch": 6.76, - "learning_rate": 6.511180910412077e-06, - "loss": 0.0531, + "learning_rate": 1.6532226608209946e-05, + "loss": 0.0209, "step": 144865 }, { "epoch": 6.76, - "learning_rate": 6.510712109136937e-06, - "loss": 0.0285, + "learning_rate": 1.6531758538373926e-05, + "loss": 0.0505, "step": 144870 }, { "epoch": 6.76, - "learning_rate": 6.510243307861798e-06, - "loss": 0.0846, + "learning_rate": 1.6531290468537906e-05, + "loss": 0.0711, "step": 144875 }, { "epoch": 6.76, - "learning_rate": 6.509774506586658e-06, - "loss": 0.1029, + "learning_rate": 1.6530822398701886e-05, + "loss": 0.0672, "step": 144880 }, { "epoch": 6.76, - "learning_rate": 6.50930570531152e-06, - "loss": 0.2064, + "learning_rate": 1.6530354328865866e-05, + "loss": 0.1629, "step": 144885 }, { "epoch": 6.76, - "learning_rate": 6.5088369040363795e-06, - "loss": 0.1629, + "learning_rate": 1.6529886259029845e-05, + "loss": 0.1032, "step": 144890 }, { "epoch": 6.76, - "learning_rate": 6.50836810276124e-06, - "loss": 0.0358, + "learning_rate": 1.652941818919383e-05, + "loss": 0.0453, "step": 144895 }, { "epoch": 6.76, - "learning_rate": 6.5078993014861e-06, - "loss": 0.0045, + "learning_rate": 1.652895011935781e-05, + "loss": 0.0011, "step": 144900 }, { "epoch": 6.76, - "learning_rate": 6.50743050021096e-06, - "loss": 0.0215, + "learning_rate": 1.6528482049521788e-05, + "loss": 0.0236, "step": 144905 }, { "epoch": 6.76, - "learning_rate": 6.506961698935822e-06, - "loss": 0.0511, + "learning_rate": 1.652801397968577e-05, + "loss": 0.0302, "step": 144910 }, { "epoch": 6.76, - "learning_rate": 6.5064928976606826e-06, - "loss": 0.049, + "learning_rate": 1.652754590984975e-05, + "loss": 0.0271, "step": 144915 }, { "epoch": 6.76, - "learning_rate": 6.5060240963855425e-06, - "loss": 0.0471, + "learning_rate": 1.652707784001373e-05, + "loss": 0.0393, "step": 144920 }, { "epoch": 6.76, - "learning_rate": 6.5055552951104024e-06, - "loss": 0.0708, + "learning_rate": 1.652660977017771e-05, + "loss": 0.091, "step": 144925 }, { "epoch": 6.76, - "learning_rate": 6.505086493835264e-06, - "loss": 0.0799, + "learning_rate": 1.6526141700341694e-05, + "loss": 0.0401, "step": 144930 }, { "epoch": 6.76, - "learning_rate": 6.504617692560125e-06, - "loss": 0.2287, + "learning_rate": 1.652567363050567e-05, + "loss": 0.1355, "step": 144935 }, { "epoch": 6.76, - "learning_rate": 6.504148891284985e-06, - "loss": 0.1377, + "learning_rate": 1.652520556066965e-05, + "loss": 0.1044, "step": 144940 }, { "epoch": 6.76, - "learning_rate": 6.503680090009845e-06, - "loss": 0.0291, + "learning_rate": 1.652473749083363e-05, + "loss": 0.0103, "step": 144945 }, { "epoch": 6.76, - "learning_rate": 6.5032112887347055e-06, - "loss": 0.029, + "learning_rate": 1.6524269420997613e-05, + "loss": 0.0467, "step": 144950 }, { "epoch": 6.76, - "learning_rate": 6.502742487459567e-06, - "loss": 0.0245, + "learning_rate": 1.6523801351161593e-05, + "loss": 0.0035, "step": 144955 }, { "epoch": 6.76, - "learning_rate": 6.502273686184427e-06, - "loss": 0.0043, + "learning_rate": 1.6523333281325573e-05, + "loss": 0.0328, "step": 144960 }, { "epoch": 6.76, - "learning_rate": 6.501804884909287e-06, - "loss": 0.0408, + "learning_rate": 1.6522865211489556e-05, + "loss": 0.0045, "step": 144965 }, { "epoch": 6.76, - "learning_rate": 6.501336083634148e-06, - "loss": 0.0461, + "learning_rate": 1.6522397141653536e-05, + "loss": 0.0312, "step": 144970 }, { "epoch": 6.76, - "learning_rate": 6.500867282359009e-06, - "loss": 0.0694, + "learning_rate": 1.6521929071817516e-05, + "loss": 0.0651, "step": 144975 }, { "epoch": 6.76, - "learning_rate": 6.500398481083869e-06, - "loss": 0.097, + "learning_rate": 1.6521461001981496e-05, + "loss": 0.0793, "step": 144980 }, { "epoch": 6.77, - "learning_rate": 6.499929679808729e-06, - "loss": 0.2049, + "learning_rate": 1.652099293214548e-05, + "loss": 0.2418, "step": 144985 }, { "epoch": 6.77, - "learning_rate": 6.49946087853359e-06, - "loss": 0.1597, + "learning_rate": 1.652052486230946e-05, + "loss": 0.1752, "step": 144990 }, { "epoch": 6.77, - "learning_rate": 6.49899207725845e-06, - "loss": 0.0258, + "learning_rate": 1.652005679247344e-05, + "loss": 0.0192, "step": 144995 }, { "epoch": 6.77, - "learning_rate": 6.498523275983312e-06, - "loss": 0.0431, + "learning_rate": 1.6519588722637415e-05, + "loss": 0.0242, "step": 145000 }, { "epoch": 6.77, - "learning_rate": 6.4980544747081715e-06, - "loss": 0.0219, + "learning_rate": 1.6519120652801398e-05, + "loss": 0.0507, "step": 145005 }, { "epoch": 6.77, - "learning_rate": 6.497585673433032e-06, - "loss": 0.0435, + "learning_rate": 1.6518652582965378e-05, + "loss": 0.0237, "step": 145010 }, { "epoch": 6.77, - "learning_rate": 6.497116872157892e-06, - "loss": 0.0334, + "learning_rate": 1.6518184513129358e-05, + "loss": 0.0052, "step": 145015 }, { "epoch": 6.77, - "learning_rate": 6.496648070882753e-06, - "loss": 0.0181, + "learning_rate": 1.651771644329334e-05, + "loss": 0.0766, "step": 145020 }, { "epoch": 6.77, - "learning_rate": 6.496179269607615e-06, - "loss": 0.0867, + "learning_rate": 1.651724837345732e-05, + "loss": 0.0781, "step": 145025 }, { "epoch": 6.77, - "learning_rate": 6.495710468332475e-06, - "loss": 0.076, + "learning_rate": 1.65167803036213e-05, + "loss": 0.0822, "step": 145030 }, { "epoch": 6.77, - "learning_rate": 6.4952416670573345e-06, - "loss": 0.2106, + "learning_rate": 1.651631223378528e-05, + "loss": 0.1711, "step": 145035 }, { "epoch": 6.77, - "learning_rate": 6.494772865782195e-06, - "loss": 0.1696, + "learning_rate": 1.6515844163949264e-05, + "loss": 0.2718, "step": 145040 }, { "epoch": 6.77, - "learning_rate": 6.494304064507057e-06, - "loss": 0.0154, + "learning_rate": 1.6515376094113243e-05, + "loss": 0.0137, "step": 145045 }, { "epoch": 6.77, - "learning_rate": 6.493835263231917e-06, - "loss": 0.0151, + "learning_rate": 1.6514908024277223e-05, + "loss": 0.0148, "step": 145050 }, { "epoch": 6.77, - "learning_rate": 6.493366461956777e-06, - "loss": 0.0066, + "learning_rate": 1.6514439954441203e-05, + "loss": 0.0411, "step": 145055 }, { "epoch": 6.77, - "learning_rate": 6.492897660681638e-06, - "loss": 0.0252, + "learning_rate": 1.6513971884605186e-05, + "loss": 0.0497, "step": 145060 }, { "epoch": 6.77, - "learning_rate": 6.4924288594064975e-06, - "loss": 0.0915, + "learning_rate": 1.6513503814769163e-05, + "loss": 0.0371, "step": 145065 }, { "epoch": 6.77, - "learning_rate": 6.491960058131359e-06, - "loss": 0.0187, + "learning_rate": 1.6513035744933143e-05, + "loss": 0.0544, "step": 145070 }, { "epoch": 6.77, - "learning_rate": 6.491491256856219e-06, - "loss": 0.0181, + "learning_rate": 1.6512567675097122e-05, + "loss": 0.0347, "step": 145075 }, { "epoch": 6.77, - "learning_rate": 6.49102245558108e-06, - "loss": 0.1585, + "learning_rate": 1.6512099605261106e-05, + "loss": 0.0514, "step": 145080 }, { "epoch": 6.77, - "learning_rate": 6.49055365430594e-06, - "loss": 0.23, + "learning_rate": 1.6511631535425085e-05, + "loss": 0.1323, "step": 145085 }, { "epoch": 6.77, - "learning_rate": 6.490084853030801e-06, - "loss": 0.2247, + "learning_rate": 1.6511163465589065e-05, + "loss": 0.0957, "step": 145090 }, { "epoch": 6.77, - "learning_rate": 6.489616051755661e-06, - "loss": 0.009, + "learning_rate": 1.651069539575305e-05, + "loss": 0.0953, "step": 145095 }, { "epoch": 6.77, - "learning_rate": 6.489147250480522e-06, - "loss": 0.0069, + "learning_rate": 1.6510227325917028e-05, + "loss": 0.0361, "step": 145100 }, { "epoch": 6.77, - "learning_rate": 6.488678449205382e-06, - "loss": 0.0198, + "learning_rate": 1.6509759256081008e-05, + "loss": 0.0043, "step": 145105 }, { "epoch": 6.77, - "learning_rate": 6.488209647930243e-06, - "loss": 0.0058, + "learning_rate": 1.6509291186244988e-05, + "loss": 0.0282, "step": 145110 }, { "epoch": 6.77, - "learning_rate": 6.487740846655104e-06, - "loss": 0.0096, + "learning_rate": 1.650882311640897e-05, + "loss": 0.0335, "step": 145115 }, { "epoch": 6.77, - "learning_rate": 6.487272045379964e-06, - "loss": 0.0469, + "learning_rate": 1.650835504657295e-05, + "loss": 0.075, "step": 145120 }, { "epoch": 6.77, - "learning_rate": 6.486803244104824e-06, - "loss": 0.0433, + "learning_rate": 1.6507886976736927e-05, + "loss": 0.0688, "step": 145125 }, { "epoch": 6.77, - "learning_rate": 6.486334442829685e-06, - "loss": 0.1099, + "learning_rate": 1.6507418906900907e-05, + "loss": 0.1861, "step": 145130 }, { "epoch": 6.77, - "learning_rate": 6.485865641554545e-06, - "loss": 0.2263, + "learning_rate": 1.650695083706489e-05, + "loss": 0.243, "step": 145135 }, { "epoch": 6.77, - "learning_rate": 6.485396840279407e-06, - "loss": 0.1494, + "learning_rate": 1.650648276722887e-05, + "loss": 0.1243, "step": 145140 }, { "epoch": 6.77, - "learning_rate": 6.484928039004267e-06, - "loss": 0.0432, + "learning_rate": 1.650601469739285e-05, + "loss": 0.0056, "step": 145145 }, { "epoch": 6.77, - "learning_rate": 6.484459237729127e-06, - "loss": 0.0119, + "learning_rate": 1.6505546627556833e-05, + "loss": 0.0175, "step": 145150 }, { "epoch": 6.77, - "learning_rate": 6.483990436453987e-06, - "loss": 0.0102, + "learning_rate": 1.6505078557720813e-05, + "loss": 0.0174, "step": 145155 }, { "epoch": 6.77, - "learning_rate": 6.483521635178847e-06, - "loss": 0.0476, + "learning_rate": 1.6504610487884793e-05, + "loss": 0.0426, "step": 145160 }, { "epoch": 6.77, - "learning_rate": 6.483052833903709e-06, - "loss": 0.1039, + "learning_rate": 1.6504142418048773e-05, + "loss": 0.0146, "step": 145165 }, { "epoch": 6.77, - "learning_rate": 6.48258403262857e-06, - "loss": 0.0477, + "learning_rate": 1.6503674348212756e-05, + "loss": 0.0203, "step": 145170 }, { "epoch": 6.77, - "learning_rate": 6.48211523135343e-06, - "loss": 0.0761, + "learning_rate": 1.6503206278376736e-05, + "loss": 0.1099, "step": 145175 }, { "epoch": 6.77, - "learning_rate": 6.48164643007829e-06, - "loss": 0.0684, + "learning_rate": 1.6502738208540715e-05, + "loss": 0.1399, "step": 145180 }, { "epoch": 6.77, - "learning_rate": 6.481177628803151e-06, - "loss": 0.245, + "learning_rate": 1.6502270138704695e-05, + "loss": 0.1845, "step": 145185 }, { "epoch": 6.77, - "learning_rate": 6.480708827528012e-06, - "loss": 0.1716, + "learning_rate": 1.6501802068868675e-05, + "loss": 0.1589, "step": 145190 }, { "epoch": 6.77, - "learning_rate": 6.480240026252872e-06, - "loss": 0.0216, + "learning_rate": 1.6501333999032655e-05, + "loss": 0.0072, "step": 145195 }, { "epoch": 6.78, - "learning_rate": 6.479771224977733e-06, - "loss": 0.0455, + "learning_rate": 1.6500865929196635e-05, + "loss": 0.0188, "step": 145200 }, { "epoch": 6.78, - "learning_rate": 6.479302423702593e-06, - "loss": 0.022, + "learning_rate": 1.6500397859360618e-05, + "loss": 0.0704, "step": 145205 }, { "epoch": 6.78, - "learning_rate": 6.478833622427454e-06, - "loss": 0.041, + "learning_rate": 1.6499929789524598e-05, + "loss": 0.0076, "step": 145210 }, { "epoch": 6.78, - "learning_rate": 6.478364821152314e-06, - "loss": 0.0605, + "learning_rate": 1.6499461719688578e-05, + "loss": 0.0809, "step": 145215 }, { "epoch": 6.78, - "learning_rate": 6.477896019877175e-06, - "loss": 0.0507, + "learning_rate": 1.6498993649852557e-05, + "loss": 0.1373, "step": 145220 }, { "epoch": 6.78, - "learning_rate": 6.477427218602035e-06, - "loss": 0.0825, + "learning_rate": 1.649852558001654e-05, + "loss": 0.0953, "step": 145225 }, { "epoch": 6.78, - "learning_rate": 6.476958417326895e-06, - "loss": 0.1313, + "learning_rate": 1.649805751018052e-05, + "loss": 0.2153, "step": 145230 }, { "epoch": 6.78, - "learning_rate": 6.4764896160517564e-06, - "loss": 0.1828, + "learning_rate": 1.64975894403445e-05, + "loss": 0.1877, "step": 145235 }, { "epoch": 6.78, - "learning_rate": 6.476020814776617e-06, - "loss": 0.259, + "learning_rate": 1.649712137050848e-05, + "loss": 0.1373, "step": 145240 }, { "epoch": 6.78, - "learning_rate": 6.475552013501477e-06, - "loss": 0.0213, + "learning_rate": 1.6496653300672463e-05, + "loss": 0.0112, "step": 145245 }, { "epoch": 6.78, - "learning_rate": 6.475083212226337e-06, - "loss": 0.0441, + "learning_rate": 1.6496185230836443e-05, + "loss": 0.0025, "step": 145250 }, { "epoch": 6.78, - "learning_rate": 6.474614410951199e-06, - "loss": 0.0475, + "learning_rate": 1.649571716100042e-05, + "loss": 0.0169, "step": 145255 }, { "epoch": 6.78, - "learning_rate": 6.4741456096760595e-06, - "loss": 0.0742, + "learning_rate": 1.64952490911644e-05, + "loss": 0.0717, "step": 145260 }, { "epoch": 6.78, - "learning_rate": 6.4736768084009194e-06, - "loss": 0.0687, + "learning_rate": 1.6494781021328383e-05, + "loss": 0.0515, "step": 145265 }, { "epoch": 6.78, - "learning_rate": 6.473208007125779e-06, - "loss": 0.0683, + "learning_rate": 1.6494312951492362e-05, + "loss": 0.0444, "step": 145270 }, { "epoch": 6.78, - "learning_rate": 6.47273920585064e-06, - "loss": 0.0204, + "learning_rate": 1.6493844881656342e-05, + "loss": 0.0855, "step": 145275 }, { "epoch": 6.78, - "learning_rate": 6.472270404575502e-06, - "loss": 0.0763, + "learning_rate": 1.6493376811820325e-05, + "loss": 0.1385, "step": 145280 }, { "epoch": 6.78, - "learning_rate": 6.471801603300362e-06, - "loss": 0.1686, + "learning_rate": 1.6492908741984305e-05, + "loss": 0.2399, "step": 145285 }, { "epoch": 6.78, - "learning_rate": 6.471332802025222e-06, - "loss": 0.1589, + "learning_rate": 1.6492440672148285e-05, + "loss": 0.1655, "step": 145290 }, { "epoch": 6.78, - "learning_rate": 6.470864000750082e-06, - "loss": 0.0179, + "learning_rate": 1.6491972602312265e-05, + "loss": 0.014, "step": 145295 }, { "epoch": 6.78, - "learning_rate": 6.470395199474944e-06, - "loss": 0.037, + "learning_rate": 1.6491504532476248e-05, + "loss": 0.0314, "step": 145300 }, { "epoch": 6.78, - "learning_rate": 6.469926398199804e-06, - "loss": 0.0211, + "learning_rate": 1.6491036462640228e-05, + "loss": 0.0273, "step": 145305 }, { "epoch": 6.78, - "learning_rate": 6.469457596924664e-06, - "loss": 0.0034, + "learning_rate": 1.6490568392804208e-05, + "loss": 0.0474, "step": 145310 }, { "epoch": 6.78, - "learning_rate": 6.468988795649525e-06, - "loss": 0.0096, + "learning_rate": 1.6490100322968184e-05, + "loss": 0.0519, "step": 145315 }, { "epoch": 6.78, - "learning_rate": 6.468519994374385e-06, - "loss": 0.0408, + "learning_rate": 1.6489632253132167e-05, + "loss": 0.0733, "step": 145320 }, { "epoch": 6.78, - "learning_rate": 6.468051193099246e-06, - "loss": 0.045, + "learning_rate": 1.6489164183296147e-05, + "loss": 0.0699, "step": 145325 }, { "epoch": 6.78, - "learning_rate": 6.467582391824106e-06, - "loss": 0.0759, + "learning_rate": 1.6488696113460127e-05, + "loss": 0.1434, "step": 145330 }, { "epoch": 6.78, - "learning_rate": 6.467113590548967e-06, - "loss": 0.1163, + "learning_rate": 1.648822804362411e-05, + "loss": 0.1726, "step": 145335 }, { "epoch": 6.78, - "learning_rate": 6.466644789273827e-06, - "loss": 0.1591, + "learning_rate": 1.648775997378809e-05, + "loss": 0.1252, "step": 145340 }, { "epoch": 6.78, - "learning_rate": 6.466175987998688e-06, - "loss": 0.0208, + "learning_rate": 1.648729190395207e-05, + "loss": 0.0399, "step": 145345 }, { "epoch": 6.78, - "learning_rate": 6.4657071867235485e-06, - "loss": 0.0018, + "learning_rate": 1.648682383411605e-05, + "loss": 0.0711, "step": 145350 }, { "epoch": 6.78, - "learning_rate": 6.465238385448409e-06, - "loss": 0.0468, + "learning_rate": 1.6486355764280033e-05, + "loss": 0.0053, "step": 145355 }, { "epoch": 6.78, - "learning_rate": 6.464769584173269e-06, - "loss": 0.0127, + "learning_rate": 1.6485887694444013e-05, + "loss": 0.0585, "step": 145360 }, { "epoch": 6.78, - "learning_rate": 6.46430078289813e-06, - "loss": 0.0366, + "learning_rate": 1.6485419624607992e-05, + "loss": 0.0467, "step": 145365 }, { "epoch": 6.78, - "learning_rate": 6.463831981622991e-06, - "loss": 0.0737, + "learning_rate": 1.6484951554771972e-05, + "loss": 0.0112, "step": 145370 }, { "epoch": 6.78, - "learning_rate": 6.4633631803478515e-06, - "loss": 0.1182, + "learning_rate": 1.6484483484935955e-05, + "loss": 0.0699, "step": 145375 }, { "epoch": 6.78, - "learning_rate": 6.4628943790727115e-06, - "loss": 0.0707, + "learning_rate": 1.6484015415099932e-05, + "loss": 0.1592, "step": 145380 }, { "epoch": 6.78, - "learning_rate": 6.462425577797572e-06, - "loss": 0.1816, + "learning_rate": 1.648354734526391e-05, + "loss": 0.1473, "step": 145385 }, { "epoch": 6.78, - "learning_rate": 6.461956776522432e-06, - "loss": 0.0954, + "learning_rate": 1.6483079275427895e-05, + "loss": 0.125, "step": 145390 }, { "epoch": 6.78, - "learning_rate": 6.461487975247294e-06, - "loss": 0.0011, + "learning_rate": 1.6482611205591875e-05, + "loss": 0.0037, "step": 145395 }, { "epoch": 6.78, - "learning_rate": 6.461019173972154e-06, - "loss": 0.0292, + "learning_rate": 1.6482143135755855e-05, + "loss": 0.0044, "step": 145400 }, { "epoch": 6.78, - "learning_rate": 6.4605503726970145e-06, - "loss": 0.0141, + "learning_rate": 1.6481675065919834e-05, + "loss": 0.0221, "step": 145405 }, { "epoch": 6.79, - "learning_rate": 6.4600815714218744e-06, - "loss": 0.0063, + "learning_rate": 1.6481206996083818e-05, + "loss": 0.087, "step": 145410 }, { "epoch": 6.79, - "learning_rate": 6.459612770146735e-06, - "loss": 0.0669, + "learning_rate": 1.6480738926247797e-05, + "loss": 0.0573, "step": 145415 }, { "epoch": 6.79, - "learning_rate": 6.459143968871596e-06, - "loss": 0.0902, + "learning_rate": 1.6480270856411777e-05, + "loss": 0.0531, "step": 145420 }, { "epoch": 6.79, - "learning_rate": 6.458675167596457e-06, - "loss": 0.0454, + "learning_rate": 1.6479802786575757e-05, + "loss": 0.0389, "step": 145425 }, { "epoch": 6.79, - "learning_rate": 6.458206366321317e-06, - "loss": 0.1729, + "learning_rate": 1.647933471673974e-05, + "loss": 0.1571, "step": 145430 }, { "epoch": 6.79, - "learning_rate": 6.4577375650461775e-06, - "loss": 0.2549, + "learning_rate": 1.647886664690372e-05, + "loss": 0.1404, "step": 145435 }, { "epoch": 6.79, - "learning_rate": 6.457268763771038e-06, - "loss": 0.2758, + "learning_rate": 1.64783985770677e-05, + "loss": 0.1323, "step": 145440 }, { "epoch": 6.79, - "learning_rate": 6.456799962495899e-06, - "loss": 0.024, + "learning_rate": 1.647793050723168e-05, + "loss": 0.0229, "step": 145445 }, { "epoch": 6.79, - "learning_rate": 6.456331161220759e-06, - "loss": 0.0006, + "learning_rate": 1.647746243739566e-05, + "loss": 0.0113, "step": 145450 }, { "epoch": 6.79, - "learning_rate": 6.45586235994562e-06, - "loss": 0.0186, + "learning_rate": 1.647699436755964e-05, + "loss": 0.0259, "step": 145455 }, { "epoch": 6.79, - "learning_rate": 6.45539355867048e-06, - "loss": 0.0094, + "learning_rate": 1.647652629772362e-05, + "loss": 0.0209, "step": 145460 }, { "epoch": 6.79, - "learning_rate": 6.454924757395341e-06, - "loss": 0.0192, + "learning_rate": 1.6476058227887602e-05, + "loss": 0.0345, "step": 145465 }, { "epoch": 6.79, - "learning_rate": 6.454455956120201e-06, - "loss": 0.0578, + "learning_rate": 1.6475590158051582e-05, + "loss": 0.037, "step": 145470 }, { "epoch": 6.79, - "learning_rate": 6.453987154845062e-06, - "loss": 0.1594, + "learning_rate": 1.6475122088215562e-05, + "loss": 0.0824, "step": 145475 }, { "epoch": 6.79, - "learning_rate": 6.453518353569922e-06, - "loss": 0.0572, + "learning_rate": 1.6474654018379542e-05, + "loss": 0.119, "step": 145480 }, { "epoch": 6.79, - "learning_rate": 6.453049552294782e-06, - "loss": 0.2041, + "learning_rate": 1.6474185948543525e-05, + "loss": 0.1035, "step": 145485 }, { "epoch": 6.79, - "learning_rate": 6.4525807510196436e-06, - "loss": 0.1651, + "learning_rate": 1.6473717878707505e-05, + "loss": 0.2971, "step": 145490 }, { "epoch": 6.79, - "learning_rate": 6.452111949744504e-06, - "loss": 0.052, + "learning_rate": 1.6473249808871485e-05, + "loss": 0.003, "step": 145495 }, { "epoch": 6.79, - "learning_rate": 6.451643148469364e-06, - "loss": 0.0114, + "learning_rate": 1.6472781739035464e-05, + "loss": 0.047, "step": 145500 }, { "epoch": 6.79, - "learning_rate": 6.451174347194224e-06, - "loss": 0.0359, + "learning_rate": 1.6472313669199444e-05, + "loss": 0.0364, "step": 145505 }, { "epoch": 6.79, - "learning_rate": 6.450705545919086e-06, - "loss": 0.0509, + "learning_rate": 1.6471845599363424e-05, + "loss": 0.0404, "step": 145510 }, { "epoch": 6.79, - "learning_rate": 6.450236744643947e-06, - "loss": 0.077, + "learning_rate": 1.6471377529527404e-05, + "loss": 0.034, "step": 145515 }, { "epoch": 6.79, - "learning_rate": 6.4497679433688065e-06, - "loss": 0.1017, + "learning_rate": 1.6470909459691387e-05, + "loss": 0.0816, "step": 145520 }, { "epoch": 6.79, - "learning_rate": 6.4492991420936665e-06, - "loss": 0.0937, + "learning_rate": 1.6470441389855367e-05, + "loss": 0.0737, "step": 145525 }, { "epoch": 6.79, - "learning_rate": 6.448830340818527e-06, - "loss": 0.0705, + "learning_rate": 1.6469973320019347e-05, + "loss": 0.0966, "step": 145530 }, { "epoch": 6.79, - "learning_rate": 6.448361539543389e-06, - "loss": 0.1884, + "learning_rate": 1.6469505250183327e-05, + "loss": 0.1322, "step": 145535 }, { "epoch": 6.79, - "learning_rate": 6.447892738268249e-06, - "loss": 0.1125, + "learning_rate": 1.646903718034731e-05, + "loss": 0.1469, "step": 145540 }, { "epoch": 6.79, - "learning_rate": 6.447423936993109e-06, - "loss": 0.0141, + "learning_rate": 1.646856911051129e-05, + "loss": 0.0228, "step": 145545 }, { "epoch": 6.79, - "learning_rate": 6.4469551357179695e-06, - "loss": 0.0302, + "learning_rate": 1.646810104067527e-05, + "loss": 0.0285, "step": 145550 }, { "epoch": 6.79, - "learning_rate": 6.4464863344428295e-06, - "loss": 0.0434, + "learning_rate": 1.646763297083925e-05, + "loss": 0.0378, "step": 145555 }, { "epoch": 6.79, - "learning_rate": 6.446017533167691e-06, - "loss": 0.0035, + "learning_rate": 1.6467164901003232e-05, + "loss": 0.0619, "step": 145560 }, { "epoch": 6.79, - "learning_rate": 6.445548731892551e-06, - "loss": 0.0512, + "learning_rate": 1.6466696831167212e-05, + "loss": 0.0167, "step": 145565 }, { "epoch": 6.79, - "learning_rate": 6.445079930617412e-06, - "loss": 0.0219, + "learning_rate": 1.646622876133119e-05, + "loss": 0.0188, "step": 145570 }, { "epoch": 6.79, - "learning_rate": 6.444611129342272e-06, - "loss": 0.1239, + "learning_rate": 1.6465760691495172e-05, + "loss": 0.118, "step": 145575 }, { "epoch": 6.79, - "learning_rate": 6.444142328067133e-06, - "loss": 0.0698, + "learning_rate": 1.646529262165915e-05, + "loss": 0.033, "step": 145580 }, { "epoch": 6.79, - "learning_rate": 6.443673526791993e-06, - "loss": 0.1638, + "learning_rate": 1.646482455182313e-05, + "loss": 0.2372, "step": 145585 }, { "epoch": 6.79, - "learning_rate": 6.443204725516854e-06, - "loss": 0.1498, + "learning_rate": 1.646435648198711e-05, + "loss": 0.2158, "step": 145590 }, { "epoch": 6.79, - "learning_rate": 6.442735924241714e-06, - "loss": 0.0032, + "learning_rate": 1.6463888412151095e-05, + "loss": 0.0198, "step": 145595 }, { "epoch": 6.79, - "learning_rate": 6.442267122966575e-06, - "loss": 0.0091, + "learning_rate": 1.6463420342315074e-05, + "loss": 0.0293, "step": 145600 }, { "epoch": 6.79, - "learning_rate": 6.441798321691436e-06, - "loss": 0.039, + "learning_rate": 1.6462952272479054e-05, + "loss": 0.0663, "step": 145605 }, { "epoch": 6.79, - "learning_rate": 6.441329520416296e-06, - "loss": 0.0775, + "learning_rate": 1.6462484202643034e-05, + "loss": 0.0398, "step": 145610 }, { "epoch": 6.79, - "learning_rate": 6.440860719141156e-06, - "loss": 0.0265, + "learning_rate": 1.6462016132807017e-05, + "loss": 0.1352, "step": 145615 }, { "epoch": 6.79, - "learning_rate": 6.440391917866017e-06, - "loss": 0.0455, + "learning_rate": 1.6461548062970997e-05, + "loss": 0.0517, "step": 145620 }, { "epoch": 6.8, - "learning_rate": 6.439923116590877e-06, - "loss": 0.1101, + "learning_rate": 1.6461079993134977e-05, + "loss": 0.0273, "step": 145625 }, { "epoch": 6.8, - "learning_rate": 6.439454315315739e-06, - "loss": 0.0693, + "learning_rate": 1.6460611923298957e-05, + "loss": 0.0883, "step": 145630 }, { "epoch": 6.8, - "learning_rate": 6.4389855140405986e-06, - "loss": 0.162, + "learning_rate": 1.6460143853462936e-05, + "loss": 0.2058, "step": 145635 }, { "epoch": 6.8, - "learning_rate": 6.438516712765459e-06, - "loss": 0.1066, + "learning_rate": 1.6459675783626916e-05, + "loss": 0.1351, "step": 145640 }, { "epoch": 6.8, - "learning_rate": 6.438047911490319e-06, - "loss": 0.0328, + "learning_rate": 1.6459207713790896e-05, + "loss": 0.0105, "step": 145645 }, { "epoch": 6.8, - "learning_rate": 6.437579110215181e-06, - "loss": 0.0257, + "learning_rate": 1.645873964395488e-05, + "loss": 0.04, "step": 145650 }, { "epoch": 6.8, - "learning_rate": 6.437110308940041e-06, - "loss": 0.0162, + "learning_rate": 1.645827157411886e-05, + "loss": 0.0571, "step": 145655 }, { "epoch": 6.8, - "learning_rate": 6.436641507664902e-06, - "loss": 0.0298, + "learning_rate": 1.645780350428284e-05, + "loss": 0.039, "step": 145660 }, { "epoch": 6.8, - "learning_rate": 6.4361727063897616e-06, - "loss": 0.0468, + "learning_rate": 1.645733543444682e-05, + "loss": 0.0484, "step": 145665 }, { "epoch": 6.8, - "learning_rate": 6.435703905114622e-06, - "loss": 0.008, + "learning_rate": 1.6456867364610802e-05, + "loss": 0.078, "step": 145670 }, { "epoch": 6.8, - "learning_rate": 6.435235103839483e-06, - "loss": 0.0362, + "learning_rate": 1.6456399294774782e-05, + "loss": 0.0614, "step": 145675 }, { "epoch": 6.8, - "learning_rate": 6.434766302564344e-06, - "loss": 0.0524, + "learning_rate": 1.645593122493876e-05, + "loss": 0.0543, "step": 145680 }, { "epoch": 6.8, - "learning_rate": 6.434297501289204e-06, - "loss": 0.2088, + "learning_rate": 1.645546315510274e-05, + "loss": 0.2446, "step": 145685 }, { "epoch": 6.8, - "learning_rate": 6.433828700014065e-06, - "loss": 0.0728, + "learning_rate": 1.6454995085266725e-05, + "loss": 0.1361, "step": 145690 }, { "epoch": 6.8, - "learning_rate": 6.433359898738925e-06, - "loss": 0.0092, + "learning_rate": 1.64545270154307e-05, + "loss": 0.0112, "step": 145695 }, { "epoch": 6.8, - "learning_rate": 6.432891097463786e-06, - "loss": 0.0215, + "learning_rate": 1.645405894559468e-05, + "loss": 0.0081, "step": 145700 }, { "epoch": 6.8, - "learning_rate": 6.432422296188646e-06, - "loss": 0.0376, + "learning_rate": 1.6453590875758664e-05, + "loss": 0.0469, "step": 145705 }, { "epoch": 6.8, - "learning_rate": 6.431953494913507e-06, - "loss": 0.0384, + "learning_rate": 1.6453122805922644e-05, + "loss": 0.0208, "step": 145710 }, { "epoch": 6.8, - "learning_rate": 6.431484693638367e-06, - "loss": 0.044, + "learning_rate": 1.6452654736086624e-05, + "loss": 0.0364, "step": 145715 }, { "epoch": 6.8, - "learning_rate": 6.4310158923632285e-06, - "loss": 0.0607, + "learning_rate": 1.6452186666250604e-05, + "loss": 0.0674, "step": 145720 }, { "epoch": 6.8, - "learning_rate": 6.430547091088088e-06, - "loss": 0.0862, + "learning_rate": 1.6451718596414587e-05, + "loss": 0.094, "step": 145725 }, { "epoch": 6.8, - "learning_rate": 6.430078289812949e-06, - "loss": 0.1126, + "learning_rate": 1.6451250526578567e-05, + "loss": 0.0928, "step": 145730 }, { "epoch": 6.8, - "learning_rate": 6.429609488537809e-06, - "loss": 0.0438, + "learning_rate": 1.6450782456742546e-05, + "loss": 0.1371, "step": 145735 }, { "epoch": 6.8, - "learning_rate": 6.429140687262669e-06, - "loss": 0.1197, + "learning_rate": 1.6450314386906526e-05, + "loss": 0.1174, "step": 145740 }, { "epoch": 6.8, - "learning_rate": 6.428671885987531e-06, - "loss": 0.0141, + "learning_rate": 1.644984631707051e-05, + "loss": 0.0106, "step": 145745 }, { "epoch": 6.8, - "learning_rate": 6.4282030847123914e-06, - "loss": 0.022, + "learning_rate": 1.644937824723449e-05, + "loss": 0.0112, "step": 145750 }, { "epoch": 6.8, - "learning_rate": 6.427734283437251e-06, - "loss": 0.0329, + "learning_rate": 1.644891017739847e-05, + "loss": 0.0854, "step": 145755 }, { "epoch": 6.8, - "learning_rate": 6.427265482162111e-06, - "loss": 0.0314, + "learning_rate": 1.644844210756245e-05, + "loss": 0.0397, "step": 145760 }, { "epoch": 6.8, - "learning_rate": 6.426796680886973e-06, - "loss": 0.016, + "learning_rate": 1.644797403772643e-05, + "loss": 0.0377, "step": 145765 }, { "epoch": 6.8, - "learning_rate": 6.426327879611834e-06, - "loss": 0.0181, + "learning_rate": 1.644750596789041e-05, + "loss": 0.0427, "step": 145770 }, { "epoch": 6.8, - "learning_rate": 6.425859078336694e-06, - "loss": 0.0849, + "learning_rate": 1.6447037898054388e-05, + "loss": 0.0533, "step": 145775 }, { "epoch": 6.8, - "learning_rate": 6.425390277061554e-06, - "loss": 0.1025, + "learning_rate": 1.644656982821837e-05, + "loss": 0.1061, "step": 145780 }, { "epoch": 6.8, - "learning_rate": 6.424921475786414e-06, - "loss": 0.1916, + "learning_rate": 1.644610175838235e-05, + "loss": 0.1053, "step": 145785 }, { "epoch": 6.8, - "learning_rate": 6.424452674511276e-06, - "loss": 0.2026, + "learning_rate": 1.644563368854633e-05, + "loss": 0.194, "step": 145790 }, { "epoch": 6.8, - "learning_rate": 6.423983873236136e-06, - "loss": 0.0013, + "learning_rate": 1.644516561871031e-05, + "loss": 0.017, "step": 145795 }, { "epoch": 6.8, - "learning_rate": 6.423515071960996e-06, - "loss": 0.0342, + "learning_rate": 1.6444697548874294e-05, + "loss": 0.0393, "step": 145800 }, { "epoch": 6.8, - "learning_rate": 6.423046270685857e-06, - "loss": 0.0189, + "learning_rate": 1.6444229479038274e-05, + "loss": 0.0104, "step": 145805 }, { "epoch": 6.8, - "learning_rate": 6.4225774694107166e-06, - "loss": 0.0481, + "learning_rate": 1.6443761409202254e-05, + "loss": 0.057, "step": 145810 }, { "epoch": 6.8, - "learning_rate": 6.422108668135578e-06, - "loss": 0.1266, + "learning_rate": 1.6443293339366237e-05, + "loss": 0.0608, "step": 145815 }, { "epoch": 6.8, - "learning_rate": 6.421639866860438e-06, - "loss": 0.0763, + "learning_rate": 1.6442825269530213e-05, + "loss": 0.0642, "step": 145820 }, { "epoch": 6.8, - "learning_rate": 6.421171065585299e-06, - "loss": 0.1168, + "learning_rate": 1.6442357199694193e-05, + "loss": 0.0812, "step": 145825 }, { "epoch": 6.8, - "learning_rate": 6.420702264310159e-06, - "loss": 0.0545, + "learning_rate": 1.6441889129858173e-05, + "loss": 0.0497, "step": 145830 }, { "epoch": 6.8, - "learning_rate": 6.4202334630350205e-06, - "loss": 0.2177, + "learning_rate": 1.6441421060022156e-05, + "loss": 0.1637, "step": 145835 }, { "epoch": 6.81, - "learning_rate": 6.41976466175988e-06, - "loss": 0.164, + "learning_rate": 1.6440952990186136e-05, + "loss": 0.1448, "step": 145840 }, { "epoch": 6.81, - "learning_rate": 6.419295860484741e-06, - "loss": 0.0146, + "learning_rate": 1.6440484920350116e-05, + "loss": 0.0013, "step": 145845 }, { "epoch": 6.81, - "learning_rate": 6.418827059209601e-06, - "loss": 0.0082, + "learning_rate": 1.6440016850514096e-05, + "loss": 0.0255, "step": 145850 }, { "epoch": 6.81, - "learning_rate": 6.418358257934462e-06, - "loss": 0.0461, + "learning_rate": 1.643954878067808e-05, + "loss": 0.0227, "step": 145855 }, { "epoch": 6.81, - "learning_rate": 6.417889456659323e-06, - "loss": 0.0322, + "learning_rate": 1.643908071084206e-05, + "loss": 0.0392, "step": 145860 }, { "epoch": 6.81, - "learning_rate": 6.4174206553841835e-06, - "loss": 0.0473, + "learning_rate": 1.643861264100604e-05, + "loss": 0.0265, "step": 145865 }, { "epoch": 6.81, - "learning_rate": 6.416951854109043e-06, - "loss": 0.0756, + "learning_rate": 1.643814457117002e-05, + "loss": 0.0774, "step": 145870 }, { "epoch": 6.81, - "learning_rate": 6.416483052833904e-06, - "loss": 0.1041, + "learning_rate": 1.6437676501334e-05, + "loss": 0.0863, "step": 145875 }, { "epoch": 6.81, - "learning_rate": 6.416014251558764e-06, - "loss": 0.1273, + "learning_rate": 1.643720843149798e-05, + "loss": 0.0879, "step": 145880 }, { "epoch": 6.81, - "learning_rate": 6.415545450283626e-06, - "loss": 0.0987, + "learning_rate": 1.6436740361661958e-05, + "loss": 0.182, "step": 145885 }, { "epoch": 6.81, - "learning_rate": 6.415076649008486e-06, - "loss": 0.1511, + "learning_rate": 1.643627229182594e-05, + "loss": 0.1394, "step": 145890 }, { "epoch": 6.81, - "learning_rate": 6.4146078477333465e-06, - "loss": 0.0173, + "learning_rate": 1.643580422198992e-05, + "loss": 0.0078, "step": 145895 }, { "epoch": 6.81, - "learning_rate": 6.414139046458206e-06, - "loss": 0.0152, + "learning_rate": 1.64353361521539e-05, + "loss": 0.0083, "step": 145900 }, { "epoch": 6.81, - "learning_rate": 6.413670245183068e-06, - "loss": 0.0334, + "learning_rate": 1.643486808231788e-05, + "loss": 0.0067, "step": 145905 }, { "epoch": 6.81, - "learning_rate": 6.413201443907928e-06, - "loss": 0.0766, + "learning_rate": 1.6434400012481864e-05, + "loss": 0.0406, "step": 145910 }, { "epoch": 6.81, - "learning_rate": 6.412732642632789e-06, - "loss": 0.0416, + "learning_rate": 1.6433931942645844e-05, + "loss": 0.0261, "step": 145915 }, { "epoch": 6.81, - "learning_rate": 6.412263841357649e-06, - "loss": 0.0601, + "learning_rate": 1.6433463872809823e-05, + "loss": 0.0439, "step": 145920 }, { "epoch": 6.81, - "learning_rate": 6.4117950400825094e-06, - "loss": 0.1052, + "learning_rate": 1.6432995802973803e-05, + "loss": 0.0708, "step": 145925 }, { "epoch": 6.81, - "learning_rate": 6.41132623880737e-06, - "loss": 0.1178, + "learning_rate": 1.6432527733137786e-05, + "loss": 0.0277, "step": 145930 }, { "epoch": 6.81, - "learning_rate": 6.410857437532231e-06, - "loss": 0.0971, + "learning_rate": 1.6432059663301766e-05, + "loss": 0.1156, "step": 145935 }, { "epoch": 6.81, - "learning_rate": 6.410388636257091e-06, - "loss": 0.1712, + "learning_rate": 1.6431591593465746e-05, + "loss": 0.1943, "step": 145940 }, { "epoch": 6.81, - "learning_rate": 6.409919834981952e-06, - "loss": 0.0228, + "learning_rate": 1.6431123523629726e-05, + "loss": 0.0392, "step": 145945 }, { "epoch": 6.81, - "learning_rate": 6.409451033706812e-06, - "loss": 0.0068, + "learning_rate": 1.6430655453793706e-05, + "loss": 0.054, "step": 145950 }, { "epoch": 6.81, - "learning_rate": 6.408982232431673e-06, - "loss": 0.0134, + "learning_rate": 1.6430187383957685e-05, + "loss": 0.0271, "step": 145955 }, { "epoch": 6.81, - "learning_rate": 6.408513431156533e-06, - "loss": 0.0179, + "learning_rate": 1.6429719314121665e-05, + "loss": 0.0701, "step": 145960 }, { "epoch": 6.81, - "learning_rate": 6.408044629881394e-06, - "loss": 0.0542, + "learning_rate": 1.642925124428565e-05, + "loss": 0.0245, "step": 145965 }, { "epoch": 6.81, - "learning_rate": 6.407575828606254e-06, - "loss": 0.0777, + "learning_rate": 1.6428783174449628e-05, + "loss": 0.0609, "step": 145970 }, { "epoch": 6.81, - "learning_rate": 6.4071070273311156e-06, - "loss": 0.0752, + "learning_rate": 1.6428315104613608e-05, + "loss": 0.0964, "step": 145975 }, { "epoch": 6.81, - "learning_rate": 6.4066382260559755e-06, - "loss": 0.0864, + "learning_rate": 1.6427847034777588e-05, + "loss": 0.1607, "step": 145980 }, { "epoch": 6.81, - "learning_rate": 6.406169424780836e-06, - "loss": 0.2542, + "learning_rate": 1.642737896494157e-05, + "loss": 0.1268, "step": 145985 }, { "epoch": 6.81, - "learning_rate": 6.405700623505696e-06, - "loss": 0.1705, + "learning_rate": 1.642691089510555e-05, + "loss": 0.1029, "step": 145990 }, { "epoch": 6.81, - "learning_rate": 6.405231822230556e-06, - "loss": 0.0024, + "learning_rate": 1.642644282526953e-05, + "loss": 0.0139, "step": 145995 }, { "epoch": 6.81, - "learning_rate": 6.404763020955418e-06, - "loss": 0.0397, + "learning_rate": 1.6425974755433514e-05, + "loss": 0.0031, "step": 146000 }, { "epoch": 6.81, - "learning_rate": 6.4042942196802786e-06, - "loss": 0.0235, + "learning_rate": 1.6425506685597494e-05, + "loss": 0.037, "step": 146005 }, { "epoch": 6.81, - "learning_rate": 6.4038254184051385e-06, - "loss": 0.0395, + "learning_rate": 1.642503861576147e-05, + "loss": 0.0163, "step": 146010 }, { "epoch": 6.81, - "learning_rate": 6.403356617129998e-06, - "loss": 0.0434, + "learning_rate": 1.642457054592545e-05, + "loss": 0.1352, "step": 146015 }, { "epoch": 6.81, - "learning_rate": 6.40288781585486e-06, - "loss": 0.0438, + "learning_rate": 1.6424102476089433e-05, + "loss": 0.0342, "step": 146020 }, { "epoch": 6.81, - "learning_rate": 6.402419014579721e-06, - "loss": 0.1086, + "learning_rate": 1.6423634406253413e-05, + "loss": 0.0733, "step": 146025 }, { "epoch": 6.81, - "learning_rate": 6.401950213304581e-06, - "loss": 0.1365, + "learning_rate": 1.6423166336417393e-05, + "loss": 0.0946, "step": 146030 }, { "epoch": 6.81, - "learning_rate": 6.401481412029441e-06, - "loss": 0.1623, + "learning_rate": 1.6422698266581373e-05, + "loss": 0.2944, "step": 146035 }, { "epoch": 6.81, - "learning_rate": 6.4010126107543015e-06, - "loss": 0.0637, + "learning_rate": 1.6422230196745356e-05, + "loss": 0.2211, "step": 146040 }, { "epoch": 6.81, - "learning_rate": 6.400543809479163e-06, - "loss": 0.0268, + "learning_rate": 1.6421762126909336e-05, + "loss": 0.0093, "step": 146045 }, { "epoch": 6.81, - "learning_rate": 6.400075008204023e-06, - "loss": 0.0171, + "learning_rate": 1.6421294057073316e-05, + "loss": 0.0091, "step": 146050 }, { "epoch": 6.82, - "learning_rate": 6.399606206928883e-06, - "loss": 0.0237, + "learning_rate": 1.6420825987237295e-05, + "loss": 0.0871, "step": 146055 }, { "epoch": 6.82, - "learning_rate": 6.399137405653744e-06, - "loss": 0.0249, + "learning_rate": 1.642035791740128e-05, + "loss": 0.0759, "step": 146060 }, { "epoch": 6.82, - "learning_rate": 6.398668604378604e-06, - "loss": 0.0077, + "learning_rate": 1.641988984756526e-05, + "loss": 0.0542, "step": 146065 }, { "epoch": 6.82, - "learning_rate": 6.398199803103465e-06, - "loss": 0.0457, + "learning_rate": 1.6419421777729238e-05, + "loss": 0.0434, "step": 146070 }, { "epoch": 6.82, - "learning_rate": 6.397731001828325e-06, - "loss": 0.0485, + "learning_rate": 1.6418953707893218e-05, + "loss": 0.0281, "step": 146075 }, { "epoch": 6.82, - "learning_rate": 6.397262200553186e-06, - "loss": 0.0876, + "learning_rate": 1.6418485638057198e-05, + "loss": 0.0914, "step": 146080 }, { "epoch": 6.82, - "learning_rate": 6.396793399278046e-06, - "loss": 0.1191, + "learning_rate": 1.6418017568221178e-05, + "loss": 0.1061, "step": 146085 }, { "epoch": 6.82, - "learning_rate": 6.396324598002908e-06, - "loss": 0.08, + "learning_rate": 1.6417549498385157e-05, + "loss": 0.182, "step": 146090 }, { "epoch": 6.82, - "learning_rate": 6.3958557967277675e-06, - "loss": 0.0169, + "learning_rate": 1.641708142854914e-05, + "loss": 0.0167, "step": 146095 }, { "epoch": 6.82, - "learning_rate": 6.395386995452628e-06, - "loss": 0.0057, + "learning_rate": 1.641661335871312e-05, + "loss": 0.0146, "step": 146100 }, { "epoch": 6.82, - "learning_rate": 6.394918194177488e-06, - "loss": 0.033, + "learning_rate": 1.64161452888771e-05, + "loss": 0.0086, "step": 146105 }, { "epoch": 6.82, - "learning_rate": 6.394449392902349e-06, - "loss": 0.0479, + "learning_rate": 1.641567721904108e-05, + "loss": 0.0442, "step": 146110 }, { "epoch": 6.82, - "learning_rate": 6.39398059162721e-06, - "loss": 0.0291, + "learning_rate": 1.6415209149205063e-05, + "loss": 0.0135, "step": 146115 }, { "epoch": 6.82, - "learning_rate": 6.393511790352071e-06, - "loss": 0.0348, + "learning_rate": 1.6414741079369043e-05, + "loss": 0.0279, "step": 146120 }, { "epoch": 6.82, - "learning_rate": 6.3930429890769305e-06, - "loss": 0.0704, + "learning_rate": 1.6414273009533023e-05, + "loss": 0.0478, "step": 146125 }, { "epoch": 6.82, - "learning_rate": 6.392574187801791e-06, - "loss": 0.0708, + "learning_rate": 1.6413804939697006e-05, + "loss": 0.2049, "step": 146130 }, { "epoch": 6.82, - "learning_rate": 6.392105386526651e-06, - "loss": 0.1612, + "learning_rate": 1.6413336869860983e-05, + "loss": 0.1048, "step": 146135 }, { "epoch": 6.82, - "learning_rate": 6.391636585251513e-06, - "loss": 0.1234, + "learning_rate": 1.6412868800024962e-05, + "loss": 0.2002, "step": 146140 }, { "epoch": 6.82, - "learning_rate": 6.391167783976373e-06, - "loss": 0.0009, + "learning_rate": 1.6412400730188942e-05, + "loss": 0.0138, "step": 146145 }, { "epoch": 6.82, - "learning_rate": 6.3906989827012336e-06, - "loss": 0.0162, + "learning_rate": 1.6411932660352925e-05, + "loss": 0.0377, "step": 146150 }, { "epoch": 6.82, - "learning_rate": 6.3902301814260935e-06, - "loss": 0.0438, + "learning_rate": 1.6411464590516905e-05, + "loss": 0.0131, "step": 146155 }, { "epoch": 6.82, - "learning_rate": 6.389761380150955e-06, - "loss": 0.0348, + "learning_rate": 1.6410996520680885e-05, + "loss": 0.0171, "step": 146160 }, { "epoch": 6.82, - "learning_rate": 6.389292578875815e-06, - "loss": 0.0592, + "learning_rate": 1.6410528450844865e-05, + "loss": 0.03, "step": 146165 }, { "epoch": 6.82, - "learning_rate": 6.388823777600676e-06, - "loss": 0.0931, + "learning_rate": 1.6410060381008848e-05, + "loss": 0.0218, "step": 146170 }, { "epoch": 6.82, - "learning_rate": 6.388354976325536e-06, - "loss": 0.0611, + "learning_rate": 1.6409592311172828e-05, + "loss": 0.208, "step": 146175 }, { "epoch": 6.82, - "learning_rate": 6.3878861750503966e-06, - "loss": 0.1186, + "learning_rate": 1.6409124241336808e-05, + "loss": 0.0667, "step": 146180 }, { "epoch": 6.82, - "learning_rate": 6.387417373775257e-06, - "loss": 0.0982, + "learning_rate": 1.640865617150079e-05, + "loss": 0.1879, "step": 146185 }, { "epoch": 6.82, - "learning_rate": 6.386948572500118e-06, - "loss": 0.1554, + "learning_rate": 1.640818810166477e-05, + "loss": 0.1981, "step": 146190 }, { "epoch": 6.82, - "learning_rate": 6.386479771224978e-06, - "loss": 0.0062, + "learning_rate": 1.640772003182875e-05, + "loss": 0.0022, "step": 146195 }, { "epoch": 6.82, - "learning_rate": 6.386010969949839e-06, - "loss": 0.0262, + "learning_rate": 1.6407251961992727e-05, + "loss": 0.014, "step": 146200 }, { "epoch": 6.82, - "learning_rate": 6.385542168674699e-06, - "loss": 0.0172, + "learning_rate": 1.640678389215671e-05, + "loss": 0.0216, "step": 146205 }, { "epoch": 6.82, - "learning_rate": 6.38507336739956e-06, - "loss": 0.0206, + "learning_rate": 1.640631582232069e-05, + "loss": 0.033, "step": 146210 }, { "epoch": 6.82, - "learning_rate": 6.38460456612442e-06, - "loss": 0.0512, + "learning_rate": 1.640584775248467e-05, + "loss": 0.0333, "step": 146215 }, { "epoch": 6.82, - "learning_rate": 6.384135764849281e-06, - "loss": 0.0341, + "learning_rate": 1.640537968264865e-05, + "loss": 0.0437, "step": 146220 }, { "epoch": 6.82, - "learning_rate": 6.383666963574141e-06, - "loss": 0.0923, + "learning_rate": 1.6404911612812633e-05, + "loss": 0.0338, "step": 146225 }, { "epoch": 6.82, - "learning_rate": 6.383198162299003e-06, - "loss": 0.1349, + "learning_rate": 1.6404443542976613e-05, + "loss": 0.0876, "step": 146230 }, { "epoch": 6.82, - "learning_rate": 6.382729361023863e-06, - "loss": 0.1491, + "learning_rate": 1.6403975473140592e-05, + "loss": 0.177, "step": 146235 }, { "epoch": 6.82, - "learning_rate": 6.382260559748723e-06, - "loss": 0.1557, + "learning_rate": 1.6403507403304576e-05, + "loss": 0.1012, "step": 146240 }, { "epoch": 6.82, - "learning_rate": 6.381791758473583e-06, - "loss": 0.0176, + "learning_rate": 1.6403039333468556e-05, + "loss": 0.0356, "step": 146245 }, { "epoch": 6.82, - "learning_rate": 6.381322957198443e-06, - "loss": 0.0116, + "learning_rate": 1.6402571263632535e-05, + "loss": 0.042, "step": 146250 }, { "epoch": 6.82, - "learning_rate": 6.380854155923305e-06, - "loss": 0.027, + "learning_rate": 1.6402103193796515e-05, + "loss": 0.0072, "step": 146255 }, { "epoch": 6.82, - "learning_rate": 6.380385354648166e-06, - "loss": 0.0531, + "learning_rate": 1.64016351239605e-05, + "loss": 0.0198, "step": 146260 }, { "epoch": 6.82, - "learning_rate": 6.379916553373026e-06, - "loss": 0.0749, + "learning_rate": 1.6401167054124475e-05, + "loss": 0.0471, "step": 146265 }, { "epoch": 6.83, - "learning_rate": 6.3794477520978855e-06, - "loss": 0.0701, + "learning_rate": 1.6400698984288455e-05, + "loss": 0.047, "step": 146270 }, { "epoch": 6.83, - "learning_rate": 6.378978950822746e-06, - "loss": 0.0691, + "learning_rate": 1.6400230914452434e-05, + "loss": 0.0195, "step": 146275 }, { "epoch": 6.83, - "learning_rate": 6.378510149547608e-06, - "loss": 0.0687, + "learning_rate": 1.6399762844616418e-05, + "loss": 0.1157, "step": 146280 }, { "epoch": 6.83, - "learning_rate": 6.378041348272468e-06, - "loss": 0.1561, + "learning_rate": 1.6399294774780397e-05, + "loss": 0.1141, "step": 146285 }, { "epoch": 6.83, - "learning_rate": 6.377572546997328e-06, - "loss": 0.1303, + "learning_rate": 1.6398826704944377e-05, + "loss": 0.1895, "step": 146290 }, { "epoch": 6.83, - "learning_rate": 6.377103745722189e-06, - "loss": 0.0022, + "learning_rate": 1.6398358635108357e-05, + "loss": 0.0155, "step": 146295 }, { "epoch": 6.83, - "learning_rate": 6.37663494444705e-06, - "loss": 0.0032, + "learning_rate": 1.639789056527234e-05, + "loss": 0.0165, "step": 146300 }, { "epoch": 6.83, - "learning_rate": 6.37616614317191e-06, - "loss": 0.0296, + "learning_rate": 1.639742249543632e-05, + "loss": 0.0228, "step": 146305 }, { "epoch": 6.83, - "learning_rate": 6.37569734189677e-06, - "loss": 0.055, + "learning_rate": 1.63969544256003e-05, + "loss": 0.0411, "step": 146310 }, { "epoch": 6.83, - "learning_rate": 6.375228540621631e-06, - "loss": 0.016, + "learning_rate": 1.6396486355764283e-05, + "loss": 0.0523, "step": 146315 }, { "epoch": 6.83, - "learning_rate": 6.374759739346491e-06, - "loss": 0.0627, + "learning_rate": 1.6396018285928263e-05, + "loss": 0.0744, "step": 146320 }, { "epoch": 6.83, - "learning_rate": 6.374290938071352e-06, - "loss": 0.0541, + "learning_rate": 1.639555021609224e-05, + "loss": 0.1387, "step": 146325 }, { "epoch": 6.83, - "learning_rate": 6.373822136796212e-06, - "loss": 0.0975, + "learning_rate": 1.639508214625622e-05, + "loss": 0.0578, "step": 146330 }, { "epoch": 6.83, - "learning_rate": 6.373353335521073e-06, - "loss": 0.2503, + "learning_rate": 1.6394614076420202e-05, + "loss": 0.1721, "step": 146335 }, { "epoch": 6.83, - "learning_rate": 6.372884534245933e-06, - "loss": 0.1277, + "learning_rate": 1.6394146006584182e-05, + "loss": 0.1006, "step": 146340 }, { "epoch": 6.83, - "learning_rate": 6.372415732970795e-06, - "loss": 0.0154, + "learning_rate": 1.6393677936748162e-05, + "loss": 0.0089, "step": 146345 }, { "epoch": 6.83, - "learning_rate": 6.371946931695655e-06, - "loss": 0.009, + "learning_rate": 1.6393209866912142e-05, + "loss": 0.0608, "step": 146350 }, { "epoch": 6.83, - "learning_rate": 6.371478130420515e-06, - "loss": 0.0315, + "learning_rate": 1.6392741797076125e-05, + "loss": 0.0433, "step": 146355 }, { "epoch": 6.83, - "learning_rate": 6.371009329145375e-06, - "loss": 0.0662, + "learning_rate": 1.6392273727240105e-05, + "loss": 0.0194, "step": 146360 }, { "epoch": 6.83, - "learning_rate": 6.370540527870236e-06, - "loss": 0.038, + "learning_rate": 1.6391805657404085e-05, + "loss": 0.0273, "step": 146365 }, { "epoch": 6.83, - "learning_rate": 6.370071726595097e-06, - "loss": 0.0817, + "learning_rate": 1.6391337587568068e-05, + "loss": 0.0463, "step": 146370 }, { "epoch": 6.83, - "learning_rate": 6.369602925319958e-06, - "loss": 0.0708, + "learning_rate": 1.6390869517732048e-05, + "loss": 0.0729, "step": 146375 }, { "epoch": 6.83, - "learning_rate": 6.369134124044818e-06, - "loss": 0.0729, + "learning_rate": 1.6390401447896028e-05, + "loss": 0.1148, "step": 146380 }, { "epoch": 6.83, - "learning_rate": 6.368665322769678e-06, - "loss": 0.1611, + "learning_rate": 1.6389933378060007e-05, + "loss": 0.157, "step": 146385 }, { "epoch": 6.83, - "learning_rate": 6.368196521494538e-06, - "loss": 0.0984, + "learning_rate": 1.6389465308223987e-05, + "loss": 0.1486, "step": 146390 }, { "epoch": 6.83, - "learning_rate": 6.3677277202194e-06, - "loss": 0.0141, + "learning_rate": 1.6388997238387967e-05, + "loss": 0.025, "step": 146395 }, { "epoch": 6.83, - "learning_rate": 6.36725891894426e-06, - "loss": 0.0104, + "learning_rate": 1.6388529168551947e-05, + "loss": 0.0048, "step": 146400 }, { "epoch": 6.83, - "learning_rate": 6.366790117669121e-06, - "loss": 0.037, + "learning_rate": 1.6388061098715927e-05, + "loss": 0.0046, "step": 146405 }, { "epoch": 6.83, - "learning_rate": 6.366321316393981e-06, - "loss": 0.0283, + "learning_rate": 1.638759302887991e-05, + "loss": 0.0429, "step": 146410 }, { "epoch": 6.83, - "learning_rate": 6.365852515118842e-06, - "loss": 0.0417, + "learning_rate": 1.638712495904389e-05, + "loss": 0.0036, "step": 146415 }, { "epoch": 6.83, - "learning_rate": 6.365383713843702e-06, - "loss": 0.0196, + "learning_rate": 1.638665688920787e-05, + "loss": 0.0864, "step": 146420 }, { "epoch": 6.83, - "learning_rate": 6.364914912568563e-06, - "loss": 0.0913, + "learning_rate": 1.6386188819371853e-05, + "loss": 0.0943, "step": 146425 }, { "epoch": 6.83, - "learning_rate": 6.364446111293423e-06, - "loss": 0.166, + "learning_rate": 1.6385720749535832e-05, + "loss": 0.0441, "step": 146430 }, { "epoch": 6.83, - "learning_rate": 6.363977310018284e-06, - "loss": 0.1493, + "learning_rate": 1.6385252679699812e-05, + "loss": 0.1651, "step": 146435 }, { "epoch": 6.83, - "learning_rate": 6.3635085087431444e-06, - "loss": 0.1588, + "learning_rate": 1.6384784609863792e-05, + "loss": 0.1815, "step": 146440 }, { "epoch": 6.83, - "learning_rate": 6.363039707468005e-06, - "loss": 0.0184, + "learning_rate": 1.6384316540027775e-05, + "loss": 0.0037, "step": 146445 }, { "epoch": 6.83, - "learning_rate": 6.362570906192865e-06, - "loss": 0.0128, + "learning_rate": 1.6383848470191755e-05, + "loss": 0.0039, "step": 146450 }, { "epoch": 6.83, - "learning_rate": 6.362102104917726e-06, - "loss": 0.0708, + "learning_rate": 1.638338040035573e-05, + "loss": 0.0582, "step": 146455 }, { "epoch": 6.83, - "learning_rate": 6.361633303642586e-06, - "loss": 0.0432, + "learning_rate": 1.638291233051971e-05, + "loss": 0.0529, "step": 146460 }, { "epoch": 6.83, - "learning_rate": 6.3611645023674475e-06, - "loss": 0.0533, + "learning_rate": 1.6382444260683695e-05, + "loss": 0.0235, "step": 146465 }, { "epoch": 6.83, - "learning_rate": 6.3606957010923074e-06, - "loss": 0.0767, + "learning_rate": 1.6381976190847674e-05, + "loss": 0.1134, "step": 146470 }, { "epoch": 6.83, - "learning_rate": 6.360226899817168e-06, - "loss": 0.0399, + "learning_rate": 1.6381508121011654e-05, + "loss": 0.0474, "step": 146475 }, { "epoch": 6.83, - "learning_rate": 6.359758098542028e-06, - "loss": 0.1679, + "learning_rate": 1.6381040051175634e-05, + "loss": 0.1506, "step": 146480 }, { "epoch": 6.84, - "learning_rate": 6.35928929726689e-06, - "loss": 0.1994, + "learning_rate": 1.6380571981339617e-05, + "loss": 0.1325, "step": 146485 }, { "epoch": 6.84, - "learning_rate": 6.35882049599175e-06, - "loss": 0.1339, + "learning_rate": 1.6380103911503597e-05, + "loss": 0.1281, "step": 146490 }, { "epoch": 6.84, - "learning_rate": 6.3583516947166105e-06, - "loss": 0.0153, + "learning_rate": 1.6379635841667577e-05, + "loss": 0.0096, "step": 146495 }, { "epoch": 6.84, - "learning_rate": 6.3578828934414704e-06, - "loss": 0.0431, + "learning_rate": 1.637916777183156e-05, + "loss": 0.0224, "step": 146500 }, { "epoch": 6.84, - "learning_rate": 6.35741409216633e-06, - "loss": 0.0353, + "learning_rate": 1.637869970199554e-05, + "loss": 0.0276, "step": 146505 }, { "epoch": 6.84, - "learning_rate": 6.356945290891192e-06, - "loss": 0.043, + "learning_rate": 1.637823163215952e-05, + "loss": 0.0156, "step": 146510 }, { "epoch": 6.84, - "learning_rate": 6.356476489616053e-06, - "loss": 0.0379, + "learning_rate": 1.6377763562323496e-05, + "loss": 0.0224, "step": 146515 }, { "epoch": 6.84, - "learning_rate": 6.356007688340913e-06, - "loss": 0.0602, + "learning_rate": 1.637729549248748e-05, + "loss": 0.0675, "step": 146520 }, { "epoch": 6.84, - "learning_rate": 6.355538887065773e-06, - "loss": 0.065, + "learning_rate": 1.637682742265146e-05, + "loss": 0.1414, "step": 146525 }, { "epoch": 6.84, - "learning_rate": 6.355070085790633e-06, - "loss": 0.0813, + "learning_rate": 1.637635935281544e-05, + "loss": 0.056, "step": 146530 }, { "epoch": 6.84, - "learning_rate": 6.354601284515495e-06, - "loss": 0.247, + "learning_rate": 1.637589128297942e-05, + "loss": 0.1608, "step": 146535 }, { "epoch": 6.84, - "learning_rate": 6.354132483240355e-06, - "loss": 0.1586, + "learning_rate": 1.6375423213143402e-05, + "loss": 0.2086, "step": 146540 }, { "epoch": 6.84, - "learning_rate": 6.353663681965215e-06, - "loss": 0.0009, + "learning_rate": 1.6374955143307382e-05, + "loss": 0.0119, "step": 146545 }, { "epoch": 6.84, - "learning_rate": 6.353194880690076e-06, - "loss": 0.0489, + "learning_rate": 1.637448707347136e-05, + "loss": 0.0019, "step": 146550 }, { "epoch": 6.84, - "learning_rate": 6.352726079414937e-06, - "loss": 0.0147, + "learning_rate": 1.6374019003635345e-05, + "loss": 0.0242, "step": 146555 }, { "epoch": 6.84, - "learning_rate": 6.352257278139797e-06, - "loss": 0.0598, + "learning_rate": 1.6373550933799325e-05, + "loss": 0.0022, "step": 146560 }, { "epoch": 6.84, - "learning_rate": 6.351788476864657e-06, - "loss": 0.0346, + "learning_rate": 1.6373082863963305e-05, + "loss": 0.0119, "step": 146565 }, { "epoch": 6.84, - "learning_rate": 6.351319675589518e-06, - "loss": 0.0183, + "learning_rate": 1.6372614794127284e-05, + "loss": 0.0647, "step": 146570 }, { "epoch": 6.84, - "learning_rate": 6.350850874314378e-06, - "loss": 0.0461, + "learning_rate": 1.6372146724291268e-05, + "loss": 0.0774, "step": 146575 }, { "epoch": 6.84, - "learning_rate": 6.3503820730392395e-06, - "loss": 0.1011, + "learning_rate": 1.6371678654455244e-05, + "loss": 0.0931, "step": 146580 }, { "epoch": 6.84, - "learning_rate": 6.3499132717640995e-06, - "loss": 0.1059, + "learning_rate": 1.6371210584619224e-05, + "loss": 0.1069, "step": 146585 }, { "epoch": 6.84, - "learning_rate": 6.34944447048896e-06, - "loss": 0.1649, + "learning_rate": 1.6370742514783204e-05, + "loss": 0.0787, "step": 146590 }, { "epoch": 6.84, - "learning_rate": 6.34897566921382e-06, - "loss": 0.0073, + "learning_rate": 1.6370274444947187e-05, + "loss": 0.0152, "step": 146595 }, { "epoch": 6.84, - "learning_rate": 6.348506867938681e-06, - "loss": 0.0036, + "learning_rate": 1.6369806375111167e-05, + "loss": 0.0266, "step": 146600 }, { "epoch": 6.84, - "learning_rate": 6.348038066663543e-06, - "loss": 0.0108, + "learning_rate": 1.6369338305275146e-05, + "loss": 0.0374, "step": 146605 }, { "epoch": 6.84, - "learning_rate": 6.3475692653884025e-06, - "loss": 0.0806, + "learning_rate": 1.636887023543913e-05, + "loss": 0.1208, "step": 146610 }, { "epoch": 6.84, - "learning_rate": 6.3471004641132625e-06, - "loss": 0.0367, + "learning_rate": 1.636840216560311e-05, + "loss": 0.0453, "step": 146615 }, { "epoch": 6.84, - "learning_rate": 6.346631662838123e-06, - "loss": 0.0435, + "learning_rate": 1.636793409576709e-05, + "loss": 0.0621, "step": 146620 }, { "epoch": 6.84, - "learning_rate": 6.346162861562985e-06, - "loss": 0.0724, + "learning_rate": 1.636746602593107e-05, + "loss": 0.1141, "step": 146625 }, { "epoch": 6.84, - "learning_rate": 6.345694060287845e-06, - "loss": 0.1123, + "learning_rate": 1.6366997956095052e-05, + "loss": 0.1627, "step": 146630 }, { "epoch": 6.84, - "learning_rate": 6.345225259012705e-06, - "loss": 0.1018, + "learning_rate": 1.6366529886259032e-05, + "loss": 0.1119, "step": 146635 }, { "epoch": 6.84, - "learning_rate": 6.3447564577375655e-06, - "loss": 0.1011, + "learning_rate": 1.6366061816423012e-05, + "loss": 0.0624, "step": 146640 }, { "epoch": 6.84, - "learning_rate": 6.3442876564624254e-06, - "loss": 0.0208, + "learning_rate": 1.636559374658699e-05, + "loss": 0.0173, "step": 146645 }, { "epoch": 6.84, - "learning_rate": 6.343818855187287e-06, - "loss": 0.0184, + "learning_rate": 1.636512567675097e-05, + "loss": 0.0038, "step": 146650 }, { "epoch": 6.84, - "learning_rate": 6.343350053912147e-06, - "loss": 0.0341, + "learning_rate": 1.636465760691495e-05, + "loss": 0.0061, "step": 146655 }, { "epoch": 6.84, - "learning_rate": 6.342881252637008e-06, - "loss": 0.0151, + "learning_rate": 1.636418953707893e-05, + "loss": 0.038, "step": 146660 }, { "epoch": 6.84, - "learning_rate": 6.342412451361868e-06, - "loss": 0.0543, + "learning_rate": 1.636372146724291e-05, + "loss": 0.0371, "step": 146665 }, { "epoch": 6.84, - "learning_rate": 6.341943650086729e-06, - "loss": 0.0226, + "learning_rate": 1.6363253397406894e-05, + "loss": 0.0236, "step": 146670 }, { "epoch": 6.84, - "learning_rate": 6.341474848811589e-06, - "loss": 0.1109, + "learning_rate": 1.6362785327570874e-05, + "loss": 0.0659, "step": 146675 }, { "epoch": 6.84, - "learning_rate": 6.34100604753645e-06, - "loss": 0.1066, + "learning_rate": 1.6362317257734854e-05, + "loss": 0.0973, "step": 146680 }, { "epoch": 6.84, - "learning_rate": 6.34053724626131e-06, - "loss": 0.1292, + "learning_rate": 1.6361849187898837e-05, + "loss": 0.1627, "step": 146685 }, { "epoch": 6.84, - "learning_rate": 6.340068444986171e-06, - "loss": 0.2527, + "learning_rate": 1.6361381118062817e-05, + "loss": 0.1347, "step": 146690 }, { "epoch": 6.84, - "learning_rate": 6.3395996437110316e-06, - "loss": 0.0157, + "learning_rate": 1.6360913048226797e-05, + "loss": 0.0185, "step": 146695 }, { "epoch": 6.85, - "learning_rate": 6.339130842435892e-06, - "loss": 0.0411, + "learning_rate": 1.6360444978390777e-05, + "loss": 0.0519, "step": 146700 }, { "epoch": 6.85, - "learning_rate": 6.338662041160752e-06, - "loss": 0.0009, + "learning_rate": 1.6359976908554756e-05, + "loss": 0.0075, "step": 146705 }, { "epoch": 6.85, - "learning_rate": 6.338193239885613e-06, - "loss": 0.0269, + "learning_rate": 1.6359508838718736e-05, + "loss": 0.0109, "step": 146710 }, { "epoch": 6.85, - "learning_rate": 6.337724438610473e-06, - "loss": 0.0522, + "learning_rate": 1.6359040768882716e-05, + "loss": 0.0578, "step": 146715 }, { "epoch": 6.85, - "learning_rate": 6.337255637335335e-06, - "loss": 0.069, + "learning_rate": 1.6358572699046696e-05, + "loss": 0.0434, "step": 146720 }, { "epoch": 6.85, - "learning_rate": 6.3367868360601945e-06, - "loss": 0.0852, + "learning_rate": 1.635810462921068e-05, + "loss": 0.0981, "step": 146725 }, { "epoch": 6.85, - "learning_rate": 6.336318034785055e-06, - "loss": 0.0827, + "learning_rate": 1.635763655937466e-05, + "loss": 0.1141, "step": 146730 }, { "epoch": 6.85, - "learning_rate": 6.335849233509915e-06, - "loss": 0.2268, + "learning_rate": 1.635716848953864e-05, + "loss": 0.1001, "step": 146735 }, { "epoch": 6.85, - "learning_rate": 6.335380432234777e-06, - "loss": 0.1906, + "learning_rate": 1.6356700419702622e-05, + "loss": 0.1395, "step": 146740 }, { "epoch": 6.85, - "learning_rate": 6.334911630959637e-06, - "loss": 0.0153, + "learning_rate": 1.63562323498666e-05, + "loss": 0.014, "step": 146745 }, { "epoch": 6.85, - "learning_rate": 6.334442829684498e-06, - "loss": 0.0206, + "learning_rate": 1.635576428003058e-05, + "loss": 0.0241, "step": 146750 }, { "epoch": 6.85, - "learning_rate": 6.3339740284093575e-06, - "loss": 0.0217, + "learning_rate": 1.635529621019456e-05, + "loss": 0.0412, "step": 146755 }, { "epoch": 6.85, - "learning_rate": 6.3335052271342175e-06, - "loss": 0.0358, + "learning_rate": 1.6354828140358544e-05, + "loss": 0.0292, "step": 146760 }, { "epoch": 6.85, - "learning_rate": 6.333036425859079e-06, - "loss": 0.0598, + "learning_rate": 1.6354360070522524e-05, + "loss": 0.0166, "step": 146765 }, { "epoch": 6.85, - "learning_rate": 6.33256762458394e-06, - "loss": 0.0601, + "learning_rate": 1.63538920006865e-05, + "loss": 0.048, "step": 146770 }, { "epoch": 6.85, - "learning_rate": 6.3320988233088e-06, - "loss": 0.0778, + "learning_rate": 1.635342393085048e-05, + "loss": 0.0424, "step": 146775 }, { "epoch": 6.85, - "learning_rate": 6.331630022033661e-06, - "loss": 0.1204, + "learning_rate": 1.6352955861014464e-05, + "loss": 0.1731, "step": 146780 }, { "epoch": 6.85, - "learning_rate": 6.3311612207585205e-06, - "loss": 0.1424, + "learning_rate": 1.6352487791178444e-05, + "loss": 0.2141, "step": 146785 }, { "epoch": 6.85, - "learning_rate": 6.330692419483382e-06, - "loss": 0.2382, + "learning_rate": 1.6352019721342423e-05, + "loss": 0.1101, "step": 146790 }, { "epoch": 6.85, - "learning_rate": 6.330223618208242e-06, - "loss": 0.066, + "learning_rate": 1.6351551651506407e-05, + "loss": 0.0119, "step": 146795 }, { "epoch": 6.85, - "learning_rate": 6.329754816933103e-06, - "loss": 0.0024, + "learning_rate": 1.6351083581670386e-05, + "loss": 0.0355, "step": 146800 }, { "epoch": 6.85, - "learning_rate": 6.329286015657963e-06, - "loss": 0.0523, + "learning_rate": 1.6350615511834366e-05, + "loss": 0.0495, "step": 146805 }, { "epoch": 6.85, - "learning_rate": 6.3288172143828244e-06, - "loss": 0.0444, + "learning_rate": 1.6350147441998346e-05, + "loss": 0.0182, "step": 146810 }, { "epoch": 6.85, - "learning_rate": 6.328348413107684e-06, - "loss": 0.0551, + "learning_rate": 1.634967937216233e-05, + "loss": 0.0284, "step": 146815 }, { "epoch": 6.85, - "learning_rate": 6.327879611832545e-06, - "loss": 0.0351, + "learning_rate": 1.634921130232631e-05, + "loss": 0.0181, "step": 146820 }, { "epoch": 6.85, - "learning_rate": 6.327410810557405e-06, - "loss": 0.1405, + "learning_rate": 1.634874323249029e-05, + "loss": 0.11, "step": 146825 }, { "epoch": 6.85, - "learning_rate": 6.326942009282265e-06, - "loss": 0.0559, + "learning_rate": 1.634827516265427e-05, + "loss": 0.0873, "step": 146830 }, { "epoch": 6.85, - "learning_rate": 6.326473208007127e-06, - "loss": 0.0892, + "learning_rate": 1.634780709281825e-05, + "loss": 0.1927, "step": 146835 }, { "epoch": 6.85, - "learning_rate": 6.326004406731987e-06, - "loss": 0.0811, + "learning_rate": 1.634733902298223e-05, + "loss": 0.1491, "step": 146840 }, { "epoch": 6.85, - "learning_rate": 6.325535605456847e-06, - "loss": 0.0399, + "learning_rate": 1.6346870953146208e-05, + "loss": 0.0019, "step": 146845 }, { "epoch": 6.85, - "learning_rate": 6.325066804181707e-06, - "loss": 0.0509, + "learning_rate": 1.634640288331019e-05, + "loss": 0.012, "step": 146850 }, { "epoch": 6.85, - "learning_rate": 6.324598002906568e-06, - "loss": 0.0493, + "learning_rate": 1.634593481347417e-05, + "loss": 0.0749, "step": 146855 }, { "epoch": 6.85, - "learning_rate": 6.32412920163143e-06, - "loss": 0.1221, + "learning_rate": 1.634546674363815e-05, + "loss": 0.0151, "step": 146860 }, { "epoch": 6.85, - "learning_rate": 6.32366040035629e-06, - "loss": 0.016, + "learning_rate": 1.634499867380213e-05, + "loss": 0.038, "step": 146865 }, { "epoch": 6.85, - "learning_rate": 6.3231915990811496e-06, - "loss": 0.071, + "learning_rate": 1.6344530603966114e-05, + "loss": 0.0384, "step": 146870 }, { "epoch": 6.85, - "learning_rate": 6.32272279780601e-06, - "loss": 0.1593, + "learning_rate": 1.6344062534130094e-05, + "loss": 0.0617, "step": 146875 }, { "epoch": 6.85, - "learning_rate": 6.322253996530872e-06, - "loss": 0.0522, + "learning_rate": 1.6343594464294074e-05, + "loss": 0.1018, "step": 146880 }, { "epoch": 6.85, - "learning_rate": 6.321785195255732e-06, - "loss": 0.1992, + "learning_rate": 1.6343126394458053e-05, + "loss": 0.3066, "step": 146885 }, { "epoch": 6.85, - "learning_rate": 6.321316393980592e-06, - "loss": 0.1828, + "learning_rate": 1.6342658324622037e-05, + "loss": 0.0928, "step": 146890 }, { "epoch": 6.85, - "learning_rate": 6.320847592705453e-06, - "loss": 0.0097, + "learning_rate": 1.6342190254786013e-05, + "loss": 0.01, "step": 146895 }, { "epoch": 6.85, - "learning_rate": 6.3203787914303126e-06, - "loss": 0.055, + "learning_rate": 1.6341722184949993e-05, + "loss": 0.0054, "step": 146900 }, { "epoch": 6.85, - "learning_rate": 6.319909990155174e-06, - "loss": 0.009, + "learning_rate": 1.6341254115113973e-05, + "loss": 0.0226, "step": 146905 }, { "epoch": 6.86, - "learning_rate": 6.319441188880034e-06, - "loss": 0.0556, + "learning_rate": 1.6340786045277956e-05, + "loss": 0.0273, "step": 146910 }, { "epoch": 6.86, - "learning_rate": 6.318972387604895e-06, + "learning_rate": 1.6340317975441936e-05, "loss": 0.03, "step": 146915 }, { "epoch": 6.86, - "learning_rate": 6.318503586329755e-06, - "loss": 0.0699, + "learning_rate": 1.6339849905605916e-05, + "loss": 0.0179, "step": 146920 }, { "epoch": 6.86, - "learning_rate": 6.318034785054616e-06, - "loss": 0.0652, + "learning_rate": 1.63393818357699e-05, + "loss": 0.0876, "step": 146925 }, { "epoch": 6.86, - "learning_rate": 6.317565983779476e-06, - "loss": 0.0639, + "learning_rate": 1.633891376593388e-05, + "loss": 0.0471, "step": 146930 }, { "epoch": 6.86, - "learning_rate": 6.317097182504337e-06, - "loss": 0.1236, + "learning_rate": 1.633844569609786e-05, + "loss": 0.1869, "step": 146935 }, { "epoch": 6.86, - "learning_rate": 6.316628381229197e-06, - "loss": 0.2059, + "learning_rate": 1.6337977626261838e-05, + "loss": 0.0428, "step": 146940 }, { "epoch": 6.86, - "learning_rate": 6.316159579954058e-06, - "loss": 0.0292, + "learning_rate": 1.633750955642582e-05, + "loss": 0.0008, "step": 146945 }, { "epoch": 6.86, - "learning_rate": 6.315690778678919e-06, - "loss": 0.0304, + "learning_rate": 1.63370414865898e-05, + "loss": 0.0274, "step": 146950 }, { "epoch": 6.86, - "learning_rate": 6.3152219774037794e-06, - "loss": 0.0536, + "learning_rate": 1.633657341675378e-05, + "loss": 0.0302, "step": 146955 }, { "epoch": 6.86, - "learning_rate": 6.314753176128639e-06, - "loss": 0.0263, + "learning_rate": 1.6336105346917758e-05, + "loss": 0.0193, "step": 146960 }, { "epoch": 6.86, - "learning_rate": 6.3142843748535e-06, - "loss": 0.0158, + "learning_rate": 1.633563727708174e-05, + "loss": 0.019, "step": 146965 }, { "epoch": 6.86, - "learning_rate": 6.31381557357836e-06, - "loss": 0.0725, + "learning_rate": 1.633516920724572e-05, + "loss": 0.0645, "step": 146970 }, { "epoch": 6.86, - "learning_rate": 6.313346772303222e-06, - "loss": 0.0313, + "learning_rate": 1.63347011374097e-05, + "loss": 0.0535, "step": 146975 }, { "epoch": 6.86, - "learning_rate": 6.312877971028082e-06, - "loss": 0.0526, + "learning_rate": 1.6334233067573684e-05, + "loss": 0.0496, "step": 146980 }, { "epoch": 6.86, - "learning_rate": 6.3124091697529424e-06, - "loss": 0.1258, + "learning_rate": 1.6333764997737663e-05, + "loss": 0.1079, "step": 146985 }, { "epoch": 6.86, - "learning_rate": 6.311940368477802e-06, - "loss": 0.2065, + "learning_rate": 1.6333296927901643e-05, + "loss": 0.1051, "step": 146990 }, { "epoch": 6.86, - "learning_rate": 6.311471567202664e-06, - "loss": 0.014, + "learning_rate": 1.6332828858065623e-05, + "loss": 0.0105, "step": 146995 }, { "epoch": 6.86, - "learning_rate": 6.311002765927524e-06, - "loss": 0.0299, + "learning_rate": 1.6332360788229606e-05, + "loss": 0.0158, "step": 147000 }, { "epoch": 6.86, - "learning_rate": 6.310533964652385e-06, - "loss": 0.0192, + "learning_rate": 1.6331892718393586e-05, + "loss": 0.0102, "step": 147005 }, { "epoch": 6.86, - "learning_rate": 6.310065163377245e-06, - "loss": 0.0203, + "learning_rate": 1.6331424648557566e-05, + "loss": 0.009, "step": 147010 }, { "epoch": 6.86, - "learning_rate": 6.3095963621021054e-06, - "loss": 0.0427, + "learning_rate": 1.6330956578721546e-05, + "loss": 0.0337, "step": 147015 }, { "epoch": 6.86, - "learning_rate": 6.309127560826966e-06, - "loss": 0.0266, + "learning_rate": 1.6330488508885526e-05, + "loss": 0.1166, "step": 147020 }, { "epoch": 6.86, - "learning_rate": 6.308658759551827e-06, - "loss": 0.0617, + "learning_rate": 1.6330020439049505e-05, + "loss": 0.0501, "step": 147025 }, { "epoch": 6.86, - "learning_rate": 6.308189958276687e-06, - "loss": 0.1118, + "learning_rate": 1.6329552369213485e-05, + "loss": 0.0595, "step": 147030 }, { "epoch": 6.86, - "learning_rate": 6.307721157001548e-06, - "loss": 0.1729, + "learning_rate": 1.632908429937747e-05, + "loss": 0.2332, "step": 147035 }, { "epoch": 6.86, - "learning_rate": 6.307252355726408e-06, - "loss": 0.1668, + "learning_rate": 1.6328616229541448e-05, + "loss": 0.1403, "step": 147040 }, { "epoch": 6.86, - "learning_rate": 6.306783554451269e-06, - "loss": 0.011, + "learning_rate": 1.6328148159705428e-05, + "loss": 0.0004, "step": 147045 }, { "epoch": 6.86, - "learning_rate": 6.306314753176129e-06, - "loss": 0.0231, + "learning_rate": 1.6327680089869408e-05, + "loss": 0.0371, "step": 147050 }, { "epoch": 6.86, - "learning_rate": 6.30584595190099e-06, - "loss": 0.0148, + "learning_rate": 1.632721202003339e-05, + "loss": 0.0039, "step": 147055 }, { "epoch": 6.86, - "learning_rate": 6.30537715062585e-06, - "loss": 0.0312, + "learning_rate": 1.632674395019737e-05, + "loss": 0.024, "step": 147060 }, { "epoch": 6.86, - "learning_rate": 6.3049083493507115e-06, - "loss": 0.0256, + "learning_rate": 1.632627588036135e-05, + "loss": 0.0261, "step": 147065 }, { "epoch": 6.86, - "learning_rate": 6.3044395480755715e-06, - "loss": 0.0491, + "learning_rate": 1.632580781052533e-05, + "loss": 0.0462, "step": 147070 }, { "epoch": 6.86, - "learning_rate": 6.303970746800432e-06, - "loss": 0.0918, + "learning_rate": 1.6325339740689314e-05, + "loss": 0.1396, "step": 147075 }, { "epoch": 6.86, - "learning_rate": 6.303501945525292e-06, - "loss": 0.1033, + "learning_rate": 1.6324871670853293e-05, + "loss": 0.1162, "step": 147080 }, { "epoch": 6.86, - "learning_rate": 6.303033144250152e-06, - "loss": 0.1487, + "learning_rate": 1.632440360101727e-05, + "loss": 0.1555, "step": 147085 }, { "epoch": 6.86, - "learning_rate": 6.302564342975014e-06, - "loss": 0.0823, + "learning_rate": 1.632393553118125e-05, + "loss": 0.2962, "step": 147090 }, { "epoch": 6.86, - "learning_rate": 6.3020955416998745e-06, - "loss": 0.0086, + "learning_rate": 1.6323467461345233e-05, + "loss": 0.0038, "step": 147095 }, { "epoch": 6.86, - "learning_rate": 6.3016267404247345e-06, - "loss": 0.0496, + "learning_rate": 1.6322999391509213e-05, + "loss": 0.0096, "step": 147100 }, { "epoch": 6.86, - "learning_rate": 6.301157939149594e-06, - "loss": 0.0338, + "learning_rate": 1.6322531321673193e-05, + "loss": 0.0785, "step": 147105 }, { "epoch": 6.86, - "learning_rate": 6.300689137874455e-06, - "loss": 0.0376, + "learning_rate": 1.6322063251837176e-05, + "loss": 0.0162, "step": 147110 }, { "epoch": 6.86, - "learning_rate": 6.300220336599317e-06, - "loss": 0.0293, + "learning_rate": 1.6321595182001156e-05, + "loss": 0.0344, "step": 147115 }, { "epoch": 6.86, - "learning_rate": 6.299751535324177e-06, - "loss": 0.0597, + "learning_rate": 1.6321127112165135e-05, + "loss": 0.0659, "step": 147120 }, { "epoch": 6.87, - "learning_rate": 6.299282734049037e-06, - "loss": 0.1717, + "learning_rate": 1.6320659042329115e-05, + "loss": 0.0848, "step": 147125 }, { "epoch": 6.87, - "learning_rate": 6.2988139327738975e-06, - "loss": 0.1423, + "learning_rate": 1.63201909724931e-05, + "loss": 0.117, "step": 147130 }, { "epoch": 6.87, - "learning_rate": 6.298345131498759e-06, - "loss": 0.16, + "learning_rate": 1.6319722902657078e-05, + "loss": 0.2139, "step": 147135 }, { "epoch": 6.87, - "learning_rate": 6.297876330223619e-06, - "loss": 0.1708, + "learning_rate": 1.6319254832821058e-05, + "loss": 0.2821, "step": 147140 }, { "epoch": 6.87, - "learning_rate": 6.297407528948479e-06, - "loss": 0.027, + "learning_rate": 1.6318786762985038e-05, + "loss": 0.0202, "step": 147145 }, { "epoch": 6.87, - "learning_rate": 6.29693872767334e-06, - "loss": 0.0223, + "learning_rate": 1.6318318693149018e-05, + "loss": 0.0115, "step": 147150 }, { "epoch": 6.87, - "learning_rate": 6.2964699263982e-06, - "loss": 0.042, + "learning_rate": 1.6317850623312998e-05, + "loss": 0.0504, "step": 147155 }, { "epoch": 6.87, - "learning_rate": 6.296001125123061e-06, - "loss": 0.0411, + "learning_rate": 1.6317382553476977e-05, + "loss": 0.0261, "step": 147160 }, { "epoch": 6.87, - "learning_rate": 6.295532323847921e-06, - "loss": 0.0212, + "learning_rate": 1.631691448364096e-05, + "loss": 0.057, "step": 147165 }, { "epoch": 6.87, - "learning_rate": 6.295063522572782e-06, - "loss": 0.0345, + "learning_rate": 1.631644641380494e-05, + "loss": 0.0296, "step": 147170 }, { "epoch": 6.87, - "learning_rate": 6.294594721297642e-06, - "loss": 0.4906, + "learning_rate": 1.631597834396892e-05, + "loss": 0.0766, "step": 147175 }, { "epoch": 6.87, - "learning_rate": 6.294125920022503e-06, - "loss": 0.0659, + "learning_rate": 1.63155102741329e-05, + "loss": 0.2472, "step": 147180 }, { "epoch": 6.87, - "learning_rate": 6.2936571187473635e-06, - "loss": 0.1937, + "learning_rate": 1.6315042204296883e-05, + "loss": 0.1649, "step": 147185 }, { "epoch": 6.87, - "learning_rate": 6.293188317472224e-06, - "loss": 0.2038, + "learning_rate": 1.6314574134460863e-05, + "loss": 0.1292, "step": 147190 }, { "epoch": 6.87, - "learning_rate": 6.292719516197084e-06, - "loss": 0.0609, + "learning_rate": 1.6314106064624843e-05, + "loss": 0.0143, "step": 147195 }, { "epoch": 6.87, - "learning_rate": 6.292250714921945e-06, - "loss": 0.0254, + "learning_rate": 1.6313637994788823e-05, + "loss": 0.0379, "step": 147200 }, { "epoch": 6.87, - "learning_rate": 6.291781913646806e-06, - "loss": 0.0299, + "learning_rate": 1.6313169924952806e-05, + "loss": 0.022, "step": 147205 }, { "epoch": 6.87, - "learning_rate": 6.2913131123716666e-06, - "loss": 0.0124, + "learning_rate": 1.6312701855116782e-05, + "loss": 0.0147, "step": 147210 }, { "epoch": 6.87, - "learning_rate": 6.2908443110965265e-06, - "loss": 0.0288, + "learning_rate": 1.6312233785280762e-05, + "loss": 0.0292, "step": 147215 }, { "epoch": 6.87, - "learning_rate": 6.290375509821387e-06, - "loss": 0.0495, + "learning_rate": 1.6311765715444745e-05, + "loss": 0.044, "step": 147220 }, { "epoch": 6.87, - "learning_rate": 6.289906708546247e-06, - "loss": 0.0727, + "learning_rate": 1.6311297645608725e-05, + "loss": 0.0456, "step": 147225 }, { "epoch": 6.87, - "learning_rate": 6.289437907271109e-06, - "loss": 0.186, + "learning_rate": 1.6310829575772705e-05, + "loss": 0.0692, "step": 147230 }, { "epoch": 6.87, - "learning_rate": 6.288969105995969e-06, - "loss": 0.2323, + "learning_rate": 1.6310361505936685e-05, + "loss": 0.1252, "step": 147235 }, { "epoch": 6.87, - "learning_rate": 6.2885003047208295e-06, - "loss": 0.2028, + "learning_rate": 1.6309893436100668e-05, + "loss": 0.1256, "step": 147240 }, { "epoch": 6.87, - "learning_rate": 6.2880315034456895e-06, - "loss": 0.0081, + "learning_rate": 1.6309425366264648e-05, + "loss": 0.0063, "step": 147245 }, { "epoch": 6.87, - "learning_rate": 6.28756270217055e-06, - "loss": 0.0714, + "learning_rate": 1.6308957296428628e-05, + "loss": 0.0021, "step": 147250 }, { "epoch": 6.87, - "learning_rate": 6.287093900895411e-06, - "loss": 0.0057, + "learning_rate": 1.6308489226592607e-05, + "loss": 0.0088, "step": 147255 }, { "epoch": 6.87, - "learning_rate": 6.286625099620272e-06, - "loss": 0.0575, + "learning_rate": 1.630802115675659e-05, + "loss": 0.0708, "step": 147260 }, { "epoch": 6.87, - "learning_rate": 6.286156298345132e-06, - "loss": 0.0194, + "learning_rate": 1.630755308692057e-05, + "loss": 0.0587, "step": 147265 }, { "epoch": 6.87, - "learning_rate": 6.2856874970699925e-06, - "loss": 0.1041, + "learning_rate": 1.630708501708455e-05, + "loss": 0.0456, "step": 147270 }, { "epoch": 6.87, - "learning_rate": 6.285218695794853e-06, - "loss": 0.0667, + "learning_rate": 1.6306616947248527e-05, + "loss": 0.0479, "step": 147275 }, { "epoch": 6.87, - "learning_rate": 6.284749894519714e-06, - "loss": 0.1894, + "learning_rate": 1.630614887741251e-05, + "loss": 0.0989, "step": 147280 }, { "epoch": 6.87, - "learning_rate": 6.284281093244574e-06, - "loss": 0.1795, + "learning_rate": 1.630568080757649e-05, + "loss": 0.1405, "step": 147285 }, { "epoch": 6.87, - "learning_rate": 6.283812291969435e-06, - "loss": 0.1263, + "learning_rate": 1.630521273774047e-05, + "loss": 0.1038, "step": 147290 }, { "epoch": 6.87, - "learning_rate": 6.283343490694295e-06, - "loss": 0.0159, + "learning_rate": 1.6304744667904453e-05, + "loss": 0.0517, "step": 147295 }, { "epoch": 6.87, - "learning_rate": 6.282874689419156e-06, - "loss": 0.0285, + "learning_rate": 1.6304276598068433e-05, + "loss": 0.0423, "step": 147300 }, { "epoch": 6.87, - "learning_rate": 6.282405888144016e-06, - "loss": 0.0269, + "learning_rate": 1.6303808528232412e-05, + "loss": 0.0235, "step": 147305 }, { "epoch": 6.87, - "learning_rate": 6.281937086868877e-06, - "loss": 0.0539, + "learning_rate": 1.6303340458396392e-05, + "loss": 0.0563, "step": 147310 }, { "epoch": 6.87, - "learning_rate": 6.281468285593737e-06, - "loss": 0.0279, + "learning_rate": 1.6302872388560375e-05, + "loss": 0.0475, "step": 147315 }, { "epoch": 6.87, - "learning_rate": 6.280999484318599e-06, - "loss": 0.0603, + "learning_rate": 1.6302404318724355e-05, + "loss": 0.0893, "step": 147320 }, { "epoch": 6.87, - "learning_rate": 6.280530683043459e-06, - "loss": 0.086, + "learning_rate": 1.6301936248888335e-05, + "loss": 0.1138, "step": 147325 }, { "epoch": 6.87, - "learning_rate": 6.280061881768319e-06, - "loss": 0.0612, + "learning_rate": 1.6301468179052315e-05, + "loss": 0.0582, "step": 147330 }, { "epoch": 6.87, - "learning_rate": 6.279593080493179e-06, - "loss": 0.1483, + "learning_rate": 1.6301000109216295e-05, + "loss": 0.2204, "step": 147335 }, { "epoch": 6.88, - "learning_rate": 6.279124279218039e-06, - "loss": 0.2093, + "learning_rate": 1.6300532039380274e-05, + "loss": 0.1096, "step": 147340 }, { "epoch": 6.88, - "learning_rate": 6.278655477942901e-06, - "loss": 0.0262, + "learning_rate": 1.6300063969544254e-05, + "loss": 0.0343, "step": 147345 }, { "epoch": 6.88, - "learning_rate": 6.278186676667762e-06, - "loss": 0.042, + "learning_rate": 1.6299595899708238e-05, + "loss": 0.0084, "step": 147350 }, { "epoch": 6.88, - "learning_rate": 6.2777178753926216e-06, - "loss": 0.0153, + "learning_rate": 1.6299127829872217e-05, + "loss": 0.0281, "step": 147355 }, { "epoch": 6.88, - "learning_rate": 6.2772490741174815e-06, - "loss": 0.0351, + "learning_rate": 1.6298659760036197e-05, + "loss": 0.0417, "step": 147360 }, { "epoch": 6.88, - "learning_rate": 6.276780272842342e-06, - "loss": 0.0618, + "learning_rate": 1.6298191690200177e-05, + "loss": 0.1219, "step": 147365 }, { "epoch": 6.88, - "learning_rate": 6.276311471567204e-06, - "loss": 0.0358, + "learning_rate": 1.629772362036416e-05, + "loss": 0.0927, "step": 147370 }, { "epoch": 6.88, - "learning_rate": 6.275842670292064e-06, - "loss": 0.0659, + "learning_rate": 1.629725555052814e-05, + "loss": 0.0755, "step": 147375 }, { "epoch": 6.88, - "learning_rate": 6.275373869016924e-06, - "loss": 0.1395, + "learning_rate": 1.629678748069212e-05, + "loss": 0.109, "step": 147380 }, { "epoch": 6.88, - "learning_rate": 6.2749050677417846e-06, - "loss": 0.1181, + "learning_rate": 1.62963194108561e-05, + "loss": 0.1618, "step": 147385 }, { "epoch": 6.88, - "learning_rate": 6.274436266466646e-06, - "loss": 0.128, + "learning_rate": 1.6295851341020083e-05, + "loss": 0.1117, "step": 147390 }, { "epoch": 6.88, - "learning_rate": 6.273967465191506e-06, - "loss": 0.0185, + "learning_rate": 1.6295383271184063e-05, + "loss": 0.0071, "step": 147395 }, { "epoch": 6.88, - "learning_rate": 6.273498663916366e-06, - "loss": 0.0087, + "learning_rate": 1.629491520134804e-05, + "loss": 0.0112, "step": 147400 }, { "epoch": 6.88, - "learning_rate": 6.273029862641227e-06, - "loss": 0.0308, + "learning_rate": 1.6294447131512022e-05, + "loss": 0.0549, "step": 147405 }, { "epoch": 6.88, - "learning_rate": 6.272561061366087e-06, - "loss": 0.0602, + "learning_rate": 1.6293979061676002e-05, + "loss": 0.0438, "step": 147410 }, { "epoch": 6.88, - "learning_rate": 6.272092260090948e-06, - "loss": 0.0582, + "learning_rate": 1.6293510991839982e-05, + "loss": 0.0591, "step": 147415 }, { "epoch": 6.88, - "learning_rate": 6.271623458815808e-06, - "loss": 0.0761, + "learning_rate": 1.6293042922003962e-05, + "loss": 0.0894, "step": 147420 }, { "epoch": 6.88, - "learning_rate": 6.271154657540669e-06, - "loss": 0.0442, + "learning_rate": 1.6292574852167945e-05, + "loss": 0.0474, "step": 147425 }, { "epoch": 6.88, - "learning_rate": 6.270685856265529e-06, - "loss": 0.058, + "learning_rate": 1.6292106782331925e-05, + "loss": 0.1433, "step": 147430 }, { "epoch": 6.88, - "learning_rate": 6.27021705499039e-06, - "loss": 0.1475, + "learning_rate": 1.6291638712495905e-05, + "loss": 0.0531, "step": 147435 }, { "epoch": 6.88, - "learning_rate": 6.269748253715251e-06, - "loss": 0.1716, + "learning_rate": 1.6291170642659884e-05, + "loss": 0.1453, "step": 147440 }, { "epoch": 6.88, - "learning_rate": 6.269279452440111e-06, - "loss": 0.0238, + "learning_rate": 1.6290702572823868e-05, + "loss": 0.0332, "step": 147445 }, { "epoch": 6.88, - "learning_rate": 6.268810651164971e-06, - "loss": 0.0352, + "learning_rate": 1.6290234502987847e-05, + "loss": 0.0045, "step": 147450 }, { "epoch": 6.88, - "learning_rate": 6.268341849889832e-06, - "loss": 0.0188, + "learning_rate": 1.6289766433151827e-05, + "loss": 0.0563, "step": 147455 }, { "epoch": 6.88, - "learning_rate": 6.267873048614693e-06, - "loss": 0.0878, + "learning_rate": 1.628929836331581e-05, + "loss": 0.077, "step": 147460 }, { "epoch": 6.88, - "learning_rate": 6.267404247339554e-06, - "loss": 0.0296, + "learning_rate": 1.6288830293479787e-05, + "loss": 0.0885, "step": 147465 }, { "epoch": 6.88, - "learning_rate": 6.266935446064414e-06, - "loss": 0.0852, + "learning_rate": 1.6288362223643767e-05, + "loss": 0.1112, "step": 147470 }, { "epoch": 6.88, - "learning_rate": 6.266466644789274e-06, - "loss": 0.1481, + "learning_rate": 1.6287894153807747e-05, + "loss": 0.0622, "step": 147475 }, { "epoch": 6.88, - "learning_rate": 6.265997843514134e-06, - "loss": 0.0898, + "learning_rate": 1.628742608397173e-05, + "loss": 0.0766, "step": 147480 }, { "epoch": 6.88, - "learning_rate": 6.265529042238996e-06, - "loss": 0.1879, + "learning_rate": 1.628695801413571e-05, + "loss": 0.2294, "step": 147485 }, { "epoch": 6.88, - "learning_rate": 6.265060240963856e-06, - "loss": 0.2205, + "learning_rate": 1.628648994429969e-05, + "loss": 0.1337, "step": 147490 }, { "epoch": 6.88, - "learning_rate": 6.264591439688717e-06, - "loss": 0.0184, + "learning_rate": 1.628602187446367e-05, + "loss": 0.011, "step": 147495 }, { "epoch": 6.88, - "learning_rate": 6.264122638413577e-06, - "loss": 0.0077, + "learning_rate": 1.6285553804627652e-05, + "loss": 0.0563, "step": 147500 }, { "epoch": 6.88, - "learning_rate": 6.263653837138437e-06, - "loss": 0.0281, + "learning_rate": 1.6285085734791632e-05, + "loss": 0.0311, "step": 147505 }, { "epoch": 6.88, - "learning_rate": 6.263185035863298e-06, - "loss": 0.0158, + "learning_rate": 1.6284617664955612e-05, + "loss": 0.0569, "step": 147510 }, { "epoch": 6.88, - "learning_rate": 6.262716234588159e-06, - "loss": 0.0283, + "learning_rate": 1.6284149595119592e-05, + "loss": 0.057, "step": 147515 }, { "epoch": 6.88, - "learning_rate": 6.262247433313019e-06, - "loss": 0.0501, + "learning_rate": 1.6283681525283575e-05, + "loss": 0.0399, "step": 147520 }, { "epoch": 6.88, - "learning_rate": 6.26177863203788e-06, - "loss": 0.1073, + "learning_rate": 1.628321345544755e-05, + "loss": 0.0102, "step": 147525 }, { "epoch": 6.88, - "learning_rate": 6.2613098307627404e-06, - "loss": 0.0494, + "learning_rate": 1.628274538561153e-05, + "loss": 0.1092, "step": 147530 }, { "epoch": 6.88, - "learning_rate": 6.260841029487601e-06, - "loss": 0.1875, + "learning_rate": 1.6282277315775514e-05, + "loss": 0.1743, "step": 147535 }, { "epoch": 6.88, - "learning_rate": 6.260372228212461e-06, - "loss": 0.1695, + "learning_rate": 1.6281809245939494e-05, + "loss": 0.1763, "step": 147540 }, { "epoch": 6.88, - "learning_rate": 6.259903426937322e-06, - "loss": 0.0619, + "learning_rate": 1.6281341176103474e-05, + "loss": 0.0151, "step": 147545 }, { "epoch": 6.88, - "learning_rate": 6.259434625662182e-06, - "loss": 0.0104, + "learning_rate": 1.6280873106267454e-05, + "loss": 0.01, "step": 147550 }, { "epoch": 6.89, - "learning_rate": 6.2589658243870435e-06, - "loss": 0.0226, + "learning_rate": 1.6280405036431437e-05, + "loss": 0.0274, "step": 147555 }, { "epoch": 6.89, - "learning_rate": 6.258497023111903e-06, - "loss": 0.0507, + "learning_rate": 1.6279936966595417e-05, + "loss": 0.0434, "step": 147560 }, { "epoch": 6.89, - "learning_rate": 6.258028221836764e-06, - "loss": 0.0539, + "learning_rate": 1.6279468896759397e-05, + "loss": 0.045, "step": 147565 }, { "epoch": 6.89, - "learning_rate": 6.257559420561624e-06, - "loss": 0.0212, + "learning_rate": 1.6279000826923377e-05, + "loss": 0.0794, "step": 147570 }, { "epoch": 6.89, - "learning_rate": 6.257090619286484e-06, - "loss": 0.1109, + "learning_rate": 1.627853275708736e-05, + "loss": 0.0475, "step": 147575 }, { "epoch": 6.89, - "learning_rate": 6.256621818011346e-06, - "loss": 0.1167, + "learning_rate": 1.627806468725134e-05, + "loss": 0.0823, "step": 147580 }, { "epoch": 6.89, - "learning_rate": 6.2561530167362065e-06, - "loss": 0.1459, + "learning_rate": 1.627759661741532e-05, + "loss": 0.1832, "step": 147585 }, { "epoch": 6.89, - "learning_rate": 6.255684215461066e-06, - "loss": 0.1968, + "learning_rate": 1.62771285475793e-05, + "loss": 0.1286, "step": 147590 }, { "epoch": 6.89, - "learning_rate": 6.255215414185926e-06, - "loss": 0.0066, + "learning_rate": 1.627666047774328e-05, + "loss": 0.039, "step": 147595 }, { "epoch": 6.89, - "learning_rate": 6.254746612910788e-06, - "loss": 0.0163, + "learning_rate": 1.627619240790726e-05, + "loss": 0.044, "step": 147600 }, { "epoch": 6.89, - "learning_rate": 6.254277811635649e-06, - "loss": 0.0567, + "learning_rate": 1.627572433807124e-05, + "loss": 0.0387, "step": 147605 }, { "epoch": 6.89, - "learning_rate": 6.253809010360509e-06, - "loss": 0.0628, + "learning_rate": 1.6275256268235222e-05, + "loss": 0.0622, "step": 147610 }, { "epoch": 6.89, - "learning_rate": 6.253340209085369e-06, - "loss": 0.0383, + "learning_rate": 1.6274788198399202e-05, + "loss": 0.0306, "step": 147615 }, { "epoch": 6.89, - "learning_rate": 6.252871407810229e-06, - "loss": 0.1143, + "learning_rate": 1.627432012856318e-05, + "loss": 0.1027, "step": 147620 }, { "epoch": 6.89, - "learning_rate": 6.252402606535091e-06, - "loss": 0.1042, + "learning_rate": 1.627385205872716e-05, + "loss": 0.0468, "step": 147625 }, { "epoch": 6.89, - "learning_rate": 6.251933805259951e-06, - "loss": 0.0849, + "learning_rate": 1.6273383988891145e-05, + "loss": 0.1552, "step": 147630 }, { "epoch": 6.89, - "learning_rate": 6.251465003984811e-06, - "loss": 0.0911, + "learning_rate": 1.6272915919055124e-05, + "loss": 0.2793, "step": 147635 }, { "epoch": 6.89, - "learning_rate": 6.250996202709672e-06, - "loss": 0.1758, + "learning_rate": 1.6272447849219104e-05, + "loss": 0.144, "step": 147640 }, { "epoch": 6.89, - "learning_rate": 6.250527401434533e-06, - "loss": 0.0072, + "learning_rate": 1.6271979779383087e-05, + "loss": 0.0349, "step": 147645 }, { "epoch": 6.89, - "learning_rate": 6.250058600159393e-06, - "loss": 0.0107, + "learning_rate": 1.6271511709547067e-05, + "loss": 0.0935, "step": 147650 }, { "epoch": 6.89, - "learning_rate": 6.249589798884253e-06, - "loss": 0.0296, + "learning_rate": 1.6271043639711044e-05, + "loss": 0.0172, "step": 147655 }, { "epoch": 6.89, - "learning_rate": 6.249120997609114e-06, - "loss": 0.0164, + "learning_rate": 1.6270575569875023e-05, + "loss": 0.0128, "step": 147660 }, { "epoch": 6.89, - "learning_rate": 6.248652196333974e-06, - "loss": 0.1111, + "learning_rate": 1.6270107500039007e-05, + "loss": 0.0619, "step": 147665 }, { "epoch": 6.89, - "learning_rate": 6.2481833950588355e-06, - "loss": 0.0194, + "learning_rate": 1.6269639430202987e-05, + "loss": 0.0186, "step": 147670 }, { "epoch": 6.89, - "learning_rate": 6.2477145937836954e-06, - "loss": 0.0457, + "learning_rate": 1.6269171360366966e-05, + "loss": 0.1035, "step": 147675 }, { "epoch": 6.89, - "learning_rate": 6.247245792508556e-06, - "loss": 0.0876, + "learning_rate": 1.6268703290530946e-05, + "loss": 0.0912, "step": 147680 }, { "epoch": 6.89, - "learning_rate": 6.246776991233416e-06, - "loss": 0.1917, + "learning_rate": 1.626823522069493e-05, + "loss": 0.1171, "step": 147685 }, { "epoch": 6.89, - "learning_rate": 6.246308189958277e-06, - "loss": 0.183, + "learning_rate": 1.626776715085891e-05, + "loss": 0.1226, "step": 147690 }, { "epoch": 6.89, - "learning_rate": 6.245839388683138e-06, - "loss": 0.0116, + "learning_rate": 1.626729908102289e-05, + "loss": 0.0247, "step": 147695 }, { "epoch": 6.89, - "learning_rate": 6.2453705874079985e-06, - "loss": 0.0295, + "learning_rate": 1.626683101118687e-05, + "loss": 0.0411, "step": 147700 }, { "epoch": 6.89, - "learning_rate": 6.2449017861328584e-06, - "loss": 0.0354, + "learning_rate": 1.6266362941350852e-05, + "loss": 0.0149, "step": 147705 }, { "epoch": 6.89, - "learning_rate": 6.244432984857719e-06, - "loss": 0.0538, + "learning_rate": 1.6265894871514832e-05, + "loss": 0.0436, "step": 147710 }, { "epoch": 6.89, - "learning_rate": 6.24396418358258e-06, - "loss": 0.0769, + "learning_rate": 1.6265426801678808e-05, + "loss": 0.048, "step": 147715 }, { "epoch": 6.89, - "learning_rate": 6.243495382307441e-06, - "loss": 0.0821, + "learning_rate": 1.626495873184279e-05, + "loss": 0.0501, "step": 147720 }, { "epoch": 6.89, - "learning_rate": 6.243026581032301e-06, - "loss": 0.0768, + "learning_rate": 1.626449066200677e-05, + "loss": 0.0644, "step": 147725 }, { "epoch": 6.89, - "learning_rate": 6.2425577797571615e-06, - "loss": 0.1316, + "learning_rate": 1.626402259217075e-05, + "loss": 0.1099, "step": 147730 }, { "epoch": 6.89, - "learning_rate": 6.242088978482021e-06, - "loss": 0.1533, + "learning_rate": 1.626355452233473e-05, + "loss": 0.1651, "step": 147735 }, { "epoch": 6.89, - "learning_rate": 6.241620177206883e-06, - "loss": 0.1693, + "learning_rate": 1.6263086452498714e-05, + "loss": 0.169, "step": 147740 }, { "epoch": 6.89, - "learning_rate": 6.241151375931743e-06, - "loss": 0.0115, + "learning_rate": 1.6262618382662694e-05, + "loss": 0.008, "step": 147745 }, { "epoch": 6.89, - "learning_rate": 6.240682574656604e-06, - "loss": 0.0229, + "learning_rate": 1.6262150312826674e-05, + "loss": 0.0214, "step": 147750 }, { "epoch": 6.89, - "learning_rate": 6.240213773381464e-06, - "loss": 0.0463, + "learning_rate": 1.6261682242990654e-05, + "loss": 0.0135, "step": 147755 }, { "epoch": 6.89, - "learning_rate": 6.2397449721063245e-06, - "loss": 0.0809, + "learning_rate": 1.6261214173154637e-05, + "loss": 0.013, "step": 147760 }, { "epoch": 6.89, - "learning_rate": 6.239276170831185e-06, - "loss": 0.055, + "learning_rate": 1.6260746103318617e-05, + "loss": 0.0892, "step": 147765 }, { "epoch": 6.9, - "learning_rate": 6.238807369556046e-06, - "loss": 0.0666, + "learning_rate": 1.6260278033482596e-05, + "loss": 0.055, "step": 147770 }, { "epoch": 6.9, - "learning_rate": 6.238338568280906e-06, - "loss": 0.0251, + "learning_rate": 1.625980996364658e-05, + "loss": 0.0861, "step": 147775 }, { "epoch": 6.9, - "learning_rate": 6.237869767005767e-06, - "loss": 0.1081, + "learning_rate": 1.6259341893810556e-05, + "loss": 0.1411, "step": 147780 }, { "epoch": 6.9, - "learning_rate": 6.2374009657306275e-06, - "loss": 0.1356, + "learning_rate": 1.6258873823974536e-05, + "loss": 0.1349, "step": 147785 }, { "epoch": 6.9, - "learning_rate": 6.236932164455488e-06, - "loss": 0.1611, + "learning_rate": 1.6258405754138516e-05, + "loss": 0.124, "step": 147790 }, { "epoch": 6.9, - "learning_rate": 6.236463363180348e-06, - "loss": 0.0068, + "learning_rate": 1.62579376843025e-05, + "loss": 0.0237, "step": 147795 }, { "epoch": 6.9, - "learning_rate": 6.235994561905209e-06, - "loss": 0.017, + "learning_rate": 1.625746961446648e-05, + "loss": 0.0233, "step": 147800 }, { "epoch": 6.9, - "learning_rate": 6.235525760630069e-06, - "loss": 0.0626, + "learning_rate": 1.625700154463046e-05, + "loss": 0.0096, "step": 147805 }, { "epoch": 6.9, - "learning_rate": 6.235056959354931e-06, - "loss": 0.0483, + "learning_rate": 1.625653347479444e-05, + "loss": 0.0469, "step": 147810 }, { "epoch": 6.9, - "learning_rate": 6.2345881580797905e-06, - "loss": 0.0266, + "learning_rate": 1.625606540495842e-05, + "loss": 0.0641, "step": 147815 }, { "epoch": 6.9, - "learning_rate": 6.234119356804651e-06, - "loss": 0.0319, + "learning_rate": 1.62555973351224e-05, + "loss": 0.0572, "step": 147820 }, { "epoch": 6.9, - "learning_rate": 6.233650555529511e-06, - "loss": 0.0482, + "learning_rate": 1.625512926528638e-05, + "loss": 0.0805, "step": 147825 }, { "epoch": 6.9, - "learning_rate": 6.233181754254371e-06, - "loss": 0.1028, + "learning_rate": 1.6254661195450364e-05, + "loss": 0.0857, "step": 147830 }, { "epoch": 6.9, - "learning_rate": 6.232712952979233e-06, - "loss": 0.141, + "learning_rate": 1.6254193125614344e-05, + "loss": 0.0984, "step": 147835 }, { "epoch": 6.9, - "learning_rate": 6.232244151704094e-06, - "loss": 0.1495, + "learning_rate": 1.6253725055778324e-05, + "loss": 0.1485, "step": 147840 }, { "epoch": 6.9, - "learning_rate": 6.2317753504289535e-06, - "loss": 0.0518, + "learning_rate": 1.62532569859423e-05, + "loss": 0.0041, "step": 147845 }, { "epoch": 6.9, - "learning_rate": 6.2313065491538134e-06, - "loss": 0.0089, + "learning_rate": 1.6252788916106284e-05, + "loss": 0.0391, "step": 147850 }, { "epoch": 6.9, - "learning_rate": 6.230837747878675e-06, - "loss": 0.0488, + "learning_rate": 1.6252320846270263e-05, + "loss": 0.0131, "step": 147855 }, { "epoch": 6.9, - "learning_rate": 6.230368946603536e-06, - "loss": 0.0677, + "learning_rate": 1.6251852776434243e-05, + "loss": 0.0344, "step": 147860 }, { "epoch": 6.9, - "learning_rate": 6.229900145328396e-06, - "loss": 0.0469, + "learning_rate": 1.6251384706598223e-05, + "loss": 0.0588, "step": 147865 }, { "epoch": 6.9, - "learning_rate": 6.229431344053256e-06, - "loss": 0.0841, + "learning_rate": 1.6250916636762206e-05, + "loss": 0.0327, "step": 147870 }, { "epoch": 6.9, - "learning_rate": 6.2289625427781165e-06, - "loss": 0.0919, + "learning_rate": 1.6250448566926186e-05, + "loss": 0.0895, "step": 147875 }, { "epoch": 6.9, - "learning_rate": 6.228493741502978e-06, - "loss": 0.1152, + "learning_rate": 1.6249980497090166e-05, + "loss": 0.1405, "step": 147880 }, { "epoch": 6.9, - "learning_rate": 6.228024940227838e-06, - "loss": 0.1718, + "learning_rate": 1.6249512427254146e-05, + "loss": 0.0953, "step": 147885 }, { "epoch": 6.9, - "learning_rate": 6.227556138952698e-06, - "loss": 0.2949, + "learning_rate": 1.624904435741813e-05, + "loss": 0.1316, "step": 147890 }, { "epoch": 6.9, - "learning_rate": 6.227087337677559e-06, - "loss": 0.0021, + "learning_rate": 1.624857628758211e-05, + "loss": 0.013, "step": 147895 }, { "epoch": 6.9, - "learning_rate": 6.226618536402419e-06, - "loss": 0.0467, + "learning_rate": 1.624810821774609e-05, + "loss": 0.0068, "step": 147900 }, { "epoch": 6.9, - "learning_rate": 6.22614973512728e-06, - "loss": 0.0044, + "learning_rate": 1.624764014791007e-05, + "loss": 0.0092, "step": 147905 }, { "epoch": 6.9, - "learning_rate": 6.22568093385214e-06, - "loss": 0.0085, + "learning_rate": 1.6247172078074048e-05, + "loss": 0.0157, "step": 147910 }, { "epoch": 6.9, - "learning_rate": 6.225212132577001e-06, - "loss": 0.0452, + "learning_rate": 1.6246704008238028e-05, + "loss": 0.0166, "step": 147915 }, { "epoch": 6.9, - "learning_rate": 6.224743331301861e-06, - "loss": 0.056, + "learning_rate": 1.6246235938402008e-05, + "loss": 0.0586, "step": 147920 }, { "epoch": 6.9, - "learning_rate": 6.224274530026723e-06, - "loss": 0.0681, + "learning_rate": 1.624576786856599e-05, + "loss": 0.0734, "step": 147925 }, { "epoch": 6.9, - "learning_rate": 6.2238057287515825e-06, - "loss": 0.1454, + "learning_rate": 1.624529979872997e-05, + "loss": 0.1255, "step": 147930 }, { "epoch": 6.9, - "learning_rate": 6.223336927476443e-06, - "loss": 0.1652, + "learning_rate": 1.624483172889395e-05, + "loss": 0.1248, "step": 147935 }, { "epoch": 6.9, - "learning_rate": 6.222868126201303e-06, - "loss": 0.1068, + "learning_rate": 1.624436365905793e-05, + "loss": 0.1008, "step": 147940 }, { "epoch": 6.9, - "learning_rate": 6.222399324926164e-06, - "loss": 0.0161, + "learning_rate": 1.6243895589221914e-05, + "loss": 0.0153, "step": 147945 }, { "epoch": 6.9, - "learning_rate": 6.221930523651025e-06, - "loss": 0.0706, + "learning_rate": 1.6243427519385894e-05, + "loss": 0.0231, "step": 147950 }, { "epoch": 6.9, - "learning_rate": 6.221461722375886e-06, - "loss": 0.0113, + "learning_rate": 1.6242959449549873e-05, + "loss": 0.0199, "step": 147955 }, { "epoch": 6.9, - "learning_rate": 6.2209929211007455e-06, - "loss": 0.0185, + "learning_rate": 1.6242491379713857e-05, + "loss": 0.0173, "step": 147960 }, { "epoch": 6.9, - "learning_rate": 6.220524119825606e-06, - "loss": 0.0324, + "learning_rate": 1.6242023309877836e-05, + "loss": 0.0397, "step": 147965 }, { "epoch": 6.9, - "learning_rate": 6.220055318550467e-06, - "loss": 0.107, + "learning_rate": 1.6241555240041813e-05, + "loss": 0.0321, "step": 147970 }, { "epoch": 6.9, - "learning_rate": 6.219586517275328e-06, - "loss": 0.0792, + "learning_rate": 1.6241087170205793e-05, + "loss": 0.0523, "step": 147975 }, { "epoch": 6.9, - "learning_rate": 6.219117716000188e-06, - "loss": 0.0739, + "learning_rate": 1.6240619100369776e-05, + "loss": 0.0802, "step": 147980 }, { "epoch": 6.91, - "learning_rate": 6.218648914725049e-06, - "loss": 0.1501, + "learning_rate": 1.6240151030533756e-05, + "loss": 0.1307, "step": 147985 }, { "epoch": 6.91, - "learning_rate": 6.2181801134499085e-06, - "loss": 0.1277, + "learning_rate": 1.6239682960697735e-05, + "loss": 0.2453, "step": 147990 }, { "epoch": 6.91, - "learning_rate": 6.21771131217477e-06, - "loss": 0.0128, + "learning_rate": 1.6239214890861715e-05, + "loss": 0.0109, "step": 147995 }, { "epoch": 6.91, - "learning_rate": 6.21724251089963e-06, - "loss": 0.0266, + "learning_rate": 1.62387468210257e-05, + "loss": 0.0125, "step": 148000 }, { "epoch": 6.91, - "learning_rate": 6.216773709624491e-06, - "loss": 0.0197, + "learning_rate": 1.623827875118968e-05, + "loss": 0.0257, "step": 148005 }, { "epoch": 6.91, - "learning_rate": 6.216304908349351e-06, - "loss": 0.4825, + "learning_rate": 1.6237810681353658e-05, + "loss": 0.068, "step": 148010 }, { "epoch": 6.91, - "learning_rate": 6.215836107074212e-06, - "loss": 0.0359, + "learning_rate": 1.623734261151764e-05, + "loss": 0.0371, "step": 148015 }, { "epoch": 6.91, - "learning_rate": 6.215367305799072e-06, - "loss": 0.0182, + "learning_rate": 1.623687454168162e-05, + "loss": 0.0243, "step": 148020 }, { "epoch": 6.91, - "learning_rate": 6.214898504523933e-06, - "loss": 0.072, + "learning_rate": 1.62364064718456e-05, + "loss": 0.085, "step": 148025 }, { "epoch": 6.91, - "learning_rate": 6.214429703248793e-06, - "loss": 0.0958, + "learning_rate": 1.623593840200958e-05, + "loss": 0.1741, "step": 148030 }, { "epoch": 6.91, - "learning_rate": 6.213960901973654e-06, - "loss": 0.1575, + "learning_rate": 1.623547033217356e-05, + "loss": 0.1877, "step": 148035 }, { "epoch": 6.91, - "learning_rate": 6.213492100698515e-06, - "loss": 0.1346, + "learning_rate": 1.623500226233754e-05, + "loss": 0.0829, "step": 148040 }, { "epoch": 6.91, - "learning_rate": 6.213023299423375e-06, - "loss": 0.0071, + "learning_rate": 1.623453419250152e-05, + "loss": 0.0046, "step": 148045 }, { "epoch": 6.91, - "learning_rate": 6.212554498148235e-06, - "loss": 0.0207, + "learning_rate": 1.62340661226655e-05, + "loss": 0.0233, "step": 148050 }, { "epoch": 6.91, - "learning_rate": 6.212085696873096e-06, - "loss": 0.0296, + "learning_rate": 1.6233598052829483e-05, + "loss": 0.0435, "step": 148055 }, { "epoch": 6.91, - "learning_rate": 6.211616895597956e-06, - "loss": 0.0492, + "learning_rate": 1.6233129982993463e-05, + "loss": 0.0522, "step": 148060 }, { "epoch": 6.91, - "learning_rate": 6.211148094322818e-06, - "loss": 0.0231, + "learning_rate": 1.6232661913157443e-05, + "loss": 0.0422, "step": 148065 }, { "epoch": 6.91, - "learning_rate": 6.210679293047678e-06, - "loss": 0.112, + "learning_rate": 1.6232193843321423e-05, + "loss": 0.0454, "step": 148070 }, { "epoch": 6.91, - "learning_rate": 6.210210491772538e-06, - "loss": 0.1759, + "learning_rate": 1.6231725773485406e-05, + "loss": 0.1139, "step": 148075 }, { "epoch": 6.91, - "learning_rate": 6.209741690497398e-06, - "loss": 0.145, + "learning_rate": 1.6231257703649386e-05, + "loss": 0.0902, "step": 148080 }, { "epoch": 6.91, - "learning_rate": 6.209272889222258e-06, - "loss": 0.2303, + "learning_rate": 1.6230789633813366e-05, + "loss": 0.2412, "step": 148085 }, { "epoch": 6.91, - "learning_rate": 6.20880408794712e-06, - "loss": 0.1982, + "learning_rate": 1.623032156397735e-05, + "loss": 0.2077, "step": 148090 }, { "epoch": 6.91, - "learning_rate": 6.208335286671981e-06, - "loss": 0.0157, + "learning_rate": 1.6229853494141325e-05, + "loss": 0.0317, "step": 148095 }, { "epoch": 6.91, - "learning_rate": 6.207866485396841e-06, - "loss": 0.0211, + "learning_rate": 1.6229385424305305e-05, + "loss": 0.0107, "step": 148100 }, { "epoch": 6.91, - "learning_rate": 6.2073976841217006e-06, - "loss": 0.0434, + "learning_rate": 1.6228917354469285e-05, + "loss": 0.0022, "step": 148105 }, { "epoch": 6.91, - "learning_rate": 6.206928882846562e-06, - "loss": 0.036, + "learning_rate": 1.6228449284633268e-05, + "loss": 0.0294, "step": 148110 }, { "epoch": 6.91, - "learning_rate": 6.206460081571423e-06, - "loss": 0.0372, + "learning_rate": 1.6227981214797248e-05, + "loss": 0.0526, "step": 148115 }, { "epoch": 6.91, - "learning_rate": 6.205991280296283e-06, - "loss": 0.0443, + "learning_rate": 1.6227513144961228e-05, + "loss": 0.0624, "step": 148120 }, { "epoch": 6.91, - "learning_rate": 6.205522479021143e-06, - "loss": 0.0703, + "learning_rate": 1.6227045075125208e-05, + "loss": 0.0567, "step": 148125 }, { "epoch": 6.91, - "learning_rate": 6.205053677746004e-06, - "loss": 0.0761, + "learning_rate": 1.622657700528919e-05, + "loss": 0.0422, "step": 148130 }, { "epoch": 6.91, - "learning_rate": 6.204584876470865e-06, - "loss": 0.1529, + "learning_rate": 1.622610893545317e-05, + "loss": 0.4208, "step": 148135 }, { "epoch": 6.91, - "learning_rate": 6.204116075195725e-06, - "loss": 0.2219, + "learning_rate": 1.622564086561715e-05, + "loss": 0.1783, "step": 148140 }, { "epoch": 6.91, - "learning_rate": 6.203647273920585e-06, - "loss": 0.0427, + "learning_rate": 1.6225172795781134e-05, + "loss": 0.005, "step": 148145 }, { "epoch": 6.91, - "learning_rate": 6.203178472645446e-06, - "loss": 0.0206, + "learning_rate": 1.6224704725945113e-05, + "loss": 0.0058, "step": 148150 }, { "epoch": 6.91, - "learning_rate": 6.202709671370306e-06, - "loss": 0.02, + "learning_rate": 1.6224236656109093e-05, + "loss": 0.0195, "step": 148155 }, { "epoch": 6.91, - "learning_rate": 6.2022408700951674e-06, - "loss": 0.0349, + "learning_rate": 1.622376858627307e-05, + "loss": 0.0249, "step": 148160 }, { "epoch": 6.91, - "learning_rate": 6.201772068820027e-06, - "loss": 0.0266, + "learning_rate": 1.6223300516437053e-05, + "loss": 0.0314, "step": 148165 }, { "epoch": 6.91, - "learning_rate": 6.201303267544888e-06, - "loss": 0.0299, + "learning_rate": 1.6222832446601033e-05, + "loss": 0.068, "step": 148170 }, { "epoch": 6.91, - "learning_rate": 6.200834466269748e-06, - "loss": 0.1539, + "learning_rate": 1.6222364376765012e-05, + "loss": 0.0793, "step": 148175 }, { "epoch": 6.91, - "learning_rate": 6.20036566499461e-06, - "loss": 0.0896, + "learning_rate": 1.6221896306928992e-05, + "loss": 0.199, "step": 148180 }, { "epoch": 6.91, - "learning_rate": 6.1998968637194705e-06, - "loss": 0.1884, + "learning_rate": 1.6221428237092975e-05, + "loss": 0.0802, "step": 148185 }, { "epoch": 6.91, - "learning_rate": 6.1994280624443304e-06, - "loss": 0.1504, + "learning_rate": 1.6220960167256955e-05, + "loss": 0.0945, "step": 148190 }, { "epoch": 6.91, - "learning_rate": 6.19895926116919e-06, - "loss": 0.0177, + "learning_rate": 1.6220492097420935e-05, + "loss": 0.0186, "step": 148195 }, { "epoch": 6.92, - "learning_rate": 6.198490459894051e-06, - "loss": 0.0155, + "learning_rate": 1.622002402758492e-05, + "loss": 0.037, "step": 148200 }, { "epoch": 6.92, - "learning_rate": 6.198021658618913e-06, - "loss": 0.076, + "learning_rate": 1.6219555957748898e-05, + "loss": 0.0125, "step": 148205 }, { "epoch": 6.92, - "learning_rate": 6.197552857343773e-06, - "loss": 0.0455, + "learning_rate": 1.6219087887912878e-05, + "loss": 0.0179, "step": 148210 }, { "epoch": 6.92, - "learning_rate": 6.197084056068633e-06, - "loss": 0.0327, + "learning_rate": 1.6218619818076858e-05, + "loss": 0.0764, "step": 148215 }, { "epoch": 6.92, - "learning_rate": 6.1966152547934934e-06, - "loss": 0.0796, + "learning_rate": 1.6218151748240838e-05, + "loss": 0.0692, "step": 148220 }, { "epoch": 6.92, - "learning_rate": 6.196146453518353e-06, - "loss": 0.048, + "learning_rate": 1.6217683678404817e-05, + "loss": 0.0338, "step": 148225 }, { "epoch": 6.92, - "learning_rate": 6.195677652243215e-06, - "loss": 0.0888, + "learning_rate": 1.6217215608568797e-05, + "loss": 0.0372, "step": 148230 }, { "epoch": 6.92, - "learning_rate": 6.195208850968075e-06, - "loss": 0.1763, + "learning_rate": 1.6216747538732777e-05, + "loss": 0.166, "step": 148235 }, { "epoch": 6.92, - "learning_rate": 6.194740049692936e-06, - "loss": 0.1574, + "learning_rate": 1.621627946889676e-05, + "loss": 0.1035, "step": 148240 }, { "epoch": 6.92, - "learning_rate": 6.194271248417796e-06, - "loss": 0.0524, + "learning_rate": 1.621581139906074e-05, + "loss": 0.0232, "step": 148245 }, { "epoch": 6.92, - "learning_rate": 6.193802447142657e-06, - "loss": 0.0132, + "learning_rate": 1.621534332922472e-05, + "loss": 0.0109, "step": 148250 }, { "epoch": 6.92, - "learning_rate": 6.193333645867517e-06, - "loss": 0.0325, + "learning_rate": 1.6214875259388703e-05, + "loss": 0.0267, "step": 148255 }, { "epoch": 6.92, - "learning_rate": 6.192864844592378e-06, - "loss": 0.0315, + "learning_rate": 1.6214407189552683e-05, + "loss": 0.0206, "step": 148260 }, { "epoch": 6.92, - "learning_rate": 6.192396043317238e-06, - "loss": 0.0466, + "learning_rate": 1.6213939119716663e-05, + "loss": 0.0531, "step": 148265 }, { "epoch": 6.92, - "learning_rate": 6.191927242042099e-06, - "loss": 0.0569, + "learning_rate": 1.6213471049880643e-05, + "loss": 0.0388, "step": 148270 }, { "epoch": 6.92, - "learning_rate": 6.1914584407669595e-06, - "loss": 0.0343, + "learning_rate": 1.6213002980044626e-05, + "loss": 0.0513, "step": 148275 }, { "epoch": 6.92, - "learning_rate": 6.19098963949182e-06, - "loss": 0.0853, + "learning_rate": 1.6212534910208606e-05, + "loss": 0.0884, "step": 148280 }, { "epoch": 6.92, - "learning_rate": 6.19052083821668e-06, - "loss": 0.1652, + "learning_rate": 1.6212066840372582e-05, + "loss": 0.0952, "step": 148285 }, { "epoch": 6.92, - "learning_rate": 6.190052036941541e-06, - "loss": 0.1759, + "learning_rate": 1.6211598770536562e-05, + "loss": 0.133, "step": 148290 }, { "epoch": 6.92, - "learning_rate": 6.189583235666402e-06, - "loss": 0.0055, + "learning_rate": 1.6211130700700545e-05, + "loss": 0.0071, "step": 148295 }, { "epoch": 6.92, - "learning_rate": 6.1891144343912625e-06, - "loss": 0.0124, + "learning_rate": 1.6210662630864525e-05, + "loss": 0.0159, "step": 148300 }, { "epoch": 6.92, - "learning_rate": 6.1886456331161225e-06, - "loss": 0.0602, + "learning_rate": 1.6210194561028505e-05, + "loss": 0.0069, "step": 148305 }, { "epoch": 6.92, - "learning_rate": 6.188176831840983e-06, - "loss": 0.0237, + "learning_rate": 1.6209726491192484e-05, + "loss": 0.0118, "step": 148310 }, { "epoch": 6.92, - "learning_rate": 6.187708030565843e-06, - "loss": 0.0435, + "learning_rate": 1.6209258421356468e-05, + "loss": 0.0509, "step": 148315 }, { "epoch": 6.92, - "learning_rate": 6.187239229290705e-06, - "loss": 0.0463, + "learning_rate": 1.6208790351520448e-05, + "loss": 0.0497, "step": 148320 }, { "epoch": 6.92, - "learning_rate": 6.186770428015565e-06, - "loss": 0.0387, + "learning_rate": 1.6208322281684427e-05, + "loss": 0.0906, "step": 148325 }, { "epoch": 6.92, - "learning_rate": 6.1863016267404255e-06, - "loss": 0.0572, + "learning_rate": 1.620785421184841e-05, + "loss": 0.1339, "step": 148330 }, { "epoch": 6.92, - "learning_rate": 6.1858328254652855e-06, - "loss": 0.1353, + "learning_rate": 1.620738614201239e-05, + "loss": 0.2173, "step": 148335 }, { "epoch": 6.92, - "learning_rate": 6.185364024190145e-06, - "loss": 0.1354, + "learning_rate": 1.620691807217637e-05, + "loss": 0.1186, "step": 148340 }, { "epoch": 6.92, - "learning_rate": 6.184895222915007e-06, - "loss": 0.0364, + "learning_rate": 1.620645000234035e-05, + "loss": 0.0276, "step": 148345 }, { "epoch": 6.92, - "learning_rate": 6.184426421639868e-06, - "loss": 0.0394, + "learning_rate": 1.620598193250433e-05, + "loss": 0.0203, "step": 148350 }, { "epoch": 6.92, - "learning_rate": 6.183957620364728e-06, - "loss": 0.0186, + "learning_rate": 1.620551386266831e-05, + "loss": 0.004, "step": 148355 }, { "epoch": 6.92, - "learning_rate": 6.1834888190895885e-06, - "loss": 0.0331, + "learning_rate": 1.620504579283229e-05, + "loss": 0.0472, "step": 148360 }, { "epoch": 6.92, - "learning_rate": 6.183020017814449e-06, - "loss": 0.0138, + "learning_rate": 1.620457772299627e-05, + "loss": 0.0286, "step": 148365 }, { "epoch": 6.92, - "learning_rate": 6.18255121653931e-06, - "loss": 0.0363, + "learning_rate": 1.6204109653160252e-05, + "loss": 0.0417, "step": 148370 }, { "epoch": 6.92, - "learning_rate": 6.18208241526417e-06, - "loss": 0.0393, + "learning_rate": 1.6203641583324232e-05, + "loss": 0.0905, "step": 148375 }, { "epoch": 6.92, - "learning_rate": 6.181613613989031e-06, - "loss": 0.128, + "learning_rate": 1.6203173513488212e-05, + "loss": 0.1371, "step": 148380 }, { "epoch": 6.92, - "learning_rate": 6.181144812713891e-06, - "loss": 0.2416, + "learning_rate": 1.6202705443652195e-05, + "loss": 0.1553, "step": 148385 }, { "epoch": 6.92, - "learning_rate": 6.180676011438752e-06, - "loss": 0.1379, + "learning_rate": 1.6202237373816175e-05, + "loss": 0.1623, "step": 148390 }, { "epoch": 6.92, - "learning_rate": 6.180207210163612e-06, - "loss": 0.037, + "learning_rate": 1.6201769303980155e-05, + "loss": 0.0309, "step": 148395 }, { "epoch": 6.92, - "learning_rate": 6.179738408888473e-06, - "loss": 0.0478, + "learning_rate": 1.6201301234144135e-05, + "loss": 0.0097, "step": 148400 }, { "epoch": 6.92, - "learning_rate": 6.179269607613333e-06, - "loss": 0.0094, + "learning_rate": 1.6200833164308118e-05, + "loss": 0.0579, "step": 148405 }, { "epoch": 6.93, - "learning_rate": 6.178800806338193e-06, - "loss": 0.1025, + "learning_rate": 1.6200365094472094e-05, + "loss": 0.0937, "step": 148410 }, { "epoch": 6.93, - "learning_rate": 6.1783320050630546e-06, - "loss": 0.0654, + "learning_rate": 1.6199897024636074e-05, + "loss": 0.0069, "step": 148415 }, { "epoch": 6.93, - "learning_rate": 6.177863203787915e-06, - "loss": 0.0852, + "learning_rate": 1.6199428954800054e-05, + "loss": 0.0786, "step": 148420 }, { "epoch": 6.93, - "learning_rate": 6.177394402512775e-06, - "loss": 0.0527, + "learning_rate": 1.6198960884964037e-05, + "loss": 0.1695, "step": 148425 }, { "epoch": 6.93, - "learning_rate": 6.176925601237635e-06, - "loss": 0.1656, + "learning_rate": 1.6198492815128017e-05, + "loss": 0.0473, "step": 148430 }, { "epoch": 6.93, - "learning_rate": 6.176456799962497e-06, - "loss": 0.1352, + "learning_rate": 1.6198024745291997e-05, + "loss": 0.0751, "step": 148435 }, { "epoch": 6.93, - "learning_rate": 6.175987998687358e-06, - "loss": 0.1364, + "learning_rate": 1.619755667545598e-05, + "loss": 0.1089, "step": 148440 }, { "epoch": 6.93, - "learning_rate": 6.1755191974122175e-06, - "loss": 0.018, + "learning_rate": 1.619708860561996e-05, + "loss": 0.015, "step": 148445 }, { "epoch": 6.93, - "learning_rate": 6.1750503961370775e-06, - "loss": 0.0469, + "learning_rate": 1.619662053578394e-05, + "loss": 0.0392, "step": 148450 }, { "epoch": 6.93, - "learning_rate": 6.174581594861938e-06, - "loss": 0.01, + "learning_rate": 1.619615246594792e-05, + "loss": 0.0179, "step": 148455 }, { "epoch": 6.93, - "learning_rate": 6.1741127935868e-06, - "loss": 0.0401, + "learning_rate": 1.6195684396111903e-05, + "loss": 0.0194, "step": 148460 }, { "epoch": 6.93, - "learning_rate": 6.17364399231166e-06, - "loss": 0.1035, + "learning_rate": 1.6195216326275883e-05, + "loss": 0.0757, "step": 148465 }, { "epoch": 6.93, - "learning_rate": 6.17317519103652e-06, - "loss": 0.0725, + "learning_rate": 1.6194748256439862e-05, + "loss": 0.073, "step": 148470 }, { "epoch": 6.93, - "learning_rate": 6.1727063897613805e-06, - "loss": 0.1064, + "learning_rate": 1.619428018660384e-05, + "loss": 0.1125, "step": 148475 }, { "epoch": 6.93, - "learning_rate": 6.1722375884862405e-06, - "loss": 0.2124, + "learning_rate": 1.6193812116767822e-05, + "loss": 0.1395, "step": 148480 }, { "epoch": 6.93, - "learning_rate": 6.171768787211102e-06, - "loss": 0.2988, + "learning_rate": 1.6193344046931802e-05, + "loss": 0.1987, "step": 148485 }, { "epoch": 6.93, - "learning_rate": 6.171299985935962e-06, - "loss": 0.1539, + "learning_rate": 1.619287597709578e-05, + "loss": 0.1504, "step": 148490 }, { "epoch": 6.93, - "learning_rate": 6.170831184660823e-06, - "loss": 0.0219, + "learning_rate": 1.619240790725976e-05, + "loss": 0.0002, "step": 148495 }, { "epoch": 6.93, - "learning_rate": 6.170362383385683e-06, - "loss": 0.0364, + "learning_rate": 1.6191939837423745e-05, + "loss": 0.0546, "step": 148500 }, { "epoch": 6.93, - "learning_rate": 6.169893582110544e-06, - "loss": 0.0254, + "learning_rate": 1.6191471767587724e-05, + "loss": 0.0277, "step": 148505 }, { "epoch": 6.93, - "learning_rate": 6.169424780835404e-06, - "loss": 0.0156, + "learning_rate": 1.6191003697751704e-05, + "loss": 0.0187, "step": 148510 }, { "epoch": 6.93, - "learning_rate": 6.168955979560265e-06, - "loss": 0.0412, + "learning_rate": 1.6190535627915688e-05, + "loss": 0.0713, "step": 148515 }, { "epoch": 6.93, - "learning_rate": 6.168487178285125e-06, - "loss": 0.0531, + "learning_rate": 1.6190067558079667e-05, + "loss": 0.1281, "step": 148520 }, { "epoch": 6.93, - "learning_rate": 6.168018377009986e-06, - "loss": 0.0755, + "learning_rate": 1.6189599488243647e-05, + "loss": 0.0516, "step": 148525 }, { "epoch": 6.93, - "learning_rate": 6.167549575734847e-06, - "loss": 0.0884, + "learning_rate": 1.6189131418407627e-05, + "loss": 0.1529, "step": 148530 }, { "epoch": 6.93, - "learning_rate": 6.167080774459707e-06, - "loss": 0.2133, + "learning_rate": 1.6188663348571607e-05, + "loss": 0.1509, "step": 148535 }, { "epoch": 6.93, - "learning_rate": 6.166611973184567e-06, - "loss": 0.195, + "learning_rate": 1.6188195278735587e-05, + "loss": 0.2326, "step": 148540 }, { "epoch": 6.93, - "learning_rate": 6.166143171909428e-06, - "loss": 0.0064, + "learning_rate": 1.6187727208899566e-05, + "loss": 0.0121, "step": 148545 }, { "epoch": 6.93, - "learning_rate": 6.165674370634288e-06, - "loss": 0.0117, + "learning_rate": 1.6187259139063546e-05, + "loss": 0.0233, "step": 148550 }, { "epoch": 6.93, - "learning_rate": 6.16520556935915e-06, - "loss": 0.0081, + "learning_rate": 1.618679106922753e-05, + "loss": 0.0098, "step": 148555 }, { "epoch": 6.93, - "learning_rate": 6.1647367680840096e-06, - "loss": 0.0502, + "learning_rate": 1.618632299939151e-05, + "loss": 0.0148, "step": 148560 }, { "epoch": 6.93, - "learning_rate": 6.16426796680887e-06, - "loss": 0.0046, + "learning_rate": 1.618585492955549e-05, + "loss": 0.0663, "step": 148565 }, { "epoch": 6.93, - "learning_rate": 6.16379916553373e-06, - "loss": 0.0598, + "learning_rate": 1.6185386859719472e-05, + "loss": 0.0306, "step": 148570 }, { "epoch": 6.93, - "learning_rate": 6.163330364258592e-06, - "loss": 0.0724, + "learning_rate": 1.6184918789883452e-05, + "loss": 0.082, "step": 148575 }, { "epoch": 6.93, - "learning_rate": 6.162861562983452e-06, - "loss": 0.0499, + "learning_rate": 1.6184450720047432e-05, + "loss": 0.0746, "step": 148580 }, { "epoch": 6.93, - "learning_rate": 6.162392761708313e-06, - "loss": 0.1184, + "learning_rate": 1.6183982650211412e-05, + "loss": 0.1918, "step": 148585 }, { "epoch": 6.93, - "learning_rate": 6.1619239604331726e-06, - "loss": 0.0539, + "learning_rate": 1.6183514580375395e-05, + "loss": 0.1871, "step": 148590 }, { "epoch": 6.93, - "learning_rate": 6.161455159158033e-06, - "loss": 0.0211, + "learning_rate": 1.6183046510539375e-05, + "loss": 0.0332, "step": 148595 }, { "epoch": 6.93, - "learning_rate": 6.160986357882894e-06, - "loss": 0.0166, + "learning_rate": 1.618257844070335e-05, + "loss": 0.0037, "step": 148600 }, { "epoch": 6.93, - "learning_rate": 6.160517556607755e-06, - "loss": 0.0095, + "learning_rate": 1.618211037086733e-05, + "loss": 0.0259, "step": 148605 }, { "epoch": 6.93, - "learning_rate": 6.160048755332615e-06, - "loss": 0.0377, + "learning_rate": 1.6181642301031314e-05, + "loss": 0.0317, "step": 148610 }, { "epoch": 6.93, - "learning_rate": 6.159579954057476e-06, - "loss": 0.0166, + "learning_rate": 1.6181174231195294e-05, + "loss": 0.0593, "step": 148615 }, { "epoch": 6.93, - "learning_rate": 6.159111152782336e-06, - "loss": 0.0857, + "learning_rate": 1.6180706161359274e-05, + "loss": 0.0176, "step": 148620 }, { "epoch": 6.94, - "learning_rate": 6.158642351507197e-06, - "loss": 0.0531, + "learning_rate": 1.6180238091523257e-05, + "loss": 0.0663, "step": 148625 }, { "epoch": 6.94, - "learning_rate": 6.158173550232057e-06, - "loss": 0.1494, + "learning_rate": 1.6179770021687237e-05, + "loss": 0.0737, "step": 148630 }, { "epoch": 6.94, - "learning_rate": 6.157704748956918e-06, - "loss": 0.174, + "learning_rate": 1.6179301951851217e-05, + "loss": 0.0646, "step": 148635 }, { "epoch": 6.94, - "learning_rate": 6.157235947681778e-06, - "loss": 0.1638, + "learning_rate": 1.6178833882015196e-05, + "loss": 0.1261, "step": 148640 }, { "epoch": 6.94, - "learning_rate": 6.1567671464066395e-06, - "loss": 0.0133, + "learning_rate": 1.617836581217918e-05, + "loss": 0.015, "step": 148645 }, { "epoch": 6.94, - "learning_rate": 6.156298345131499e-06, - "loss": 0.0101, + "learning_rate": 1.617789774234316e-05, + "loss": 0.0207, "step": 148650 }, { "epoch": 6.94, - "learning_rate": 6.15582954385636e-06, - "loss": 0.0568, + "learning_rate": 1.617742967250714e-05, + "loss": 0.028, "step": 148655 }, { "epoch": 6.94, - "learning_rate": 6.15536074258122e-06, - "loss": 0.0089, + "learning_rate": 1.617696160267112e-05, + "loss": 0.0115, "step": 148660 }, { "epoch": 6.94, - "learning_rate": 6.15489194130608e-06, - "loss": 0.0588, + "learning_rate": 1.61764935328351e-05, + "loss": 0.0505, "step": 148665 }, { "epoch": 6.94, - "learning_rate": 6.154423140030942e-06, - "loss": 0.0532, + "learning_rate": 1.617602546299908e-05, + "loss": 0.0164, "step": 148670 }, { "epoch": 6.94, - "learning_rate": 6.1539543387558024e-06, - "loss": 0.0796, + "learning_rate": 1.617555739316306e-05, + "loss": 0.09, "step": 148675 }, { "epoch": 6.94, - "learning_rate": 6.153485537480662e-06, - "loss": 0.1643, + "learning_rate": 1.617508932332704e-05, + "loss": 0.1603, "step": 148680 }, { "epoch": 6.94, - "learning_rate": 6.153016736205522e-06, - "loss": 0.1813, + "learning_rate": 1.617462125349102e-05, + "loss": 0.1096, "step": 148685 }, { "epoch": 6.94, - "learning_rate": 6.152547934930384e-06, - "loss": 0.1563, + "learning_rate": 1.6174153183655e-05, + "loss": 0.1177, "step": 148690 }, { "epoch": 6.94, - "learning_rate": 6.152079133655245e-06, - "loss": 0.0095, + "learning_rate": 1.617368511381898e-05, + "loss": 0.0145, "step": 148695 }, { "epoch": 6.94, - "learning_rate": 6.151610332380105e-06, - "loss": 0.016, + "learning_rate": 1.6173217043982964e-05, + "loss": 0.0173, "step": 148700 }, { "epoch": 6.94, - "learning_rate": 6.151141531104965e-06, - "loss": 0.005, + "learning_rate": 1.6172748974146944e-05, + "loss": 0.0243, "step": 148705 }, { "epoch": 6.94, - "learning_rate": 6.150672729829825e-06, - "loss": 0.0171, + "learning_rate": 1.6172280904310924e-05, + "loss": 0.0061, "step": 148710 }, { "epoch": 6.94, - "learning_rate": 6.150203928554687e-06, - "loss": 0.0535, + "learning_rate": 1.6171812834474904e-05, + "loss": 0.065, "step": 148715 }, { "epoch": 6.94, - "learning_rate": 6.149735127279547e-06, - "loss": 0.0304, + "learning_rate": 1.6171344764638887e-05, + "loss": 0.0157, "step": 148720 }, { "epoch": 6.94, - "learning_rate": 6.149266326004407e-06, - "loss": 0.0832, + "learning_rate": 1.6170876694802864e-05, + "loss": 0.0121, "step": 148725 }, { "epoch": 6.94, - "learning_rate": 6.148797524729268e-06, - "loss": 0.1522, + "learning_rate": 1.6170408624966843e-05, + "loss": 0.1488, "step": 148730 }, { "epoch": 6.94, - "learning_rate": 6.148328723454128e-06, - "loss": 0.0713, + "learning_rate": 1.6169940555130823e-05, + "loss": 0.1884, "step": 148735 }, { "epoch": 6.94, - "learning_rate": 6.147859922178989e-06, - "loss": 0.1535, + "learning_rate": 1.6169472485294806e-05, + "loss": 0.1334, "step": 148740 }, { "epoch": 6.94, - "learning_rate": 6.147391120903849e-06, - "loss": 0.0112, + "learning_rate": 1.6169004415458786e-05, + "loss": 0.0177, "step": 148745 }, { "epoch": 6.94, - "learning_rate": 6.14692231962871e-06, - "loss": 0.0207, + "learning_rate": 1.6168536345622766e-05, + "loss": 0.0089, "step": 148750 }, { "epoch": 6.94, - "learning_rate": 6.14645351835357e-06, - "loss": 0.0548, + "learning_rate": 1.616806827578675e-05, + "loss": 0.0059, "step": 148755 }, { "epoch": 6.94, - "learning_rate": 6.1459847170784315e-06, - "loss": 0.0145, + "learning_rate": 1.616760020595073e-05, + "loss": 0.0643, "step": 148760 }, { "epoch": 6.94, - "learning_rate": 6.145515915803291e-06, - "loss": 0.0457, + "learning_rate": 1.616713213611471e-05, + "loss": 0.0317, "step": 148765 }, { "epoch": 6.94, - "learning_rate": 6.145047114528152e-06, - "loss": 0.0381, + "learning_rate": 1.616666406627869e-05, + "loss": 0.0233, "step": 148770 }, { "epoch": 6.94, - "learning_rate": 6.144578313253012e-06, - "loss": 0.108, + "learning_rate": 1.6166195996442672e-05, + "loss": 0.1418, "step": 148775 }, { "epoch": 6.94, - "learning_rate": 6.144109511977873e-06, - "loss": 0.0575, + "learning_rate": 1.6165727926606652e-05, + "loss": 0.0897, "step": 148780 }, { "epoch": 6.94, - "learning_rate": 6.143640710702734e-06, - "loss": 0.2101, + "learning_rate": 1.616525985677063e-05, + "loss": 0.1656, "step": 148785 }, { "epoch": 6.94, - "learning_rate": 6.1431719094275945e-06, - "loss": 0.1477, + "learning_rate": 1.6164791786934608e-05, + "loss": 0.1312, "step": 148790 }, { "epoch": 6.94, - "learning_rate": 6.142703108152454e-06, - "loss": 0.011, + "learning_rate": 1.616432371709859e-05, + "loss": 0.0347, "step": 148795 }, { "epoch": 6.94, - "learning_rate": 6.142234306877315e-06, - "loss": 0.0111, + "learning_rate": 1.616385564726257e-05, + "loss": 0.0014, "step": 148800 }, { "epoch": 6.94, - "learning_rate": 6.141765505602175e-06, - "loss": 0.0236, + "learning_rate": 1.616338757742655e-05, + "loss": 0.008, "step": 148805 }, { "epoch": 6.94, - "learning_rate": 6.141296704327037e-06, - "loss": 0.061, + "learning_rate": 1.6162919507590534e-05, + "loss": 0.0201, "step": 148810 }, { "epoch": 6.94, - "learning_rate": 6.140827903051897e-06, - "loss": 0.0862, + "learning_rate": 1.6162451437754514e-05, + "loss": 0.0776, "step": 148815 }, { "epoch": 6.94, - "learning_rate": 6.1403591017767575e-06, - "loss": 0.0367, + "learning_rate": 1.6161983367918494e-05, + "loss": 0.0672, "step": 148820 }, { "epoch": 6.94, - "learning_rate": 6.139890300501617e-06, - "loss": 0.1452, + "learning_rate": 1.6161515298082473e-05, + "loss": 0.0678, "step": 148825 }, { "epoch": 6.94, - "learning_rate": 6.139421499226479e-06, - "loss": 0.1635, + "learning_rate": 1.6161047228246457e-05, + "loss": 0.0705, "step": 148830 }, { "epoch": 6.94, - "learning_rate": 6.138952697951339e-06, - "loss": 0.2093, + "learning_rate": 1.6160579158410436e-05, + "loss": 0.0962, "step": 148835 }, { "epoch": 6.95, - "learning_rate": 6.1384838966762e-06, - "loss": 0.1353, + "learning_rate": 1.6160111088574416e-05, + "loss": 0.0913, "step": 148840 }, { "epoch": 6.95, - "learning_rate": 6.13801509540106e-06, - "loss": 0.0078, + "learning_rate": 1.6159643018738396e-05, + "loss": 0.0269, "step": 148845 }, { "epoch": 6.95, - "learning_rate": 6.1375462941259205e-06, - "loss": 0.0224, + "learning_rate": 1.615917494890238e-05, + "loss": 0.0161, "step": 148850 }, { "epoch": 6.95, - "learning_rate": 6.137077492850781e-06, - "loss": 0.0146, + "learning_rate": 1.6158706879066356e-05, + "loss": 0.0137, "step": 148855 }, { "epoch": 6.95, - "learning_rate": 6.136608691575642e-06, - "loss": 0.0125, + "learning_rate": 1.6158238809230336e-05, + "loss": 0.0352, "step": 148860 }, { "epoch": 6.95, - "learning_rate": 6.136139890300502e-06, - "loss": 0.0087, + "learning_rate": 1.615777073939432e-05, + "loss": 0.0308, "step": 148865 }, { "epoch": 6.95, - "learning_rate": 6.135671089025363e-06, - "loss": 0.0473, + "learning_rate": 1.61573026695583e-05, + "loss": 0.0298, "step": 148870 }, { "epoch": 6.95, - "learning_rate": 6.135202287750223e-06, - "loss": 0.0428, + "learning_rate": 1.615683459972228e-05, + "loss": 0.0785, "step": 148875 }, { "epoch": 6.95, - "learning_rate": 6.134733486475084e-06, - "loss": 0.0937, + "learning_rate": 1.6156366529886258e-05, + "loss": 0.0448, "step": 148880 }, { "epoch": 6.95, - "learning_rate": 6.134264685199944e-06, - "loss": 0.1978, + "learning_rate": 1.615589846005024e-05, + "loss": 0.1241, "step": 148885 }, { "epoch": 6.95, - "learning_rate": 6.133795883924805e-06, - "loss": 0.1708, + "learning_rate": 1.615543039021422e-05, + "loss": 0.1387, "step": 148890 }, { "epoch": 6.95, - "learning_rate": 6.133327082649665e-06, - "loss": 0.0149, + "learning_rate": 1.61549623203782e-05, + "loss": 0.0217, "step": 148895 }, { "epoch": 6.95, - "learning_rate": 6.1328582813745266e-06, - "loss": 0.0316, + "learning_rate": 1.615449425054218e-05, + "loss": 0.0112, "step": 148900 }, { "epoch": 6.95, - "learning_rate": 6.1323894800993865e-06, - "loss": 0.0186, + "learning_rate": 1.6154026180706164e-05, + "loss": 0.0086, "step": 148905 }, { "epoch": 6.95, - "learning_rate": 6.131920678824247e-06, - "loss": 0.0288, + "learning_rate": 1.6153558110870144e-05, + "loss": 0.0612, "step": 148910 }, { "epoch": 6.95, - "learning_rate": 6.131451877549107e-06, - "loss": 0.0302, + "learning_rate": 1.615309004103412e-05, + "loss": 0.0687, "step": 148915 }, { "epoch": 6.95, - "learning_rate": 6.130983076273967e-06, - "loss": 0.0455, + "learning_rate": 1.61526219711981e-05, + "loss": 0.0407, "step": 148920 }, { "epoch": 6.95, - "learning_rate": 6.130514274998829e-06, - "loss": 0.088, + "learning_rate": 1.6152153901362083e-05, + "loss": 0.0554, "step": 148925 }, { "epoch": 6.95, - "learning_rate": 6.1300454737236896e-06, - "loss": 0.1135, + "learning_rate": 1.6151685831526063e-05, + "loss": 0.1085, "step": 148930 }, { "epoch": 6.95, - "learning_rate": 6.1295766724485495e-06, - "loss": 0.1232, + "learning_rate": 1.6151217761690043e-05, + "loss": 0.2726, "step": 148935 }, { "epoch": 6.95, - "learning_rate": 6.1291078711734094e-06, - "loss": 0.1448, + "learning_rate": 1.6150749691854026e-05, + "loss": 0.0954, "step": 148940 }, { "epoch": 6.95, - "learning_rate": 6.128639069898271e-06, - "loss": 0.0067, + "learning_rate": 1.6150281622018006e-05, + "loss": 0.0209, "step": 148945 }, { "epoch": 6.95, - "learning_rate": 6.128170268623132e-06, - "loss": 0.0024, + "learning_rate": 1.6149813552181986e-05, + "loss": 0.0252, "step": 148950 }, { "epoch": 6.95, - "learning_rate": 6.127701467347992e-06, - "loss": 0.0131, + "learning_rate": 1.6149345482345966e-05, + "loss": 0.0021, "step": 148955 }, { "epoch": 6.95, - "learning_rate": 6.127232666072852e-06, - "loss": 0.0085, + "learning_rate": 1.614887741250995e-05, + "loss": 0.0119, "step": 148960 }, { "epoch": 6.95, - "learning_rate": 6.1267638647977125e-06, - "loss": 0.0803, + "learning_rate": 1.614840934267393e-05, + "loss": 0.033, "step": 148965 }, { "epoch": 6.95, - "learning_rate": 6.126295063522574e-06, - "loss": 0.0559, + "learning_rate": 1.614794127283791e-05, + "loss": 0.1266, "step": 148970 }, { "epoch": 6.95, - "learning_rate": 6.125826262247434e-06, - "loss": 0.022, + "learning_rate": 1.614747320300189e-05, + "loss": 0.075, "step": 148975 }, { "epoch": 6.95, - "learning_rate": 6.125357460972294e-06, - "loss": 0.0967, + "learning_rate": 1.6147005133165868e-05, + "loss": 0.0545, "step": 148980 }, { "epoch": 6.95, - "learning_rate": 6.124888659697155e-06, - "loss": 0.1812, + "learning_rate": 1.6146537063329848e-05, + "loss": 0.1025, "step": 148985 }, { "epoch": 6.95, - "learning_rate": 6.124419858422015e-06, - "loss": 0.1728, + "learning_rate": 1.6146068993493828e-05, + "loss": 0.1233, "step": 148990 }, { "epoch": 6.95, - "learning_rate": 6.123951057146876e-06, - "loss": 0.0338, + "learning_rate": 1.614560092365781e-05, + "loss": 0.0054, "step": 148995 }, { "epoch": 6.95, - "learning_rate": 6.123482255871736e-06, - "loss": 0.0132, + "learning_rate": 1.614513285382179e-05, + "loss": 0.0363, "step": 149000 }, { "epoch": 6.95, - "learning_rate": 6.123013454596597e-06, - "loss": 0.0417, + "learning_rate": 1.614466478398577e-05, + "loss": 0.027, "step": 149005 }, { "epoch": 6.95, - "learning_rate": 6.122544653321457e-06, - "loss": 0.0364, + "learning_rate": 1.614419671414975e-05, + "loss": 0.0198, "step": 149010 }, { "epoch": 6.95, - "learning_rate": 6.122075852046319e-06, - "loss": 0.0691, + "learning_rate": 1.6143728644313734e-05, + "loss": 0.0184, "step": 149015 }, { "epoch": 6.95, - "learning_rate": 6.1216070507711785e-06, - "loss": 0.0396, + "learning_rate": 1.6143260574477713e-05, + "loss": 0.0803, "step": 149020 }, { "epoch": 6.95, - "learning_rate": 6.121138249496039e-06, - "loss": 0.0898, + "learning_rate": 1.6142792504641693e-05, + "loss": 0.0813, "step": 149025 }, { "epoch": 6.95, - "learning_rate": 6.120669448220899e-06, - "loss": 0.1344, + "learning_rate": 1.6142324434805673e-05, + "loss": 0.1742, "step": 149030 }, { "epoch": 6.95, - "learning_rate": 6.12020064694576e-06, - "loss": 0.0914, + "learning_rate": 1.6141856364969656e-05, + "loss": 0.072, "step": 149035 }, { "epoch": 6.95, - "learning_rate": 6.119731845670621e-06, - "loss": 0.1137, + "learning_rate": 1.6141388295133636e-05, + "loss": 0.1641, "step": 149040 }, { "epoch": 6.95, - "learning_rate": 6.119263044395482e-06, - "loss": 0.0273, + "learning_rate": 1.6140920225297613e-05, + "loss": 0.0072, "step": 149045 }, { "epoch": 6.95, - "learning_rate": 6.1187942431203415e-06, - "loss": 0.0216, + "learning_rate": 1.6140452155461596e-05, + "loss": 0.0122, "step": 149050 }, { "epoch": 6.96, - "learning_rate": 6.118325441845202e-06, - "loss": 0.0355, + "learning_rate": 1.6139984085625576e-05, + "loss": 0.0035, "step": 149055 }, { "epoch": 6.96, - "learning_rate": 6.117856640570062e-06, - "loss": 0.0167, + "learning_rate": 1.6139516015789555e-05, + "loss": 0.016, "step": 149060 }, { "epoch": 6.96, - "learning_rate": 6.117387839294924e-06, - "loss": 0.0653, + "learning_rate": 1.6139047945953535e-05, + "loss": 0.0725, "step": 149065 }, { "epoch": 6.96, - "learning_rate": 6.116919038019784e-06, - "loss": 0.1733, + "learning_rate": 1.613857987611752e-05, + "loss": 0.0349, "step": 149070 }, { "epoch": 6.96, - "learning_rate": 6.1164502367446446e-06, - "loss": 0.0949, + "learning_rate": 1.6138111806281498e-05, + "loss": 0.0953, "step": 149075 }, { "epoch": 6.96, - "learning_rate": 6.1159814354695045e-06, - "loss": 0.1266, + "learning_rate": 1.6137643736445478e-05, + "loss": 0.0851, "step": 149080 }, { "epoch": 6.96, - "learning_rate": 6.115512634194366e-06, - "loss": 0.2809, + "learning_rate": 1.6137175666609458e-05, + "loss": 0.1929, "step": 149085 }, { "epoch": 6.96, - "learning_rate": 6.115043832919226e-06, - "loss": 0.1446, + "learning_rate": 1.613670759677344e-05, + "loss": 0.1516, "step": 149090 }, { "epoch": 6.96, - "learning_rate": 6.114575031644087e-06, - "loss": 0.0246, + "learning_rate": 1.613623952693742e-05, + "loss": 0.0178, "step": 149095 }, { "epoch": 6.96, - "learning_rate": 6.114106230368947e-06, - "loss": 0.0198, + "learning_rate": 1.61357714571014e-05, + "loss": 0.0111, "step": 149100 }, { "epoch": 6.96, - "learning_rate": 6.1136374290938076e-06, - "loss": 0.0122, + "learning_rate": 1.6135303387265377e-05, + "loss": 0.0249, "step": 149105 }, { "epoch": 6.96, - "learning_rate": 6.113168627818668e-06, - "loss": 0.0248, + "learning_rate": 1.613483531742936e-05, + "loss": 0.0186, "step": 149110 }, { "epoch": 6.96, - "learning_rate": 6.112699826543529e-06, - "loss": 0.0458, + "learning_rate": 1.613436724759334e-05, + "loss": 0.0215, "step": 149115 }, { "epoch": 6.96, - "learning_rate": 6.112231025268389e-06, - "loss": 0.0681, + "learning_rate": 1.613389917775732e-05, + "loss": 0.054, "step": 149120 }, { "epoch": 6.96, - "learning_rate": 6.11176222399325e-06, - "loss": 0.0586, + "learning_rate": 1.6133431107921303e-05, + "loss": 0.0309, "step": 149125 }, { "epoch": 6.96, - "learning_rate": 6.11129342271811e-06, - "loss": 0.0734, + "learning_rate": 1.6132963038085283e-05, + "loss": 0.1315, "step": 149130 }, { "epoch": 6.96, - "learning_rate": 6.110824621442971e-06, - "loss": 0.1642, + "learning_rate": 1.6132494968249263e-05, + "loss": 0.2742, "step": 149135 }, { "epoch": 6.96, - "learning_rate": 6.110355820167831e-06, - "loss": 0.1367, + "learning_rate": 1.6132026898413243e-05, + "loss": 0.1306, "step": 149140 }, { "epoch": 6.96, - "learning_rate": 6.109887018892692e-06, - "loss": 0.0092, + "learning_rate": 1.6131558828577226e-05, + "loss": 0.0531, "step": 149145 }, { "epoch": 6.96, - "learning_rate": 6.109418217617552e-06, - "loss": 0.0091, + "learning_rate": 1.6131090758741206e-05, + "loss": 0.0561, "step": 149150 }, { "epoch": 6.96, - "learning_rate": 6.108949416342414e-06, - "loss": 0.019, + "learning_rate": 1.6130622688905185e-05, + "loss": 0.0177, "step": 149155 }, { "epoch": 6.96, - "learning_rate": 6.108480615067274e-06, - "loss": 0.0279, + "learning_rate": 1.6130154619069165e-05, + "loss": 0.0509, "step": 149160 }, { "epoch": 6.96, - "learning_rate": 6.108011813792134e-06, - "loss": 0.0599, + "learning_rate": 1.612968654923315e-05, + "loss": 0.0589, "step": 149165 }, { "epoch": 6.96, - "learning_rate": 6.107543012516994e-06, - "loss": 0.023, + "learning_rate": 1.6129218479397125e-05, + "loss": 0.0211, "step": 149170 }, { "epoch": 6.96, - "learning_rate": 6.107074211241854e-06, - "loss": 0.0461, + "learning_rate": 1.6128750409561105e-05, + "loss": 0.0714, "step": 149175 }, { "epoch": 6.96, - "learning_rate": 6.106605409966716e-06, - "loss": 0.1082, + "learning_rate": 1.6128282339725088e-05, + "loss": 0.0823, "step": 149180 }, { "epoch": 6.96, - "learning_rate": 6.106136608691577e-06, - "loss": 0.0929, + "learning_rate": 1.6127814269889068e-05, + "loss": 0.0764, "step": 149185 }, { "epoch": 6.96, - "learning_rate": 6.105667807416437e-06, - "loss": 0.1218, + "learning_rate": 1.6127346200053048e-05, + "loss": 0.0961, "step": 149190 }, { "epoch": 6.96, - "learning_rate": 6.1051990061412965e-06, - "loss": 0.0141, + "learning_rate": 1.6126878130217027e-05, + "loss": 0.0106, "step": 149195 }, { "epoch": 6.96, - "learning_rate": 6.104730204866157e-06, - "loss": 0.0152, + "learning_rate": 1.612641006038101e-05, + "loss": 0.0199, "step": 149200 }, { "epoch": 6.96, - "learning_rate": 6.104261403591019e-06, - "loss": 0.0217, + "learning_rate": 1.612594199054499e-05, + "loss": 0.0103, "step": 149205 }, { "epoch": 6.96, - "learning_rate": 6.103792602315879e-06, - "loss": 0.0242, + "learning_rate": 1.612547392070897e-05, + "loss": 0.0152, "step": 149210 }, { "epoch": 6.96, - "learning_rate": 6.103323801040739e-06, - "loss": 0.109, + "learning_rate": 1.612500585087295e-05, + "loss": 0.0412, "step": 149215 }, { "epoch": 6.96, - "learning_rate": 6.1028549997656e-06, - "loss": 0.0804, + "learning_rate": 1.6124537781036933e-05, + "loss": 0.0293, "step": 149220 }, { "epoch": 6.96, - "learning_rate": 6.102386198490461e-06, - "loss": 0.1069, + "learning_rate": 1.6124069711200913e-05, + "loss": 0.076, "step": 149225 }, { "epoch": 6.96, - "learning_rate": 6.101917397215321e-06, - "loss": 0.1669, + "learning_rate": 1.6123601641364893e-05, + "loss": 0.0879, "step": 149230 }, { "epoch": 6.96, - "learning_rate": 6.101448595940181e-06, - "loss": 0.1498, + "learning_rate": 1.6123133571528873e-05, + "loss": 0.1971, "step": 149235 }, { "epoch": 6.96, - "learning_rate": 6.100979794665042e-06, - "loss": 0.1537, + "learning_rate": 1.6122665501692853e-05, + "loss": 0.2249, "step": 149240 }, { "epoch": 6.96, - "learning_rate": 6.100510993389902e-06, - "loss": 0.0243, + "learning_rate": 1.6122197431856832e-05, + "loss": 0.0482, "step": 149245 }, { "epoch": 6.96, - "learning_rate": 6.1000421921147634e-06, - "loss": 0.0386, + "learning_rate": 1.6121729362020812e-05, + "loss": 0.038, "step": 149250 }, { "epoch": 6.96, - "learning_rate": 6.099573390839623e-06, - "loss": 0.0422, + "learning_rate": 1.6121261292184795e-05, + "loss": 0.0301, "step": 149255 }, { "epoch": 6.96, - "learning_rate": 6.099104589564484e-06, - "loss": 0.0555, + "learning_rate": 1.6120793222348775e-05, + "loss": 0.0334, "step": 149260 }, { "epoch": 6.96, - "learning_rate": 6.098635788289344e-06, - "loss": 0.0364, + "learning_rate": 1.6120325152512755e-05, + "loss": 0.0305, "step": 149265 }, { "epoch": 6.97, - "learning_rate": 6.098166987014206e-06, - "loss": 0.044, + "learning_rate": 1.6119857082676735e-05, + "loss": 0.0402, "step": 149270 }, { "epoch": 6.97, - "learning_rate": 6.097698185739066e-06, - "loss": 0.063, + "learning_rate": 1.6119389012840718e-05, + "loss": 0.0326, "step": 149275 }, { "epoch": 6.97, - "learning_rate": 6.097229384463926e-06, - "loss": 0.0639, + "learning_rate": 1.6118920943004698e-05, + "loss": 0.1025, "step": 149280 }, { "epoch": 6.97, - "learning_rate": 6.096760583188786e-06, - "loss": 0.1964, + "learning_rate": 1.6118452873168678e-05, + "loss": 0.1745, "step": 149285 }, { "epoch": 6.97, - "learning_rate": 6.096291781913647e-06, - "loss": 0.089, + "learning_rate": 1.6117984803332657e-05, + "loss": 0.1061, "step": 149290 }, { "epoch": 6.97, - "learning_rate": 6.095822980638508e-06, - "loss": 0.0262, + "learning_rate": 1.6117516733496637e-05, + "loss": 0.009, "step": 149295 }, { "epoch": 6.97, - "learning_rate": 6.095354179363369e-06, - "loss": 0.0338, + "learning_rate": 1.6117048663660617e-05, + "loss": 0.0125, "step": 149300 }, { "epoch": 6.97, - "learning_rate": 6.094885378088229e-06, - "loss": 0.0527, + "learning_rate": 1.6116580593824597e-05, + "loss": 0.0079, "step": 149305 }, { "epoch": 6.97, - "learning_rate": 6.094416576813089e-06, - "loss": 0.0303, + "learning_rate": 1.611611252398858e-05, + "loss": 0.04, "step": 149310 }, { "epoch": 6.97, - "learning_rate": 6.093947775537949e-06, - "loss": 0.021, + "learning_rate": 1.611564445415256e-05, + "loss": 0.029, "step": 149315 }, { "epoch": 6.97, - "learning_rate": 6.093478974262811e-06, - "loss": 0.0436, + "learning_rate": 1.611517638431654e-05, + "loss": 0.073, "step": 149320 }, { "epoch": 6.97, - "learning_rate": 6.093010172987671e-06, - "loss": 0.0595, + "learning_rate": 1.611470831448052e-05, + "loss": 0.0643, "step": 149325 }, { "epoch": 6.97, - "learning_rate": 6.092541371712532e-06, - "loss": 0.1049, + "learning_rate": 1.6114240244644503e-05, + "loss": 0.2001, "step": 149330 }, { "epoch": 6.97, - "learning_rate": 6.092072570437392e-06, - "loss": 0.158, + "learning_rate": 1.6113772174808483e-05, + "loss": 0.0858, "step": 149335 }, { "epoch": 6.97, - "learning_rate": 6.091603769162253e-06, - "loss": 0.1062, + "learning_rate": 1.6113304104972462e-05, + "loss": 0.1353, "step": 149340 }, { "epoch": 6.97, - "learning_rate": 6.091134967887113e-06, - "loss": 0.0364, + "learning_rate": 1.6112836035136442e-05, + "loss": 0.0066, "step": 149345 }, { "epoch": 6.97, - "learning_rate": 6.090666166611974e-06, - "loss": 0.0046, + "learning_rate": 1.6112367965300425e-05, + "loss": 0.0076, "step": 149350 }, { "epoch": 6.97, - "learning_rate": 6.090197365336834e-06, - "loss": 0.0288, + "learning_rate": 1.6111899895464405e-05, + "loss": 0.0037, "step": 149355 }, { "epoch": 6.97, - "learning_rate": 6.089728564061695e-06, - "loss": 0.0397, + "learning_rate": 1.6111431825628382e-05, + "loss": 0.0628, "step": 149360 }, { "epoch": 6.97, - "learning_rate": 6.0892597627865555e-06, - "loss": 0.0398, + "learning_rate": 1.6110963755792365e-05, + "loss": 0.0371, "step": 149365 }, { "epoch": 6.97, - "learning_rate": 6.088790961511416e-06, - "loss": 0.0805, + "learning_rate": 1.6110495685956345e-05, + "loss": 0.016, "step": 149370 }, { "epoch": 6.97, - "learning_rate": 6.088322160236276e-06, - "loss": 0.0345, + "learning_rate": 1.6110027616120325e-05, + "loss": 0.0722, "step": 149375 }, { "epoch": 6.97, - "learning_rate": 6.087853358961137e-06, - "loss": 0.0839, + "learning_rate": 1.6109559546284304e-05, + "loss": 0.065, "step": 149380 }, { "epoch": 6.97, - "learning_rate": 6.087384557685997e-06, - "loss": 0.2908, + "learning_rate": 1.6109091476448288e-05, + "loss": 0.1383, "step": 149385 }, { "epoch": 6.97, - "learning_rate": 6.0869157564108585e-06, - "loss": 0.1222, + "learning_rate": 1.6108623406612267e-05, + "loss": 0.1925, "step": 149390 }, { "epoch": 6.97, - "learning_rate": 6.0864469551357184e-06, - "loss": 0.0257, + "learning_rate": 1.6108155336776247e-05, + "loss": 0.0174, "step": 149395 }, { "epoch": 6.97, - "learning_rate": 6.085978153860579e-06, - "loss": 0.0218, + "learning_rate": 1.6107687266940227e-05, + "loss": 0.0256, "step": 149400 }, { "epoch": 6.97, - "learning_rate": 6.085509352585439e-06, - "loss": 0.0506, + "learning_rate": 1.610721919710421e-05, + "loss": 0.0416, "step": 149405 }, { "epoch": 6.97, - "learning_rate": 6.085040551310301e-06, - "loss": 0.0094, + "learning_rate": 1.610675112726819e-05, + "loss": 0.013, "step": 149410 }, { "epoch": 6.97, - "learning_rate": 6.084571750035161e-06, - "loss": 0.0459, + "learning_rate": 1.610628305743217e-05, + "loss": 0.0282, "step": 149415 }, { "epoch": 6.97, - "learning_rate": 6.0841029487600215e-06, - "loss": 0.0959, + "learning_rate": 1.610581498759615e-05, + "loss": 0.0375, "step": 149420 }, { "epoch": 6.97, - "learning_rate": 6.0836341474848814e-06, - "loss": 0.1174, + "learning_rate": 1.610534691776013e-05, + "loss": 0.0952, "step": 149425 }, { "epoch": 6.97, - "learning_rate": 6.083165346209741e-06, - "loss": 0.0808, + "learning_rate": 1.610487884792411e-05, + "loss": 0.0836, "step": 149430 }, { "epoch": 6.97, - "learning_rate": 6.082696544934603e-06, - "loss": 0.2638, + "learning_rate": 1.610441077808809e-05, + "loss": 0.2011, "step": 149435 }, { "epoch": 6.97, - "learning_rate": 6.082227743659464e-06, - "loss": 0.192, + "learning_rate": 1.6103942708252072e-05, + "loss": 0.1106, "step": 149440 }, { "epoch": 6.97, - "learning_rate": 6.081758942384324e-06, - "loss": 0.007, + "learning_rate": 1.6103474638416052e-05, + "loss": 0.0571, "step": 149445 }, { "epoch": 6.97, - "learning_rate": 6.081290141109184e-06, - "loss": 0.0185, + "learning_rate": 1.6103006568580032e-05, + "loss": 0.0056, "step": 149450 }, { "epoch": 6.97, - "learning_rate": 6.080821339834044e-06, - "loss": 0.0126, + "learning_rate": 1.6102538498744012e-05, + "loss": 0.0092, "step": 149455 }, { "epoch": 6.97, - "learning_rate": 6.080352538558906e-06, - "loss": 0.07, + "learning_rate": 1.6102070428907995e-05, + "loss": 0.0887, "step": 149460 }, { "epoch": 6.97, - "learning_rate": 6.079883737283766e-06, - "loss": 0.0124, + "learning_rate": 1.6101602359071975e-05, + "loss": 0.0272, "step": 149465 }, { "epoch": 6.97, - "learning_rate": 6.079414936008626e-06, - "loss": 0.0612, + "learning_rate": 1.6101134289235955e-05, + "loss": 0.0891, "step": 149470 }, { "epoch": 6.97, - "learning_rate": 6.078946134733487e-06, - "loss": 0.1094, + "learning_rate": 1.6100666219399934e-05, + "loss": 0.0343, "step": 149475 }, { "epoch": 6.97, - "learning_rate": 6.078477333458348e-06, - "loss": 0.1196, + "learning_rate": 1.6100198149563918e-05, + "loss": 0.1412, "step": 149480 }, { "epoch": 6.98, - "learning_rate": 6.078008532183208e-06, - "loss": 0.218, + "learning_rate": 1.6099730079727894e-05, + "loss": 0.1669, "step": 149485 }, { "epoch": 6.98, - "learning_rate": 6.077539730908068e-06, - "loss": 0.1769, + "learning_rate": 1.6099262009891874e-05, + "loss": 0.1332, "step": 149490 }, { "epoch": 6.98, - "learning_rate": 6.077070929632929e-06, - "loss": 0.0058, + "learning_rate": 1.6098793940055857e-05, + "loss": 0.0081, "step": 149495 }, { "epoch": 6.98, - "learning_rate": 6.076602128357789e-06, - "loss": 0.0384, + "learning_rate": 1.6098325870219837e-05, + "loss": 0.0111, "step": 149500 }, { "epoch": 6.98, - "learning_rate": 6.0761333270826505e-06, - "loss": 0.0367, + "learning_rate": 1.6097857800383817e-05, + "loss": 0.0199, "step": 149505 }, { "epoch": 6.98, - "learning_rate": 6.0756645258075105e-06, - "loss": 0.0778, + "learning_rate": 1.6097389730547797e-05, + "loss": 0.0464, "step": 149510 }, { "epoch": 6.98, - "learning_rate": 6.075195724532371e-06, - "loss": 0.0448, + "learning_rate": 1.609692166071178e-05, + "loss": 0.0474, "step": 149515 }, { "epoch": 6.98, - "learning_rate": 6.074726923257231e-06, - "loss": 0.1016, + "learning_rate": 1.609645359087576e-05, + "loss": 0.0551, "step": 149520 }, { "epoch": 6.98, - "learning_rate": 6.074258121982092e-06, - "loss": 0.0565, + "learning_rate": 1.609598552103974e-05, + "loss": 0.099, "step": 149525 }, { "epoch": 6.98, - "learning_rate": 6.073789320706953e-06, - "loss": 0.0928, + "learning_rate": 1.609551745120372e-05, + "loss": 0.1796, "step": 149530 }, { "epoch": 6.98, - "learning_rate": 6.0733205194318135e-06, - "loss": 0.2693, + "learning_rate": 1.6095049381367702e-05, + "loss": 0.2301, "step": 149535 }, { "epoch": 6.98, - "learning_rate": 6.0728517181566735e-06, - "loss": 0.1804, + "learning_rate": 1.6094581311531682e-05, + "loss": 0.1906, "step": 149540 }, { "epoch": 6.98, - "learning_rate": 6.072382916881534e-06, - "loss": 0.0367, + "learning_rate": 1.6094113241695662e-05, + "loss": 0.0191, "step": 149545 }, { "epoch": 6.98, - "learning_rate": 6.071914115606395e-06, - "loss": 0.0246, + "learning_rate": 1.6093645171859642e-05, + "loss": 0.0315, "step": 149550 }, { "epoch": 6.98, - "learning_rate": 6.071445314331256e-06, - "loss": 0.0262, + "learning_rate": 1.6093177102023622e-05, + "loss": 0.0755, "step": 149555 }, { "epoch": 6.98, - "learning_rate": 6.070976513056116e-06, - "loss": 0.019, + "learning_rate": 1.60927090321876e-05, + "loss": 0.023, "step": 149560 }, { "epoch": 6.98, - "learning_rate": 6.0705077117809765e-06, - "loss": 0.0357, + "learning_rate": 1.609224096235158e-05, + "loss": 0.0305, "step": 149565 }, { "epoch": 6.98, - "learning_rate": 6.0700389105058364e-06, - "loss": 0.0579, + "learning_rate": 1.6091772892515565e-05, + "loss": 0.0478, "step": 149570 }, { "epoch": 6.98, - "learning_rate": 6.069570109230698e-06, - "loss": 0.0777, + "learning_rate": 1.6091304822679544e-05, + "loss": 0.0187, "step": 149575 }, { "epoch": 6.98, - "learning_rate": 6.069101307955558e-06, - "loss": 0.0541, + "learning_rate": 1.6090836752843524e-05, + "loss": 0.1106, "step": 149580 }, { "epoch": 6.98, - "learning_rate": 6.068632506680419e-06, - "loss": 0.2185, + "learning_rate": 1.6090368683007504e-05, + "loss": 0.1177, "step": 149585 }, { "epoch": 6.98, - "learning_rate": 6.068163705405279e-06, - "loss": 0.1356, + "learning_rate": 1.6089900613171487e-05, + "loss": 0.0911, "step": 149590 }, { "epoch": 6.98, - "learning_rate": 6.06769490413014e-06, - "loss": 0.0004, + "learning_rate": 1.6089432543335467e-05, + "loss": 0.0105, "step": 149595 }, { "epoch": 6.98, - "learning_rate": 6.067226102855e-06, - "loss": 0.0051, + "learning_rate": 1.6088964473499447e-05, + "loss": 0.0383, "step": 149600 }, { "epoch": 6.98, - "learning_rate": 6.066757301579861e-06, - "loss": 0.0378, + "learning_rate": 1.608849640366343e-05, + "loss": 0.0299, "step": 149605 }, { "epoch": 6.98, - "learning_rate": 6.066288500304721e-06, - "loss": 0.0488, + "learning_rate": 1.6088028333827406e-05, + "loss": 0.0351, "step": 149610 }, { "epoch": 6.98, - "learning_rate": 6.065819699029582e-06, - "loss": 0.0506, + "learning_rate": 1.6087560263991386e-05, + "loss": 0.0162, "step": 149615 }, { "epoch": 6.98, - "learning_rate": 6.0653508977544426e-06, - "loss": 0.0439, + "learning_rate": 1.6087092194155366e-05, + "loss": 0.0529, "step": 149620 }, { "epoch": 6.98, - "learning_rate": 6.064882096479303e-06, - "loss": 0.115, + "learning_rate": 1.608662412431935e-05, + "loss": 0.0726, "step": 149625 }, { "epoch": 6.98, - "learning_rate": 6.064413295204163e-06, - "loss": 0.1563, + "learning_rate": 1.608615605448333e-05, + "loss": 0.0911, "step": 149630 }, { "epoch": 6.98, - "learning_rate": 6.063944493929024e-06, - "loss": 0.1109, + "learning_rate": 1.608568798464731e-05, + "loss": 0.1496, "step": 149635 }, { "epoch": 6.98, - "learning_rate": 6.063475692653884e-06, - "loss": 0.2475, + "learning_rate": 1.608521991481129e-05, + "loss": 0.1684, "step": 149640 }, { "epoch": 6.98, - "learning_rate": 6.063006891378746e-06, - "loss": 0.0052, + "learning_rate": 1.6084751844975272e-05, + "loss": 0.0442, "step": 149645 }, { "epoch": 6.98, - "learning_rate": 6.0625380901036056e-06, - "loss": 0.0443, + "learning_rate": 1.6084283775139252e-05, + "loss": 0.021, "step": 149650 }, { "epoch": 6.98, - "learning_rate": 6.062069288828466e-06, - "loss": 0.0073, + "learning_rate": 1.608381570530323e-05, + "loss": 0.0512, "step": 149655 }, { "epoch": 6.98, - "learning_rate": 6.061600487553326e-06, - "loss": 0.0171, + "learning_rate": 1.6083347635467215e-05, + "loss": 0.0263, "step": 149660 }, { "epoch": 6.98, - "learning_rate": 6.061131686278188e-06, - "loss": 0.0619, + "learning_rate": 1.6082879565631195e-05, + "loss": 0.0797, "step": 149665 }, { "epoch": 6.98, - "learning_rate": 6.060662885003048e-06, - "loss": 0.0311, + "learning_rate": 1.6082411495795174e-05, + "loss": 0.0312, "step": 149670 }, { "epoch": 6.98, - "learning_rate": 6.060194083727909e-06, - "loss": 0.1004, + "learning_rate": 1.608194342595915e-05, + "loss": 0.0589, "step": 149675 }, { "epoch": 6.98, - "learning_rate": 6.0597252824527685e-06, - "loss": 0.1083, + "learning_rate": 1.6081475356123134e-05, + "loss": 0.0736, "step": 149680 }, { "epoch": 6.98, - "learning_rate": 6.0592564811776285e-06, - "loss": 0.0854, + "learning_rate": 1.6081007286287114e-05, + "loss": 0.1499, "step": 149685 }, { "epoch": 6.98, - "learning_rate": 6.05878767990249e-06, - "loss": 0.1293, + "learning_rate": 1.6080539216451094e-05, + "loss": 0.1002, "step": 149690 }, { "epoch": 6.98, - "learning_rate": 6.058318878627351e-06, - "loss": 0.0436, + "learning_rate": 1.6080071146615074e-05, + "loss": 0.0094, "step": 149695 }, { "epoch": 6.99, - "learning_rate": 6.057850077352211e-06, - "loss": 0.0436, + "learning_rate": 1.6079603076779057e-05, + "loss": 0.0136, "step": 149700 }, { "epoch": 6.99, - "learning_rate": 6.057381276077071e-06, - "loss": 0.0848, + "learning_rate": 1.6079135006943037e-05, + "loss": 0.0172, "step": 149705 }, { "epoch": 6.99, - "learning_rate": 6.0569124748019315e-06, - "loss": 0.0265, + "learning_rate": 1.6078666937107016e-05, + "loss": 0.0453, "step": 149710 }, { "epoch": 6.99, - "learning_rate": 6.056443673526793e-06, - "loss": 0.0527, + "learning_rate": 1.6078198867270996e-05, + "loss": 0.0479, "step": 149715 }, { "epoch": 6.99, - "learning_rate": 6.055974872251653e-06, - "loss": 0.033, + "learning_rate": 1.607773079743498e-05, + "loss": 0.1416, "step": 149720 }, { "epoch": 6.99, - "learning_rate": 6.055506070976513e-06, - "loss": 0.0396, + "learning_rate": 1.607726272759896e-05, + "loss": 0.0752, "step": 149725 }, { "epoch": 6.99, - "learning_rate": 6.055037269701374e-06, - "loss": 0.104, + "learning_rate": 1.607679465776294e-05, + "loss": 0.0538, "step": 149730 }, { "epoch": 6.99, - "learning_rate": 6.0545684684262354e-06, - "loss": 0.1802, + "learning_rate": 1.607632658792692e-05, + "loss": 0.1234, "step": 149735 }, { "epoch": 6.99, - "learning_rate": 6.054099667151095e-06, - "loss": 0.1535, + "learning_rate": 1.60758585180909e-05, + "loss": 0.1543, "step": 149740 }, { "epoch": 6.99, - "learning_rate": 6.053630865875955e-06, - "loss": 0.0183, + "learning_rate": 1.607539044825488e-05, + "loss": 0.025, "step": 149745 }, { "epoch": 6.99, - "learning_rate": 6.053162064600816e-06, - "loss": 0.0072, + "learning_rate": 1.607492237841886e-05, + "loss": 0.0343, "step": 149750 }, { "epoch": 6.99, - "learning_rate": 6.052693263325676e-06, - "loss": 0.0621, + "learning_rate": 1.607445430858284e-05, + "loss": 0.0237, "step": 149755 }, { "epoch": 6.99, - "learning_rate": 6.052224462050538e-06, - "loss": 0.0478, + "learning_rate": 1.607398623874682e-05, + "loss": 0.0133, "step": 149760 }, { "epoch": 6.99, - "learning_rate": 6.051755660775398e-06, - "loss": 0.0879, + "learning_rate": 1.60735181689108e-05, + "loss": 0.0234, "step": 149765 }, { "epoch": 6.99, - "learning_rate": 6.051286859500258e-06, - "loss": 0.0444, + "learning_rate": 1.607305009907478e-05, + "loss": 0.0521, "step": 149770 }, { "epoch": 6.99, - "learning_rate": 6.050818058225118e-06, - "loss": 0.072, + "learning_rate": 1.6072582029238764e-05, + "loss": 0.0483, "step": 149775 }, { "epoch": 6.99, - "learning_rate": 6.050349256949979e-06, - "loss": 0.1172, + "learning_rate": 1.6072113959402744e-05, + "loss": 0.0632, "step": 149780 }, { "epoch": 6.99, - "learning_rate": 6.049880455674841e-06, - "loss": 0.231, + "learning_rate": 1.6071645889566724e-05, + "loss": 0.249, "step": 149785 }, { "epoch": 6.99, - "learning_rate": 6.049411654399701e-06, - "loss": 0.1651, + "learning_rate": 1.6071177819730707e-05, + "loss": 0.2075, "step": 149790 }, { "epoch": 6.99, - "learning_rate": 6.0489428531245606e-06, - "loss": 0.0274, + "learning_rate": 1.6070709749894687e-05, + "loss": 0.0321, "step": 149795 }, { "epoch": 6.99, - "learning_rate": 6.048474051849421e-06, - "loss": 0.0119, + "learning_rate": 1.6070241680058663e-05, + "loss": 0.036, "step": 149800 }, { "epoch": 6.99, - "learning_rate": 6.048005250574283e-06, - "loss": 0.0682, + "learning_rate": 1.6069773610222643e-05, + "loss": 0.0145, "step": 149805 }, { "epoch": 6.99, - "learning_rate": 6.047536449299143e-06, - "loss": 0.0172, + "learning_rate": 1.6069305540386626e-05, + "loss": 0.022, "step": 149810 }, { "epoch": 6.99, - "learning_rate": 6.047067648024003e-06, - "loss": 0.0457, + "learning_rate": 1.6068837470550606e-05, + "loss": 0.055, "step": 149815 }, { "epoch": 6.99, - "learning_rate": 6.046598846748864e-06, - "loss": 0.0364, + "learning_rate": 1.6068369400714586e-05, + "loss": 0.0286, "step": 149820 }, { "epoch": 6.99, - "learning_rate": 6.0461300454737236e-06, - "loss": 0.0889, + "learning_rate": 1.6067901330878566e-05, + "loss": 0.0653, "step": 149825 }, { "epoch": 6.99, - "learning_rate": 6.045661244198585e-06, - "loss": 0.1634, + "learning_rate": 1.606743326104255e-05, + "loss": 0.0921, "step": 149830 }, { "epoch": 6.99, - "learning_rate": 6.045192442923445e-06, - "loss": 0.1694, + "learning_rate": 1.606696519120653e-05, + "loss": 0.1541, "step": 149835 }, { "epoch": 6.99, - "learning_rate": 6.044723641648306e-06, - "loss": 0.1495, + "learning_rate": 1.606649712137051e-05, + "loss": 0.1527, "step": 149840 }, { "epoch": 6.99, - "learning_rate": 6.044254840373166e-06, - "loss": 0.0245, + "learning_rate": 1.6066029051534492e-05, + "loss": 0.0447, "step": 149845 }, { "epoch": 6.99, - "learning_rate": 6.043786039098027e-06, - "loss": 0.0183, + "learning_rate": 1.606556098169847e-05, + "loss": 0.0107, "step": 149850 }, { "epoch": 6.99, - "learning_rate": 6.043317237822887e-06, - "loss": 0.0185, + "learning_rate": 1.606509291186245e-05, + "loss": 0.022, "step": 149855 }, { "epoch": 6.99, - "learning_rate": 6.042848436547748e-06, - "loss": 0.0172, + "learning_rate": 1.606462484202643e-05, + "loss": 0.0359, "step": 149860 }, { "epoch": 6.99, - "learning_rate": 6.042379635272608e-06, - "loss": 0.0458, + "learning_rate": 1.606415677219041e-05, + "loss": 0.0557, "step": 149865 }, { "epoch": 6.99, - "learning_rate": 6.041910833997469e-06, - "loss": 0.0379, + "learning_rate": 1.606368870235439e-05, + "loss": 0.059, "step": 149870 }, { "epoch": 6.99, - "learning_rate": 6.04144203272233e-06, - "loss": 0.0716, + "learning_rate": 1.606322063251837e-05, + "loss": 0.1079, "step": 149875 }, { "epoch": 6.99, - "learning_rate": 6.0409732314471905e-06, - "loss": 0.0717, + "learning_rate": 1.606275256268235e-05, + "loss": 0.1776, "step": 149880 }, { "epoch": 6.99, - "learning_rate": 6.04050443017205e-06, - "loss": 0.1188, + "learning_rate": 1.6062284492846334e-05, + "loss": 0.1671, "step": 149885 }, { "epoch": 6.99, - "learning_rate": 6.040035628896911e-06, - "loss": 0.1275, + "learning_rate": 1.6061816423010314e-05, + "loss": 0.1584, "step": 149890 }, { "epoch": 6.99, - "learning_rate": 6.039566827621771e-06, - "loss": 0.0266, + "learning_rate": 1.6061348353174293e-05, + "loss": 0.0191, "step": 149895 }, { "epoch": 6.99, - "learning_rate": 6.039098026346633e-06, - "loss": 0.0159, + "learning_rate": 1.6060880283338273e-05, + "loss": 0.0308, "step": 149900 }, { "epoch": 6.99, - "learning_rate": 6.038629225071493e-06, - "loss": 0.0076, + "learning_rate": 1.6060412213502256e-05, + "loss": 0.0052, "step": 149905 }, { "epoch": 7.0, - "learning_rate": 6.0381604237963534e-06, - "loss": 0.0168, + "learning_rate": 1.6059944143666236e-05, + "loss": 0.0414, "step": 149910 }, { "epoch": 7.0, - "learning_rate": 6.037691622521213e-06, - "loss": 0.0416, + "learning_rate": 1.6059476073830216e-05, + "loss": 0.0308, "step": 149915 }, { "epoch": 7.0, - "learning_rate": 6.037222821246073e-06, - "loss": 0.0975, + "learning_rate": 1.60590080039942e-05, + "loss": 0.0661, "step": 149920 }, { "epoch": 7.0, - "learning_rate": 6.036754019970935e-06, - "loss": 0.0633, + "learning_rate": 1.6058539934158176e-05, + "loss": 0.0749, "step": 149925 }, { "epoch": 7.0, - "learning_rate": 6.036285218695796e-06, - "loss": 0.1082, + "learning_rate": 1.6058071864322155e-05, + "loss": 0.0284, "step": 149930 }, { "epoch": 7.0, - "learning_rate": 6.035816417420656e-06, - "loss": 0.163, + "learning_rate": 1.6057603794486135e-05, + "loss": 0.1019, "step": 149935 }, { "epoch": 7.0, - "learning_rate": 6.035347616145516e-06, - "loss": 0.1431, + "learning_rate": 1.605713572465012e-05, + "loss": 0.1065, "step": 149940 }, { "epoch": 7.0, - "learning_rate": 6.034878814870377e-06, - "loss": 0.0251, + "learning_rate": 1.60566676548141e-05, + "loss": 0.0108, "step": 149945 }, { "epoch": 7.0, - "learning_rate": 6.034410013595238e-06, - "loss": 0.0149, + "learning_rate": 1.6056199584978078e-05, + "loss": 0.0054, "step": 149950 }, { "epoch": 7.0, - "learning_rate": 6.033941212320098e-06, - "loss": 0.0303, + "learning_rate": 1.6055731515142058e-05, + "loss": 0.0707, "step": 149955 }, { "epoch": 7.0, - "learning_rate": 6.033472411044959e-06, - "loss": 0.0135, + "learning_rate": 1.605526344530604e-05, + "loss": 0.0201, "step": 149960 }, { "epoch": 7.0, - "learning_rate": 6.033003609769819e-06, - "loss": 0.0696, + "learning_rate": 1.605479537547002e-05, + "loss": 0.0362, "step": 149965 }, { "epoch": 7.0, - "learning_rate": 6.03253480849468e-06, - "loss": 0.0424, + "learning_rate": 1.6054327305634e-05, + "loss": 0.0274, "step": 149970 }, { "epoch": 7.0, - "learning_rate": 6.03206600721954e-06, - "loss": 0.114, + "learning_rate": 1.6053859235797984e-05, + "loss": 0.0525, "step": 149975 }, { "epoch": 7.0, - "learning_rate": 6.031597205944401e-06, - "loss": 0.0742, + "learning_rate": 1.6053391165961964e-05, + "loss": 0.1449, "step": 149980 }, { "epoch": 7.0, - "learning_rate": 6.031128404669261e-06, - "loss": 0.2211, + "learning_rate": 1.6052923096125944e-05, + "loss": 0.1688, "step": 149985 }, { "epoch": 7.0, - "learning_rate": 6.0306596033941225e-06, - "loss": 0.1114, + "learning_rate": 1.605245502628992e-05, + "loss": 0.0543, "step": 149990 }, { "epoch": 7.0, - "learning_rate": 6.0301908021189825e-06, - "loss": 0.0319, + "learning_rate": 1.6051986956453903e-05, + "loss": 0.0338, "step": 149995 }, { "epoch": 7.0, - "learning_rate": 6.029722000843843e-06, - "loss": 0.0163, + "learning_rate": 1.6051518886617883e-05, + "loss": 0.0207, "step": 150000 }, { "epoch": 7.0, - "learning_rate": 6.029253199568703e-06, - "loss": 0.0441, + "learning_rate": 1.6051050816781863e-05, + "loss": 0.0486, "step": 150005 }, { "epoch": 7.0, - "learning_rate": 6.028784398293563e-06, - "loss": 0.0309, + "learning_rate": 1.6050582746945843e-05, + "loss": 0.053, "step": 150010 }, { "epoch": 7.0, - "learning_rate": 6.028315597018425e-06, - "loss": 0.1214, + "learning_rate": 1.6050114677109826e-05, + "loss": 0.1455, "step": 150015 }, { "epoch": 7.0, - "eval_cer": 0.009850527951730128, - "eval_loss": 0.03822819143533707, - "eval_runtime": 383.7473, - "eval_samples_per_second": 49.642, - "eval_steps_per_second": 12.412, - "eval_wer": 0.07735870427001926, + "eval_cer": 0.00955042662273964, + "eval_loss": 0.036567650735378265, + "eval_runtime": 396.8518, + "eval_samples_per_second": 48.003, + "eval_steps_per_second": 12.002, + "eval_wer": 0.07752946578907087, "step": 150017 }, { "epoch": 7.0, - "learning_rate": 6.0278467957432855e-06, - "loss": 0.154, + "learning_rate": 1.6049646607273806e-05, + "loss": 0.1323, "step": 150020 }, { "epoch": 7.0, - "learning_rate": 6.0273779944681455e-06, - "loss": 0.0356, + "learning_rate": 1.6049178537437786e-05, + "loss": 0.0231, "step": 150025 }, { "epoch": 7.0, - "learning_rate": 6.026909193193005e-06, - "loss": 0.0347, + "learning_rate": 1.604871046760177e-05, + "loss": 0.0038, "step": 150030 }, { "epoch": 7.0, - "learning_rate": 6.026440391917866e-06, - "loss": 0.0077, + "learning_rate": 1.604824239776575e-05, + "loss": 0.0157, "step": 150035 }, { "epoch": 7.0, - "learning_rate": 6.025971590642728e-06, - "loss": 0.0223, + "learning_rate": 1.604777432792973e-05, + "loss": 0.0813, "step": 150040 }, { "epoch": 7.0, - "learning_rate": 6.025502789367588e-06, - "loss": 0.0117, + "learning_rate": 1.6047306258093708e-05, + "loss": 0.04, "step": 150045 }, { "epoch": 7.0, - "learning_rate": 6.025033988092448e-06, - "loss": 0.003, + "learning_rate": 1.604683818825769e-05, + "loss": 0.0956, "step": 150050 }, { "epoch": 7.0, - "learning_rate": 6.0245651868173085e-06, - "loss": 0.0806, + "learning_rate": 1.6046370118421668e-05, + "loss": 0.0506, "step": 150055 }, { "epoch": 7.0, - "learning_rate": 6.02409638554217e-06, - "loss": 0.1057, + "learning_rate": 1.6045902048585648e-05, + "loss": 0.0406, "step": 150060 }, { "epoch": 7.0, - "learning_rate": 6.02362758426703e-06, - "loss": 0.1093, + "learning_rate": 1.6045433978749627e-05, + "loss": 0.3131, "step": 150065 }, { "epoch": 7.0, - "learning_rate": 6.02315878299189e-06, - "loss": 0.2196, + "learning_rate": 1.604496590891361e-05, + "loss": 0.2438, "step": 150070 }, { "epoch": 7.0, - "learning_rate": 6.022689981716751e-06, - "loss": 0.0362, + "learning_rate": 1.604449783907759e-05, + "loss": 0.0053, "step": 150075 }, { "epoch": 7.0, - "learning_rate": 6.022221180441611e-06, - "loss": 0.0389, + "learning_rate": 1.604402976924157e-05, + "loss": 0.0255, "step": 150080 }, { "epoch": 7.0, - "learning_rate": 6.021752379166472e-06, - "loss": 0.0356, + "learning_rate": 1.604356169940555e-05, + "loss": 0.0234, "step": 150085 }, { "epoch": 7.0, - "learning_rate": 6.021283577891332e-06, - "loss": 0.1472, + "learning_rate": 1.6043093629569533e-05, + "loss": 0.0307, "step": 150090 }, { "epoch": 7.0, - "learning_rate": 6.020814776616193e-06, - "loss": 0.0538, + "learning_rate": 1.6042625559733513e-05, + "loss": 0.0586, "step": 150095 }, { "epoch": 7.0, - "learning_rate": 6.020345975341053e-06, - "loss": 0.0057, + "learning_rate": 1.6042157489897493e-05, + "loss": 0.0743, "step": 150100 }, { "epoch": 7.0, - "learning_rate": 6.019877174065914e-06, - "loss": 0.0936, + "learning_rate": 1.6041689420061476e-05, + "loss": 0.0739, "step": 150105 }, { "epoch": 7.0, - "learning_rate": 6.0194083727907745e-06, - "loss": 0.0537, + "learning_rate": 1.6041221350225456e-05, + "loss": 0.0539, "step": 150110 }, { "epoch": 7.0, - "learning_rate": 6.018939571515635e-06, - "loss": 0.111, + "learning_rate": 1.6040753280389432e-05, + "loss": 0.1451, "step": 150115 }, { "epoch": 7.0, - "learning_rate": 6.018470770240495e-06, - "loss": 0.1718, + "learning_rate": 1.6040285210553412e-05, + "loss": 0.1871, "step": 150120 }, { "epoch": 7.01, - "learning_rate": 6.018001968965356e-06, - "loss": 0.0371, + "learning_rate": 1.6039817140717395e-05, + "loss": 0.0042, "step": 150125 }, { "epoch": 7.01, - "learning_rate": 6.017533167690217e-06, - "loss": 0.0486, + "learning_rate": 1.6039349070881375e-05, + "loss": 0.0006, "step": 150130 }, { "epoch": 7.01, - "learning_rate": 6.0170643664150776e-06, - "loss": 0.0399, + "learning_rate": 1.6038881001045355e-05, + "loss": 0.0117, "step": 150135 }, { "epoch": 7.01, - "learning_rate": 6.0165955651399375e-06, - "loss": 0.0265, + "learning_rate": 1.6038412931209335e-05, + "loss": 0.0126, "step": 150140 }, { "epoch": 7.01, - "learning_rate": 6.016126763864798e-06, - "loss": 0.0333, + "learning_rate": 1.6037944861373318e-05, + "loss": 0.0407, "step": 150145 }, { "epoch": 7.01, - "learning_rate": 6.015657962589658e-06, - "loss": 0.038, + "learning_rate": 1.6037476791537298e-05, + "loss": 0.0199, "step": 150150 }, { "epoch": 7.01, - "learning_rate": 6.01518916131452e-06, - "loss": 0.0393, + "learning_rate": 1.6037008721701278e-05, + "loss": 0.0368, "step": 150155 }, { "epoch": 7.01, - "learning_rate": 6.01472036003938e-06, - "loss": 0.1163, + "learning_rate": 1.603654065186526e-05, + "loss": 0.0896, "step": 150160 }, { "epoch": 7.01, - "learning_rate": 6.0142515587642406e-06, - "loss": 0.1722, + "learning_rate": 1.603607258202924e-05, + "loss": 0.4561, "step": 150165 }, { "epoch": 7.01, - "learning_rate": 6.0137827574891005e-06, - "loss": 0.1549, + "learning_rate": 1.603560451219322e-05, + "loss": 0.126, "step": 150170 }, { "epoch": 7.01, - "learning_rate": 6.013313956213961e-06, - "loss": 0.0411, + "learning_rate": 1.60351364423572e-05, + "loss": 0.0349, "step": 150175 }, { "epoch": 7.01, - "learning_rate": 6.012845154938822e-06, - "loss": 0.0197, + "learning_rate": 1.603466837252118e-05, + "loss": 0.0147, "step": 150180 }, { "epoch": 7.01, - "learning_rate": 6.012376353663683e-06, - "loss": 0.0147, + "learning_rate": 1.603420030268516e-05, + "loss": 0.0023, "step": 150185 }, { "epoch": 7.01, - "learning_rate": 6.011907552388543e-06, - "loss": 0.0272, + "learning_rate": 1.603373223284914e-05, + "loss": 0.0337, "step": 150190 }, { "epoch": 7.01, - "learning_rate": 6.0114387511134035e-06, - "loss": 0.0244, + "learning_rate": 1.603326416301312e-05, + "loss": 0.0162, "step": 150195 }, { "epoch": 7.01, - "learning_rate": 6.010969949838264e-06, - "loss": 0.1014, + "learning_rate": 1.6032796093177103e-05, + "loss": 0.0617, "step": 150200 }, { "epoch": 7.01, - "learning_rate": 6.010501148563125e-06, - "loss": 0.0595, + "learning_rate": 1.6032328023341083e-05, + "loss": 0.0544, "step": 150205 }, { "epoch": 7.01, - "learning_rate": 6.010032347287985e-06, - "loss": 0.1043, + "learning_rate": 1.6031859953505063e-05, + "loss": 0.0546, "step": 150210 }, { "epoch": 7.01, - "learning_rate": 6.009563546012846e-06, - "loss": 0.1167, + "learning_rate": 1.6031391883669046e-05, + "loss": 0.2198, "step": 150215 }, { "epoch": 7.01, - "learning_rate": 6.009094744737706e-06, - "loss": 0.1957, + "learning_rate": 1.6030923813833026e-05, + "loss": 0.2084, "step": 150220 }, { "epoch": 7.01, - "learning_rate": 6.008625943462567e-06, - "loss": 0.0178, + "learning_rate": 1.6030455743997005e-05, + "loss": 0.034, "step": 150225 }, { "epoch": 7.01, - "learning_rate": 6.008157142187427e-06, - "loss": 0.0284, + "learning_rate": 1.6029987674160985e-05, + "loss": 0.0478, "step": 150230 }, { "epoch": 7.01, - "learning_rate": 6.007688340912288e-06, - "loss": 0.0143, + "learning_rate": 1.602951960432497e-05, + "loss": 0.0209, "step": 150235 }, { "epoch": 7.01, - "learning_rate": 6.007219539637148e-06, - "loss": 0.0249, + "learning_rate": 1.6029051534488948e-05, + "loss": 0.008, "step": 150240 }, { "epoch": 7.01, - "learning_rate": 6.006750738362008e-06, - "loss": 0.0506, + "learning_rate": 1.6028583464652925e-05, + "loss": 0.064, "step": 150245 }, { "epoch": 7.01, - "learning_rate": 6.00628193708687e-06, - "loss": 0.0865, + "learning_rate": 1.6028115394816904e-05, + "loss": 0.0695, "step": 150250 }, { "epoch": 7.01, - "learning_rate": 6.00581313581173e-06, - "loss": 0.1187, + "learning_rate": 1.6027647324980888e-05, + "loss": 0.026, "step": 150255 }, { "epoch": 7.01, - "learning_rate": 6.00534433453659e-06, - "loss": 0.1985, + "learning_rate": 1.6027179255144867e-05, + "loss": 0.091, "step": 150260 }, { "epoch": 7.01, - "learning_rate": 6.00487553326145e-06, - "loss": 0.1225, + "learning_rate": 1.6026711185308847e-05, + "loss": 0.1081, "step": 150265 }, { "epoch": 7.01, - "learning_rate": 6.004406731986312e-06, - "loss": 0.1534, + "learning_rate": 1.602624311547283e-05, + "loss": 0.2133, "step": 150270 }, { "epoch": 7.01, - "learning_rate": 6.003937930711173e-06, - "loss": 0.0363, + "learning_rate": 1.602577504563681e-05, + "loss": 0.0562, "step": 150275 }, { "epoch": 7.01, - "learning_rate": 6.003469129436033e-06, - "loss": 0.0296, + "learning_rate": 1.602530697580079e-05, + "loss": 0.0048, "step": 150280 }, { "epoch": 7.01, - "learning_rate": 6.0030003281608925e-06, - "loss": 0.0839, + "learning_rate": 1.602483890596477e-05, + "loss": 0.0187, "step": 150285 }, { "epoch": 7.01, - "learning_rate": 6.002531526885753e-06, - "loss": 0.0079, + "learning_rate": 1.6024370836128753e-05, + "loss": 0.0157, "step": 150290 }, { "epoch": 7.01, - "learning_rate": 6.002062725610615e-06, - "loss": 0.0679, + "learning_rate": 1.6023902766292733e-05, + "loss": 0.0191, "step": 150295 }, { "epoch": 7.01, - "learning_rate": 6.001593924335475e-06, - "loss": 0.0408, + "learning_rate": 1.6023434696456713e-05, + "loss": 0.0608, "step": 150300 }, { "epoch": 7.01, - "learning_rate": 6.001125123060335e-06, - "loss": 0.1183, + "learning_rate": 1.602296662662069e-05, + "loss": 0.0938, "step": 150305 }, { "epoch": 7.01, - "learning_rate": 6.0006563217851956e-06, - "loss": 0.0753, + "learning_rate": 1.6022498556784672e-05, + "loss": 0.1298, "step": 150310 }, { "epoch": 7.01, - "learning_rate": 6.000187520510057e-06, - "loss": 0.0868, + "learning_rate": 1.6022030486948652e-05, + "loss": 0.2185, "step": 150315 }, { "epoch": 7.01, - "learning_rate": 5.999718719234917e-06, - "loss": 0.1478, + "learning_rate": 1.6021562417112632e-05, + "loss": 0.1843, "step": 150320 }, { "epoch": 7.01, - "learning_rate": 5.999249917959777e-06, - "loss": 0.0168, + "learning_rate": 1.6021094347276612e-05, + "loss": 0.0077, "step": 150325 }, { "epoch": 7.01, - "learning_rate": 5.998781116684638e-06, - "loss": 0.0273, + "learning_rate": 1.6020626277440595e-05, + "loss": 0.0112, "step": 150330 }, { "epoch": 7.01, - "learning_rate": 5.998312315409498e-06, - "loss": 0.0942, + "learning_rate": 1.6020158207604575e-05, + "loss": 0.0033, "step": 150335 }, { "epoch": 7.02, - "learning_rate": 5.997843514134359e-06, - "loss": 0.066, + "learning_rate": 1.6019690137768555e-05, + "loss": 0.0277, "step": 150340 }, { "epoch": 7.02, - "learning_rate": 5.997374712859219e-06, - "loss": 0.0148, + "learning_rate": 1.6019222067932538e-05, + "loss": 0.0327, "step": 150345 }, { "epoch": 7.02, - "learning_rate": 5.99690591158408e-06, - "loss": 0.0768, + "learning_rate": 1.6018753998096518e-05, + "loss": 0.1074, "step": 150350 }, { "epoch": 7.02, - "learning_rate": 5.99643711030894e-06, - "loss": 0.0672, + "learning_rate": 1.6018285928260498e-05, + "loss": 0.0417, "step": 150355 }, { "epoch": 7.02, - "learning_rate": 5.995968309033801e-06, - "loss": 0.0432, + "learning_rate": 1.6017817858424477e-05, + "loss": 0.0888, "step": 150360 }, { "epoch": 7.02, - "learning_rate": 5.995499507758662e-06, - "loss": 0.1325, + "learning_rate": 1.601734978858846e-05, + "loss": 0.121, "step": 150365 }, { "epoch": 7.02, - "learning_rate": 5.995030706483522e-06, - "loss": 0.1458, + "learning_rate": 1.6016881718752437e-05, + "loss": 0.1638, "step": 150370 }, { "epoch": 7.02, - "learning_rate": 5.994561905208382e-06, - "loss": 0.0401, + "learning_rate": 1.6016413648916417e-05, + "loss": 0.0314, "step": 150375 }, { "epoch": 7.02, - "learning_rate": 5.994093103933243e-06, - "loss": 0.0627, + "learning_rate": 1.6015945579080397e-05, + "loss": 0.0146, "step": 150380 }, { "epoch": 7.02, - "learning_rate": 5.993624302658104e-06, - "loss": 0.0059, + "learning_rate": 1.601547750924438e-05, + "loss": 0.0267, "step": 150385 }, { "epoch": 7.02, - "learning_rate": 5.993155501382965e-06, - "loss": 0.0166, + "learning_rate": 1.601500943940836e-05, + "loss": 0.0278, "step": 150390 }, { "epoch": 7.02, - "learning_rate": 5.992686700107825e-06, - "loss": 0.032, + "learning_rate": 1.601454136957234e-05, + "loss": 0.0038, "step": 150395 }, { "epoch": 7.02, - "learning_rate": 5.992217898832685e-06, - "loss": 0.1607, + "learning_rate": 1.6014073299736323e-05, + "loss": 0.0346, "step": 150400 }, { "epoch": 7.02, - "learning_rate": 5.991749097557545e-06, - "loss": 0.0979, + "learning_rate": 1.6013605229900303e-05, + "loss": 0.0923, "step": 150405 }, { "epoch": 7.02, - "learning_rate": 5.991280296282407e-06, - "loss": 0.1246, + "learning_rate": 1.6013137160064282e-05, + "loss": 0.1367, "step": 150410 }, { "epoch": 7.02, - "learning_rate": 5.990811495007267e-06, - "loss": 0.0771, + "learning_rate": 1.6012669090228262e-05, + "loss": 0.2444, "step": 150415 }, { "epoch": 7.02, - "learning_rate": 5.990342693732128e-06, - "loss": 0.1109, + "learning_rate": 1.6012201020392245e-05, + "loss": 0.2246, "step": 150420 }, { "epoch": 7.02, - "learning_rate": 5.989873892456988e-06, - "loss": 0.0177, + "learning_rate": 1.6011732950556225e-05, + "loss": 0.0084, "step": 150425 }, { "epoch": 7.02, - "learning_rate": 5.989405091181848e-06, - "loss": 0.0369, + "learning_rate": 1.6011264880720205e-05, + "loss": 0.0398, "step": 150430 }, { "epoch": 7.02, - "learning_rate": 5.988936289906709e-06, - "loss": 0.0246, + "learning_rate": 1.601079681088418e-05, + "loss": 0.0457, "step": 150435 }, { "epoch": 7.02, - "learning_rate": 5.98846748863157e-06, - "loss": 0.0698, + "learning_rate": 1.6010328741048165e-05, + "loss": 0.0275, "step": 150440 }, { "epoch": 7.02, - "learning_rate": 5.98799868735643e-06, - "loss": 0.0222, + "learning_rate": 1.6009860671212144e-05, + "loss": 0.0776, "step": 150445 }, { "epoch": 7.02, - "learning_rate": 5.987529886081291e-06, - "loss": 0.063, + "learning_rate": 1.6009392601376124e-05, + "loss": 0.0242, "step": 150450 }, { "epoch": 7.02, - "learning_rate": 5.9870610848061514e-06, - "loss": 0.0473, + "learning_rate": 1.6008924531540107e-05, + "loss": 0.0791, "step": 150455 }, { "epoch": 7.02, - "learning_rate": 5.986592283531012e-06, - "loss": 0.0649, + "learning_rate": 1.6008456461704087e-05, + "loss": 0.0348, "step": 150460 }, { "epoch": 7.02, - "learning_rate": 5.986123482255872e-06, - "loss": 0.1624, + "learning_rate": 1.6007988391868067e-05, + "loss": 0.1544, "step": 150465 }, { "epoch": 7.02, - "learning_rate": 5.985654680980733e-06, - "loss": 0.1726, + "learning_rate": 1.6007520322032047e-05, + "loss": 0.1745, "step": 150470 }, { "epoch": 7.02, - "learning_rate": 5.985185879705593e-06, - "loss": 0.0069, + "learning_rate": 1.600705225219603e-05, + "loss": 0.0095, "step": 150475 }, { "epoch": 7.02, - "learning_rate": 5.9847170784304545e-06, - "loss": 0.0181, + "learning_rate": 1.600658418236001e-05, + "loss": 0.0137, "step": 150480 }, { "epoch": 7.02, - "learning_rate": 5.984248277155314e-06, - "loss": 0.0222, + "learning_rate": 1.600611611252399e-05, + "loss": 0.0714, "step": 150485 }, { "epoch": 7.02, - "learning_rate": 5.983779475880175e-06, - "loss": 0.0677, + "learning_rate": 1.600564804268797e-05, + "loss": 0.009, "step": 150490 }, { "epoch": 7.02, - "learning_rate": 5.983310674605035e-06, - "loss": 0.1735, + "learning_rate": 1.600517997285195e-05, + "loss": 0.0667, "step": 150495 }, { "epoch": 7.02, - "learning_rate": 5.982841873329895e-06, - "loss": 0.0902, + "learning_rate": 1.600471190301593e-05, + "loss": 0.0633, "step": 150500 }, { "epoch": 7.02, - "learning_rate": 5.982373072054757e-06, - "loss": 0.0636, + "learning_rate": 1.600424383317991e-05, + "loss": 0.1497, "step": 150505 }, { "epoch": 7.02, - "learning_rate": 5.9819042707796175e-06, - "loss": 0.1322, + "learning_rate": 1.600377576334389e-05, + "loss": 0.1173, "step": 150510 }, { "epoch": 7.02, - "learning_rate": 5.981435469504477e-06, - "loss": 0.0765, + "learning_rate": 1.6003307693507872e-05, + "loss": 0.1204, "step": 150515 }, { "epoch": 7.02, - "learning_rate": 5.980966668229337e-06, - "loss": 0.1192, + "learning_rate": 1.6002839623671852e-05, + "loss": 0.1587, "step": 150520 }, { "epoch": 7.02, - "learning_rate": 5.980497866954199e-06, - "loss": 0.0109, + "learning_rate": 1.6002371553835832e-05, + "loss": 0.0181, "step": 150525 }, { "epoch": 7.02, - "learning_rate": 5.98002906567906e-06, - "loss": 0.0266, + "learning_rate": 1.6001903483999815e-05, + "loss": 0.0113, "step": 150530 }, { "epoch": 7.02, - "learning_rate": 5.97956026440392e-06, - "loss": 0.0519, + "learning_rate": 1.6001435414163795e-05, + "loss": 0.0509, "step": 150535 }, { "epoch": 7.02, - "learning_rate": 5.97909146312878e-06, - "loss": 0.0038, + "learning_rate": 1.6000967344327775e-05, + "loss": 0.0276, "step": 150540 }, { "epoch": 7.02, - "learning_rate": 5.97862266185364e-06, - "loss": 0.02, + "learning_rate": 1.6000499274491754e-05, + "loss": 0.0284, "step": 150545 }, { "epoch": 7.02, - "learning_rate": 5.978153860578502e-06, - "loss": 0.0547, + "learning_rate": 1.6000031204655738e-05, + "loss": 0.0319, "step": 150550 }, { "epoch": 7.03, - "learning_rate": 5.977685059303362e-06, - "loss": 0.0272, + "learning_rate": 1.5999563134819717e-05, + "loss": 0.0391, "step": 150555 }, { "epoch": 7.03, - "learning_rate": 5.977216258028222e-06, - "loss": 0.0387, + "learning_rate": 1.5999095064983694e-05, + "loss": 0.0651, "step": 150560 }, { "epoch": 7.03, - "learning_rate": 5.976747456753083e-06, - "loss": 0.1601, + "learning_rate": 1.5998626995147674e-05, + "loss": 0.1232, "step": 150565 }, { "epoch": 7.03, - "learning_rate": 5.976278655477943e-06, - "loss": 0.1143, + "learning_rate": 1.5998158925311657e-05, + "loss": 0.1357, "step": 150570 }, { "epoch": 7.03, - "learning_rate": 5.975809854202804e-06, - "loss": 0.0637, + "learning_rate": 1.5997690855475637e-05, + "loss": 0.0128, "step": 150575 }, { "epoch": 7.03, - "learning_rate": 5.975341052927664e-06, - "loss": 0.0123, + "learning_rate": 1.5997222785639616e-05, + "loss": 0.0062, "step": 150580 }, { "epoch": 7.03, - "learning_rate": 5.974872251652525e-06, - "loss": 0.0409, + "learning_rate": 1.59967547158036e-05, + "loss": 0.0174, "step": 150585 }, { "epoch": 7.03, - "learning_rate": 5.974403450377385e-06, - "loss": 0.045, + "learning_rate": 1.599628664596758e-05, + "loss": 0.0703, "step": 150590 }, { "epoch": 7.03, - "learning_rate": 5.9739346491022465e-06, - "loss": 0.0313, + "learning_rate": 1.599581857613156e-05, + "loss": 0.0335, "step": 150595 }, { "epoch": 7.03, - "learning_rate": 5.9734658478271064e-06, - "loss": 0.0255, + "learning_rate": 1.599535050629554e-05, + "loss": 0.0109, "step": 150600 }, { "epoch": 7.03, - "learning_rate": 5.972997046551967e-06, - "loss": 0.0425, + "learning_rate": 1.5994882436459522e-05, + "loss": 0.103, "step": 150605 }, { "epoch": 7.03, - "learning_rate": 5.972528245276827e-06, - "loss": 0.0973, + "learning_rate": 1.5994414366623502e-05, + "loss": 0.1394, "step": 150610 }, { "epoch": 7.03, - "learning_rate": 5.972059444001688e-06, - "loss": 0.0995, + "learning_rate": 1.5993946296787482e-05, + "loss": 0.1021, "step": 150615 }, { "epoch": 7.03, - "learning_rate": 5.971590642726549e-06, - "loss": 0.0957, + "learning_rate": 1.5993478226951462e-05, + "loss": 0.1389, "step": 150620 }, { "epoch": 7.03, - "learning_rate": 5.9711218414514095e-06, - "loss": 0.0228, + "learning_rate": 1.599301015711544e-05, + "loss": 0.0403, "step": 150625 }, { "epoch": 7.03, - "learning_rate": 5.9706530401762694e-06, - "loss": 0.0308, + "learning_rate": 1.599254208727942e-05, + "loss": 0.0203, "step": 150630 }, { "epoch": 7.03, - "learning_rate": 5.97018423890113e-06, - "loss": 0.0194, + "learning_rate": 1.59920740174434e-05, + "loss": 0.0206, "step": 150635 }, { "epoch": 7.03, - "learning_rate": 5.969715437625991e-06, - "loss": 0.0456, + "learning_rate": 1.5991605947607384e-05, + "loss": 0.0052, "step": 150640 }, { "epoch": 7.03, - "learning_rate": 5.969246636350852e-06, - "loss": 0.0184, + "learning_rate": 1.5991137877771364e-05, + "loss": 0.0312, "step": 150645 }, { "epoch": 7.03, - "learning_rate": 5.968777835075712e-06, - "loss": 0.0348, + "learning_rate": 1.5990669807935344e-05, + "loss": 0.0392, "step": 150650 }, { "epoch": 7.03, - "learning_rate": 5.9683090338005725e-06, - "loss": 0.0467, + "learning_rate": 1.5990201738099324e-05, + "loss": 0.0334, "step": 150655 }, { "epoch": 7.03, - "learning_rate": 5.9678402325254324e-06, - "loss": 0.1073, + "learning_rate": 1.5989733668263307e-05, + "loss": 0.1199, "step": 150660 }, { "epoch": 7.03, - "learning_rate": 5.967371431250294e-06, - "loss": 0.1889, + "learning_rate": 1.5989265598427287e-05, + "loss": 0.2627, "step": 150665 }, { "epoch": 7.03, - "learning_rate": 5.966902629975154e-06, - "loss": 0.2384, + "learning_rate": 1.5988797528591267e-05, + "loss": 0.1939, "step": 150670 }, { "epoch": 7.03, - "learning_rate": 5.966433828700015e-06, - "loss": 0.0109, + "learning_rate": 1.5988329458755247e-05, + "loss": 0.025, "step": 150675 }, { "epoch": 7.03, - "learning_rate": 5.965965027424875e-06, - "loss": 0.0249, + "learning_rate": 1.598786138891923e-05, + "loss": 0.0056, "step": 150680 }, { "epoch": 7.03, - "learning_rate": 5.9654962261497355e-06, - "loss": 0.0168, + "learning_rate": 1.5987393319083206e-05, + "loss": 0.0123, "step": 150685 }, { "epoch": 7.03, - "learning_rate": 5.965027424874596e-06, - "loss": 0.0203, + "learning_rate": 1.5986925249247186e-05, + "loss": 0.0396, "step": 150690 }, { "epoch": 7.03, - "learning_rate": 5.964558623599457e-06, - "loss": 0.0684, + "learning_rate": 1.5986457179411166e-05, + "loss": 0.0492, "step": 150695 }, { "epoch": 7.03, - "learning_rate": 5.964089822324317e-06, - "loss": 0.046, + "learning_rate": 1.598598910957515e-05, + "loss": 0.0458, "step": 150700 }, { "epoch": 7.03, - "learning_rate": 5.963621021049178e-06, - "loss": 0.1107, + "learning_rate": 1.598552103973913e-05, + "loss": 0.0893, "step": 150705 }, { "epoch": 7.03, - "learning_rate": 5.9631522197740385e-06, - "loss": 0.1908, + "learning_rate": 1.598505296990311e-05, + "loss": 0.0415, "step": 150710 }, { "epoch": 7.03, - "learning_rate": 5.962683418498899e-06, - "loss": 0.1099, + "learning_rate": 1.5984584900067092e-05, + "loss": 0.1671, "step": 150715 }, { "epoch": 7.03, - "learning_rate": 5.962214617223759e-06, - "loss": 0.2301, + "learning_rate": 1.5984116830231072e-05, + "loss": 0.1495, "step": 150720 }, { "epoch": 7.03, - "learning_rate": 5.96174581594862e-06, - "loss": 0.0143, + "learning_rate": 1.598364876039505e-05, + "loss": 0.0536, "step": 150725 }, { "epoch": 7.03, - "learning_rate": 5.96127701467348e-06, - "loss": 0.0588, + "learning_rate": 1.598318069055903e-05, + "loss": 0.0261, "step": 150730 }, { "epoch": 7.03, - "learning_rate": 5.960808213398342e-06, - "loss": 0.0176, + "learning_rate": 1.5982712620723015e-05, + "loss": 0.0109, "step": 150735 }, { "epoch": 7.03, - "learning_rate": 5.9603394121232015e-06, - "loss": 0.0365, + "learning_rate": 1.5982244550886994e-05, + "loss": 0.0588, "step": 150740 }, { "epoch": 7.03, - "learning_rate": 5.959870610848062e-06, - "loss": 0.0839, + "learning_rate": 1.5981776481050974e-05, + "loss": 0.0445, "step": 150745 }, { "epoch": 7.03, - "learning_rate": 5.959401809572922e-06, - "loss": 0.0299, + "learning_rate": 1.598130841121495e-05, + "loss": 0.0583, "step": 150750 }, { "epoch": 7.03, - "learning_rate": 5.958933008297782e-06, - "loss": 0.0772, + "learning_rate": 1.5980840341378934e-05, + "loss": 0.0555, "step": 150755 }, { "epoch": 7.03, - "learning_rate": 5.958464207022644e-06, - "loss": 0.0256, + "learning_rate": 1.5980372271542914e-05, + "loss": 0.1106, "step": 150760 }, { "epoch": 7.03, - "learning_rate": 5.957995405747505e-06, - "loss": 0.1491, + "learning_rate": 1.5979904201706893e-05, + "loss": 0.1287, "step": 150765 }, { "epoch": 7.04, - "learning_rate": 5.9575266044723645e-06, - "loss": 0.1693, + "learning_rate": 1.5979436131870877e-05, + "loss": 0.155, "step": 150770 }, { "epoch": 7.04, - "learning_rate": 5.9570578031972245e-06, - "loss": 0.0292, + "learning_rate": 1.5978968062034856e-05, + "loss": 0.0421, "step": 150775 }, { "epoch": 7.04, - "learning_rate": 5.956589001922086e-06, - "loss": 0.0076, + "learning_rate": 1.5978499992198836e-05, + "loss": 0.0164, "step": 150780 }, { "epoch": 7.04, - "learning_rate": 5.956120200646947e-06, - "loss": 0.024, + "learning_rate": 1.5978031922362816e-05, + "loss": 0.0163, "step": 150785 }, { "epoch": 7.04, - "learning_rate": 5.955651399371807e-06, - "loss": 0.0372, + "learning_rate": 1.59775638525268e-05, + "loss": 0.0331, "step": 150790 }, { "epoch": 7.04, - "learning_rate": 5.955182598096667e-06, - "loss": 0.1036, + "learning_rate": 1.597709578269078e-05, + "loss": 0.0368, "step": 150795 }, { "epoch": 7.04, - "learning_rate": 5.9547137968215275e-06, - "loss": 0.0528, + "learning_rate": 1.597662771285476e-05, + "loss": 0.0606, "step": 150800 }, { "epoch": 7.04, - "learning_rate": 5.954244995546389e-06, - "loss": 0.0452, + "learning_rate": 1.597615964301874e-05, + "loss": 0.0467, "step": 150805 }, { "epoch": 7.04, - "learning_rate": 5.953776194271249e-06, - "loss": 0.0983, + "learning_rate": 1.597569157318272e-05, + "loss": 0.0861, "step": 150810 }, { "epoch": 7.04, - "learning_rate": 5.953307392996109e-06, - "loss": 0.1493, + "learning_rate": 1.59752235033467e-05, + "loss": 0.1107, "step": 150815 }, { "epoch": 7.04, - "learning_rate": 5.95283859172097e-06, - "loss": 0.1536, + "learning_rate": 1.5974755433510678e-05, + "loss": 0.2255, "step": 150820 }, { "epoch": 7.04, - "learning_rate": 5.95236979044583e-06, - "loss": 0.0229, + "learning_rate": 1.597428736367466e-05, + "loss": 0.0136, "step": 150825 }, { "epoch": 7.04, - "learning_rate": 5.951900989170691e-06, - "loss": 0.0059, + "learning_rate": 1.597381929383864e-05, + "loss": 0.0648, "step": 150830 }, { "epoch": 7.04, - "learning_rate": 5.951432187895551e-06, - "loss": 0.0455, + "learning_rate": 1.597335122400262e-05, + "loss": 0.0264, "step": 150835 }, { "epoch": 7.04, - "learning_rate": 5.950963386620412e-06, - "loss": 0.0157, + "learning_rate": 1.59728831541666e-05, + "loss": 0.0277, "step": 150840 }, { "epoch": 7.04, - "learning_rate": 5.950494585345272e-06, - "loss": 0.0122, + "learning_rate": 1.5972415084330584e-05, + "loss": 0.0056, "step": 150845 }, { "epoch": 7.04, - "learning_rate": 5.950025784070134e-06, - "loss": 0.0914, + "learning_rate": 1.5971947014494564e-05, + "loss": 0.0697, "step": 150850 }, { "epoch": 7.04, - "learning_rate": 5.9495569827949936e-06, - "loss": 0.0451, + "learning_rate": 1.5971478944658544e-05, + "loss": 0.1402, "step": 150855 }, { "epoch": 7.04, - "learning_rate": 5.949088181519854e-06, - "loss": 0.0935, + "learning_rate": 1.5971010874822524e-05, + "loss": 0.098, "step": 150860 }, { "epoch": 7.04, - "learning_rate": 5.948619380244714e-06, - "loss": 0.1312, + "learning_rate": 1.5970542804986507e-05, + "loss": 0.0989, "step": 150865 }, { "epoch": 7.04, - "learning_rate": 5.948150578969575e-06, - "loss": 0.1737, + "learning_rate": 1.5970074735150487e-05, + "loss": 0.1241, "step": 150870 }, { "epoch": 7.04, - "learning_rate": 5.947681777694436e-06, - "loss": 0.0166, + "learning_rate": 1.5969606665314463e-05, + "loss": 0.0089, "step": 150875 }, { "epoch": 7.04, - "learning_rate": 5.947212976419297e-06, - "loss": 0.0018, + "learning_rate": 1.5969138595478443e-05, + "loss": 0.0123, "step": 150880 }, { "epoch": 7.04, - "learning_rate": 5.9467441751441565e-06, - "loss": 0.0048, + "learning_rate": 1.5968670525642426e-05, + "loss": 0.0266, "step": 150885 }, { "epoch": 7.04, - "learning_rate": 5.946275373869017e-06, - "loss": 0.0066, + "learning_rate": 1.5968202455806406e-05, + "loss": 0.05, "step": 150890 }, { "epoch": 7.04, - "learning_rate": 5.945806572593877e-06, - "loss": 0.0074, + "learning_rate": 1.5967734385970386e-05, + "loss": 0.012, "step": 150895 }, { "epoch": 7.04, - "learning_rate": 5.945337771318739e-06, - "loss": 0.0366, + "learning_rate": 1.596726631613437e-05, + "loss": 0.052, "step": 150900 }, { "epoch": 7.04, - "learning_rate": 5.944868970043599e-06, - "loss": 0.0157, + "learning_rate": 1.596679824629835e-05, + "loss": 0.05, "step": 150905 }, { "epoch": 7.04, - "learning_rate": 5.94440016876846e-06, - "loss": 0.047, + "learning_rate": 1.596633017646233e-05, + "loss": 0.1078, "step": 150910 }, { "epoch": 7.04, - "learning_rate": 5.9439313674933195e-06, - "loss": 0.1475, + "learning_rate": 1.5965862106626308e-05, + "loss": 0.2677, "step": 150915 }, { "epoch": 7.04, - "learning_rate": 5.943462566218181e-06, - "loss": 0.1361, + "learning_rate": 1.596539403679029e-05, + "loss": 0.1535, "step": 150920 }, { "epoch": 7.04, - "learning_rate": 5.942993764943041e-06, - "loss": 0.0248, + "learning_rate": 1.596492596695427e-05, + "loss": 0.0175, "step": 150925 }, { "epoch": 7.04, - "learning_rate": 5.942524963667902e-06, - "loss": 0.0232, + "learning_rate": 1.596445789711825e-05, + "loss": 0.0092, "step": 150930 }, { "epoch": 7.04, - "learning_rate": 5.942056162392762e-06, - "loss": 0.0078, + "learning_rate": 1.596398982728223e-05, + "loss": 0.0306, "step": 150935 }, { "epoch": 7.04, - "learning_rate": 5.941587361117623e-06, - "loss": 0.0465, + "learning_rate": 1.596352175744621e-05, + "loss": 0.0545, "step": 150940 }, { "epoch": 7.04, - "learning_rate": 5.941118559842483e-06, - "loss": 0.052, + "learning_rate": 1.596305368761019e-05, + "loss": 0.0521, "step": 150945 }, { "epoch": 7.04, - "learning_rate": 5.940649758567344e-06, - "loss": 0.0572, + "learning_rate": 1.596258561777417e-05, + "loss": 0.0501, "step": 150950 }, { "epoch": 7.04, - "learning_rate": 5.940180957292204e-06, - "loss": 0.0412, + "learning_rate": 1.5962117547938154e-05, + "loss": 0.1406, "step": 150955 }, { "epoch": 7.04, - "learning_rate": 5.939712156017065e-06, - "loss": 0.0807, + "learning_rate": 1.5961649478102133e-05, + "loss": 0.0657, "step": 150960 }, { "epoch": 7.04, - "learning_rate": 5.939243354741926e-06, - "loss": 0.2019, + "learning_rate": 1.5961181408266113e-05, + "loss": 0.0572, "step": 150965 }, { "epoch": 7.04, - "learning_rate": 5.9387745534667864e-06, - "loss": 0.2521, + "learning_rate": 1.5960713338430093e-05, + "loss": 0.1359, "step": 150970 }, { "epoch": 7.04, - "learning_rate": 5.938305752191646e-06, - "loss": 0.0301, + "learning_rate": 1.5960245268594076e-05, + "loss": 0.0148, "step": 150975 }, { "epoch": 7.04, - "learning_rate": 5.937836950916507e-06, - "loss": 0.022, + "learning_rate": 1.5959777198758056e-05, + "loss": 0.0163, "step": 150980 }, { "epoch": 7.05, - "learning_rate": 5.937368149641367e-06, - "loss": 0.027, + "learning_rate": 1.5959309128922036e-05, + "loss": 0.0221, "step": 150985 }, { "epoch": 7.05, - "learning_rate": 5.936899348366229e-06, - "loss": 0.0621, + "learning_rate": 1.5958841059086016e-05, + "loss": 0.0463, "step": 150990 }, { "epoch": 7.05, - "learning_rate": 5.936430547091089e-06, - "loss": 0.039, + "learning_rate": 1.595837298925e-05, + "loss": 0.0404, "step": 150995 }, { "epoch": 7.05, - "learning_rate": 5.935961745815949e-06, - "loss": 0.1083, + "learning_rate": 1.5957904919413975e-05, + "loss": 0.1183, "step": 151000 }, { "epoch": 7.05, - "learning_rate": 5.935492944540809e-06, - "loss": 0.1486, + "learning_rate": 1.5957436849577955e-05, + "loss": 0.0412, "step": 151005 }, { "epoch": 7.05, - "learning_rate": 5.935024143265669e-06, - "loss": 0.1429, + "learning_rate": 1.595696877974194e-05, + "loss": 0.0273, "step": 151010 }, { "epoch": 7.05, - "learning_rate": 5.934555341990531e-06, - "loss": 0.1972, + "learning_rate": 1.5956500709905918e-05, + "loss": 0.1176, "step": 151015 }, { "epoch": 7.05, - "learning_rate": 5.934086540715392e-06, - "loss": 0.2232, + "learning_rate": 1.5956032640069898e-05, + "loss": 0.3628, "step": 151020 }, { "epoch": 7.05, - "learning_rate": 5.933617739440252e-06, - "loss": 0.0093, + "learning_rate": 1.5955564570233878e-05, + "loss": 0.0106, "step": 151025 }, { "epoch": 7.05, - "learning_rate": 5.9331489381651116e-06, - "loss": 0.0252, + "learning_rate": 1.595509650039786e-05, + "loss": 0.019, "step": 151030 }, { "epoch": 7.05, - "learning_rate": 5.932680136889973e-06, - "loss": 0.0203, + "learning_rate": 1.595462843056184e-05, + "loss": 0.0225, "step": 151035 }, { "epoch": 7.05, - "learning_rate": 5.932211335614834e-06, - "loss": 0.0146, + "learning_rate": 1.595416036072582e-05, + "loss": 0.0165, "step": 151040 }, { "epoch": 7.05, - "learning_rate": 5.931742534339694e-06, - "loss": 0.0298, + "learning_rate": 1.59536922908898e-05, + "loss": 0.0159, "step": 151045 }, { "epoch": 7.05, - "learning_rate": 5.931273733064554e-06, - "loss": 0.132, + "learning_rate": 1.5953224221053784e-05, + "loss": 0.1024, "step": 151050 }, { "epoch": 7.05, - "learning_rate": 5.930804931789415e-06, - "loss": 0.0596, + "learning_rate": 1.5952756151217764e-05, + "loss": 0.0576, "step": 151055 }, { "epoch": 7.05, - "learning_rate": 5.930336130514276e-06, - "loss": 0.053, + "learning_rate": 1.5952288081381743e-05, + "loss": 0.0556, "step": 151060 }, { "epoch": 7.05, - "learning_rate": 5.929867329239136e-06, - "loss": 0.1096, + "learning_rate": 1.5951820011545723e-05, + "loss": 0.2095, "step": 151065 }, { "epoch": 7.05, - "learning_rate": 5.929398527963996e-06, - "loss": 0.2363, + "learning_rate": 1.5951351941709703e-05, + "loss": 0.2795, "step": 151070 }, { "epoch": 7.05, - "learning_rate": 5.928929726688857e-06, - "loss": 0.0112, + "learning_rate": 1.5950883871873683e-05, + "loss": 0.0412, "step": 151075 }, { "epoch": 7.05, - "learning_rate": 5.928460925413717e-06, - "loss": 0.0069, + "learning_rate": 1.5950415802037663e-05, + "loss": 0.0017, "step": 151080 }, { "epoch": 7.05, - "learning_rate": 5.9279921241385785e-06, - "loss": 0.0441, + "learning_rate": 1.5949947732201646e-05, + "loss": 0.0104, "step": 151085 }, { "epoch": 7.05, - "learning_rate": 5.927523322863438e-06, - "loss": 0.0326, + "learning_rate": 1.5949479662365626e-05, + "loss": 0.0382, "step": 151090 }, { "epoch": 7.05, - "learning_rate": 5.927054521588299e-06, - "loss": 0.0412, + "learning_rate": 1.5949011592529605e-05, + "loss": 0.0393, "step": 151095 }, { "epoch": 7.05, - "learning_rate": 5.926585720313159e-06, - "loss": 0.0791, + "learning_rate": 1.5948543522693585e-05, + "loss": 0.099, "step": 151100 }, { "epoch": 7.05, - "learning_rate": 5.926116919038021e-06, - "loss": 0.0755, + "learning_rate": 1.594807545285757e-05, + "loss": 0.0376, "step": 151105 }, { "epoch": 7.05, - "learning_rate": 5.925648117762881e-06, - "loss": 0.126, + "learning_rate": 1.5947607383021548e-05, + "loss": 0.0807, "step": 151110 }, { "epoch": 7.05, - "learning_rate": 5.9251793164877414e-06, - "loss": 0.1974, + "learning_rate": 1.5947139313185528e-05, + "loss": 0.1666, "step": 151115 }, { "epoch": 7.05, - "learning_rate": 5.924710515212601e-06, - "loss": 0.3141, + "learning_rate": 1.5946671243349508e-05, + "loss": 0.2758, "step": 151120 }, { "epoch": 7.05, - "learning_rate": 5.924241713937462e-06, - "loss": 0.0601, + "learning_rate": 1.5946203173513488e-05, + "loss": 0.0269, "step": 151125 }, { "epoch": 7.05, - "learning_rate": 5.923772912662323e-06, - "loss": 0.0068, + "learning_rate": 1.5945735103677468e-05, + "loss": 0.0118, "step": 151130 }, { "epoch": 7.05, - "learning_rate": 5.923304111387184e-06, - "loss": 0.0358, + "learning_rate": 1.5945267033841447e-05, + "loss": 0.0236, "step": 151135 }, { "epoch": 7.05, - "learning_rate": 5.922835310112044e-06, - "loss": 0.0104, + "learning_rate": 1.594479896400543e-05, + "loss": 0.009, "step": 151140 }, { "epoch": 7.05, - "learning_rate": 5.9223665088369044e-06, - "loss": 0.0348, + "learning_rate": 1.594433089416941e-05, + "loss": 0.0377, "step": 151145 }, { "epoch": 7.05, - "learning_rate": 5.921897707561764e-06, - "loss": 0.0483, + "learning_rate": 1.594386282433339e-05, + "loss": 0.0316, "step": 151150 }, { "epoch": 7.05, - "learning_rate": 5.921428906286626e-06, - "loss": 0.0428, + "learning_rate": 1.594339475449737e-05, + "loss": 0.0963, "step": 151155 }, { "epoch": 7.05, - "learning_rate": 5.920960105011486e-06, - "loss": 0.1031, + "learning_rate": 1.5942926684661353e-05, + "loss": 0.0897, "step": 151160 }, { "epoch": 7.05, - "learning_rate": 5.920491303736347e-06, - "loss": 0.131, + "learning_rate": 1.5942458614825333e-05, + "loss": 0.1069, "step": 151165 }, { "epoch": 7.05, - "learning_rate": 5.920022502461207e-06, - "loss": 0.1877, + "learning_rate": 1.5941990544989313e-05, + "loss": 0.3304, "step": 151170 }, { "epoch": 7.05, - "learning_rate": 5.919553701186068e-06, - "loss": 0.034, + "learning_rate": 1.5941522475153293e-05, + "loss": 0.0042, "step": 151175 }, { "epoch": 7.05, - "learning_rate": 5.919084899910928e-06, - "loss": 0.0207, + "learning_rate": 1.5941054405317276e-05, + "loss": 0.0262, "step": 151180 }, { "epoch": 7.05, - "learning_rate": 5.918616098635789e-06, - "loss": 0.036, + "learning_rate": 1.5940586335481256e-05, + "loss": 0.0129, "step": 151185 }, { "epoch": 7.05, - "learning_rate": 5.918147297360649e-06, - "loss": 0.1115, + "learning_rate": 1.5940118265645232e-05, + "loss": 0.0563, "step": 151190 }, { "epoch": 7.05, - "learning_rate": 5.91767849608551e-06, - "loss": 0.0583, + "learning_rate": 1.5939650195809215e-05, + "loss": 0.009, "step": 151195 }, { "epoch": 7.06, - "learning_rate": 5.9172096948103705e-06, - "loss": 0.0538, + "learning_rate": 1.5939182125973195e-05, + "loss": 0.0532, "step": 151200 }, { "epoch": 7.06, - "learning_rate": 5.916740893535231e-06, - "loss": 0.0551, + "learning_rate": 1.5938714056137175e-05, + "loss": 0.1066, "step": 151205 }, { "epoch": 7.06, - "learning_rate": 5.916272092260091e-06, - "loss": 0.0514, + "learning_rate": 1.5938245986301155e-05, + "loss": 0.0522, "step": 151210 }, { "epoch": 7.06, - "learning_rate": 5.915803290984952e-06, - "loss": 0.0706, + "learning_rate": 1.5937777916465138e-05, + "loss": 0.115, "step": 151215 }, { "epoch": 7.06, - "learning_rate": 5.915334489709812e-06, - "loss": 0.1764, + "learning_rate": 1.5937309846629118e-05, + "loss": 0.2712, "step": 151220 }, { "epoch": 7.06, - "learning_rate": 5.9148656884346735e-06, - "loss": 0.0216, + "learning_rate": 1.5936841776793098e-05, + "loss": 0.0222, "step": 151225 }, { "epoch": 7.06, - "learning_rate": 5.9143968871595335e-06, - "loss": 0.0127, + "learning_rate": 1.5936373706957077e-05, + "loss": 0.0083, "step": 151230 }, { "epoch": 7.06, - "learning_rate": 5.913928085884394e-06, - "loss": 0.0271, + "learning_rate": 1.593590563712106e-05, + "loss": 0.0093, "step": 151235 }, { "epoch": 7.06, - "learning_rate": 5.913459284609254e-06, - "loss": 0.0607, + "learning_rate": 1.593543756728504e-05, + "loss": 0.0457, "step": 151240 }, { "epoch": 7.06, - "learning_rate": 5.912990483334116e-06, - "loss": 0.0274, + "learning_rate": 1.593496949744902e-05, + "loss": 0.0555, "step": 151245 }, { "epoch": 7.06, - "learning_rate": 5.912521682058976e-06, - "loss": 0.0378, + "learning_rate": 1.5934501427613004e-05, + "loss": 0.1243, "step": 151250 }, { "epoch": 7.06, - "learning_rate": 5.9120528807838365e-06, - "loss": 0.0655, + "learning_rate": 1.593403335777698e-05, + "loss": 0.099, "step": 151255 }, { "epoch": 7.06, - "learning_rate": 5.9115840795086965e-06, - "loss": 0.1504, + "learning_rate": 1.593356528794096e-05, + "loss": 0.1119, "step": 151260 }, { "epoch": 7.06, - "learning_rate": 5.911115278233556e-06, - "loss": 0.1067, + "learning_rate": 1.593309721810494e-05, + "loss": 0.1174, "step": 151265 }, { "epoch": 7.06, - "learning_rate": 5.910646476958418e-06, - "loss": 0.15, + "learning_rate": 1.5932629148268923e-05, + "loss": 0.148, "step": 151270 }, { "epoch": 7.06, - "learning_rate": 5.910177675683279e-06, - "loss": 0.0475, + "learning_rate": 1.5932161078432903e-05, + "loss": 0.0334, "step": 151275 }, { "epoch": 7.06, - "learning_rate": 5.909708874408139e-06, - "loss": 0.0377, + "learning_rate": 1.5931693008596882e-05, + "loss": 0.0214, "step": 151280 }, { "epoch": 7.06, - "learning_rate": 5.909240073132999e-06, - "loss": 0.0028, + "learning_rate": 1.5931224938760862e-05, + "loss": 0.0066, "step": 151285 }, { "epoch": 7.06, - "learning_rate": 5.90877127185786e-06, - "loss": 0.0475, + "learning_rate": 1.5930756868924845e-05, + "loss": 0.0208, "step": 151290 }, { "epoch": 7.06, - "learning_rate": 5.908302470582721e-06, - "loss": 0.016, + "learning_rate": 1.5930288799088825e-05, + "loss": 0.095, "step": 151295 }, { "epoch": 7.06, - "learning_rate": 5.907833669307581e-06, - "loss": 0.068, + "learning_rate": 1.5929820729252805e-05, + "loss": 0.0266, "step": 151300 }, { "epoch": 7.06, - "learning_rate": 5.907364868032441e-06, - "loss": 0.0332, + "learning_rate": 1.5929352659416785e-05, + "loss": 0.029, "step": 151305 }, { "epoch": 7.06, - "learning_rate": 5.906896066757302e-06, - "loss": 0.0551, + "learning_rate": 1.5928884589580768e-05, + "loss": 0.0922, "step": 151310 }, { "epoch": 7.06, - "learning_rate": 5.906427265482163e-06, - "loss": 0.1079, + "learning_rate": 1.5928416519744745e-05, + "loss": 0.1711, "step": 151315 }, { "epoch": 7.06, - "learning_rate": 5.905958464207023e-06, - "loss": 0.167, + "learning_rate": 1.5927948449908724e-05, + "loss": 0.1675, "step": 151320 }, { "epoch": 7.06, - "learning_rate": 5.905489662931883e-06, - "loss": 0.0676, + "learning_rate": 1.5927480380072708e-05, + "loss": 0.0276, "step": 151325 }, { "epoch": 7.06, - "learning_rate": 5.905020861656744e-06, - "loss": 0.0151, + "learning_rate": 1.5927012310236687e-05, + "loss": 0.0297, "step": 151330 }, { "epoch": 7.06, - "learning_rate": 5.904552060381604e-06, - "loss": 0.0426, + "learning_rate": 1.5926544240400667e-05, + "loss": 0.0235, "step": 151335 }, { "epoch": 7.06, - "learning_rate": 5.9040832591064656e-06, - "loss": 0.0455, + "learning_rate": 1.5926076170564647e-05, + "loss": 0.0341, "step": 151340 }, { "epoch": 7.06, - "learning_rate": 5.9036144578313255e-06, - "loss": 0.0413, + "learning_rate": 1.592560810072863e-05, + "loss": 0.048, "step": 151345 }, { "epoch": 7.06, - "learning_rate": 5.903145656556186e-06, - "loss": 0.0329, + "learning_rate": 1.592514003089261e-05, + "loss": 0.1623, "step": 151350 }, { "epoch": 7.06, - "learning_rate": 5.902676855281046e-06, - "loss": 0.0368, + "learning_rate": 1.592467196105659e-05, + "loss": 0.0712, "step": 151355 }, { "epoch": 7.06, - "learning_rate": 5.902208054005908e-06, - "loss": 0.1257, + "learning_rate": 1.592420389122057e-05, + "loss": 0.0601, "step": 151360 }, { "epoch": 7.06, - "learning_rate": 5.901739252730769e-06, - "loss": 0.1954, + "learning_rate": 1.5923735821384553e-05, + "loss": 0.1098, "step": 151365 }, { "epoch": 7.06, - "learning_rate": 5.9012704514556286e-06, - "loss": 0.2792, + "learning_rate": 1.5923267751548533e-05, + "loss": 0.267, "step": 151370 }, { "epoch": 7.06, - "learning_rate": 5.9008016501804885e-06, - "loss": 0.012, + "learning_rate": 1.5922799681712513e-05, + "loss": 0.0141, "step": 151375 }, { "epoch": 7.06, - "learning_rate": 5.900332848905349e-06, - "loss": 0.0328, + "learning_rate": 1.5922331611876492e-05, + "loss": 0.0265, "step": 151380 }, { "epoch": 7.06, - "learning_rate": 5.899864047630211e-06, - "loss": 0.0399, + "learning_rate": 1.5921863542040472e-05, + "loss": 0.022, "step": 151385 }, { "epoch": 7.06, - "learning_rate": 5.899395246355071e-06, - "loss": 0.0148, + "learning_rate": 1.5921395472204452e-05, + "loss": 0.0314, "step": 151390 }, { "epoch": 7.06, - "learning_rate": 5.898926445079931e-06, - "loss": 0.0368, + "learning_rate": 1.5920927402368432e-05, + "loss": 0.0767, "step": 151395 }, { "epoch": 7.06, - "learning_rate": 5.8984576438047915e-06, - "loss": 0.0597, + "learning_rate": 1.5920459332532415e-05, + "loss": 0.0594, "step": 151400 }, { "epoch": 7.06, - "learning_rate": 5.8979888425296515e-06, - "loss": 0.0313, + "learning_rate": 1.5919991262696395e-05, + "loss": 0.0527, "step": 151405 }, { "epoch": 7.06, - "learning_rate": 5.897520041254513e-06, - "loss": 0.1095, + "learning_rate": 1.5919523192860375e-05, + "loss": 0.0532, "step": 151410 }, { "epoch": 7.07, - "learning_rate": 5.897051239979373e-06, - "loss": 0.1494, + "learning_rate": 1.5919055123024354e-05, + "loss": 0.1983, "step": 151415 }, { "epoch": 7.07, - "learning_rate": 5.896582438704234e-06, - "loss": 0.1948, + "learning_rate": 1.5918587053188338e-05, + "loss": 0.1963, "step": 151420 }, { "epoch": 7.07, - "learning_rate": 5.896113637429094e-06, - "loss": 0.0072, + "learning_rate": 1.5918118983352317e-05, + "loss": 0.0087, "step": 151425 }, { "epoch": 7.07, - "learning_rate": 5.895644836153955e-06, - "loss": 0.0314, + "learning_rate": 1.5917650913516297e-05, + "loss": 0.0197, "step": 151430 }, { "epoch": 7.07, - "learning_rate": 5.895176034878815e-06, - "loss": 0.0388, + "learning_rate": 1.591718284368028e-05, + "loss": 0.0128, "step": 151435 }, { "epoch": 7.07, - "learning_rate": 5.894707233603676e-06, - "loss": 0.0252, + "learning_rate": 1.591671477384426e-05, + "loss": 0.0775, "step": 151440 }, { "epoch": 7.07, - "learning_rate": 5.894238432328536e-06, - "loss": 0.0377, + "learning_rate": 1.5916246704008237e-05, + "loss": 0.028, "step": 151445 }, { "epoch": 7.07, - "learning_rate": 5.893769631053397e-06, - "loss": 0.0647, + "learning_rate": 1.5915778634172217e-05, + "loss": 0.0833, "step": 151450 }, { "epoch": 7.07, - "learning_rate": 5.893300829778258e-06, - "loss": 0.0663, + "learning_rate": 1.59153105643362e-05, + "loss": 0.0412, "step": 151455 }, { "epoch": 7.07, - "learning_rate": 5.892832028503118e-06, - "loss": 0.1472, + "learning_rate": 1.591484249450018e-05, + "loss": 0.051, "step": 151460 }, { "epoch": 7.07, - "learning_rate": 5.892363227227978e-06, - "loss": 0.147, + "learning_rate": 1.591437442466416e-05, + "loss": 0.1013, "step": 151465 }, { "epoch": 7.07, - "learning_rate": 5.891894425952839e-06, - "loss": 0.1392, + "learning_rate": 1.591390635482814e-05, + "loss": 0.091, "step": 151470 }, { "epoch": 7.07, - "learning_rate": 5.891425624677699e-06, - "loss": 0.0202, + "learning_rate": 1.5913438284992122e-05, + "loss": 0.0365, "step": 151475 }, { "epoch": 7.07, - "learning_rate": 5.890956823402561e-06, - "loss": 0.0388, + "learning_rate": 1.5912970215156102e-05, + "loss": 0.0206, "step": 151480 }, { "epoch": 7.07, - "learning_rate": 5.890488022127421e-06, - "loss": 0.0289, + "learning_rate": 1.5912502145320082e-05, + "loss": 0.0271, "step": 151485 }, { "epoch": 7.07, - "learning_rate": 5.890019220852281e-06, - "loss": 0.0071, + "learning_rate": 1.5912034075484062e-05, + "loss": 0.0345, "step": 151490 }, { "epoch": 7.07, - "learning_rate": 5.889550419577141e-06, - "loss": 0.0561, + "learning_rate": 1.5911566005648045e-05, + "loss": 0.0789, "step": 151495 }, { "epoch": 7.07, - "learning_rate": 5.889081618302003e-06, - "loss": 0.089, + "learning_rate": 1.5911097935812025e-05, + "loss": 0.0521, "step": 151500 }, { "epoch": 7.07, - "learning_rate": 5.888612817026863e-06, - "loss": 0.074, + "learning_rate": 1.5910629865976e-05, + "loss": 0.0436, "step": 151505 }, { "epoch": 7.07, - "learning_rate": 5.888144015751724e-06, - "loss": 0.0964, + "learning_rate": 1.5910161796139985e-05, + "loss": 0.1194, "step": 151510 }, { "epoch": 7.07, - "learning_rate": 5.8876752144765836e-06, - "loss": 0.2024, + "learning_rate": 1.5909693726303964e-05, + "loss": 0.1607, "step": 151515 }, { "epoch": 7.07, - "learning_rate": 5.8872064132014435e-06, - "loss": 0.1997, + "learning_rate": 1.5909225656467944e-05, + "loss": 0.1932, "step": 151520 }, { "epoch": 7.07, - "learning_rate": 5.886737611926305e-06, - "loss": 0.0108, + "learning_rate": 1.5908757586631924e-05, + "loss": 0.0288, "step": 151525 }, { "epoch": 7.07, - "learning_rate": 5.886268810651166e-06, - "loss": 0.0108, + "learning_rate": 1.5908289516795907e-05, + "loss": 0.0011, "step": 151530 }, { "epoch": 7.07, - "learning_rate": 5.885800009376026e-06, - "loss": 0.0201, + "learning_rate": 1.5907821446959887e-05, + "loss": 0.0104, "step": 151535 }, { "epoch": 7.07, - "learning_rate": 5.885331208100887e-06, - "loss": 0.0419, + "learning_rate": 1.5907353377123867e-05, + "loss": 0.0576, "step": 151540 }, { "epoch": 7.07, - "learning_rate": 5.8848624068257466e-06, - "loss": 0.0627, + "learning_rate": 1.5906885307287847e-05, + "loss": 0.0075, "step": 151545 }, { "epoch": 7.07, - "learning_rate": 5.884393605550608e-06, - "loss": 0.0472, + "learning_rate": 1.590641723745183e-05, + "loss": 0.0961, "step": 151550 }, { "epoch": 7.07, - "learning_rate": 5.883924804275468e-06, - "loss": 0.0461, + "learning_rate": 1.590594916761581e-05, + "loss": 0.0767, "step": 151555 }, { "epoch": 7.07, - "learning_rate": 5.883456003000329e-06, - "loss": 0.1021, + "learning_rate": 1.590548109777979e-05, + "loss": 0.1126, "step": 151560 }, { "epoch": 7.07, - "learning_rate": 5.882987201725189e-06, - "loss": 0.2353, + "learning_rate": 1.5905013027943773e-05, + "loss": 0.1507, "step": 151565 }, { "epoch": 7.07, - "learning_rate": 5.8825184004500505e-06, - "loss": 0.1629, + "learning_rate": 1.590454495810775e-05, + "loss": 0.1067, "step": 151570 }, { "epoch": 7.07, - "learning_rate": 5.88204959917491e-06, - "loss": 0.0118, + "learning_rate": 1.590407688827173e-05, + "loss": 0.0361, "step": 151575 }, { "epoch": 7.07, - "learning_rate": 5.881580797899771e-06, - "loss": 0.0795, + "learning_rate": 1.590360881843571e-05, + "loss": 0.0334, "step": 151580 }, { "epoch": 7.07, - "learning_rate": 5.881111996624631e-06, - "loss": 0.0642, + "learning_rate": 1.5903140748599692e-05, + "loss": 0.0286, "step": 151585 }, { "epoch": 7.07, - "learning_rate": 5.880643195349491e-06, - "loss": 0.0137, + "learning_rate": 1.5902672678763672e-05, + "loss": 0.0061, "step": 151590 }, { "epoch": 7.07, - "learning_rate": 5.880174394074353e-06, - "loss": 0.0772, + "learning_rate": 1.590220460892765e-05, + "loss": 0.0187, "step": 151595 }, { "epoch": 7.07, - "learning_rate": 5.8797055927992135e-06, - "loss": 0.0475, + "learning_rate": 1.590173653909163e-05, + "loss": 0.0308, "step": 151600 }, { "epoch": 7.07, - "learning_rate": 5.879236791524073e-06, - "loss": 0.0311, + "learning_rate": 1.5901268469255615e-05, + "loss": 0.0636, "step": 151605 }, { "epoch": 7.07, - "learning_rate": 5.878767990248933e-06, - "loss": 0.0852, + "learning_rate": 1.5900800399419594e-05, + "loss": 0.0517, "step": 151610 }, { "epoch": 7.07, - "learning_rate": 5.878299188973795e-06, - "loss": 0.1154, + "learning_rate": 1.5900332329583574e-05, + "loss": 0.1355, "step": 151615 }, { "epoch": 7.07, - "learning_rate": 5.877830387698656e-06, - "loss": 0.133, + "learning_rate": 1.5899864259747557e-05, + "loss": 0.1744, "step": 151620 }, { "epoch": 7.08, - "learning_rate": 5.877361586423516e-06, - "loss": 0.0498, + "learning_rate": 1.5899396189911537e-05, + "loss": 0.033, "step": 151625 }, { "epoch": 7.08, - "learning_rate": 5.876892785148376e-06, - "loss": 0.0159, + "learning_rate": 1.5898928120075517e-05, + "loss": 0.0125, "step": 151630 }, { "epoch": 7.08, - "learning_rate": 5.876423983873236e-06, - "loss": 0.0254, + "learning_rate": 1.5898460050239494e-05, + "loss": 0.0332, "step": 151635 }, { "epoch": 7.08, - "learning_rate": 5.875955182598098e-06, - "loss": 0.0204, + "learning_rate": 1.5897991980403477e-05, + "loss": 0.0225, "step": 151640 }, { "epoch": 7.08, - "learning_rate": 5.875486381322958e-06, - "loss": 0.0194, + "learning_rate": 1.5897523910567457e-05, + "loss": 0.0303, "step": 151645 }, { "epoch": 7.08, - "learning_rate": 5.875017580047818e-06, - "loss": 0.073, + "learning_rate": 1.5897055840731436e-05, + "loss": 0.068, "step": 151650 }, { "epoch": 7.08, - "learning_rate": 5.874548778772679e-06, - "loss": 0.1306, + "learning_rate": 1.5896587770895416e-05, + "loss": 0.0562, "step": 151655 }, { "epoch": 7.08, - "learning_rate": 5.874079977497539e-06, - "loss": 0.0361, + "learning_rate": 1.58961197010594e-05, + "loss": 0.025, "step": 151660 }, { "epoch": 7.08, - "learning_rate": 5.8736111762224e-06, - "loss": 0.1557, + "learning_rate": 1.589565163122338e-05, + "loss": 0.0744, "step": 151665 }, { "epoch": 7.08, - "learning_rate": 5.87314237494726e-06, - "loss": 0.1242, + "learning_rate": 1.589518356138736e-05, + "loss": 0.1012, "step": 151670 }, { "epoch": 7.08, - "learning_rate": 5.872673573672121e-06, - "loss": 0.0328, + "learning_rate": 1.5894715491551342e-05, + "loss": 0.0158, "step": 151675 }, { "epoch": 7.08, - "learning_rate": 5.872204772396981e-06, - "loss": 0.0285, + "learning_rate": 1.5894247421715322e-05, + "loss": 0.0502, "step": 151680 }, { "epoch": 7.08, - "learning_rate": 5.8717359711218425e-06, - "loss": 0.0075, + "learning_rate": 1.5893779351879302e-05, + "loss": 0.0138, "step": 151685 }, { "epoch": 7.08, - "learning_rate": 5.8712671698467024e-06, - "loss": 0.0365, + "learning_rate": 1.589331128204328e-05, + "loss": 0.0515, "step": 151690 }, { "epoch": 7.08, - "learning_rate": 5.870798368571563e-06, - "loss": 0.0545, + "learning_rate": 1.589284321220726e-05, + "loss": 0.0272, "step": 151695 }, { "epoch": 7.08, - "learning_rate": 5.870329567296423e-06, - "loss": 0.086, + "learning_rate": 1.589237514237124e-05, + "loss": 0.062, "step": 151700 }, { "epoch": 7.08, - "learning_rate": 5.869860766021284e-06, - "loss": 0.0916, + "learning_rate": 1.589190707253522e-05, + "loss": 0.0469, "step": 151705 }, { "epoch": 7.08, - "learning_rate": 5.869391964746145e-06, - "loss": 0.0752, + "learning_rate": 1.58914390026992e-05, + "loss": 0.0798, "step": 151710 }, { "epoch": 7.08, - "learning_rate": 5.8689231634710055e-06, - "loss": 0.0815, + "learning_rate": 1.5890970932863184e-05, + "loss": 0.1906, "step": 151715 }, { "epoch": 7.08, - "learning_rate": 5.868454362195865e-06, - "loss": 0.1756, + "learning_rate": 1.5890502863027164e-05, + "loss": 0.1372, "step": 151720 }, { "epoch": 7.08, - "learning_rate": 5.867985560920726e-06, - "loss": 0.0356, + "learning_rate": 1.5890034793191144e-05, + "loss": 0.0594, "step": 151725 }, { "epoch": 7.08, - "learning_rate": 5.867516759645586e-06, - "loss": 0.0066, + "learning_rate": 1.5889566723355124e-05, + "loss": 0.0076, "step": 151730 }, { "epoch": 7.08, - "learning_rate": 5.867047958370448e-06, - "loss": 0.029, + "learning_rate": 1.5889098653519107e-05, + "loss": 0.0299, "step": 151735 }, { "epoch": 7.08, - "learning_rate": 5.866579157095308e-06, - "loss": 0.0502, + "learning_rate": 1.5888630583683087e-05, + "loss": 0.0811, "step": 151740 }, { "epoch": 7.08, - "learning_rate": 5.8661103558201685e-06, - "loss": 0.0111, + "learning_rate": 1.5888162513847066e-05, + "loss": 0.0268, "step": 151745 }, { "epoch": 7.08, - "learning_rate": 5.865641554545028e-06, - "loss": 0.0548, + "learning_rate": 1.588769444401105e-05, + "loss": 0.0936, "step": 151750 }, { "epoch": 7.08, - "learning_rate": 5.86517275326989e-06, - "loss": 0.116, + "learning_rate": 1.588722637417503e-05, + "loss": 0.0326, "step": 151755 }, { "epoch": 7.08, - "learning_rate": 5.86470395199475e-06, - "loss": 0.0498, + "learning_rate": 1.5886758304339006e-05, + "loss": 0.1089, "step": 151760 }, { "epoch": 7.08, - "learning_rate": 5.864235150719611e-06, - "loss": 0.1622, + "learning_rate": 1.5886290234502986e-05, + "loss": 0.1467, "step": 151765 }, { "epoch": 7.08, - "learning_rate": 5.863766349444471e-06, - "loss": 0.151, + "learning_rate": 1.588582216466697e-05, + "loss": 0.1392, "step": 151770 }, { "epoch": 7.08, - "learning_rate": 5.8632975481693315e-06, - "loss": 0.0143, + "learning_rate": 1.588535409483095e-05, + "loss": 0.0324, "step": 151775 }, { "epoch": 7.08, - "learning_rate": 5.862828746894192e-06, - "loss": 0.039, + "learning_rate": 1.588488602499493e-05, + "loss": 0.0417, "step": 151780 }, { "epoch": 7.08, - "learning_rate": 5.862359945619053e-06, - "loss": 0.0382, + "learning_rate": 1.588441795515891e-05, + "loss": 0.0805, "step": 151785 }, { "epoch": 7.08, - "learning_rate": 5.861891144343913e-06, - "loss": 0.013, + "learning_rate": 1.588394988532289e-05, + "loss": 0.0251, "step": 151790 }, { "epoch": 7.08, - "learning_rate": 5.861422343068774e-06, - "loss": 0.029, + "learning_rate": 1.588348181548687e-05, + "loss": 0.0263, "step": 151795 }, { "epoch": 7.08, - "learning_rate": 5.860953541793634e-06, - "loss": 0.0384, + "learning_rate": 1.588301374565085e-05, + "loss": 0.091, "step": 151800 }, { "epoch": 7.08, - "learning_rate": 5.860484740518495e-06, - "loss": 0.0865, + "learning_rate": 1.5882545675814834e-05, + "loss": 0.0621, "step": 151805 }, { "epoch": 7.08, - "learning_rate": 5.860015939243355e-06, - "loss": 0.0838, + "learning_rate": 1.5882077605978814e-05, + "loss": 0.0707, "step": 151810 }, { "epoch": 7.08, - "learning_rate": 5.859547137968216e-06, - "loss": 0.1051, + "learning_rate": 1.5881609536142794e-05, + "loss": 0.3313, "step": 151815 }, { "epoch": 7.08, - "learning_rate": 5.859078336693076e-06, - "loss": 0.2981, + "learning_rate": 1.5881141466306774e-05, + "loss": 0.231, "step": 151820 }, { "epoch": 7.08, - "learning_rate": 5.8586095354179376e-06, - "loss": 0.0807, + "learning_rate": 1.5880673396470754e-05, + "loss": 0.0139, "step": 151825 }, { "epoch": 7.08, - "learning_rate": 5.8581407341427975e-06, - "loss": 0.0197, + "learning_rate": 1.5880205326634734e-05, + "loss": 0.0199, "step": 151830 }, { "epoch": 7.08, - "learning_rate": 5.857671932867658e-06, - "loss": 0.0431, + "learning_rate": 1.5879737256798713e-05, + "loss": 0.0419, "step": 151835 }, { "epoch": 7.09, - "learning_rate": 5.857203131592518e-06, - "loss": 0.029, + "learning_rate": 1.5879269186962693e-05, + "loss": 0.0755, "step": 151840 }, { "epoch": 7.09, - "learning_rate": 5.856734330317378e-06, - "loss": 0.0112, + "learning_rate": 1.5878801117126676e-05, + "loss": 0.0043, "step": 151845 }, { "epoch": 7.09, - "learning_rate": 5.85626552904224e-06, - "loss": 0.0896, + "learning_rate": 1.5878333047290656e-05, + "loss": 0.0155, "step": 151850 }, { "epoch": 7.09, - "learning_rate": 5.8557967277671006e-06, - "loss": 0.0644, + "learning_rate": 1.5877864977454636e-05, + "loss": 0.0971, "step": 151855 }, { "epoch": 7.09, - "learning_rate": 5.8553279264919605e-06, - "loss": 0.085, + "learning_rate": 1.587739690761862e-05, + "loss": 0.0541, "step": 151860 }, { "epoch": 7.09, - "learning_rate": 5.8548591252168204e-06, - "loss": 0.1411, + "learning_rate": 1.58769288377826e-05, + "loss": 0.1466, "step": 151865 }, { "epoch": 7.09, - "learning_rate": 5.854390323941681e-06, - "loss": 0.1721, + "learning_rate": 1.587646076794658e-05, + "loss": 0.2546, "step": 151870 }, { "epoch": 7.09, - "learning_rate": 5.853921522666543e-06, - "loss": 0.0307, + "learning_rate": 1.587599269811056e-05, + "loss": 0.0385, "step": 151875 }, { "epoch": 7.09, - "learning_rate": 5.853452721391403e-06, - "loss": 0.0301, + "learning_rate": 1.5875524628274542e-05, + "loss": 0.0128, "step": 151880 }, { "epoch": 7.09, - "learning_rate": 5.852983920116263e-06, - "loss": 0.0309, + "learning_rate": 1.5875056558438518e-05, + "loss": 0.0261, "step": 151885 }, { "epoch": 7.09, - "learning_rate": 5.8525151188411235e-06, - "loss": 0.0264, + "learning_rate": 1.5874588488602498e-05, + "loss": 0.0688, "step": 151890 }, { "epoch": 7.09, - "learning_rate": 5.852046317565985e-06, - "loss": 0.0559, + "learning_rate": 1.5874120418766478e-05, + "loss": 0.0409, "step": 151895 }, { "epoch": 7.09, - "learning_rate": 5.851577516290845e-06, - "loss": 0.0593, + "learning_rate": 1.587365234893046e-05, + "loss": 0.1016, "step": 151900 }, { "epoch": 7.09, - "learning_rate": 5.851108715015705e-06, - "loss": 0.0784, + "learning_rate": 1.587318427909444e-05, + "loss": 0.1315, "step": 151905 }, { "epoch": 7.09, - "learning_rate": 5.850639913740566e-06, - "loss": 0.1592, + "learning_rate": 1.587271620925842e-05, + "loss": 0.0758, "step": 151910 }, { "epoch": 7.09, - "learning_rate": 5.850171112465426e-06, - "loss": 0.1007, + "learning_rate": 1.58722481394224e-05, + "loss": 0.1444, "step": 151915 }, { "epoch": 7.09, - "learning_rate": 5.849702311190287e-06, - "loss": 0.126, + "learning_rate": 1.5871780069586384e-05, + "loss": 0.2396, "step": 151920 }, { "epoch": 7.09, - "learning_rate": 5.849233509915147e-06, - "loss": 0.0238, + "learning_rate": 1.5871311999750364e-05, + "loss": 0.0471, "step": 151925 }, { "epoch": 7.09, - "learning_rate": 5.848764708640008e-06, - "loss": 0.0276, + "learning_rate": 1.5870843929914343e-05, + "loss": 0.0494, "step": 151930 }, { "epoch": 7.09, - "learning_rate": 5.848295907364868e-06, - "loss": 0.014, + "learning_rate": 1.5870375860078327e-05, + "loss": 0.0589, "step": 151935 }, { "epoch": 7.09, - "learning_rate": 5.84782710608973e-06, - "loss": 0.0092, + "learning_rate": 1.5869907790242306e-05, + "loss": 0.0234, "step": 151940 }, { "epoch": 7.09, - "learning_rate": 5.8473583048145895e-06, - "loss": 0.0566, + "learning_rate": 1.5869439720406286e-05, + "loss": 0.0264, "step": 151945 }, { "epoch": 7.09, - "learning_rate": 5.84688950353945e-06, - "loss": 0.0265, + "learning_rate": 1.5868971650570263e-05, + "loss": 0.0297, "step": 151950 }, { "epoch": 7.09, - "learning_rate": 5.84642070226431e-06, - "loss": 0.0592, + "learning_rate": 1.5868503580734246e-05, + "loss": 0.1622, "step": 151955 }, { "epoch": 7.09, - "learning_rate": 5.845951900989171e-06, - "loss": 0.164, + "learning_rate": 1.5868035510898226e-05, + "loss": 0.0842, "step": 151960 }, { "epoch": 7.09, - "learning_rate": 5.845483099714032e-06, - "loss": 0.1185, + "learning_rate": 1.5867567441062206e-05, + "loss": 0.1283, "step": 151965 }, { "epoch": 7.09, - "learning_rate": 5.845014298438893e-06, - "loss": 0.1256, + "learning_rate": 1.5867099371226185e-05, + "loss": 0.1269, "step": 151970 }, { "epoch": 7.09, - "learning_rate": 5.8445454971637525e-06, - "loss": 0.0371, + "learning_rate": 1.586663130139017e-05, + "loss": 0.0602, "step": 151975 }, { "epoch": 7.09, - "learning_rate": 5.844076695888613e-06, - "loss": 0.0221, + "learning_rate": 1.586616323155415e-05, + "loss": 0.0448, "step": 151980 }, { "epoch": 7.09, - "learning_rate": 5.843607894613473e-06, - "loss": 0.0193, + "learning_rate": 1.5865695161718128e-05, + "loss": 0.0382, "step": 151985 }, { "epoch": 7.09, - "learning_rate": 5.843139093338335e-06, - "loss": 0.0216, + "learning_rate": 1.586522709188211e-05, + "loss": 0.0435, "step": 151990 }, { "epoch": 7.09, - "learning_rate": 5.842670292063195e-06, - "loss": 0.0171, + "learning_rate": 1.586475902204609e-05, + "loss": 0.0247, "step": 151995 }, { "epoch": 7.09, - "learning_rate": 5.842201490788056e-06, - "loss": 0.077, + "learning_rate": 1.586429095221007e-05, + "loss": 0.029, "step": 152000 }, { "epoch": 7.09, - "learning_rate": 5.8417326895129155e-06, - "loss": 0.0853, + "learning_rate": 1.586382288237405e-05, + "loss": 0.0808, "step": 152005 }, { "epoch": 7.09, - "learning_rate": 5.841263888237777e-06, - "loss": 0.0512, + "learning_rate": 1.586335481253803e-05, + "loss": 0.0808, "step": 152010 }, { "epoch": 7.09, - "learning_rate": 5.840795086962637e-06, - "loss": 0.1721, + "learning_rate": 1.586288674270201e-05, + "loss": 0.1623, "step": 152015 }, { "epoch": 7.09, - "learning_rate": 5.840326285687498e-06, - "loss": 0.163, + "learning_rate": 1.586241867286599e-05, + "loss": 0.1579, "step": 152020 }, { "epoch": 7.09, - "learning_rate": 5.839857484412358e-06, - "loss": 0.073, + "learning_rate": 1.586195060302997e-05, + "loss": 0.031, "step": 152025 }, { "epoch": 7.09, - "learning_rate": 5.8393886831372186e-06, - "loss": 0.001, + "learning_rate": 1.5861482533193953e-05, + "loss": 0.0273, "step": 152030 }, { "epoch": 7.09, - "learning_rate": 5.838919881862079e-06, - "loss": 0.0396, + "learning_rate": 1.5861014463357933e-05, + "loss": 0.0273, "step": 152035 }, { "epoch": 7.09, - "learning_rate": 5.83845108058694e-06, - "loss": 0.0235, + "learning_rate": 1.5860546393521913e-05, + "loss": 0.009, "step": 152040 }, { "epoch": 7.09, - "learning_rate": 5.8379822793118e-06, - "loss": 0.0192, + "learning_rate": 1.5860078323685896e-05, + "loss": 0.0233, "step": 152045 }, { "epoch": 7.09, - "learning_rate": 5.837513478036661e-06, - "loss": 0.0514, + "learning_rate": 1.5859610253849876e-05, + "loss": 0.022, "step": 152050 }, { "epoch": 7.1, - "learning_rate": 5.837044676761521e-06, - "loss": 0.0504, + "learning_rate": 1.5859142184013856e-05, + "loss": 0.0478, "step": 152055 }, { "epoch": 7.1, - "learning_rate": 5.836575875486382e-06, - "loss": 0.0403, + "learning_rate": 1.5858674114177836e-05, + "loss": 0.0569, "step": 152060 }, { "epoch": 7.1, - "learning_rate": 5.836107074211242e-06, - "loss": 0.1927, + "learning_rate": 1.585820604434182e-05, + "loss": 0.1519, "step": 152065 }, { "epoch": 7.1, - "learning_rate": 5.835638272936103e-06, - "loss": 0.2512, + "learning_rate": 1.58577379745058e-05, + "loss": 0.122, "step": 152070 }, { "epoch": 7.1, - "learning_rate": 5.835169471660963e-06, - "loss": 0.0474, + "learning_rate": 1.5857269904669775e-05, + "loss": 0.0324, "step": 152075 }, { "epoch": 7.1, - "learning_rate": 5.834700670385825e-06, - "loss": 0.0056, + "learning_rate": 1.5856801834833755e-05, + "loss": 0.0095, "step": 152080 }, { "epoch": 7.1, - "learning_rate": 5.834231869110685e-06, - "loss": 0.0163, + "learning_rate": 1.5856333764997738e-05, + "loss": 0.0299, "step": 152085 }, { "epoch": 7.1, - "learning_rate": 5.833763067835545e-06, - "loss": 0.0606, + "learning_rate": 1.5855865695161718e-05, + "loss": 0.0611, "step": 152090 }, { "epoch": 7.1, - "learning_rate": 5.833294266560405e-06, - "loss": 0.0493, + "learning_rate": 1.5855397625325698e-05, + "loss": 0.0392, "step": 152095 }, { "epoch": 7.1, - "learning_rate": 5.832825465285265e-06, - "loss": 0.0405, + "learning_rate": 1.5854929555489678e-05, + "loss": 0.0375, "step": 152100 }, { "epoch": 7.1, - "learning_rate": 5.832356664010127e-06, - "loss": 0.0417, + "learning_rate": 1.585446148565366e-05, + "loss": 0.0719, "step": 152105 }, { "epoch": 7.1, - "learning_rate": 5.831887862734988e-06, - "loss": 0.0577, + "learning_rate": 1.585399341581764e-05, + "loss": 0.059, "step": 152110 }, { "epoch": 7.1, - "learning_rate": 5.831419061459848e-06, - "loss": 0.0998, + "learning_rate": 1.585352534598162e-05, + "loss": 0.0893, "step": 152115 }, { "epoch": 7.1, - "learning_rate": 5.8309502601847075e-06, - "loss": 0.3044, + "learning_rate": 1.5853057276145604e-05, + "loss": 0.2347, "step": 152120 }, { "epoch": 7.1, - "learning_rate": 5.830481458909568e-06, - "loss": 0.0147, + "learning_rate": 1.5852589206309583e-05, + "loss": 0.0145, "step": 152125 }, { "epoch": 7.1, - "learning_rate": 5.83001265763443e-06, - "loss": 0.0156, + "learning_rate": 1.5852121136473563e-05, + "loss": 0.0151, "step": 152130 }, { "epoch": 7.1, - "learning_rate": 5.82954385635929e-06, - "loss": 0.0126, + "learning_rate": 1.5851653066637543e-05, + "loss": 0.023, "step": 152135 }, { "epoch": 7.1, - "learning_rate": 5.82907505508415e-06, - "loss": 0.0535, + "learning_rate": 1.5851184996801523e-05, + "loss": 0.0275, "step": 152140 }, { "epoch": 7.1, - "learning_rate": 5.828606253809011e-06, - "loss": 0.0226, + "learning_rate": 1.5850716926965503e-05, + "loss": 0.0193, "step": 152145 }, { "epoch": 7.1, - "learning_rate": 5.828137452533872e-06, - "loss": 0.0296, + "learning_rate": 1.5850248857129483e-05, + "loss": 0.0623, "step": 152150 }, { "epoch": 7.1, - "learning_rate": 5.827668651258732e-06, - "loss": 0.0762, + "learning_rate": 1.5849780787293462e-05, + "loss": 0.0942, "step": 152155 }, { "epoch": 7.1, - "learning_rate": 5.827199849983592e-06, - "loss": 0.1015, + "learning_rate": 1.5849312717457446e-05, + "loss": 0.0793, "step": 152160 }, { "epoch": 7.1, - "learning_rate": 5.826731048708453e-06, - "loss": 0.0991, + "learning_rate": 1.5848844647621425e-05, + "loss": 0.1046, "step": 152165 }, { "epoch": 7.1, - "learning_rate": 5.826262247433313e-06, - "loss": 0.1979, + "learning_rate": 1.5848376577785405e-05, + "loss": 0.163, "step": 152170 }, { "epoch": 7.1, - "learning_rate": 5.8257934461581744e-06, - "loss": 0.0203, + "learning_rate": 1.584790850794939e-05, + "loss": 0.0275, "step": 152175 }, { "epoch": 7.1, - "learning_rate": 5.825324644883034e-06, - "loss": 0.0315, + "learning_rate": 1.5847440438113368e-05, + "loss": 0.0169, "step": 152180 }, { "epoch": 7.1, - "learning_rate": 5.824855843607895e-06, - "loss": 0.0387, + "learning_rate": 1.5846972368277348e-05, + "loss": 0.0093, "step": 152185 }, { "epoch": 7.1, - "learning_rate": 5.824387042332755e-06, - "loss": 0.0357, + "learning_rate": 1.5846504298441328e-05, + "loss": 0.0565, "step": 152190 }, { "epoch": 7.1, - "learning_rate": 5.823918241057616e-06, - "loss": 0.0457, + "learning_rate": 1.584603622860531e-05, + "loss": 0.0919, "step": 152195 }, { "epoch": 7.1, - "learning_rate": 5.823449439782477e-06, - "loss": 0.069, + "learning_rate": 1.5845568158769287e-05, + "loss": 0.0395, "step": 152200 }, { "epoch": 7.1, - "learning_rate": 5.822980638507337e-06, - "loss": 0.0632, + "learning_rate": 1.5845100088933267e-05, + "loss": 0.0959, "step": 152205 }, { "epoch": 7.1, - "learning_rate": 5.822511837232197e-06, - "loss": 0.1296, + "learning_rate": 1.5844632019097247e-05, + "loss": 0.0219, "step": 152210 }, { "epoch": 7.1, - "learning_rate": 5.822043035957058e-06, - "loss": 0.1362, + "learning_rate": 1.584416394926123e-05, + "loss": 0.2429, "step": 152215 }, { "epoch": 7.1, - "learning_rate": 5.821574234681919e-06, - "loss": 0.1641, + "learning_rate": 1.584369587942521e-05, + "loss": 0.1893, "step": 152220 }, { "epoch": 7.1, - "learning_rate": 5.82110543340678e-06, - "loss": 0.0037, + "learning_rate": 1.584322780958919e-05, + "loss": 0.0397, "step": 152225 }, { "epoch": 7.1, - "learning_rate": 5.82063663213164e-06, - "loss": 0.0329, + "learning_rate": 1.5842759739753173e-05, + "loss": 0.0203, "step": 152230 }, { "epoch": 7.1, - "learning_rate": 5.8201678308565e-06, - "loss": 0.0104, + "learning_rate": 1.5842291669917153e-05, + "loss": 0.047, "step": 152235 }, { "epoch": 7.1, - "learning_rate": 5.81969902958136e-06, - "loss": 0.0702, + "learning_rate": 1.5841823600081133e-05, + "loss": 0.0325, "step": 152240 }, { "epoch": 7.1, - "learning_rate": 5.819230228306222e-06, - "loss": 0.0629, + "learning_rate": 1.5841355530245113e-05, + "loss": 0.0378, "step": 152245 }, { "epoch": 7.1, - "learning_rate": 5.818761427031082e-06, - "loss": 0.04, + "learning_rate": 1.5840887460409096e-05, + "loss": 0.0714, "step": 152250 }, { "epoch": 7.1, - "learning_rate": 5.818292625755943e-06, - "loss": 0.0992, + "learning_rate": 1.5840419390573076e-05, + "loss": 0.0638, "step": 152255 }, { "epoch": 7.1, - "learning_rate": 5.817823824480803e-06, - "loss": 0.0627, + "learning_rate": 1.5839951320737055e-05, + "loss": 0.062, "step": 152260 }, { "epoch": 7.1, - "learning_rate": 5.817355023205664e-06, - "loss": 0.1617, + "learning_rate": 1.5839483250901032e-05, + "loss": 0.0933, "step": 152265 }, { "epoch": 7.11, - "learning_rate": 5.816886221930524e-06, - "loss": 0.1012, + "learning_rate": 1.5839015181065015e-05, + "loss": 0.1548, "step": 152270 }, { "epoch": 7.11, - "learning_rate": 5.816417420655385e-06, - "loss": 0.0292, + "learning_rate": 1.5838547111228995e-05, + "loss": 0.0078, "step": 152275 }, { "epoch": 7.11, - "learning_rate": 5.815948619380245e-06, - "loss": 0.0289, + "learning_rate": 1.5838079041392975e-05, + "loss": 0.0121, "step": 152280 }, { "epoch": 7.11, - "learning_rate": 5.815479818105106e-06, - "loss": 0.0431, + "learning_rate": 1.5837610971556958e-05, + "loss": 0.0145, "step": 152285 }, { "epoch": 7.11, - "learning_rate": 5.8150110168299665e-06, - "loss": 0.0269, + "learning_rate": 1.5837142901720938e-05, + "loss": 0.0336, "step": 152290 }, { "epoch": 7.11, - "learning_rate": 5.814542215554827e-06, - "loss": 0.0238, + "learning_rate": 1.5836674831884918e-05, + "loss": 0.0404, "step": 152295 }, { "epoch": 7.11, - "learning_rate": 5.814073414279687e-06, - "loss": 0.0457, + "learning_rate": 1.5836206762048897e-05, + "loss": 0.0515, "step": 152300 }, { "epoch": 7.11, - "learning_rate": 5.813604613004548e-06, - "loss": 0.0316, + "learning_rate": 1.583573869221288e-05, + "loss": 0.0205, "step": 152305 }, { "epoch": 7.11, - "learning_rate": 5.813135811729408e-06, - "loss": 0.054, + "learning_rate": 1.583527062237686e-05, + "loss": 0.1492, "step": 152310 }, { "epoch": 7.11, - "learning_rate": 5.8126670104542695e-06, - "loss": 0.2579, + "learning_rate": 1.583480255254084e-05, + "loss": 0.0804, "step": 152315 }, { "epoch": 7.11, - "learning_rate": 5.8121982091791294e-06, - "loss": 0.1719, + "learning_rate": 1.583433448270482e-05, + "loss": 0.1474, "step": 152320 }, { "epoch": 7.11, - "learning_rate": 5.81172940790399e-06, - "loss": 0.028, + "learning_rate": 1.5833866412868803e-05, + "loss": 0.0236, "step": 152325 }, { "epoch": 7.11, - "learning_rate": 5.81126060662885e-06, - "loss": 0.0214, + "learning_rate": 1.583339834303278e-05, + "loss": 0.005, "step": 152330 }, { "epoch": 7.11, - "learning_rate": 5.810791805353712e-06, - "loss": 0.0229, + "learning_rate": 1.583293027319676e-05, + "loss": 0.0072, "step": 152335 }, { "epoch": 7.11, - "learning_rate": 5.810323004078572e-06, - "loss": 0.0118, + "learning_rate": 1.583246220336074e-05, + "loss": 0.0163, "step": 152340 }, { "epoch": 7.11, - "learning_rate": 5.8098542028034325e-06, - "loss": 0.0302, + "learning_rate": 1.5831994133524723e-05, + "loss": 0.0237, "step": 152345 }, { "epoch": 7.11, - "learning_rate": 5.8093854015282924e-06, - "loss": 0.0352, + "learning_rate": 1.5831526063688702e-05, + "loss": 0.0536, "step": 152350 }, { "epoch": 7.11, - "learning_rate": 5.808916600253152e-06, - "loss": 0.0763, + "learning_rate": 1.5831057993852682e-05, + "loss": 0.0402, "step": 152355 }, { "epoch": 7.11, - "learning_rate": 5.808447798978014e-06, - "loss": 0.0507, + "learning_rate": 1.5830589924016665e-05, + "loss": 0.0465, "step": 152360 }, { "epoch": 7.11, - "learning_rate": 5.807978997702875e-06, - "loss": 0.1031, + "learning_rate": 1.5830121854180645e-05, + "loss": 0.1284, "step": 152365 }, { "epoch": 7.11, - "learning_rate": 5.807510196427735e-06, - "loss": 0.143, + "learning_rate": 1.5829653784344625e-05, + "loss": 0.1471, "step": 152370 }, { "epoch": 7.11, - "learning_rate": 5.807041395152595e-06, - "loss": 0.019, + "learning_rate": 1.5829185714508605e-05, + "loss": 0.0186, "step": 152375 }, { "epoch": 7.11, - "learning_rate": 5.8065725938774554e-06, - "loss": 0.0084, + "learning_rate": 1.5828717644672588e-05, + "loss": 0.0167, "step": 152380 }, { "epoch": 7.11, - "learning_rate": 5.806103792602317e-06, - "loss": 0.0554, + "learning_rate": 1.5828249574836568e-05, + "loss": 0.0248, "step": 152385 }, { "epoch": 7.11, - "learning_rate": 5.805634991327177e-06, - "loss": 0.0744, + "learning_rate": 1.5827781505000544e-05, + "loss": 0.0438, "step": 152390 }, { "epoch": 7.11, - "learning_rate": 5.805166190052037e-06, - "loss": 0.0501, + "learning_rate": 1.5827313435164524e-05, + "loss": 0.034, "step": 152395 }, { "epoch": 7.11, - "learning_rate": 5.804697388776898e-06, - "loss": 0.0571, + "learning_rate": 1.5826845365328507e-05, + "loss": 0.052, "step": 152400 }, { "epoch": 7.11, - "learning_rate": 5.804228587501759e-06, - "loss": 0.0729, + "learning_rate": 1.5826377295492487e-05, + "loss": 0.0804, "step": 152405 }, { "epoch": 7.11, - "learning_rate": 5.803759786226619e-06, - "loss": 0.0629, + "learning_rate": 1.5825909225656467e-05, + "loss": 0.1013, "step": 152410 }, { "epoch": 7.11, - "learning_rate": 5.803290984951479e-06, - "loss": 0.1248, + "learning_rate": 1.582544115582045e-05, + "loss": 0.1868, "step": 152415 }, { "epoch": 7.11, - "learning_rate": 5.80282218367634e-06, - "loss": 0.0986, + "learning_rate": 1.582497308598443e-05, + "loss": 0.1549, "step": 152420 }, { "epoch": 7.11, - "learning_rate": 5.8023533824012e-06, - "loss": 0.0429, + "learning_rate": 1.582450501614841e-05, + "loss": 0.0039, "step": 152425 }, { "epoch": 7.11, - "learning_rate": 5.8018845811260615e-06, - "loss": 0.0015, + "learning_rate": 1.582403694631239e-05, + "loss": 0.0143, "step": 152430 }, { "epoch": 7.11, - "learning_rate": 5.8014157798509215e-06, - "loss": 0.0421, + "learning_rate": 1.5823568876476373e-05, + "loss": 0.0403, "step": 152435 }, { "epoch": 7.11, - "learning_rate": 5.800946978575782e-06, - "loss": 0.0178, + "learning_rate": 1.5823100806640353e-05, + "loss": 0.0525, "step": 152440 }, { "epoch": 7.11, - "learning_rate": 5.800478177300642e-06, - "loss": 0.0339, + "learning_rate": 1.5822632736804332e-05, + "loss": 0.0392, "step": 152445 }, { "epoch": 7.11, - "learning_rate": 5.800009376025503e-06, - "loss": 0.0327, + "learning_rate": 1.5822164666968312e-05, + "loss": 0.0664, "step": 152450 }, { "epoch": 7.11, - "learning_rate": 5.799540574750364e-06, - "loss": 0.0573, + "learning_rate": 1.5821696597132292e-05, + "loss": 0.0217, "step": 152455 }, { "epoch": 7.11, - "learning_rate": 5.7990717734752245e-06, - "loss": 0.0609, + "learning_rate": 1.5821228527296272e-05, + "loss": 0.0952, "step": 152460 }, { "epoch": 7.11, - "learning_rate": 5.7986029722000845e-06, - "loss": 0.0925, + "learning_rate": 1.582076045746025e-05, + "loss": 0.1581, "step": 152465 }, { "epoch": 7.11, - "learning_rate": 5.798134170924945e-06, - "loss": 0.1457, + "learning_rate": 1.5820292387624235e-05, + "loss": 0.2834, "step": 152470 }, { "epoch": 7.11, - "learning_rate": 5.797665369649806e-06, - "loss": 0.0193, + "learning_rate": 1.5819824317788215e-05, + "loss": 0.0273, "step": 152475 }, { "epoch": 7.11, - "learning_rate": 5.797196568374667e-06, - "loss": 0.0541, + "learning_rate": 1.5819356247952195e-05, + "loss": 0.0095, "step": 152480 }, { "epoch": 7.12, - "learning_rate": 5.796727767099527e-06, - "loss": 0.0585, + "learning_rate": 1.5818888178116174e-05, + "loss": 0.0148, "step": 152485 }, { "epoch": 7.12, - "learning_rate": 5.7962589658243875e-06, - "loss": 0.0218, + "learning_rate": 1.5818420108280158e-05, + "loss": 0.0331, "step": 152490 }, { "epoch": 7.12, - "learning_rate": 5.7957901645492475e-06, - "loss": 0.0265, + "learning_rate": 1.5817952038444137e-05, + "loss": 0.0779, "step": 152495 }, { "epoch": 7.12, - "learning_rate": 5.795321363274109e-06, - "loss": 0.0261, + "learning_rate": 1.5817483968608117e-05, + "loss": 0.1228, "step": 152500 }, { "epoch": 7.12, - "learning_rate": 5.794852561998969e-06, - "loss": 0.0392, + "learning_rate": 1.5817015898772097e-05, + "loss": 0.0439, "step": 152505 }, { "epoch": 7.12, - "learning_rate": 5.79438376072383e-06, - "loss": 0.0716, + "learning_rate": 1.581654782893608e-05, + "loss": 0.0877, "step": 152510 }, { "epoch": 7.12, - "learning_rate": 5.79391495944869e-06, - "loss": 0.1082, + "learning_rate": 1.5816079759100057e-05, + "loss": 0.0731, "step": 152515 }, { "epoch": 7.12, - "learning_rate": 5.7934461581735505e-06, - "loss": 0.2424, + "learning_rate": 1.5815611689264036e-05, + "loss": 0.1683, "step": 152520 }, { "epoch": 7.12, - "learning_rate": 5.792977356898411e-06, - "loss": 0.0376, + "learning_rate": 1.5815143619428016e-05, + "loss": 0.0261, "step": 152525 }, { "epoch": 7.12, - "learning_rate": 5.792508555623272e-06, - "loss": 0.0366, + "learning_rate": 1.5814675549592e-05, + "loss": 0.0091, "step": 152530 }, { "epoch": 7.12, - "learning_rate": 5.792039754348132e-06, - "loss": 0.0555, + "learning_rate": 1.581420747975598e-05, + "loss": 0.0554, "step": 152535 }, { "epoch": 7.12, - "learning_rate": 5.791570953072993e-06, - "loss": 0.0399, + "learning_rate": 1.581373940991996e-05, + "loss": 0.0219, "step": 152540 }, { "epoch": 7.12, - "learning_rate": 5.7911021517978536e-06, - "loss": 0.0444, + "learning_rate": 1.5813271340083942e-05, + "loss": 0.0411, "step": 152545 }, { "epoch": 7.12, - "learning_rate": 5.790633350522714e-06, - "loss": 0.0715, + "learning_rate": 1.5812803270247922e-05, + "loss": 0.0392, "step": 152550 }, { "epoch": 7.12, - "learning_rate": 5.790164549247574e-06, - "loss": 0.0568, + "learning_rate": 1.5812335200411902e-05, + "loss": 0.0156, "step": 152555 }, { "epoch": 7.12, - "learning_rate": 5.789695747972435e-06, - "loss": 0.1769, + "learning_rate": 1.5811867130575882e-05, + "loss": 0.0576, "step": 152560 }, { "epoch": 7.12, - "learning_rate": 5.789226946697295e-06, - "loss": 0.1248, + "learning_rate": 1.5811399060739865e-05, + "loss": 0.1155, "step": 152565 }, { "epoch": 7.12, - "learning_rate": 5.788758145422157e-06, - "loss": 0.3438, + "learning_rate": 1.5810930990903845e-05, + "loss": 0.1663, "step": 152570 }, { "epoch": 7.12, - "learning_rate": 5.7882893441470166e-06, - "loss": 0.0079, + "learning_rate": 1.5810462921067825e-05, + "loss": 0.019, "step": 152575 }, { "epoch": 7.12, - "learning_rate": 5.787820542871877e-06, - "loss": 0.0228, + "learning_rate": 1.58099948512318e-05, + "loss": 0.0153, "step": 152580 }, { "epoch": 7.12, - "learning_rate": 5.787351741596737e-06, - "loss": 0.0102, + "learning_rate": 1.5809526781395784e-05, + "loss": 0.0113, "step": 152585 }, { "epoch": 7.12, - "learning_rate": 5.786882940321599e-06, - "loss": 0.083, + "learning_rate": 1.5809058711559764e-05, + "loss": 0.0349, "step": 152590 }, { "epoch": 7.12, - "learning_rate": 5.786414139046459e-06, - "loss": 0.047, + "learning_rate": 1.5808590641723744e-05, + "loss": 0.0362, "step": 152595 }, { "epoch": 7.12, - "learning_rate": 5.78594533777132e-06, - "loss": 0.0481, + "learning_rate": 1.5808122571887727e-05, + "loss": 0.066, "step": 152600 }, { "epoch": 7.12, - "learning_rate": 5.7854765364961795e-06, - "loss": 0.0643, + "learning_rate": 1.5807654502051707e-05, + "loss": 0.0351, "step": 152605 }, { "epoch": 7.12, - "learning_rate": 5.7850077352210395e-06, - "loss": 0.0967, + "learning_rate": 1.5807186432215687e-05, + "loss": 0.0744, "step": 152610 }, { "epoch": 7.12, - "learning_rate": 5.784538933945901e-06, - "loss": 0.15, + "learning_rate": 1.5806718362379667e-05, + "loss": 0.1499, "step": 152615 }, { "epoch": 7.12, - "learning_rate": 5.784070132670762e-06, - "loss": 0.1662, + "learning_rate": 1.580625029254365e-05, + "loss": 0.1593, "step": 152620 }, { "epoch": 7.12, - "learning_rate": 5.783601331395622e-06, - "loss": 0.0542, + "learning_rate": 1.580578222270763e-05, + "loss": 0.0915, "step": 152625 }, { "epoch": 7.12, - "learning_rate": 5.783132530120482e-06, - "loss": 0.012, + "learning_rate": 1.580531415287161e-05, + "loss": 0.017, "step": 152630 }, { "epoch": 7.12, - "learning_rate": 5.7826637288453425e-06, - "loss": 0.0055, + "learning_rate": 1.580484608303559e-05, + "loss": 0.0139, "step": 152635 }, { "epoch": 7.12, - "learning_rate": 5.782194927570204e-06, - "loss": 0.0182, + "learning_rate": 1.5804378013199572e-05, + "loss": 0.0152, "step": 152640 }, { "epoch": 7.12, - "learning_rate": 5.781726126295064e-06, - "loss": 0.048, + "learning_rate": 1.580390994336355e-05, + "loss": 0.0288, "step": 152645 }, { "epoch": 7.12, - "learning_rate": 5.781257325019924e-06, - "loss": 0.0415, + "learning_rate": 1.580344187352753e-05, + "loss": 0.0911, "step": 152650 }, { "epoch": 7.12, - "learning_rate": 5.780788523744785e-06, - "loss": 0.0831, + "learning_rate": 1.5802973803691512e-05, + "loss": 0.0275, "step": 152655 }, { "epoch": 7.12, - "learning_rate": 5.7803197224696464e-06, - "loss": 0.16, + "learning_rate": 1.580250573385549e-05, + "loss": 0.0474, "step": 152660 }, { "epoch": 7.12, - "learning_rate": 5.779850921194506e-06, - "loss": 0.0798, + "learning_rate": 1.580203766401947e-05, + "loss": 0.1038, "step": 152665 }, { "epoch": 7.12, - "learning_rate": 5.779382119919366e-06, - "loss": 0.1407, + "learning_rate": 1.580156959418345e-05, + "loss": 0.2874, "step": 152670 }, { "epoch": 7.12, - "learning_rate": 5.778913318644227e-06, - "loss": 0.0138, + "learning_rate": 1.5801101524347435e-05, + "loss": 0.0332, "step": 152675 }, { "epoch": 7.12, - "learning_rate": 5.778444517369087e-06, - "loss": 0.01, + "learning_rate": 1.5800633454511414e-05, + "loss": 0.0412, "step": 152680 }, { "epoch": 7.12, - "learning_rate": 5.777975716093949e-06, - "loss": 0.027, + "learning_rate": 1.5800165384675394e-05, + "loss": 0.0122, "step": 152685 }, { "epoch": 7.12, - "learning_rate": 5.777506914818809e-06, - "loss": 0.0169, + "learning_rate": 1.5799697314839374e-05, + "loss": 0.0254, "step": 152690 }, { "epoch": 7.12, - "learning_rate": 5.777038113543669e-06, - "loss": 0.0346, + "learning_rate": 1.5799229245003357e-05, + "loss": 0.0465, "step": 152695 }, { "epoch": 7.13, - "learning_rate": 5.776569312268529e-06, - "loss": 0.0681, + "learning_rate": 1.5798761175167337e-05, + "loss": 0.061, "step": 152700 }, { "epoch": 7.13, - "learning_rate": 5.77610051099339e-06, - "loss": 0.0533, + "learning_rate": 1.5798293105331313e-05, + "loss": 0.0886, "step": 152705 }, { "epoch": 7.13, - "learning_rate": 5.775631709718251e-06, - "loss": 0.0942, + "learning_rate": 1.5797825035495293e-05, + "loss": 0.0892, "step": 152710 }, { "epoch": 7.13, - "learning_rate": 5.775162908443112e-06, - "loss": 0.0823, + "learning_rate": 1.5797356965659276e-05, + "loss": 0.0923, "step": 152715 }, { "epoch": 7.13, - "learning_rate": 5.7746941071679716e-06, - "loss": 0.1657, + "learning_rate": 1.5796888895823256e-05, + "loss": 0.1411, "step": 152720 }, { "epoch": 7.13, - "learning_rate": 5.774225305892832e-06, - "loss": 0.0393, + "learning_rate": 1.5796420825987236e-05, + "loss": 0.0202, "step": 152725 }, { "epoch": 7.13, - "learning_rate": 5.773756504617693e-06, - "loss": 0.0141, + "learning_rate": 1.579595275615122e-05, + "loss": 0.0013, "step": 152730 }, { "epoch": 7.13, - "learning_rate": 5.773287703342554e-06, - "loss": 0.0184, + "learning_rate": 1.57954846863152e-05, + "loss": 0.0298, "step": 152735 }, { "epoch": 7.13, - "learning_rate": 5.772818902067414e-06, - "loss": 0.085, + "learning_rate": 1.579501661647918e-05, + "loss": 0.0998, "step": 152740 }, { "epoch": 7.13, - "learning_rate": 5.772350100792275e-06, - "loss": 0.0636, + "learning_rate": 1.579454854664316e-05, + "loss": 0.0648, "step": 152745 }, { "epoch": 7.13, - "learning_rate": 5.7718812995171346e-06, - "loss": 0.0213, + "learning_rate": 1.5794080476807142e-05, + "loss": 0.0181, "step": 152750 }, { "epoch": 7.13, - "learning_rate": 5.771412498241996e-06, - "loss": 0.0798, + "learning_rate": 1.5793612406971122e-05, + "loss": 0.0734, "step": 152755 }, { "epoch": 7.13, - "learning_rate": 5.770943696966856e-06, - "loss": 0.0701, + "learning_rate": 1.57931443371351e-05, + "loss": 0.1288, "step": 152760 }, { "epoch": 7.13, - "learning_rate": 5.770474895691717e-06, - "loss": 0.1851, + "learning_rate": 1.579267626729908e-05, + "loss": 0.0948, "step": 152765 }, { "epoch": 7.13, - "learning_rate": 5.770006094416577e-06, - "loss": 0.113, + "learning_rate": 1.579220819746306e-05, + "loss": 0.1765, "step": 152770 }, { "epoch": 7.13, - "learning_rate": 5.769537293141438e-06, - "loss": 0.0224, + "learning_rate": 1.579174012762704e-05, + "loss": 0.0346, "step": 152775 }, { "epoch": 7.13, - "learning_rate": 5.769068491866298e-06, - "loss": 0.0185, + "learning_rate": 1.579127205779102e-05, + "loss": 0.0151, "step": 152780 }, { "epoch": 7.13, - "learning_rate": 5.768599690591159e-06, - "loss": 0.0364, + "learning_rate": 1.5790803987955004e-05, + "loss": 0.006, "step": 152785 }, { "epoch": 7.13, - "learning_rate": 5.768130889316019e-06, - "loss": 0.0382, + "learning_rate": 1.5790335918118984e-05, + "loss": 0.0246, "step": 152790 }, { "epoch": 7.13, - "learning_rate": 5.76766208804088e-06, - "loss": 0.0229, + "learning_rate": 1.5789867848282964e-05, + "loss": 0.1865, "step": 152795 }, { "epoch": 7.13, - "learning_rate": 5.767193286765741e-06, - "loss": 0.0187, + "learning_rate": 1.5789399778446944e-05, + "loss": 0.0591, "step": 152800 }, { "epoch": 7.13, - "learning_rate": 5.7667244854906015e-06, - "loss": 0.0853, + "learning_rate": 1.5788931708610927e-05, + "loss": 0.0469, "step": 152805 }, { "epoch": 7.13, - "learning_rate": 5.766255684215461e-06, - "loss": 0.0561, + "learning_rate": 1.5788463638774907e-05, + "loss": 0.1488, "step": 152810 }, { "epoch": 7.13, - "learning_rate": 5.765786882940322e-06, - "loss": 0.1422, + "learning_rate": 1.5787995568938886e-05, + "loss": 0.1949, "step": 152815 }, { "epoch": 7.13, - "learning_rate": 5.765318081665182e-06, - "loss": 0.152, + "learning_rate": 1.5787527499102866e-05, + "loss": 0.0944, "step": 152820 }, { "epoch": 7.13, - "learning_rate": 5.764849280390044e-06, - "loss": 0.0204, + "learning_rate": 1.578705942926685e-05, + "loss": 0.0144, "step": 152825 }, { "epoch": 7.13, - "learning_rate": 5.764380479114904e-06, - "loss": 0.0328, + "learning_rate": 1.578659135943083e-05, + "loss": 0.0208, "step": 152830 }, { "epoch": 7.13, - "learning_rate": 5.7639116778397644e-06, - "loss": 0.0184, + "learning_rate": 1.5786123289594806e-05, + "loss": 0.0192, "step": 152835 }, { "epoch": 7.13, - "learning_rate": 5.763442876564624e-06, - "loss": 0.0201, + "learning_rate": 1.578565521975879e-05, + "loss": 0.0308, "step": 152840 }, { "epoch": 7.13, - "learning_rate": 5.762974075289484e-06, - "loss": 0.0245, + "learning_rate": 1.578518714992277e-05, + "loss": 0.0507, "step": 152845 }, { "epoch": 7.13, - "learning_rate": 5.762505274014346e-06, - "loss": 0.0749, + "learning_rate": 1.578471908008675e-05, + "loss": 0.0433, "step": 152850 }, { "epoch": 7.13, - "learning_rate": 5.762036472739207e-06, - "loss": 0.0525, + "learning_rate": 1.5784251010250728e-05, + "loss": 0.1194, "step": 152855 }, { "epoch": 7.13, - "learning_rate": 5.761567671464067e-06, - "loss": 0.0421, + "learning_rate": 1.578378294041471e-05, + "loss": 0.1174, "step": 152860 }, { "epoch": 7.13, - "learning_rate": 5.761098870188927e-06, - "loss": 0.0826, + "learning_rate": 1.578331487057869e-05, + "loss": 0.1221, "step": 152865 }, { "epoch": 7.13, - "learning_rate": 5.760630068913788e-06, - "loss": 0.1215, + "learning_rate": 1.578284680074267e-05, + "loss": 0.1405, "step": 152870 }, { "epoch": 7.13, - "learning_rate": 5.760161267638649e-06, - "loss": 0.0062, + "learning_rate": 1.578237873090665e-05, + "loss": 0.0427, "step": 152875 }, { "epoch": 7.13, - "learning_rate": 5.759692466363509e-06, - "loss": 0.0266, + "learning_rate": 1.5781910661070634e-05, + "loss": 0.0107, "step": 152880 }, { "epoch": 7.13, - "learning_rate": 5.759223665088369e-06, - "loss": 0.0573, + "learning_rate": 1.5781442591234614e-05, + "loss": 0.0092, "step": 152885 }, { "epoch": 7.13, - "learning_rate": 5.75875486381323e-06, - "loss": 0.0075, + "learning_rate": 1.5780974521398594e-05, + "loss": 0.0157, "step": 152890 }, { "epoch": 7.13, - "learning_rate": 5.758286062538091e-06, - "loss": 0.0399, + "learning_rate": 1.578050645156257e-05, + "loss": 0.0594, "step": 152895 }, { "epoch": 7.13, - "learning_rate": 5.757817261262951e-06, - "loss": 0.0392, + "learning_rate": 1.5780038381726553e-05, + "loss": 0.096, "step": 152900 }, { "epoch": 7.13, - "learning_rate": 5.757348459987811e-06, - "loss": 0.1036, + "learning_rate": 1.5779570311890533e-05, + "loss": 0.1533, "step": 152905 }, { "epoch": 7.13, - "learning_rate": 5.756879658712672e-06, - "loss": 0.0342, + "learning_rate": 1.5779102242054513e-05, + "loss": 0.1149, "step": 152910 }, { "epoch": 7.14, - "learning_rate": 5.7564108574375336e-06, - "loss": 0.1486, + "learning_rate": 1.5778634172218496e-05, + "loss": 0.0599, "step": 152915 }, { "epoch": 7.14, - "learning_rate": 5.7559420561623935e-06, - "loss": 0.2082, + "learning_rate": 1.5778166102382476e-05, + "loss": 0.1412, "step": 152920 }, { "epoch": 7.14, - "learning_rate": 5.755473254887253e-06, - "loss": 0.025, + "learning_rate": 1.5777698032546456e-05, + "loss": 0.0119, "step": 152925 }, { "epoch": 7.14, - "learning_rate": 5.755004453612114e-06, - "loss": 0.0183, + "learning_rate": 1.5777229962710436e-05, + "loss": 0.0547, "step": 152930 }, { "epoch": 7.14, - "learning_rate": 5.754535652336974e-06, - "loss": 0.0243, + "learning_rate": 1.577676189287442e-05, + "loss": 0.0139, "step": 152935 }, { "epoch": 7.14, - "learning_rate": 5.754066851061836e-06, - "loss": 0.0139, + "learning_rate": 1.57762938230384e-05, + "loss": 0.0062, "step": 152940 }, { "epoch": 7.14, - "learning_rate": 5.753598049786696e-06, - "loss": 0.0884, + "learning_rate": 1.577582575320238e-05, + "loss": 0.0737, "step": 152945 }, { "epoch": 7.14, - "learning_rate": 5.7531292485115565e-06, - "loss": 0.0615, + "learning_rate": 1.577535768336636e-05, + "loss": 0.1067, "step": 152950 }, { "epoch": 7.14, - "learning_rate": 5.752660447236416e-06, - "loss": 0.0168, + "learning_rate": 1.577488961353034e-05, + "loss": 0.0382, "step": 152955 }, { "epoch": 7.14, - "learning_rate": 5.752191645961277e-06, - "loss": 0.0451, + "learning_rate": 1.5774421543694318e-05, + "loss": 0.0473, "step": 152960 }, { "epoch": 7.14, - "learning_rate": 5.751722844686139e-06, - "loss": 0.1003, + "learning_rate": 1.5773953473858298e-05, + "loss": 0.1238, "step": 152965 }, { "epoch": 7.14, - "learning_rate": 5.751254043410999e-06, - "loss": 0.1918, + "learning_rate": 1.577348540402228e-05, + "loss": 0.2118, "step": 152970 }, { "epoch": 7.14, - "learning_rate": 5.750785242135859e-06, - "loss": 0.0221, + "learning_rate": 1.577301733418626e-05, + "loss": 0.0153, "step": 152975 }, { "epoch": 7.14, - "learning_rate": 5.7503164408607195e-06, - "loss": 0.0184, + "learning_rate": 1.577254926435024e-05, + "loss": 0.0289, "step": 152980 }, { "epoch": 7.14, - "learning_rate": 5.749847639585581e-06, - "loss": 0.0157, + "learning_rate": 1.577208119451422e-05, + "loss": 0.0403, "step": 152985 }, { "epoch": 7.14, - "learning_rate": 5.749378838310441e-06, - "loss": 0.0429, + "learning_rate": 1.5771613124678204e-05, + "loss": 0.0225, "step": 152990 }, { "epoch": 7.14, - "learning_rate": 5.748910037035301e-06, - "loss": 0.0491, + "learning_rate": 1.5771145054842184e-05, + "loss": 0.0369, "step": 152995 }, { "epoch": 7.14, - "learning_rate": 5.748441235760162e-06, - "loss": 0.0285, + "learning_rate": 1.5770676985006163e-05, + "loss": 0.0611, "step": 153000 }, { "epoch": 7.14, - "learning_rate": 5.747972434485022e-06, - "loss": 0.1153, + "learning_rate": 1.5770208915170143e-05, + "loss": 0.1138, "step": 153005 }, { "epoch": 7.14, - "learning_rate": 5.747503633209883e-06, - "loss": 0.0852, + "learning_rate": 1.5769740845334126e-05, + "loss": 0.14, "step": 153010 }, { "epoch": 7.14, - "learning_rate": 5.747034831934743e-06, - "loss": 0.108, + "learning_rate": 1.5769272775498106e-05, + "loss": 0.1225, "step": 153015 }, { "epoch": 7.14, - "learning_rate": 5.746566030659604e-06, - "loss": 0.1367, + "learning_rate": 1.5768804705662086e-05, + "loss": 0.1146, "step": 153020 }, { "epoch": 7.14, - "learning_rate": 5.746097229384464e-06, - "loss": 0.0018, + "learning_rate": 1.5768336635826066e-05, + "loss": 0.0331, "step": 153025 }, { "epoch": 7.14, - "learning_rate": 5.745628428109325e-06, - "loss": 0.0107, + "learning_rate": 1.5767868565990046e-05, + "loss": 0.0244, "step": 153030 }, { "epoch": 7.14, - "learning_rate": 5.7451596268341855e-06, - "loss": 0.0329, + "learning_rate": 1.5767400496154025e-05, + "loss": 0.0367, "step": 153035 }, { "epoch": 7.14, - "learning_rate": 5.744690825559046e-06, - "loss": 0.0256, + "learning_rate": 1.5766932426318005e-05, + "loss": 0.04, "step": 153040 }, { "epoch": 7.14, - "learning_rate": 5.744222024283906e-06, - "loss": 0.0631, + "learning_rate": 1.576646435648199e-05, + "loss": 0.0433, "step": 153045 }, { "epoch": 7.14, - "learning_rate": 5.743753223008767e-06, - "loss": 0.0411, + "learning_rate": 1.5765996286645968e-05, + "loss": 0.0935, "step": 153050 }, { "epoch": 7.14, - "learning_rate": 5.743284421733628e-06, - "loss": 0.0788, + "learning_rate": 1.5765528216809948e-05, + "loss": 0.042, "step": 153055 }, { "epoch": 7.14, - "learning_rate": 5.7428156204584886e-06, - "loss": 0.1015, + "learning_rate": 1.5765060146973928e-05, + "loss": 0.1612, "step": 153060 }, { "epoch": 7.14, - "learning_rate": 5.7423468191833485e-06, - "loss": 0.1891, + "learning_rate": 1.576459207713791e-05, + "loss": 0.1446, "step": 153065 }, { "epoch": 7.14, - "learning_rate": 5.741878017908209e-06, - "loss": 0.2158, + "learning_rate": 1.576412400730189e-05, + "loss": 0.1734, "step": 153070 }, { "epoch": 7.14, - "learning_rate": 5.741409216633069e-06, - "loss": 0.0414, + "learning_rate": 1.576365593746587e-05, + "loss": 0.0191, "step": 153075 }, { "epoch": 7.14, - "learning_rate": 5.740940415357931e-06, - "loss": 0.0027, + "learning_rate": 1.5763187867629854e-05, + "loss": 0.0138, "step": 153080 }, { "epoch": 7.14, - "learning_rate": 5.740471614082791e-06, - "loss": 0.0045, + "learning_rate": 1.576271979779383e-05, + "loss": 0.0158, "step": 153085 }, { "epoch": 7.14, - "learning_rate": 5.7400028128076516e-06, - "loss": 0.0538, + "learning_rate": 1.576225172795781e-05, + "loss": 0.0199, "step": 153090 }, { "epoch": 7.14, - "learning_rate": 5.7395340115325115e-06, - "loss": 0.0187, + "learning_rate": 1.576178365812179e-05, + "loss": 0.0383, "step": 153095 }, { "epoch": 7.14, - "learning_rate": 5.7390652102573714e-06, - "loss": 0.0602, + "learning_rate": 1.5761315588285773e-05, + "loss": 0.0463, "step": 153100 }, { "epoch": 7.14, - "learning_rate": 5.738596408982233e-06, - "loss": 0.0745, + "learning_rate": 1.5760847518449753e-05, + "loss": 0.0262, "step": 153105 }, { "epoch": 7.14, - "learning_rate": 5.738127607707094e-06, - "loss": 0.0725, + "learning_rate": 1.5760379448613733e-05, + "loss": 0.0519, "step": 153110 }, { "epoch": 7.14, - "learning_rate": 5.737658806431954e-06, - "loss": 0.1355, + "learning_rate": 1.5759911378777713e-05, + "loss": 0.0657, "step": 153115 }, { "epoch": 7.14, - "learning_rate": 5.7371900051568145e-06, - "loss": 0.351, + "learning_rate": 1.5759443308941696e-05, + "loss": 0.1437, "step": 153120 }, { "epoch": 7.15, - "learning_rate": 5.736721203881675e-06, - "loss": 0.0472, + "learning_rate": 1.5758975239105676e-05, + "loss": 0.0023, "step": 153125 }, { "epoch": 7.15, - "learning_rate": 5.736252402606536e-06, - "loss": 0.0027, + "learning_rate": 1.5758507169269656e-05, + "loss": 0.0052, "step": 153130 }, { "epoch": 7.15, - "learning_rate": 5.735783601331396e-06, - "loss": 0.0308, + "learning_rate": 1.5758039099433635e-05, + "loss": 0.038, "step": 153135 }, { "epoch": 7.15, - "learning_rate": 5.735314800056257e-06, - "loss": 0.0387, + "learning_rate": 1.575757102959762e-05, + "loss": 0.0497, "step": 153140 }, { "epoch": 7.15, - "learning_rate": 5.734845998781117e-06, - "loss": 0.0761, + "learning_rate": 1.57571029597616e-05, + "loss": 0.0199, "step": 153145 }, { "epoch": 7.15, - "learning_rate": 5.734377197505978e-06, - "loss": 0.0492, + "learning_rate": 1.5756634889925575e-05, + "loss": 0.0134, "step": 153150 }, { "epoch": 7.15, - "learning_rate": 5.733908396230838e-06, - "loss": 0.0927, + "learning_rate": 1.5756166820089558e-05, + "loss": 0.0289, "step": 153155 }, { "epoch": 7.15, - "learning_rate": 5.733439594955699e-06, - "loss": 0.0579, + "learning_rate": 1.5755698750253538e-05, + "loss": 0.0854, "step": 153160 }, { "epoch": 7.15, - "learning_rate": 5.732970793680559e-06, - "loss": 0.1264, + "learning_rate": 1.5755230680417518e-05, + "loss": 0.1103, "step": 153165 }, { "epoch": 7.15, - "learning_rate": 5.732501992405419e-06, - "loss": 0.2119, + "learning_rate": 1.5754762610581497e-05, + "loss": 0.1744, "step": 153170 }, { "epoch": 7.15, - "learning_rate": 5.732033191130281e-06, - "loss": 0.0118, + "learning_rate": 1.575429454074548e-05, + "loss": 0.0469, "step": 153175 }, { "epoch": 7.15, - "learning_rate": 5.731564389855141e-06, - "loss": 0.0043, + "learning_rate": 1.575382647090946e-05, + "loss": 0.0052, "step": 153180 }, { "epoch": 7.15, - "learning_rate": 5.731095588580001e-06, - "loss": 0.0142, + "learning_rate": 1.575335840107344e-05, + "loss": 0.0177, "step": 153185 }, { "epoch": 7.15, - "learning_rate": 5.730626787304861e-06, - "loss": 0.0384, + "learning_rate": 1.575289033123742e-05, + "loss": 0.013, "step": 153190 }, { "epoch": 7.15, - "learning_rate": 5.730157986029723e-06, - "loss": 0.0272, + "learning_rate": 1.5752422261401403e-05, + "loss": 0.0135, "step": 153195 }, { "epoch": 7.15, - "learning_rate": 5.729689184754584e-06, - "loss": 0.057, + "learning_rate": 1.5751954191565383e-05, + "loss": 0.0345, "step": 153200 }, { "epoch": 7.15, - "learning_rate": 5.729220383479444e-06, - "loss": 0.1085, + "learning_rate": 1.5751486121729363e-05, + "loss": 0.0935, "step": 153205 }, { "epoch": 7.15, - "learning_rate": 5.7287515822043035e-06, - "loss": 0.0562, + "learning_rate": 1.5751018051893343e-05, + "loss": 0.1, "step": 153210 }, { "epoch": 7.15, - "learning_rate": 5.728282780929164e-06, - "loss": 0.1572, + "learning_rate": 1.5750549982057323e-05, + "loss": 0.1071, "step": 153215 }, { "epoch": 7.15, - "learning_rate": 5.727813979654026e-06, - "loss": 0.1575, + "learning_rate": 1.5750081912221302e-05, + "loss": 0.1291, "step": 153220 }, { "epoch": 7.15, - "learning_rate": 5.727345178378886e-06, - "loss": 0.0128, + "learning_rate": 1.5749613842385282e-05, + "loss": 0.0549, "step": 153225 }, { "epoch": 7.15, - "learning_rate": 5.726876377103746e-06, - "loss": 0.0179, + "learning_rate": 1.5749145772549265e-05, + "loss": 0.0063, "step": 153230 }, { "epoch": 7.15, - "learning_rate": 5.7264075758286066e-06, - "loss": 0.0167, + "learning_rate": 1.5748677702713245e-05, + "loss": 0.018, "step": 153235 }, { "epoch": 7.15, - "learning_rate": 5.725938774553468e-06, - "loss": 0.0143, + "learning_rate": 1.5748209632877225e-05, + "loss": 0.0276, "step": 153240 }, { "epoch": 7.15, - "learning_rate": 5.725469973278328e-06, - "loss": 0.0495, + "learning_rate": 1.5747741563041205e-05, + "loss": 0.137, "step": 153245 }, { "epoch": 7.15, - "learning_rate": 5.725001172003188e-06, - "loss": 0.0415, + "learning_rate": 1.5747273493205188e-05, + "loss": 0.0103, "step": 153250 }, { "epoch": 7.15, - "learning_rate": 5.724532370728049e-06, - "loss": 0.1155, + "learning_rate": 1.5746805423369168e-05, + "loss": 0.0925, "step": 153255 }, { "epoch": 7.15, - "learning_rate": 5.724063569452909e-06, - "loss": 0.1947, + "learning_rate": 1.5746337353533148e-05, + "loss": 0.0865, "step": 153260 }, { "epoch": 7.15, - "learning_rate": 5.72359476817777e-06, - "loss": 0.1008, + "learning_rate": 1.574586928369713e-05, + "loss": 0.1721, "step": 153265 }, { "epoch": 7.15, - "learning_rate": 5.72312596690263e-06, - "loss": 0.2812, + "learning_rate": 1.574540121386111e-05, + "loss": 0.1566, "step": 153270 }, { "epoch": 7.15, - "learning_rate": 5.722657165627491e-06, - "loss": 0.0402, + "learning_rate": 1.5744933144025087e-05, + "loss": 0.0378, "step": 153275 }, { "epoch": 7.15, - "learning_rate": 5.722188364352351e-06, - "loss": 0.0213, + "learning_rate": 1.5744465074189067e-05, + "loss": 0.0266, "step": 153280 }, { "epoch": 7.15, - "learning_rate": 5.721719563077212e-06, - "loss": 0.035, + "learning_rate": 1.574399700435305e-05, + "loss": 0.0072, "step": 153285 }, { "epoch": 7.15, - "learning_rate": 5.721250761802073e-06, - "loss": 0.0236, + "learning_rate": 1.574352893451703e-05, + "loss": 0.0024, "step": 153290 }, { "epoch": 7.15, - "learning_rate": 5.720781960526933e-06, - "loss": 0.0808, + "learning_rate": 1.574306086468101e-05, + "loss": 0.0327, "step": 153295 }, { "epoch": 7.15, - "learning_rate": 5.720313159251793e-06, - "loss": 0.0389, + "learning_rate": 1.574259279484499e-05, + "loss": 0.032, "step": 153300 }, { "epoch": 7.15, - "learning_rate": 5.719844357976654e-06, - "loss": 0.0818, + "learning_rate": 1.5742124725008973e-05, + "loss": 0.1362, "step": 153305 }, { "epoch": 7.15, - "learning_rate": 5.719375556701515e-06, - "loss": 0.1182, + "learning_rate": 1.5741656655172953e-05, + "loss": 0.0499, "step": 153310 }, { "epoch": 7.15, - "learning_rate": 5.718906755426376e-06, - "loss": 0.1394, + "learning_rate": 1.5741188585336932e-05, + "loss": 0.0739, "step": 153315 }, { "epoch": 7.15, - "learning_rate": 5.718437954151236e-06, - "loss": 0.2854, + "learning_rate": 1.5740720515500912e-05, + "loss": 0.1392, "step": 153320 }, { "epoch": 7.15, - "learning_rate": 5.717969152876096e-06, - "loss": 0.0105, + "learning_rate": 1.5740252445664896e-05, + "loss": 0.0335, "step": 153325 }, { "epoch": 7.15, - "learning_rate": 5.717500351600956e-06, - "loss": 0.0623, + "learning_rate": 1.5739784375828875e-05, + "loss": 0.0228, "step": 153330 }, { "epoch": 7.15, - "learning_rate": 5.717031550325818e-06, - "loss": 0.0169, + "learning_rate": 1.5739316305992855e-05, + "loss": 0.0318, "step": 153335 }, { "epoch": 7.16, - "learning_rate": 5.716562749050678e-06, - "loss": 0.052, + "learning_rate": 1.5738848236156835e-05, + "loss": 0.0375, "step": 153340 }, { "epoch": 7.16, - "learning_rate": 5.716093947775539e-06, - "loss": 0.0529, + "learning_rate": 1.5738380166320815e-05, + "loss": 0.0799, "step": 153345 }, { "epoch": 7.16, - "learning_rate": 5.715625146500399e-06, - "loss": 0.0328, + "learning_rate": 1.5737912096484795e-05, + "loss": 0.0106, "step": 153350 }, { "epoch": 7.16, - "learning_rate": 5.715156345225259e-06, - "loss": 0.0909, + "learning_rate": 1.5737444026648774e-05, + "loss": 0.0765, "step": 153355 }, { "epoch": 7.16, - "learning_rate": 5.71468754395012e-06, - "loss": 0.1405, + "learning_rate": 1.5736975956812758e-05, + "loss": 0.0672, "step": 153360 }, { "epoch": 7.16, - "learning_rate": 5.714218742674981e-06, - "loss": 0.3014, + "learning_rate": 1.5736507886976737e-05, + "loss": 0.1501, "step": 153365 }, { "epoch": 7.16, - "learning_rate": 5.713749941399841e-06, - "loss": 0.1566, + "learning_rate": 1.5736039817140717e-05, + "loss": 0.2315, "step": 153370 }, { "epoch": 7.16, - "learning_rate": 5.713281140124702e-06, - "loss": 0.0421, + "learning_rate": 1.5735571747304697e-05, + "loss": 0.0197, "step": 153375 }, { "epoch": 7.16, - "learning_rate": 5.7128123388495624e-06, - "loss": 0.0085, + "learning_rate": 1.573510367746868e-05, + "loss": 0.0142, "step": 153380 }, { "epoch": 7.16, - "learning_rate": 5.712343537574423e-06, - "loss": 0.0869, + "learning_rate": 1.573463560763266e-05, + "loss": 0.0532, "step": 153385 }, { "epoch": 7.16, - "learning_rate": 5.711874736299283e-06, - "loss": 0.025, + "learning_rate": 1.573416753779664e-05, + "loss": 0.0356, "step": 153390 }, { "epoch": 7.16, - "learning_rate": 5.711405935024144e-06, - "loss": 0.0703, + "learning_rate": 1.5733699467960623e-05, + "loss": 0.0182, "step": 153395 }, { "epoch": 7.16, - "learning_rate": 5.710937133749004e-06, - "loss": 0.0653, + "learning_rate": 1.57332313981246e-05, + "loss": 0.082, "step": 153400 }, { "epoch": 7.16, - "learning_rate": 5.7104683324738655e-06, - "loss": 0.0868, + "learning_rate": 1.573276332828858e-05, + "loss": 0.081, "step": 153405 }, { "epoch": 7.16, - "learning_rate": 5.7099995311987254e-06, - "loss": 0.1022, + "learning_rate": 1.573229525845256e-05, + "loss": 0.0692, "step": 153410 }, { "epoch": 7.16, - "learning_rate": 5.709530729923586e-06, - "loss": 0.0962, + "learning_rate": 1.5731827188616542e-05, + "loss": 0.2026, "step": 153415 }, { "epoch": 7.16, - "learning_rate": 5.709061928648446e-06, - "loss": 0.1988, + "learning_rate": 1.5731359118780522e-05, + "loss": 0.1419, "step": 153420 }, { "epoch": 7.16, - "learning_rate": 5.708593127373306e-06, - "loss": 0.0119, + "learning_rate": 1.5730891048944502e-05, + "loss": 0.0138, "step": 153425 }, { "epoch": 7.16, - "learning_rate": 5.708124326098168e-06, - "loss": 0.0056, + "learning_rate": 1.5730422979108482e-05, + "loss": 0.0078, "step": 153430 }, { "epoch": 7.16, - "learning_rate": 5.7076555248230285e-06, - "loss": 0.0142, + "learning_rate": 1.5729954909272465e-05, + "loss": 0.0139, "step": 153435 }, { "epoch": 7.16, - "learning_rate": 5.707186723547888e-06, - "loss": 0.0405, + "learning_rate": 1.5729486839436445e-05, + "loss": 0.0317, "step": 153440 }, { "epoch": 7.16, - "learning_rate": 5.706717922272748e-06, - "loss": 0.0487, + "learning_rate": 1.5729018769600425e-05, + "loss": 0.1098, "step": 153445 }, { "epoch": 7.16, - "learning_rate": 5.70624912099761e-06, - "loss": 0.0242, + "learning_rate": 1.5728550699764408e-05, + "loss": 0.0232, "step": 153450 }, { "epoch": 7.16, - "learning_rate": 5.705780319722471e-06, - "loss": 0.0702, + "learning_rate": 1.5728082629928388e-05, + "loss": 0.0545, "step": 153455 }, { "epoch": 7.16, - "learning_rate": 5.705311518447331e-06, - "loss": 0.0909, + "learning_rate": 1.5727614560092368e-05, + "loss": 0.1201, "step": 153460 }, { "epoch": 7.16, - "learning_rate": 5.704842717172191e-06, - "loss": 0.1855, + "learning_rate": 1.5727146490256344e-05, + "loss": 0.1181, "step": 153465 }, { "epoch": 7.16, - "learning_rate": 5.704373915897051e-06, - "loss": 0.2374, + "learning_rate": 1.5726678420420327e-05, + "loss": 0.2067, "step": 153470 }, { "epoch": 7.16, - "learning_rate": 5.703905114621913e-06, - "loss": 0.0153, + "learning_rate": 1.5726210350584307e-05, + "loss": 0.0145, "step": 153475 }, { "epoch": 7.16, - "learning_rate": 5.703436313346773e-06, - "loss": 0.0028, + "learning_rate": 1.5725742280748287e-05, + "loss": 0.0302, "step": 153480 }, { "epoch": 7.16, - "learning_rate": 5.702967512071633e-06, - "loss": 0.0331, + "learning_rate": 1.5725274210912267e-05, + "loss": 0.0248, "step": 153485 }, { "epoch": 7.16, - "learning_rate": 5.702498710796494e-06, - "loss": 0.0257, + "learning_rate": 1.572480614107625e-05, + "loss": 0.0239, "step": 153490 }, { "epoch": 7.16, - "learning_rate": 5.702029909521354e-06, - "loss": 0.0053, + "learning_rate": 1.572433807124023e-05, + "loss": 0.0708, "step": 153495 }, { "epoch": 7.16, - "learning_rate": 5.701561108246215e-06, - "loss": 0.0614, + "learning_rate": 1.572387000140421e-05, + "loss": 0.0694, "step": 153500 }, { "epoch": 7.16, - "learning_rate": 5.701092306971075e-06, - "loss": 0.0787, + "learning_rate": 1.572340193156819e-05, + "loss": 0.1701, "step": 153505 }, { "epoch": 7.16, - "learning_rate": 5.700623505695936e-06, - "loss": 0.189, + "learning_rate": 1.5722933861732172e-05, + "loss": 0.0781, "step": 153510 }, { "epoch": 7.16, - "learning_rate": 5.700154704420796e-06, - "loss": 0.2156, + "learning_rate": 1.5722465791896152e-05, + "loss": 0.2459, "step": 153515 }, { "epoch": 7.16, - "learning_rate": 5.6996859031456575e-06, - "loss": 0.3315, + "learning_rate": 1.5721997722060132e-05, + "loss": 0.2631, "step": 153520 }, { "epoch": 7.16, - "learning_rate": 5.6992171018705175e-06, - "loss": 0.0504, + "learning_rate": 1.5721529652224115e-05, + "loss": 0.0225, "step": 153525 }, { "epoch": 7.16, - "learning_rate": 5.698748300595378e-06, - "loss": 0.0264, + "learning_rate": 1.5721061582388092e-05, + "loss": 0.0192, "step": 153530 }, { "epoch": 7.16, - "learning_rate": 5.698279499320238e-06, - "loss": 0.0319, + "learning_rate": 1.572059351255207e-05, + "loss": 0.0205, "step": 153535 }, { "epoch": 7.16, - "learning_rate": 5.697810698045099e-06, - "loss": 0.0282, + "learning_rate": 1.572012544271605e-05, + "loss": 0.0349, "step": 153540 }, { "epoch": 7.16, - "learning_rate": 5.69734189676996e-06, - "loss": 0.0492, + "learning_rate": 1.5719657372880035e-05, + "loss": 0.0143, "step": 153545 }, { "epoch": 7.16, - "learning_rate": 5.6968730954948205e-06, - "loss": 0.0572, + "learning_rate": 1.5719189303044014e-05, + "loss": 0.0329, "step": 153550 }, { "epoch": 7.17, - "learning_rate": 5.6964042942196804e-06, - "loss": 0.0751, + "learning_rate": 1.5718721233207994e-05, + "loss": 0.1235, "step": 153555 }, { "epoch": 7.17, - "learning_rate": 5.695935492944541e-06, - "loss": 0.098, + "learning_rate": 1.5718253163371974e-05, + "loss": 0.1138, "step": 153560 }, { "epoch": 7.17, - "learning_rate": 5.695466691669402e-06, - "loss": 0.181, + "learning_rate": 1.5717785093535957e-05, + "loss": 0.0853, "step": 153565 }, { "epoch": 7.17, - "learning_rate": 5.694997890394263e-06, - "loss": 0.1072, + "learning_rate": 1.5717317023699937e-05, + "loss": 0.1149, "step": 153570 }, { "epoch": 7.17, - "learning_rate": 5.694529089119123e-06, - "loss": 0.0261, + "learning_rate": 1.5716848953863917e-05, + "loss": 0.041, "step": 153575 }, { "epoch": 7.17, - "learning_rate": 5.6940602878439835e-06, - "loss": 0.024, + "learning_rate": 1.57163808840279e-05, + "loss": 0.0164, "step": 153580 }, { "epoch": 7.17, - "learning_rate": 5.6935914865688434e-06, - "loss": 0.0074, + "learning_rate": 1.571591281419188e-05, + "loss": 0.0095, "step": 153585 }, { "epoch": 7.17, - "learning_rate": 5.693122685293705e-06, - "loss": 0.0324, + "learning_rate": 1.5715444744355856e-05, + "loss": 0.0435, "step": 153590 }, { "epoch": 7.17, - "learning_rate": 5.692653884018565e-06, - "loss": 0.0346, + "learning_rate": 1.5714976674519836e-05, + "loss": 0.0284, "step": 153595 }, { "epoch": 7.17, - "learning_rate": 5.692185082743426e-06, - "loss": 0.0141, + "learning_rate": 1.571450860468382e-05, + "loss": 0.0342, "step": 153600 }, { "epoch": 7.17, - "learning_rate": 5.691716281468286e-06, - "loss": 0.057, + "learning_rate": 1.57140405348478e-05, + "loss": 0.058, "step": 153605 }, { "epoch": 7.17, - "learning_rate": 5.6912474801931465e-06, - "loss": 0.0355, + "learning_rate": 1.571357246501178e-05, + "loss": 0.0409, "step": 153610 }, { "epoch": 7.17, - "learning_rate": 5.690778678918007e-06, - "loss": 0.1011, + "learning_rate": 1.571310439517576e-05, + "loss": 0.0625, "step": 153615 }, { "epoch": 7.17, - "learning_rate": 5.690309877642868e-06, - "loss": 0.2316, + "learning_rate": 1.5712636325339742e-05, + "loss": 0.1266, "step": 153620 }, { "epoch": 7.17, - "learning_rate": 5.689841076367728e-06, - "loss": 0.0141, + "learning_rate": 1.5712168255503722e-05, + "loss": 0.0054, "step": 153625 }, { "epoch": 7.17, - "learning_rate": 5.689372275092589e-06, - "loss": 0.0071, + "learning_rate": 1.57117001856677e-05, + "loss": 0.0136, "step": 153630 }, { "epoch": 7.17, - "learning_rate": 5.6889034738174495e-06, - "loss": 0.0134, + "learning_rate": 1.5711232115831685e-05, + "loss": 0.0229, "step": 153635 }, { "epoch": 7.17, - "learning_rate": 5.68843467254231e-06, - "loss": 0.0438, + "learning_rate": 1.5710764045995665e-05, + "loss": 0.0138, "step": 153640 }, { "epoch": 7.17, - "learning_rate": 5.68796587126717e-06, - "loss": 0.0194, + "learning_rate": 1.5710295976159645e-05, + "loss": 0.0341, "step": 153645 }, { "epoch": 7.17, - "learning_rate": 5.687497069992031e-06, - "loss": 0.0711, + "learning_rate": 1.5709827906323624e-05, + "loss": 0.0425, "step": 153650 }, { "epoch": 7.17, - "learning_rate": 5.687028268716891e-06, - "loss": 0.0746, + "learning_rate": 1.5709359836487604e-05, + "loss": 0.0751, "step": 153655 }, { "epoch": 7.17, - "learning_rate": 5.686559467441753e-06, - "loss": 0.0407, + "learning_rate": 1.5708891766651584e-05, + "loss": 0.0781, "step": 153660 }, { "epoch": 7.17, - "learning_rate": 5.6860906661666125e-06, - "loss": 0.1146, + "learning_rate": 1.5708423696815564e-05, + "loss": 0.1148, "step": 153665 }, { "epoch": 7.17, - "learning_rate": 5.685621864891473e-06, - "loss": 0.2838, + "learning_rate": 1.5707955626979544e-05, + "loss": 0.1081, "step": 153670 }, { "epoch": 7.17, - "learning_rate": 5.685153063616333e-06, - "loss": 0.0044, + "learning_rate": 1.5707487557143527e-05, + "loss": 0.0209, "step": 153675 }, { "epoch": 7.17, - "learning_rate": 5.684684262341193e-06, - "loss": 0.0687, + "learning_rate": 1.5707019487307507e-05, + "loss": 0.0215, "step": 153680 }, { "epoch": 7.17, - "learning_rate": 5.684215461066055e-06, - "loss": 0.0135, + "learning_rate": 1.5706551417471486e-05, + "loss": 0.0203, "step": 153685 }, { "epoch": 7.17, - "learning_rate": 5.683746659790916e-06, - "loss": 0.1, + "learning_rate": 1.570608334763547e-05, + "loss": 0.0255, "step": 153690 }, { "epoch": 7.17, - "learning_rate": 5.6832778585157755e-06, - "loss": 0.0778, + "learning_rate": 1.570561527779945e-05, + "loss": 0.0614, "step": 153695 }, { "epoch": 7.17, - "learning_rate": 5.6828090572406355e-06, - "loss": 0.0564, + "learning_rate": 1.570514720796343e-05, + "loss": 0.0971, "step": 153700 }, { "epoch": 7.17, - "learning_rate": 5.682340255965497e-06, - "loss": 0.047, + "learning_rate": 1.570467913812741e-05, + "loss": 0.0763, "step": 153705 }, { "epoch": 7.17, - "learning_rate": 5.681871454690358e-06, - "loss": 0.1076, + "learning_rate": 1.5704211068291392e-05, + "loss": 0.0804, "step": 153710 }, { "epoch": 7.17, - "learning_rate": 5.681402653415218e-06, - "loss": 0.1387, + "learning_rate": 1.5703742998455372e-05, + "loss": 0.1953, "step": 153715 }, { "epoch": 7.17, - "learning_rate": 5.680933852140078e-06, - "loss": 0.1229, + "learning_rate": 1.570327492861935e-05, + "loss": 0.2495, "step": 153720 }, { "epoch": 7.17, - "learning_rate": 5.6804650508649385e-06, - "loss": 0.0514, + "learning_rate": 1.570280685878333e-05, + "loss": 0.01, "step": 153725 }, { "epoch": 7.17, - "learning_rate": 5.6799962495898e-06, - "loss": 0.0044, + "learning_rate": 1.570233878894731e-05, + "loss": 0.0104, "step": 153730 }, { "epoch": 7.17, - "learning_rate": 5.67952744831466e-06, - "loss": 0.0377, + "learning_rate": 1.570187071911129e-05, + "loss": 0.0035, "step": 153735 }, { "epoch": 7.17, - "learning_rate": 5.67905864703952e-06, - "loss": 0.0354, + "learning_rate": 1.570140264927527e-05, + "loss": 0.0338, "step": 153740 }, { "epoch": 7.17, - "learning_rate": 5.678589845764381e-06, - "loss": 0.0445, + "learning_rate": 1.570093457943925e-05, + "loss": 0.0454, "step": 153745 }, { "epoch": 7.17, - "learning_rate": 5.678121044489241e-06, - "loss": 0.04, + "learning_rate": 1.5700466509603234e-05, + "loss": 0.0151, "step": 153750 }, { "epoch": 7.17, - "learning_rate": 5.677652243214102e-06, - "loss": 0.0534, + "learning_rate": 1.5699998439767214e-05, + "loss": 0.0554, "step": 153755 }, { "epoch": 7.17, - "learning_rate": 5.677183441938962e-06, - "loss": 0.1013, + "learning_rate": 1.5699530369931194e-05, + "loss": 0.0381, "step": 153760 }, { "epoch": 7.17, - "learning_rate": 5.676714640663823e-06, - "loss": 0.1085, + "learning_rate": 1.5699062300095177e-05, + "loss": 0.2031, "step": 153765 }, { "epoch": 7.18, - "learning_rate": 5.676245839388683e-06, - "loss": 0.2035, + "learning_rate": 1.5698594230259157e-05, + "loss": 0.2147, "step": 153770 }, { "epoch": 7.18, - "learning_rate": 5.675777038113545e-06, - "loss": 0.0228, + "learning_rate": 1.5698126160423137e-05, + "loss": 0.0179, "step": 153775 }, { "epoch": 7.18, - "learning_rate": 5.6753082368384046e-06, - "loss": 0.0161, + "learning_rate": 1.5697658090587113e-05, + "loss": 0.0156, "step": 153780 }, { "epoch": 7.18, - "learning_rate": 5.674839435563265e-06, - "loss": 0.0307, + "learning_rate": 1.5697190020751096e-05, + "loss": 0.0202, "step": 153785 }, { "epoch": 7.18, - "learning_rate": 5.674370634288125e-06, - "loss": 0.0844, + "learning_rate": 1.5696721950915076e-05, + "loss": 0.0222, "step": 153790 }, { "epoch": 7.18, - "learning_rate": 5.673901833012986e-06, - "loss": 0.0443, + "learning_rate": 1.5696253881079056e-05, + "loss": 0.0583, "step": 153795 }, { "epoch": 7.18, - "learning_rate": 5.673433031737847e-06, - "loss": 0.0999, + "learning_rate": 1.5695785811243036e-05, + "loss": 0.0795, "step": 153800 }, { "epoch": 7.18, - "learning_rate": 5.672964230462708e-06, - "loss": 0.0935, + "learning_rate": 1.569531774140702e-05, + "loss": 0.1009, "step": 153805 }, { "epoch": 7.18, - "learning_rate": 5.6724954291875676e-06, - "loss": 0.0444, + "learning_rate": 1.5694849671571e-05, + "loss": 0.0622, "step": 153810 }, { "epoch": 7.18, - "learning_rate": 5.672026627912428e-06, - "loss": 0.1496, + "learning_rate": 1.569438160173498e-05, + "loss": 0.1137, "step": 153815 }, { "epoch": 7.18, - "learning_rate": 5.671557826637288e-06, - "loss": 0.2125, + "learning_rate": 1.5693913531898962e-05, + "loss": 0.1352, "step": 153820 }, { "epoch": 7.18, - "learning_rate": 5.67108902536215e-06, - "loss": 0.012, + "learning_rate": 1.569344546206294e-05, + "loss": 0.02, "step": 153825 }, { "epoch": 7.18, - "learning_rate": 5.67062022408701e-06, - "loss": 0.0288, + "learning_rate": 1.569297739222692e-05, + "loss": 0.013, "step": 153830 }, { "epoch": 7.18, - "learning_rate": 5.670151422811871e-06, - "loss": 0.0614, + "learning_rate": 1.56925093223909e-05, + "loss": 0.0021, "step": 153835 }, { "epoch": 7.18, - "learning_rate": 5.6696826215367305e-06, - "loss": 0.0395, + "learning_rate": 1.5692041252554885e-05, + "loss": 0.0217, "step": 153840 }, { "epoch": 7.18, - "learning_rate": 5.669213820261592e-06, - "loss": 0.0086, + "learning_rate": 1.569157318271886e-05, + "loss": 0.0788, "step": 153845 }, { "epoch": 7.18, - "learning_rate": 5.668745018986452e-06, - "loss": 0.1203, + "learning_rate": 1.569110511288284e-05, + "loss": 0.0308, "step": 153850 }, { "epoch": 7.18, - "learning_rate": 5.668276217711313e-06, - "loss": 0.0484, + "learning_rate": 1.569063704304682e-05, + "loss": 0.0529, "step": 153855 }, { "epoch": 7.18, - "learning_rate": 5.667807416436173e-06, - "loss": 0.0456, + "learning_rate": 1.5690168973210804e-05, + "loss": 0.1608, "step": 153860 }, { "epoch": 7.18, - "learning_rate": 5.667338615161034e-06, - "loss": 0.1165, + "learning_rate": 1.5689700903374784e-05, + "loss": 0.201, "step": 153865 }, { "epoch": 7.18, - "learning_rate": 5.666869813885894e-06, - "loss": 0.0899, + "learning_rate": 1.5689232833538763e-05, + "loss": 0.189, "step": 153870 }, { "epoch": 7.18, - "learning_rate": 5.666401012610755e-06, - "loss": 0.0285, + "learning_rate": 1.5688764763702747e-05, + "loss": 0.0143, "step": 153875 }, { "epoch": 7.18, - "learning_rate": 5.665932211335615e-06, - "loss": 0.005, + "learning_rate": 1.5688296693866726e-05, + "loss": 0.0146, "step": 153880 }, { "epoch": 7.18, - "learning_rate": 5.665463410060476e-06, - "loss": 0.0089, + "learning_rate": 1.5687828624030706e-05, + "loss": 0.0071, "step": 153885 }, { "epoch": 7.18, - "learning_rate": 5.664994608785337e-06, - "loss": 0.0236, + "learning_rate": 1.5687360554194686e-05, + "loss": 0.0219, "step": 153890 }, { "epoch": 7.18, - "learning_rate": 5.6645258075101974e-06, - "loss": 0.0342, + "learning_rate": 1.568689248435867e-05, + "loss": 0.0256, "step": 153895 }, { "epoch": 7.18, - "learning_rate": 5.664057006235057e-06, - "loss": 0.0535, + "learning_rate": 1.568642441452265e-05, + "loss": 0.0423, "step": 153900 }, { "epoch": 7.18, - "learning_rate": 5.663588204959918e-06, + "learning_rate": 1.5685956344686626e-05, "loss": 0.0471, "step": 153905 }, { "epoch": 7.18, - "learning_rate": 5.663119403684778e-06, - "loss": 0.0511, + "learning_rate": 1.5685488274850605e-05, + "loss": 0.0902, "step": 153910 }, { "epoch": 7.18, - "learning_rate": 5.66265060240964e-06, - "loss": 0.1383, + "learning_rate": 1.568502020501459e-05, + "loss": 0.1161, "step": 153915 }, { "epoch": 7.18, - "learning_rate": 5.6621818011345e-06, - "loss": 0.3012, + "learning_rate": 1.568455213517857e-05, + "loss": 0.0834, "step": 153920 }, { "epoch": 7.18, - "learning_rate": 5.6617129998593604e-06, - "loss": 0.0381, + "learning_rate": 1.5684084065342548e-05, + "loss": 0.0244, "step": 153925 }, { "epoch": 7.18, - "learning_rate": 5.66124419858422e-06, - "loss": 0.0034, + "learning_rate": 1.5683615995506528e-05, + "loss": 0.0284, "step": 153930 }, { "epoch": 7.18, - "learning_rate": 5.66077539730908e-06, - "loss": 0.0136, + "learning_rate": 1.568314792567051e-05, + "loss": 0.0218, "step": 153935 }, { "epoch": 7.18, - "learning_rate": 5.660306596033942e-06, - "loss": 0.002, + "learning_rate": 1.568267985583449e-05, + "loss": 0.0054, "step": 153940 }, { "epoch": 7.18, - "learning_rate": 5.659837794758803e-06, - "loss": 0.0403, + "learning_rate": 1.568221178599847e-05, + "loss": 0.0927, "step": 153945 }, { "epoch": 7.18, - "learning_rate": 5.659368993483663e-06, - "loss": 0.1407, + "learning_rate": 1.5681743716162454e-05, + "loss": 0.1002, "step": 153950 }, { "epoch": 7.18, - "learning_rate": 5.6589001922085226e-06, - "loss": 0.0338, + "learning_rate": 1.5681275646326434e-05, + "loss": 0.0979, "step": 153955 }, { "epoch": 7.18, - "learning_rate": 5.658431390933384e-06, - "loss": 0.1137, + "learning_rate": 1.5680807576490414e-05, + "loss": 0.111, "step": 153960 }, { "epoch": 7.18, - "learning_rate": 5.657962589658245e-06, - "loss": 0.1629, + "learning_rate": 1.5680339506654393e-05, + "loss": 0.0775, "step": 153965 }, { "epoch": 7.18, - "learning_rate": 5.657493788383105e-06, - "loss": 0.2538, + "learning_rate": 1.5679871436818373e-05, + "loss": 0.1916, "step": 153970 }, { "epoch": 7.18, - "learning_rate": 5.657024987107965e-06, - "loss": 0.0717, + "learning_rate": 1.5679403366982353e-05, + "loss": 0.0023, "step": 153975 }, { "epoch": 7.18, - "learning_rate": 5.656556185832826e-06, - "loss": 0.0095, + "learning_rate": 1.5678935297146333e-05, + "loss": 0.0054, "step": 153980 }, { "epoch": 7.19, - "learning_rate": 5.656087384557687e-06, - "loss": 0.0204, + "learning_rate": 1.5678467227310313e-05, + "loss": 0.0781, "step": 153985 }, { "epoch": 7.19, - "learning_rate": 5.655618583282547e-06, - "loss": 0.0322, + "learning_rate": 1.5677999157474296e-05, + "loss": 0.0613, "step": 153990 }, { "epoch": 7.19, - "learning_rate": 5.655149782007407e-06, - "loss": 0.0405, + "learning_rate": 1.5677531087638276e-05, + "loss": 0.0137, "step": 153995 }, { "epoch": 7.19, - "learning_rate": 5.654680980732268e-06, - "loss": 0.0667, + "learning_rate": 1.5677063017802256e-05, + "loss": 0.1003, "step": 154000 }, { "epoch": 7.19, - "learning_rate": 5.654212179457128e-06, - "loss": 0.1003, + "learning_rate": 1.567659494796624e-05, + "loss": 0.0369, "step": 154005 }, { "epoch": 7.19, - "learning_rate": 5.6537433781819895e-06, - "loss": 0.1076, + "learning_rate": 1.567612687813022e-05, + "loss": 0.1593, "step": 154010 }, { "epoch": 7.19, - "learning_rate": 5.653274576906849e-06, - "loss": 0.1538, + "learning_rate": 1.56756588082942e-05, + "loss": 0.1624, "step": 154015 }, { "epoch": 7.19, - "learning_rate": 5.65280577563171e-06, - "loss": 0.1404, + "learning_rate": 1.5675190738458178e-05, + "loss": 0.2036, "step": 154020 }, { "epoch": 7.19, - "learning_rate": 5.65233697435657e-06, - "loss": 0.0048, + "learning_rate": 1.567472266862216e-05, + "loss": 0.0054, "step": 154025 }, { "epoch": 7.19, - "learning_rate": 5.651868173081432e-06, - "loss": 0.006, + "learning_rate": 1.567425459878614e-05, + "loss": 0.0147, "step": 154030 }, { "epoch": 7.19, - "learning_rate": 5.651399371806292e-06, - "loss": 0.0171, + "learning_rate": 1.5673786528950118e-05, + "loss": 0.0244, "step": 154035 }, { "epoch": 7.19, - "learning_rate": 5.6509305705311525e-06, - "loss": 0.0093, + "learning_rate": 1.5673318459114098e-05, + "loss": 0.0156, "step": 154040 }, { "epoch": 7.19, - "learning_rate": 5.650461769256012e-06, - "loss": 0.0245, + "learning_rate": 1.567285038927808e-05, + "loss": 0.0696, "step": 154045 }, { "epoch": 7.19, - "learning_rate": 5.649992967980873e-06, - "loss": 0.0175, + "learning_rate": 1.567238231944206e-05, + "loss": 0.0176, "step": 154050 }, { "epoch": 7.19, - "learning_rate": 5.649524166705734e-06, - "loss": 0.0578, + "learning_rate": 1.567191424960604e-05, + "loss": 0.0145, "step": 154055 }, { "epoch": 7.19, - "learning_rate": 5.649055365430595e-06, - "loss": 0.1092, + "learning_rate": 1.5671446179770024e-05, + "loss": 0.0859, "step": 154060 }, { "epoch": 7.19, - "learning_rate": 5.648586564155455e-06, - "loss": 0.1262, + "learning_rate": 1.5670978109934003e-05, + "loss": 0.2357, "step": 154065 }, { "epoch": 7.19, - "learning_rate": 5.6481177628803154e-06, - "loss": 0.2071, + "learning_rate": 1.5670510040097983e-05, + "loss": 0.2085, "step": 154070 }, { "epoch": 7.19, - "learning_rate": 5.647648961605175e-06, - "loss": 0.0323, + "learning_rate": 1.5670041970261963e-05, + "loss": 0.027, "step": 154075 }, { "epoch": 7.19, - "learning_rate": 5.647180160330037e-06, - "loss": 0.0067, + "learning_rate": 1.5669573900425946e-05, + "loss": 0.0512, "step": 154080 }, { "epoch": 7.19, - "learning_rate": 5.646711359054897e-06, - "loss": 0.0182, + "learning_rate": 1.5669105830589926e-05, + "loss": 0.0359, "step": 154085 }, { "epoch": 7.19, - "learning_rate": 5.646242557779758e-06, - "loss": 0.0157, + "learning_rate": 1.5668637760753906e-05, + "loss": 0.0205, "step": 154090 }, { "epoch": 7.19, - "learning_rate": 5.645773756504618e-06, - "loss": 0.1109, + "learning_rate": 1.5668169690917882e-05, + "loss": 0.0265, "step": 154095 }, { "epoch": 7.19, - "learning_rate": 5.645304955229479e-06, - "loss": 0.0166, + "learning_rate": 1.5667701621081866e-05, + "loss": 0.0333, "step": 154100 }, { "epoch": 7.19, - "learning_rate": 5.644836153954339e-06, - "loss": 0.0709, + "learning_rate": 1.5667233551245845e-05, + "loss": 0.1044, "step": 154105 }, { "epoch": 7.19, - "learning_rate": 5.6443673526792e-06, - "loss": 0.0854, + "learning_rate": 1.5666765481409825e-05, + "loss": 0.0305, "step": 154110 }, { "epoch": 7.19, - "learning_rate": 5.64389855140406e-06, - "loss": 0.1136, + "learning_rate": 1.5666297411573805e-05, + "loss": 0.2156, "step": 154115 }, { "epoch": 7.19, - "learning_rate": 5.643429750128921e-06, - "loss": 0.1547, + "learning_rate": 1.5665829341737788e-05, + "loss": 0.1407, "step": 154120 }, { "epoch": 7.19, - "learning_rate": 5.6429609488537815e-06, - "loss": 0.0038, + "learning_rate": 1.5665361271901768e-05, + "loss": 0.0164, "step": 154125 }, { "epoch": 7.19, - "learning_rate": 5.642492147578642e-06, - "loss": 0.0057, + "learning_rate": 1.5664893202065748e-05, + "loss": 0.0634, "step": 154130 }, { "epoch": 7.19, - "learning_rate": 5.642023346303502e-06, - "loss": 0.1308, + "learning_rate": 1.566442513222973e-05, + "loss": 0.0015, "step": 154135 }, { "epoch": 7.19, - "learning_rate": 5.641554545028363e-06, - "loss": 0.027, + "learning_rate": 1.566395706239371e-05, + "loss": 0.0447, "step": 154140 }, { "epoch": 7.19, - "learning_rate": 5.641085743753223e-06, - "loss": 0.0331, + "learning_rate": 1.566348899255769e-05, + "loss": 0.0418, "step": 154145 }, { "epoch": 7.19, - "learning_rate": 5.6406169424780845e-06, - "loss": 0.1032, + "learning_rate": 1.566302092272167e-05, + "loss": 0.0554, "step": 154150 }, { "epoch": 7.19, - "learning_rate": 5.6401481412029445e-06, - "loss": 0.124, + "learning_rate": 1.5662552852885654e-05, + "loss": 0.0419, "step": 154155 }, { "epoch": 7.19, - "learning_rate": 5.639679339927805e-06, - "loss": 0.1038, + "learning_rate": 1.566208478304963e-05, + "loss": 0.0895, "step": 154160 }, { "epoch": 7.19, - "learning_rate": 5.639210538652665e-06, - "loss": 0.0743, + "learning_rate": 1.566161671321361e-05, + "loss": 0.1015, "step": 154165 }, { "epoch": 7.19, - "learning_rate": 5.638741737377527e-06, - "loss": 0.0931, + "learning_rate": 1.566114864337759e-05, + "loss": 0.1199, "step": 154170 }, { "epoch": 7.19, - "learning_rate": 5.638272936102387e-06, - "loss": 0.0264, + "learning_rate": 1.5660680573541573e-05, + "loss": 0.0205, "step": 154175 }, { "epoch": 7.19, - "learning_rate": 5.6378041348272475e-06, - "loss": 0.0305, + "learning_rate": 1.5660212503705553e-05, + "loss": 0.0362, "step": 154180 }, { "epoch": 7.19, - "learning_rate": 5.6373353335521075e-06, - "loss": 0.0151, + "learning_rate": 1.5659744433869533e-05, + "loss": 0.0186, "step": 154185 }, { "epoch": 7.19, - "learning_rate": 5.636866532276967e-06, - "loss": 0.0217, + "learning_rate": 1.5659276364033516e-05, + "loss": 0.0399, "step": 154190 }, { "epoch": 7.19, - "learning_rate": 5.636397731001829e-06, - "loss": 0.0636, + "learning_rate": 1.5658808294197496e-05, + "loss": 0.0113, "step": 154195 }, { "epoch": 7.2, - "learning_rate": 5.63592892972669e-06, - "loss": 0.0379, + "learning_rate": 1.5658340224361475e-05, + "loss": 0.0942, "step": 154200 }, { "epoch": 7.2, - "learning_rate": 5.63546012845155e-06, - "loss": 0.07, + "learning_rate": 1.5657872154525455e-05, + "loss": 0.0726, "step": 154205 }, { "epoch": 7.2, - "learning_rate": 5.63499132717641e-06, - "loss": 0.0547, + "learning_rate": 1.565740408468944e-05, + "loss": 0.0739, "step": 154210 }, { "epoch": 7.2, - "learning_rate": 5.634522525901271e-06, - "loss": 0.1232, + "learning_rate": 1.5656936014853418e-05, + "loss": 0.0804, "step": 154215 }, { "epoch": 7.2, - "learning_rate": 5.634053724626132e-06, - "loss": 0.1071, + "learning_rate": 1.5656467945017398e-05, + "loss": 0.2833, "step": 154220 }, { "epoch": 7.2, - "learning_rate": 5.633584923350992e-06, - "loss": 0.0208, + "learning_rate": 1.5655999875181375e-05, + "loss": 0.0501, "step": 154225 }, { "epoch": 7.2, - "learning_rate": 5.633116122075852e-06, - "loss": 0.0391, + "learning_rate": 1.5655531805345358e-05, + "loss": 0.0088, "step": 154230 }, { "epoch": 7.2, - "learning_rate": 5.632647320800713e-06, - "loss": 0.0041, + "learning_rate": 1.5655063735509338e-05, + "loss": 0.0271, "step": 154235 }, { "epoch": 7.2, - "learning_rate": 5.632178519525574e-06, - "loss": 0.0569, + "learning_rate": 1.5654595665673317e-05, + "loss": 0.0274, "step": 154240 }, { "epoch": 7.2, - "learning_rate": 5.631709718250434e-06, - "loss": 0.0261, + "learning_rate": 1.56541275958373e-05, + "loss": 0.0436, "step": 154245 }, { "epoch": 7.2, - "learning_rate": 5.631240916975294e-06, - "loss": 0.0903, + "learning_rate": 1.565365952600128e-05, + "loss": 0.0462, "step": 154250 }, { "epoch": 7.2, - "learning_rate": 5.630772115700155e-06, - "loss": 0.0409, + "learning_rate": 1.565319145616526e-05, + "loss": 0.0445, "step": 154255 }, { "epoch": 7.2, - "learning_rate": 5.630303314425015e-06, - "loss": 0.1137, + "learning_rate": 1.565272338632924e-05, + "loss": 0.1848, "step": 154260 }, { "epoch": 7.2, - "learning_rate": 5.6298345131498766e-06, - "loss": 0.1206, + "learning_rate": 1.5652255316493223e-05, + "loss": 0.099, "step": 154265 }, { "epoch": 7.2, - "learning_rate": 5.6293657118747365e-06, - "loss": 0.145, + "learning_rate": 1.5651787246657203e-05, + "loss": 0.1418, "step": 154270 }, { "epoch": 7.2, - "learning_rate": 5.628896910599597e-06, - "loss": 0.0211, + "learning_rate": 1.5651319176821183e-05, + "loss": 0.0241, "step": 154275 }, { "epoch": 7.2, - "learning_rate": 5.628428109324457e-06, - "loss": 0.0007, + "learning_rate": 1.5650851106985163e-05, + "loss": 0.0087, "step": 154280 }, { "epoch": 7.2, - "learning_rate": 5.627959308049319e-06, - "loss": 0.0189, + "learning_rate": 1.5650383037149142e-05, + "loss": 0.0201, "step": 154285 }, { "epoch": 7.2, - "learning_rate": 5.627490506774179e-06, - "loss": 0.0417, + "learning_rate": 1.5649914967313122e-05, + "loss": 0.0196, "step": 154290 }, { "epoch": 7.2, - "learning_rate": 5.6270217054990396e-06, - "loss": 0.0141, + "learning_rate": 1.5649446897477102e-05, + "loss": 0.068, "step": 154295 }, { "epoch": 7.2, - "learning_rate": 5.6265529042238995e-06, - "loss": 0.0667, + "learning_rate": 1.5648978827641082e-05, + "loss": 0.0553, "step": 154300 }, { "epoch": 7.2, - "learning_rate": 5.62608410294876e-06, - "loss": 0.0699, + "learning_rate": 1.5648510757805065e-05, + "loss": 0.0725, "step": 154305 }, { "epoch": 7.2, - "learning_rate": 5.625615301673621e-06, - "loss": 0.0534, + "learning_rate": 1.5648042687969045e-05, + "loss": 0.1232, "step": 154310 }, { "epoch": 7.2, - "learning_rate": 5.625146500398482e-06, - "loss": 0.0844, + "learning_rate": 1.5647574618133025e-05, + "loss": 0.199, "step": 154315 }, { "epoch": 7.2, - "learning_rate": 5.624677699123342e-06, - "loss": 0.1711, + "learning_rate": 1.5647106548297008e-05, + "loss": 0.1772, "step": 154320 }, { "epoch": 7.2, - "learning_rate": 5.6242088978482026e-06, - "loss": 0.0145, + "learning_rate": 1.5646638478460988e-05, + "loss": 0.0372, "step": 154325 }, { "epoch": 7.2, - "learning_rate": 5.6237400965730625e-06, - "loss": 0.0224, + "learning_rate": 1.5646170408624968e-05, + "loss": 0.0517, "step": 154330 }, { "epoch": 7.2, - "learning_rate": 5.623271295297924e-06, - "loss": 0.0601, + "learning_rate": 1.5645702338788947e-05, + "loss": 0.0038, "step": 154335 }, { "epoch": 7.2, - "learning_rate": 5.622802494022784e-06, - "loss": 0.0287, + "learning_rate": 1.564523426895293e-05, + "loss": 0.0138, "step": 154340 }, { "epoch": 7.2, - "learning_rate": 5.622333692747645e-06, - "loss": 0.0736, + "learning_rate": 1.564476619911691e-05, + "loss": 0.0478, "step": 154345 }, { "epoch": 7.2, - "learning_rate": 5.621864891472505e-06, - "loss": 0.0534, + "learning_rate": 1.5644298129280887e-05, + "loss": 0.0805, "step": 154350 }, { "epoch": 7.2, - "learning_rate": 5.621396090197366e-06, - "loss": 0.0824, + "learning_rate": 1.5643830059444867e-05, + "loss": 0.1253, "step": 154355 }, { "epoch": 7.2, - "learning_rate": 5.620927288922226e-06, - "loss": 0.1083, + "learning_rate": 1.564336198960885e-05, + "loss": 0.133, "step": 154360 }, { "epoch": 7.2, - "learning_rate": 5.620458487647087e-06, - "loss": 0.1552, + "learning_rate": 1.564289391977283e-05, + "loss": 0.1194, "step": 154365 }, { "epoch": 7.2, - "learning_rate": 5.619989686371947e-06, - "loss": 0.1676, + "learning_rate": 1.564242584993681e-05, + "loss": 0.3266, "step": 154370 }, { "epoch": 7.2, - "learning_rate": 5.619520885096808e-06, - "loss": 0.0048, + "learning_rate": 1.5641957780100793e-05, + "loss": 0.0052, "step": 154375 }, { "epoch": 7.2, - "learning_rate": 5.619052083821669e-06, - "loss": 0.0192, + "learning_rate": 1.5641489710264773e-05, + "loss": 0.0252, "step": 154380 }, { "epoch": 7.2, - "learning_rate": 5.618583282546529e-06, - "loss": 0.0126, + "learning_rate": 1.5641021640428752e-05, + "loss": 0.0542, "step": 154385 }, { "epoch": 7.2, - "learning_rate": 5.618114481271389e-06, - "loss": 0.0534, + "learning_rate": 1.5640553570592732e-05, + "loss": 0.0927, "step": 154390 }, { "epoch": 7.2, - "learning_rate": 5.61764567999625e-06, - "loss": 0.0083, + "learning_rate": 1.5640085500756715e-05, + "loss": 0.0217, "step": 154395 }, { "epoch": 7.2, - "learning_rate": 5.61717687872111e-06, - "loss": 0.0437, + "learning_rate": 1.5639617430920695e-05, + "loss": 0.0309, "step": 154400 }, { "epoch": 7.2, - "learning_rate": 5.616708077445972e-06, - "loss": 0.0503, + "learning_rate": 1.5639149361084675e-05, + "loss": 0.0354, "step": 154405 }, { "epoch": 7.2, - "learning_rate": 5.616239276170832e-06, - "loss": 0.0756, + "learning_rate": 1.5638681291248655e-05, + "loss": 0.16, "step": 154410 }, { "epoch": 7.21, - "learning_rate": 5.615770474895692e-06, - "loss": 0.1752, + "learning_rate": 1.5638213221412635e-05, + "loss": 0.1104, "step": 154415 }, { "epoch": 7.21, - "learning_rate": 5.615301673620552e-06, - "loss": 0.1439, + "learning_rate": 1.5637745151576615e-05, + "loss": 0.1896, "step": 154420 }, { "epoch": 7.21, - "learning_rate": 5.614832872345414e-06, - "loss": 0.025, + "learning_rate": 1.5637277081740594e-05, + "loss": 0.0379, "step": 154425 }, { "epoch": 7.21, - "learning_rate": 5.614364071070274e-06, - "loss": 0.0554, + "learning_rate": 1.5636809011904578e-05, + "loss": 0.0157, "step": 154430 }, { "epoch": 7.21, - "learning_rate": 5.613895269795135e-06, - "loss": 0.021, + "learning_rate": 1.5636340942068557e-05, + "loss": 0.0235, "step": 154435 }, { "epoch": 7.21, - "learning_rate": 5.613426468519995e-06, - "loss": 0.0566, + "learning_rate": 1.5635872872232537e-05, + "loss": 0.033, "step": 154440 }, { "epoch": 7.21, - "learning_rate": 5.6129576672448545e-06, - "loss": 0.0872, + "learning_rate": 1.5635404802396517e-05, + "loss": 0.0251, "step": 154445 }, { "epoch": 7.21, - "learning_rate": 5.612488865969716e-06, - "loss": 0.0719, + "learning_rate": 1.56349367325605e-05, + "loss": 0.0434, "step": 154450 }, { "epoch": 7.21, - "learning_rate": 5.612020064694577e-06, - "loss": 0.0573, + "learning_rate": 1.563446866272448e-05, + "loss": 0.0452, "step": 154455 }, { "epoch": 7.21, - "learning_rate": 5.611551263419437e-06, - "loss": 0.1315, + "learning_rate": 1.563400059288846e-05, + "loss": 0.176, "step": 154460 }, { "epoch": 7.21, - "learning_rate": 5.611082462144297e-06, - "loss": 0.1213, + "learning_rate": 1.563353252305244e-05, + "loss": 0.1296, "step": 154465 }, { "epoch": 7.21, - "learning_rate": 5.6106136608691576e-06, - "loss": 0.1754, + "learning_rate": 1.5633064453216423e-05, + "loss": 0.2827, "step": 154470 }, { "epoch": 7.21, - "learning_rate": 5.610144859594019e-06, - "loss": 0.02, + "learning_rate": 1.56325963833804e-05, + "loss": 0.0256, "step": 154475 }, { "epoch": 7.21, - "learning_rate": 5.609676058318879e-06, - "loss": 0.0577, + "learning_rate": 1.563212831354438e-05, + "loss": 0.0141, "step": 154480 }, { "epoch": 7.21, - "learning_rate": 5.609207257043739e-06, - "loss": 0.0073, + "learning_rate": 1.5631660243708362e-05, + "loss": 0.0102, "step": 154485 }, { "epoch": 7.21, - "learning_rate": 5.6087384557686e-06, - "loss": 0.0374, + "learning_rate": 1.5631192173872342e-05, + "loss": 0.0242, "step": 154490 }, { "epoch": 7.21, - "learning_rate": 5.6082696544934615e-06, - "loss": 0.0232, + "learning_rate": 1.5630724104036322e-05, + "loss": 0.0338, "step": 154495 }, { "epoch": 7.21, - "learning_rate": 5.607800853218321e-06, - "loss": 0.0833, + "learning_rate": 1.5630256034200302e-05, + "loss": 0.0709, "step": 154500 }, { "epoch": 7.21, - "learning_rate": 5.607332051943181e-06, - "loss": 0.0707, + "learning_rate": 1.5629787964364285e-05, + "loss": 0.0403, "step": 154505 }, { "epoch": 7.21, - "learning_rate": 5.606863250668042e-06, - "loss": 0.0668, + "learning_rate": 1.5629319894528265e-05, + "loss": 0.0995, "step": 154510 }, { "epoch": 7.21, - "learning_rate": 5.606394449392902e-06, - "loss": 0.1316, + "learning_rate": 1.5628851824692245e-05, + "loss": 0.0653, "step": 154515 }, { "epoch": 7.21, - "learning_rate": 5.605925648117764e-06, - "loss": 0.1263, + "learning_rate": 1.5628383754856224e-05, + "loss": 0.1237, "step": 154520 }, { "epoch": 7.21, - "learning_rate": 5.605456846842624e-06, - "loss": 0.0133, + "learning_rate": 1.5627915685020208e-05, + "loss": 0.0656, "step": 154525 }, { "epoch": 7.21, - "learning_rate": 5.604988045567484e-06, - "loss": 0.0217, + "learning_rate": 1.5627447615184187e-05, + "loss": 0.0087, "step": 154530 }, { "epoch": 7.21, - "learning_rate": 5.604519244292344e-06, - "loss": 0.0259, + "learning_rate": 1.5626979545348167e-05, + "loss": 0.0261, "step": 154535 }, { "epoch": 7.21, - "learning_rate": 5.604050443017205e-06, - "loss": 0.0214, + "learning_rate": 1.5626511475512144e-05, + "loss": 0.0286, "step": 154540 }, { "epoch": 7.21, - "learning_rate": 5.603581641742067e-06, - "loss": 0.0541, + "learning_rate": 1.5626043405676127e-05, + "loss": 0.0181, "step": 154545 }, { "epoch": 7.21, - "learning_rate": 5.603112840466927e-06, - "loss": 0.0928, + "learning_rate": 1.5625575335840107e-05, + "loss": 0.0524, "step": 154550 }, { "epoch": 7.21, - "learning_rate": 5.602644039191787e-06, - "loss": 0.0701, + "learning_rate": 1.5625107266004087e-05, + "loss": 0.0724, "step": 154555 }, { "epoch": 7.21, - "learning_rate": 5.602175237916647e-06, - "loss": 0.094, + "learning_rate": 1.562463919616807e-05, + "loss": 0.076, "step": 154560 }, { "epoch": 7.21, - "learning_rate": 5.601706436641509e-06, - "loss": 0.1359, + "learning_rate": 1.562417112633205e-05, + "loss": 0.1268, "step": 154565 }, { "epoch": 7.21, - "learning_rate": 5.601237635366369e-06, - "loss": 0.1325, + "learning_rate": 1.562370305649603e-05, + "loss": 0.2168, "step": 154570 }, { "epoch": 7.21, - "learning_rate": 5.600768834091229e-06, - "loss": 0.0014, + "learning_rate": 1.562323498666001e-05, + "loss": 0.0213, "step": 154575 }, { "epoch": 7.21, - "learning_rate": 5.60030003281609e-06, - "loss": 0.0348, + "learning_rate": 1.5622766916823992e-05, + "loss": 0.0189, "step": 154580 }, { "epoch": 7.21, - "learning_rate": 5.59983123154095e-06, - "loss": 0.0434, + "learning_rate": 1.5622298846987972e-05, + "loss": 0.0294, "step": 154585 }, { "epoch": 7.21, - "learning_rate": 5.599362430265811e-06, - "loss": 0.0468, + "learning_rate": 1.5621830777151952e-05, + "loss": 0.0319, "step": 154590 }, { "epoch": 7.21, - "learning_rate": 5.598893628990671e-06, - "loss": 0.069, + "learning_rate": 1.5621362707315932e-05, + "loss": 0.0349, "step": 154595 }, { "epoch": 7.21, - "learning_rate": 5.598424827715532e-06, - "loss": 0.0206, + "learning_rate": 1.562089463747991e-05, + "loss": 0.0329, "step": 154600 }, { "epoch": 7.21, - "learning_rate": 5.597956026440392e-06, - "loss": 0.0301, + "learning_rate": 1.562042656764389e-05, + "loss": 0.0767, "step": 154605 }, { "epoch": 7.21, - "learning_rate": 5.5974872251652535e-06, - "loss": 0.1042, + "learning_rate": 1.561995849780787e-05, + "loss": 0.0763, "step": 154610 }, { "epoch": 7.21, - "learning_rate": 5.5970184238901134e-06, - "loss": 0.1425, + "learning_rate": 1.5619490427971854e-05, + "loss": 0.2753, "step": 154615 }, { "epoch": 7.21, - "learning_rate": 5.596549622614974e-06, - "loss": 0.2139, + "learning_rate": 1.5619022358135834e-05, + "loss": 0.2727, "step": 154620 }, { "epoch": 7.22, - "learning_rate": 5.596080821339834e-06, - "loss": 0.0054, + "learning_rate": 1.5618554288299814e-05, + "loss": 0.0325, "step": 154625 }, { "epoch": 7.22, - "learning_rate": 5.595612020064695e-06, - "loss": 0.0178, + "learning_rate": 1.5618086218463794e-05, + "loss": 0.0037, "step": 154630 }, { "epoch": 7.22, - "learning_rate": 5.595143218789556e-06, - "loss": 0.0141, + "learning_rate": 1.5617618148627777e-05, + "loss": 0.0377, "step": 154635 }, { "epoch": 7.22, - "learning_rate": 5.5946744175144165e-06, - "loss": 0.0621, + "learning_rate": 1.5617150078791757e-05, + "loss": 0.0299, "step": 154640 }, { "epoch": 7.22, - "learning_rate": 5.594205616239276e-06, - "loss": 0.0447, + "learning_rate": 1.5616682008955737e-05, + "loss": 0.0118, "step": 154645 }, { "epoch": 7.22, - "learning_rate": 5.593736814964137e-06, - "loss": 0.1183, + "learning_rate": 1.5616213939119717e-05, + "loss": 0.0908, "step": 154650 }, { "epoch": 7.22, - "learning_rate": 5.593268013688997e-06, - "loss": 0.0575, + "learning_rate": 1.56157458692837e-05, + "loss": 0.0778, "step": 154655 }, { "epoch": 7.22, - "learning_rate": 5.592799212413859e-06, - "loss": 0.1123, + "learning_rate": 1.561527779944768e-05, + "loss": 0.0765, "step": 154660 }, { "epoch": 7.22, - "learning_rate": 5.592330411138719e-06, - "loss": 0.0848, + "learning_rate": 1.5614809729611656e-05, + "loss": 0.1281, "step": 154665 }, { "epoch": 7.22, - "learning_rate": 5.5918616098635795e-06, - "loss": 0.1551, + "learning_rate": 1.561434165977564e-05, + "loss": 0.1907, "step": 154670 }, { "epoch": 7.22, - "learning_rate": 5.591392808588439e-06, - "loss": 0.0158, + "learning_rate": 1.561387358993962e-05, + "loss": 0.0157, "step": 154675 }, { "epoch": 7.22, - "learning_rate": 5.590924007313301e-06, - "loss": 0.0125, + "learning_rate": 1.56134055201036e-05, + "loss": 0.0239, "step": 154680 }, { "epoch": 7.22, - "learning_rate": 5.590455206038161e-06, - "loss": 0.0222, + "learning_rate": 1.561293745026758e-05, + "loss": 0.0375, "step": 154685 }, { "epoch": 7.22, - "learning_rate": 5.589986404763022e-06, - "loss": 0.0425, + "learning_rate": 1.5612469380431562e-05, + "loss": 0.0396, "step": 154690 }, { "epoch": 7.22, - "learning_rate": 5.589517603487882e-06, - "loss": 0.0392, + "learning_rate": 1.5612001310595542e-05, + "loss": 0.0385, "step": 154695 }, { "epoch": 7.22, - "learning_rate": 5.589048802212742e-06, - "loss": 0.0294, + "learning_rate": 1.561153324075952e-05, + "loss": 0.0313, "step": 154700 }, { "epoch": 7.22, - "learning_rate": 5.588580000937603e-06, - "loss": 0.0598, + "learning_rate": 1.56110651709235e-05, + "loss": 0.0313, "step": 154705 }, { "epoch": 7.22, - "learning_rate": 5.588111199662464e-06, - "loss": 0.0315, + "learning_rate": 1.5610597101087485e-05, + "loss": 0.0978, "step": 154710 }, { "epoch": 7.22, - "learning_rate": 5.587642398387324e-06, - "loss": 0.3259, + "learning_rate": 1.5610129031251464e-05, + "loss": 0.0794, "step": 154715 }, { "epoch": 7.22, - "learning_rate": 5.587173597112185e-06, - "loss": 0.1252, + "learning_rate": 1.5609660961415444e-05, + "loss": 0.0807, "step": 154720 }, { "epoch": 7.22, - "learning_rate": 5.586704795837045e-06, - "loss": 0.0447, + "learning_rate": 1.5609192891579424e-05, + "loss": 0.0724, "step": 154725 }, { "epoch": 7.22, - "learning_rate": 5.586235994561906e-06, - "loss": 0.0307, + "learning_rate": 1.5608724821743404e-05, + "loss": 0.0285, "step": 154730 }, { "epoch": 7.22, - "learning_rate": 5.585767193286766e-06, - "loss": 0.02, + "learning_rate": 1.5608256751907384e-05, + "loss": 0.0265, "step": 154735 }, { "epoch": 7.22, - "learning_rate": 5.585298392011627e-06, - "loss": 0.0282, + "learning_rate": 1.5607788682071363e-05, + "loss": 0.046, "step": 154740 }, { "epoch": 7.22, - "learning_rate": 5.584829590736487e-06, - "loss": 0.0152, + "learning_rate": 1.5607320612235347e-05, + "loss": 0.0224, "step": 154745 }, { "epoch": 7.22, - "learning_rate": 5.584360789461349e-06, - "loss": 0.0691, + "learning_rate": 1.5606852542399327e-05, + "loss": 0.0327, "step": 154750 }, { "epoch": 7.22, - "learning_rate": 5.5838919881862085e-06, - "loss": 0.0513, + "learning_rate": 1.5606384472563306e-05, + "loss": 0.1047, "step": 154755 }, { "epoch": 7.22, - "learning_rate": 5.583423186911069e-06, - "loss": 0.0399, + "learning_rate": 1.5605916402727286e-05, + "loss": 0.0862, "step": 154760 }, { "epoch": 7.22, - "learning_rate": 5.582954385635929e-06, - "loss": 0.1514, + "learning_rate": 1.560544833289127e-05, + "loss": 0.1443, "step": 154765 }, { "epoch": 7.22, - "learning_rate": 5.582485584360789e-06, - "loss": 0.1543, + "learning_rate": 1.560498026305525e-05, + "loss": 0.1986, "step": 154770 }, { "epoch": 7.22, - "learning_rate": 5.582016783085651e-06, - "loss": 0.0207, + "learning_rate": 1.560451219321923e-05, + "loss": 0.0306, "step": 154775 }, { "epoch": 7.22, - "learning_rate": 5.5815479818105116e-06, - "loss": 0.0212, + "learning_rate": 1.560404412338321e-05, + "loss": 0.0069, "step": 154780 }, { "epoch": 7.22, - "learning_rate": 5.5810791805353715e-06, - "loss": 0.0569, + "learning_rate": 1.5603576053547192e-05, + "loss": 0.0269, "step": 154785 }, { "epoch": 7.22, - "learning_rate": 5.5806103792602314e-06, - "loss": 0.0146, + "learning_rate": 1.560310798371117e-05, + "loss": 0.0221, "step": 154790 }, { "epoch": 7.22, - "learning_rate": 5.580141577985092e-06, - "loss": 0.0394, + "learning_rate": 1.5602639913875148e-05, + "loss": 0.0389, "step": 154795 }, { "epoch": 7.22, - "learning_rate": 5.579672776709954e-06, - "loss": 0.0346, + "learning_rate": 1.560217184403913e-05, + "loss": 0.044, "step": 154800 }, { "epoch": 7.22, - "learning_rate": 5.579203975434814e-06, - "loss": 0.0709, + "learning_rate": 1.560170377420311e-05, + "loss": 0.0416, "step": 154805 }, { "epoch": 7.22, - "learning_rate": 5.578735174159674e-06, - "loss": 0.1318, + "learning_rate": 1.560123570436709e-05, + "loss": 0.0723, "step": 154810 }, { "epoch": 7.22, - "learning_rate": 5.5782663728845345e-06, - "loss": 0.1359, + "learning_rate": 1.560076763453107e-05, + "loss": 0.1368, "step": 154815 }, { "epoch": 7.22, - "learning_rate": 5.577797571609396e-06, - "loss": 0.2657, + "learning_rate": 1.5600299564695054e-05, + "loss": 0.1183, "step": 154820 }, { "epoch": 7.22, - "learning_rate": 5.577328770334256e-06, - "loss": 0.0012, + "learning_rate": 1.5599831494859034e-05, + "loss": 0.0485, "step": 154825 }, { "epoch": 7.22, - "learning_rate": 5.576859969059116e-06, - "loss": 0.0332, + "learning_rate": 1.5599363425023014e-05, + "loss": 0.0266, "step": 154830 }, { "epoch": 7.22, - "learning_rate": 5.576391167783977e-06, - "loss": 0.0079, + "learning_rate": 1.5598895355186994e-05, + "loss": 0.0505, "step": 154835 }, { "epoch": 7.23, - "learning_rate": 5.575922366508837e-06, - "loss": 0.0125, + "learning_rate": 1.5598427285350977e-05, + "loss": 0.0388, "step": 154840 }, { "epoch": 7.23, - "learning_rate": 5.575453565233698e-06, - "loss": 0.019, + "learning_rate": 1.5597959215514957e-05, + "loss": 0.0285, "step": 154845 }, { "epoch": 7.23, - "learning_rate": 5.574984763958558e-06, - "loss": 0.0633, + "learning_rate": 1.5597491145678936e-05, + "loss": 0.0619, "step": 154850 }, { "epoch": 7.23, - "learning_rate": 5.574515962683419e-06, - "loss": 0.0384, + "learning_rate": 1.5597023075842916e-05, + "loss": 0.0311, "step": 154855 }, { "epoch": 7.23, - "learning_rate": 5.574047161408279e-06, - "loss": 0.1011, + "learning_rate": 1.5596555006006896e-05, + "loss": 0.0556, "step": 154860 }, { "epoch": 7.23, - "learning_rate": 5.57357836013314e-06, - "loss": 0.1186, + "learning_rate": 1.5596086936170876e-05, + "loss": 0.1029, "step": 154865 }, { "epoch": 7.23, - "learning_rate": 5.5731095588580005e-06, - "loss": 0.1975, + "learning_rate": 1.5595618866334856e-05, + "loss": 0.0864, "step": 154870 }, { "epoch": 7.23, - "learning_rate": 5.572640757582861e-06, - "loss": 0.0101, + "learning_rate": 1.559515079649884e-05, + "loss": 0.0158, "step": 154875 }, { "epoch": 7.23, - "learning_rate": 5.572171956307721e-06, - "loss": 0.0298, + "learning_rate": 1.559468272666282e-05, + "loss": 0.0184, "step": 154880 }, { "epoch": 7.23, - "learning_rate": 5.571703155032582e-06, - "loss": 0.0151, + "learning_rate": 1.55942146568268e-05, + "loss": 0.0424, "step": 154885 }, { "epoch": 7.23, - "learning_rate": 5.571234353757443e-06, - "loss": 0.0469, + "learning_rate": 1.559374658699078e-05, + "loss": 0.0139, "step": 154890 }, { "epoch": 7.23, - "learning_rate": 5.570765552482304e-06, - "loss": 0.0076, + "learning_rate": 1.559327851715476e-05, + "loss": 0.0158, "step": 154895 }, { "epoch": 7.23, - "learning_rate": 5.5702967512071635e-06, - "loss": 0.0357, + "learning_rate": 1.559281044731874e-05, + "loss": 0.1564, "step": 154900 }, { "epoch": 7.23, - "learning_rate": 5.569827949932024e-06, - "loss": 0.0684, + "learning_rate": 1.559234237748272e-05, + "loss": 0.112, "step": 154905 }, { "epoch": 7.23, - "learning_rate": 5.569359148656884e-06, - "loss": 0.1221, + "learning_rate": 1.55918743076467e-05, + "loss": 0.0836, "step": 154910 }, { "epoch": 7.23, - "learning_rate": 5.568890347381746e-06, - "loss": 0.1007, + "learning_rate": 1.5591406237810684e-05, + "loss": 0.1325, "step": 154915 }, { "epoch": 7.23, - "learning_rate": 5.568421546106606e-06, - "loss": 0.3027, + "learning_rate": 1.559093816797466e-05, + "loss": 0.1367, "step": 154920 }, { "epoch": 7.23, - "learning_rate": 5.567952744831467e-06, - "loss": 0.0417, + "learning_rate": 1.559047009813864e-05, + "loss": 0.0015, "step": 154925 }, { "epoch": 7.23, - "learning_rate": 5.5674839435563265e-06, - "loss": 0.019, + "learning_rate": 1.5590002028302624e-05, + "loss": 0.006, "step": 154930 }, { "epoch": 7.23, - "learning_rate": 5.567015142281188e-06, - "loss": 0.0132, + "learning_rate": 1.5589533958466603e-05, + "loss": 0.0234, "step": 154935 }, { "epoch": 7.23, - "learning_rate": 5.566546341006048e-06, - "loss": 0.0097, + "learning_rate": 1.5589065888630583e-05, + "loss": 0.0164, "step": 154940 }, { "epoch": 7.23, - "learning_rate": 5.566077539730909e-06, - "loss": 0.0447, + "learning_rate": 1.5588597818794563e-05, + "loss": 0.0612, "step": 154945 }, { "epoch": 7.23, - "learning_rate": 5.565608738455769e-06, - "loss": 0.0071, + "learning_rate": 1.5588129748958546e-05, + "loss": 0.039, "step": 154950 }, { "epoch": 7.23, - "learning_rate": 5.5651399371806296e-06, - "loss": 0.0457, + "learning_rate": 1.5587661679122526e-05, + "loss": 0.0804, "step": 154955 }, { "epoch": 7.23, - "learning_rate": 5.56467113590549e-06, - "loss": 0.072, + "learning_rate": 1.5587193609286506e-05, + "loss": 0.0857, "step": 154960 }, { "epoch": 7.23, - "learning_rate": 5.564202334630351e-06, - "loss": 0.1233, + "learning_rate": 1.5586725539450486e-05, + "loss": 0.2069, "step": 154965 }, { "epoch": 7.23, - "learning_rate": 5.563733533355211e-06, - "loss": 0.1358, + "learning_rate": 1.558625746961447e-05, + "loss": 0.0919, "step": 154970 }, { "epoch": 7.23, - "learning_rate": 5.563264732080072e-06, - "loss": 0.0008, + "learning_rate": 1.558578939977845e-05, + "loss": 0.0159, "step": 154975 }, { "epoch": 7.23, - "learning_rate": 5.562795930804932e-06, - "loss": 0.0159, + "learning_rate": 1.5585321329942425e-05, + "loss": 0.0145, "step": 154980 }, { "epoch": 7.23, - "learning_rate": 5.562327129529793e-06, - "loss": 0.0153, + "learning_rate": 1.558485326010641e-05, + "loss": 0.0019, "step": 154985 }, { "epoch": 7.23, - "learning_rate": 5.561858328254653e-06, - "loss": 0.0471, + "learning_rate": 1.5584385190270388e-05, + "loss": 0.0802, "step": 154990 }, { "epoch": 7.23, - "learning_rate": 5.561389526979514e-06, - "loss": 0.0211, + "learning_rate": 1.5583917120434368e-05, + "loss": 0.0378, "step": 154995 }, { "epoch": 7.23, - "learning_rate": 5.560920725704374e-06, - "loss": 0.025, + "learning_rate": 1.5583449050598348e-05, + "loss": 0.0516, "step": 155000 }, { "epoch": 7.23, - "learning_rate": 5.560451924429236e-06, - "loss": 0.0703, + "learning_rate": 1.558298098076233e-05, + "loss": 0.0519, "step": 155005 }, { "epoch": 7.23, - "learning_rate": 5.559983123154096e-06, - "loss": 0.1108, + "learning_rate": 1.558251291092631e-05, + "loss": 0.1302, "step": 155010 }, { "epoch": 7.23, - "learning_rate": 5.559514321878956e-06, - "loss": 0.1042, + "learning_rate": 1.558204484109029e-05, + "loss": 0.1562, "step": 155015 }, { "epoch": 7.23, - "learning_rate": 5.559045520603816e-06, - "loss": 0.1373, + "learning_rate": 1.558157677125427e-05, + "loss": 0.1435, "step": 155020 }, { "epoch": 7.23, - "learning_rate": 5.558576719328676e-06, - "loss": 0.0316, + "learning_rate": 1.5581108701418254e-05, + "loss": 0.0373, "step": 155025 }, { "epoch": 7.23, - "learning_rate": 5.558107918053538e-06, - "loss": 0.0111, + "learning_rate": 1.5580640631582234e-05, + "loss": 0.0195, "step": 155030 }, { "epoch": 7.23, - "learning_rate": 5.557639116778399e-06, - "loss": 0.0443, + "learning_rate": 1.5580172561746213e-05, + "loss": 0.0258, "step": 155035 }, { "epoch": 7.23, - "learning_rate": 5.557170315503259e-06, - "loss": 0.0146, + "learning_rate": 1.5579704491910197e-05, + "loss": 0.032, "step": 155040 }, { "epoch": 7.23, - "learning_rate": 5.5567015142281185e-06, - "loss": 0.0242, + "learning_rate": 1.5579236422074173e-05, + "loss": 0.0185, "step": 155045 }, { "epoch": 7.23, - "learning_rate": 5.556232712952979e-06, - "loss": 0.0463, + "learning_rate": 1.5578768352238153e-05, + "loss": 0.0939, "step": 155050 }, { "epoch": 7.24, - "learning_rate": 5.555763911677841e-06, - "loss": 0.0864, + "learning_rate": 1.5578300282402133e-05, + "loss": 0.1032, "step": 155055 }, { "epoch": 7.24, - "learning_rate": 5.555295110402701e-06, - "loss": 0.0943, + "learning_rate": 1.5577832212566116e-05, + "loss": 0.0834, "step": 155060 }, { "epoch": 7.24, - "learning_rate": 5.554826309127561e-06, - "loss": 0.1281, + "learning_rate": 1.5577364142730096e-05, + "loss": 0.119, "step": 155065 }, { "epoch": 7.24, - "learning_rate": 5.554357507852422e-06, - "loss": 0.1202, + "learning_rate": 1.5576896072894076e-05, + "loss": 0.0699, "step": 155070 }, { "epoch": 7.24, - "learning_rate": 5.553888706577283e-06, - "loss": 0.0095, + "learning_rate": 1.5576428003058055e-05, + "loss": 0.0172, "step": 155075 }, { "epoch": 7.24, - "learning_rate": 5.553419905302143e-06, - "loss": 0.0146, + "learning_rate": 1.557595993322204e-05, + "loss": 0.0071, "step": 155080 }, { "epoch": 7.24, - "learning_rate": 5.552951104027003e-06, - "loss": 0.0209, + "learning_rate": 1.557549186338602e-05, + "loss": 0.0011, "step": 155085 }, { "epoch": 7.24, - "learning_rate": 5.552482302751864e-06, - "loss": 0.0611, + "learning_rate": 1.5575023793549998e-05, + "loss": 0.0343, "step": 155090 }, { "epoch": 7.24, - "learning_rate": 5.552013501476724e-06, - "loss": 0.0285, + "learning_rate": 1.557455572371398e-05, + "loss": 0.0467, "step": 155095 }, { "epoch": 7.24, - "learning_rate": 5.5515447002015854e-06, - "loss": 0.0794, + "learning_rate": 1.557408765387796e-05, + "loss": 0.0736, "step": 155100 }, { "epoch": 7.24, - "learning_rate": 5.551075898926445e-06, - "loss": 0.0572, + "learning_rate": 1.5573619584041938e-05, + "loss": 0.0698, "step": 155105 }, { "epoch": 7.24, - "learning_rate": 5.550607097651306e-06, - "loss": 0.0797, + "learning_rate": 1.5573151514205917e-05, + "loss": 0.0842, "step": 155110 }, { "epoch": 7.24, - "learning_rate": 5.550138296376166e-06, - "loss": 0.1115, + "learning_rate": 1.55726834443699e-05, + "loss": 0.1389, "step": 155115 }, { "epoch": 7.24, - "learning_rate": 5.549669495101027e-06, - "loss": 0.1568, + "learning_rate": 1.557221537453388e-05, + "loss": 0.2036, "step": 155120 }, { "epoch": 7.24, - "learning_rate": 5.549200693825888e-06, - "loss": 0.0321, + "learning_rate": 1.557174730469786e-05, + "loss": 0.0165, "step": 155125 }, { "epoch": 7.24, - "learning_rate": 5.5487318925507484e-06, - "loss": 0.0213, + "learning_rate": 1.557127923486184e-05, + "loss": 0.014, "step": 155130 }, { "epoch": 7.24, - "learning_rate": 5.548263091275608e-06, - "loss": 0.0278, + "learning_rate": 1.5570811165025823e-05, + "loss": 0.0043, "step": 155135 }, { "epoch": 7.24, - "learning_rate": 5.547794290000469e-06, - "loss": 0.0195, + "learning_rate": 1.5570343095189803e-05, + "loss": 0.0342, "step": 155140 }, { "epoch": 7.24, - "learning_rate": 5.54732548872533e-06, - "loss": 0.0334, + "learning_rate": 1.5569875025353783e-05, + "loss": 0.0264, "step": 155145 }, { "epoch": 7.24, - "learning_rate": 5.546856687450191e-06, - "loss": 0.0607, + "learning_rate": 1.5569406955517763e-05, + "loss": 0.0681, "step": 155150 }, { "epoch": 7.24, - "learning_rate": 5.546387886175051e-06, - "loss": 0.0972, + "learning_rate": 1.5568938885681746e-05, + "loss": 0.18, "step": 155155 }, { "epoch": 7.24, - "learning_rate": 5.545919084899911e-06, - "loss": 0.0644, + "learning_rate": 1.5568470815845726e-05, + "loss": 0.1181, "step": 155160 }, { "epoch": 7.24, - "learning_rate": 5.545450283624771e-06, + "learning_rate": 1.5568002746009706e-05, "loss": 0.1773, "step": 155165 }, { "epoch": 7.24, - "learning_rate": 5.544981482349633e-06, - "loss": 0.1474, + "learning_rate": 1.5567534676173685e-05, + "loss": 0.2242, "step": 155170 }, { "epoch": 7.24, - "learning_rate": 5.544512681074493e-06, - "loss": 0.0347, + "learning_rate": 1.5567066606337665e-05, + "loss": 0.0234, "step": 155175 }, { "epoch": 7.24, - "learning_rate": 5.544043879799354e-06, - "loss": 0.0098, + "learning_rate": 1.5566598536501645e-05, + "loss": 0.0142, "step": 155180 }, { "epoch": 7.24, - "learning_rate": 5.543575078524214e-06, - "loss": 0.01, + "learning_rate": 1.5566130466665625e-05, + "loss": 0.019, "step": 155185 }, { "epoch": 7.24, - "learning_rate": 5.543106277249074e-06, - "loss": 0.0278, + "learning_rate": 1.5565662396829608e-05, + "loss": 0.0173, "step": 155190 }, { "epoch": 7.24, - "learning_rate": 5.542637475973935e-06, - "loss": 0.1009, + "learning_rate": 1.5565194326993588e-05, + "loss": 0.0669, "step": 155195 }, { "epoch": 7.24, - "learning_rate": 5.542168674698796e-06, - "loss": 0.1008, + "learning_rate": 1.5564726257157568e-05, + "loss": 0.0459, "step": 155200 }, { "epoch": 7.24, - "learning_rate": 5.541699873423656e-06, - "loss": 0.0631, + "learning_rate": 1.5564258187321548e-05, + "loss": 0.0531, "step": 155205 }, { "epoch": 7.24, - "learning_rate": 5.541231072148517e-06, - "loss": 0.0347, + "learning_rate": 1.556379011748553e-05, + "loss": 0.0942, "step": 155210 }, { "epoch": 7.24, - "learning_rate": 5.5407622708733775e-06, - "loss": 0.118, + "learning_rate": 1.556332204764951e-05, + "loss": 0.1837, "step": 155215 }, { "epoch": 7.24, - "learning_rate": 5.540293469598238e-06, - "loss": 0.1535, + "learning_rate": 1.556285397781349e-05, + "loss": 0.1598, "step": 155220 }, { "epoch": 7.24, - "learning_rate": 5.539824668323098e-06, - "loss": 0.0086, + "learning_rate": 1.5562385907977474e-05, + "loss": 0.0263, "step": 155225 }, { "epoch": 7.24, - "learning_rate": 5.539355867047959e-06, - "loss": 0.0033, + "learning_rate": 1.5561917838141453e-05, + "loss": 0.0218, "step": 155230 }, { "epoch": 7.24, - "learning_rate": 5.538887065772819e-06, - "loss": 0.0262, + "learning_rate": 1.556144976830543e-05, + "loss": 0.0158, "step": 155235 }, { "epoch": 7.24, - "learning_rate": 5.5384182644976805e-06, - "loss": 0.0071, + "learning_rate": 1.556098169846941e-05, + "loss": 0.0456, "step": 155240 }, { "epoch": 7.24, - "learning_rate": 5.5379494632225405e-06, - "loss": 0.0254, + "learning_rate": 1.5560513628633393e-05, + "loss": 0.0385, "step": 155245 }, { "epoch": 7.24, - "learning_rate": 5.537480661947401e-06, - "loss": 0.0259, + "learning_rate": 1.5560045558797373e-05, + "loss": 0.0408, "step": 155250 }, { "epoch": 7.24, - "learning_rate": 5.537011860672261e-06, - "loss": 0.0553, + "learning_rate": 1.5559577488961352e-05, + "loss": 0.0914, "step": 155255 }, { "epoch": 7.24, - "learning_rate": 5.536543059397123e-06, - "loss": 0.0491, + "learning_rate": 1.5559109419125332e-05, + "loss": 0.0594, "step": 155260 }, { "epoch": 7.24, - "learning_rate": 5.536074258121983e-06, - "loss": 0.179, + "learning_rate": 1.5558641349289315e-05, + "loss": 0.1741, "step": 155265 }, { "epoch": 7.25, - "learning_rate": 5.5356054568468435e-06, - "loss": 0.133, + "learning_rate": 1.5558173279453295e-05, + "loss": 0.2206, "step": 155270 }, { "epoch": 7.25, - "learning_rate": 5.5351366555717034e-06, - "loss": 0.0368, + "learning_rate": 1.5557705209617275e-05, + "loss": 0.0379, "step": 155275 }, { "epoch": 7.25, - "learning_rate": 5.534667854296563e-06, - "loss": 0.0151, + "learning_rate": 1.555723713978126e-05, + "loss": 0.0091, "step": 155280 }, { "epoch": 7.25, - "learning_rate": 5.534199053021425e-06, - "loss": 0.0453, + "learning_rate": 1.5556769069945238e-05, + "loss": 0.0484, "step": 155285 }, { "epoch": 7.25, - "learning_rate": 5.533730251746286e-06, - "loss": 0.0717, + "learning_rate": 1.5556301000109218e-05, + "loss": 0.0131, "step": 155290 }, { "epoch": 7.25, - "learning_rate": 5.533261450471146e-06, - "loss": 0.0381, + "learning_rate": 1.5555832930273194e-05, + "loss": 0.0144, "step": 155295 }, { "epoch": 7.25, - "learning_rate": 5.532792649196006e-06, - "loss": 0.0489, + "learning_rate": 1.5555364860437178e-05, + "loss": 0.0434, "step": 155300 }, { "epoch": 7.25, - "learning_rate": 5.5323238479208664e-06, - "loss": 0.0977, + "learning_rate": 1.5554896790601157e-05, + "loss": 0.0657, "step": 155305 }, { "epoch": 7.25, - "learning_rate": 5.531855046645728e-06, - "loss": 0.1329, + "learning_rate": 1.5554428720765137e-05, + "loss": 0.0917, "step": 155310 }, { "epoch": 7.25, - "learning_rate": 5.531386245370588e-06, - "loss": 0.1467, + "learning_rate": 1.5553960650929117e-05, + "loss": 0.0994, "step": 155315 }, { "epoch": 7.25, - "learning_rate": 5.530917444095448e-06, - "loss": 0.183, + "learning_rate": 1.55534925810931e-05, + "loss": 0.2969, "step": 155320 }, { "epoch": 7.25, - "learning_rate": 5.530448642820309e-06, - "loss": 0.0066, + "learning_rate": 1.555302451125708e-05, + "loss": 0.0323, "step": 155325 }, { "epoch": 7.25, - "learning_rate": 5.52997984154517e-06, - "loss": 0.0233, + "learning_rate": 1.555255644142106e-05, + "loss": 0.0262, "step": 155330 }, { "epoch": 7.25, - "learning_rate": 5.52951104027003e-06, - "loss": 0.0337, + "learning_rate": 1.555208837158504e-05, + "loss": 0.0922, "step": 155335 }, { "epoch": 7.25, - "learning_rate": 5.52904223899489e-06, - "loss": 0.0352, + "learning_rate": 1.5551620301749023e-05, + "loss": 0.045, "step": 155340 }, { "epoch": 7.25, - "learning_rate": 5.528573437719751e-06, - "loss": 0.0397, + "learning_rate": 1.5551152231913003e-05, + "loss": 0.0284, "step": 155345 }, { "epoch": 7.25, - "learning_rate": 5.528104636444611e-06, - "loss": 0.0463, + "learning_rate": 1.5550684162076983e-05, + "loss": 0.031, "step": 155350 }, { "epoch": 7.25, - "learning_rate": 5.5276358351694725e-06, - "loss": 0.0527, + "learning_rate": 1.5550216092240966e-05, + "loss": 0.0594, "step": 155355 }, { "epoch": 7.25, - "learning_rate": 5.5271670338943325e-06, - "loss": 0.0394, + "learning_rate": 1.5549748022404942e-05, + "loss": 0.0928, "step": 155360 }, { "epoch": 7.25, - "learning_rate": 5.526698232619193e-06, - "loss": 0.1115, + "learning_rate": 1.5549279952568922e-05, + "loss": 0.1022, "step": 155365 }, { "epoch": 7.25, - "learning_rate": 5.526229431344053e-06, - "loss": 0.1308, + "learning_rate": 1.5548811882732902e-05, + "loss": 0.1477, "step": 155370 }, { "epoch": 7.25, - "learning_rate": 5.525760630068914e-06, - "loss": 0.0405, + "learning_rate": 1.5548343812896885e-05, + "loss": 0.0189, "step": 155375 }, { "epoch": 7.25, - "learning_rate": 5.525291828793775e-06, - "loss": 0.0302, + "learning_rate": 1.5547875743060865e-05, + "loss": 0.0312, "step": 155380 }, { "epoch": 7.25, - "learning_rate": 5.5248230275186355e-06, - "loss": 0.0252, + "learning_rate": 1.5547407673224845e-05, + "loss": 0.0219, "step": 155385 }, { "epoch": 7.25, - "learning_rate": 5.5243542262434955e-06, - "loss": 0.0414, + "learning_rate": 1.5546939603388824e-05, + "loss": 0.0437, "step": 155390 }, { "epoch": 7.25, - "learning_rate": 5.523885424968356e-06, - "loss": 0.0351, + "learning_rate": 1.5546471533552808e-05, + "loss": 0.0369, "step": 155395 }, { "epoch": 7.25, - "learning_rate": 5.523416623693217e-06, - "loss": 0.0725, + "learning_rate": 1.5546003463716788e-05, + "loss": 0.0764, "step": 155400 }, { "epoch": 7.25, - "learning_rate": 5.522947822418078e-06, - "loss": 0.1105, + "learning_rate": 1.5545535393880767e-05, + "loss": 0.0615, "step": 155405 }, { "epoch": 7.25, - "learning_rate": 5.522479021142938e-06, - "loss": 0.059, + "learning_rate": 1.554506732404475e-05, + "loss": 0.0562, "step": 155410 }, { "epoch": 7.25, - "learning_rate": 5.5220102198677985e-06, - "loss": 0.0696, + "learning_rate": 1.554459925420873e-05, + "loss": 0.1726, "step": 155415 }, { "epoch": 7.25, - "learning_rate": 5.5215414185926585e-06, - "loss": 0.1947, + "learning_rate": 1.554413118437271e-05, + "loss": 0.1361, "step": 155420 }, { "epoch": 7.25, - "learning_rate": 5.52107261731752e-06, - "loss": 0.0154, + "learning_rate": 1.5543663114536687e-05, + "loss": 0.0182, "step": 155425 }, { "epoch": 7.25, - "learning_rate": 5.52060381604238e-06, - "loss": 0.012, + "learning_rate": 1.554319504470067e-05, + "loss": 0.043, "step": 155430 }, { "epoch": 7.25, - "learning_rate": 5.520135014767241e-06, - "loss": 0.0267, + "learning_rate": 1.554272697486465e-05, + "loss": 0.0215, "step": 155435 }, { "epoch": 7.25, - "learning_rate": 5.519666213492101e-06, - "loss": 0.0391, + "learning_rate": 1.554225890502863e-05, + "loss": 0.0402, "step": 155440 }, { "epoch": 7.25, - "learning_rate": 5.5191974122169615e-06, - "loss": 0.0345, + "learning_rate": 1.554179083519261e-05, + "loss": 0.0323, "step": 155445 }, { "epoch": 7.25, - "learning_rate": 5.518728610941822e-06, - "loss": 0.038, + "learning_rate": 1.5541322765356592e-05, + "loss": 0.0879, "step": 155450 }, { "epoch": 7.25, - "learning_rate": 5.518259809666683e-06, - "loss": 0.0508, + "learning_rate": 1.5540854695520572e-05, + "loss": 0.0858, "step": 155455 }, { "epoch": 7.25, - "learning_rate": 5.517791008391543e-06, - "loss": 0.156, + "learning_rate": 1.5540386625684552e-05, + "loss": 0.0666, "step": 155460 }, { "epoch": 7.25, - "learning_rate": 5.517322207116404e-06, - "loss": 0.0952, + "learning_rate": 1.5539918555848535e-05, + "loss": 0.1878, "step": 155465 }, { "epoch": 7.25, - "learning_rate": 5.5168534058412646e-06, - "loss": 0.1969, + "learning_rate": 1.5539450486012515e-05, + "loss": 0.2636, "step": 155470 }, { "epoch": 7.25, - "learning_rate": 5.516384604566125e-06, - "loss": 0.0281, + "learning_rate": 1.5538982416176495e-05, + "loss": 0.0408, "step": 155475 }, { "epoch": 7.25, - "learning_rate": 5.515915803290985e-06, - "loss": 0.0027, + "learning_rate": 1.5538514346340475e-05, + "loss": 0.0115, "step": 155480 }, { "epoch": 7.26, - "learning_rate": 5.515447002015846e-06, - "loss": 0.0133, + "learning_rate": 1.5538046276504455e-05, + "loss": 0.0098, "step": 155485 }, { "epoch": 7.26, - "learning_rate": 5.514978200740706e-06, - "loss": 0.0587, + "learning_rate": 1.5537578206668434e-05, + "loss": 0.0097, "step": 155490 }, { "epoch": 7.26, - "learning_rate": 5.514509399465568e-06, - "loss": 0.0738, + "learning_rate": 1.5537110136832414e-05, + "loss": 0.0555, "step": 155495 }, { "epoch": 7.26, - "learning_rate": 5.5140405981904276e-06, - "loss": 0.051, + "learning_rate": 1.5536642066996394e-05, + "loss": 0.1238, "step": 155500 }, { "epoch": 7.26, - "learning_rate": 5.513571796915288e-06, - "loss": 0.0388, + "learning_rate": 1.5536173997160377e-05, + "loss": 0.0481, "step": 155505 }, { "epoch": 7.26, - "learning_rate": 5.513102995640148e-06, - "loss": 0.1731, + "learning_rate": 1.5535705927324357e-05, + "loss": 0.0651, "step": 155510 }, { "epoch": 7.26, - "learning_rate": 5.512634194365008e-06, - "loss": 0.0677, + "learning_rate": 1.5535237857488337e-05, + "loss": 0.103, "step": 155515 }, { "epoch": 7.26, - "learning_rate": 5.51216539308987e-06, - "loss": 0.1402, + "learning_rate": 1.5534769787652317e-05, + "loss": 0.0874, "step": 155520 }, { "epoch": 7.26, - "learning_rate": 5.511696591814731e-06, - "loss": 0.0148, + "learning_rate": 1.55343017178163e-05, + "loss": 0.0293, "step": 155525 }, { "epoch": 7.26, - "learning_rate": 5.5112277905395906e-06, - "loss": 0.0273, + "learning_rate": 1.553383364798028e-05, + "loss": 0.0119, "step": 155530 }, { "epoch": 7.26, - "learning_rate": 5.5107589892644505e-06, - "loss": 0.0581, + "learning_rate": 1.553336557814426e-05, + "loss": 0.0266, "step": 155535 }, { "epoch": 7.26, - "learning_rate": 5.510290187989312e-06, - "loss": 0.0605, + "learning_rate": 1.5532897508308243e-05, + "loss": 0.0522, "step": 155540 }, { "epoch": 7.26, - "learning_rate": 5.509821386714173e-06, - "loss": 0.0251, + "learning_rate": 1.5532429438472223e-05, + "loss": 0.0636, "step": 155545 }, { "epoch": 7.26, - "learning_rate": 5.509352585439033e-06, - "loss": 0.0373, + "learning_rate": 1.55319613686362e-05, + "loss": 0.0516, "step": 155550 }, { "epoch": 7.26, - "learning_rate": 5.508883784163893e-06, - "loss": 0.0919, + "learning_rate": 1.553149329880018e-05, + "loss": 0.0545, "step": 155555 }, { "epoch": 7.26, - "learning_rate": 5.5084149828887535e-06, - "loss": 0.0703, + "learning_rate": 1.5531025228964162e-05, + "loss": 0.0671, "step": 155560 }, { "epoch": 7.26, - "learning_rate": 5.507946181613615e-06, - "loss": 0.2186, + "learning_rate": 1.5530557159128142e-05, + "loss": 0.1799, "step": 155565 }, { "epoch": 7.26, - "learning_rate": 5.507477380338475e-06, - "loss": 0.1992, + "learning_rate": 1.553008908929212e-05, + "loss": 0.1406, "step": 155570 }, { "epoch": 7.26, - "learning_rate": 5.507008579063335e-06, - "loss": 0.0347, + "learning_rate": 1.55296210194561e-05, + "loss": 0.032, "step": 155575 }, { "epoch": 7.26, - "learning_rate": 5.506539777788196e-06, - "loss": 0.0313, + "learning_rate": 1.5529152949620085e-05, + "loss": 0.0171, "step": 155580 }, { "epoch": 7.26, - "learning_rate": 5.5060709765130574e-06, - "loss": 0.0212, + "learning_rate": 1.5528684879784064e-05, + "loss": 0.0288, "step": 155585 }, { "epoch": 7.26, - "learning_rate": 5.505602175237917e-06, - "loss": 0.0328, + "learning_rate": 1.5528216809948044e-05, + "loss": 0.0113, "step": 155590 }, { "epoch": 7.26, - "learning_rate": 5.505133373962777e-06, - "loss": 0.0059, + "learning_rate": 1.5527748740112028e-05, + "loss": 0.0281, "step": 155595 }, { "epoch": 7.26, - "learning_rate": 5.504664572687638e-06, - "loss": 0.0182, + "learning_rate": 1.5527280670276007e-05, + "loss": 0.099, "step": 155600 }, { "epoch": 7.26, - "learning_rate": 5.504195771412498e-06, - "loss": 0.0254, + "learning_rate": 1.5526812600439987e-05, + "loss": 0.1209, "step": 155605 }, { "epoch": 7.26, - "learning_rate": 5.50372697013736e-06, - "loss": 0.1207, + "learning_rate": 1.5526344530603967e-05, + "loss": 0.14, "step": 155610 }, { "epoch": 7.26, - "learning_rate": 5.50325816886222e-06, - "loss": 0.0852, + "learning_rate": 1.5525876460767947e-05, + "loss": 0.0963, "step": 155615 }, { "epoch": 7.26, - "learning_rate": 5.50278936758708e-06, - "loss": 0.2516, + "learning_rate": 1.5525408390931927e-05, + "loss": 0.1686, "step": 155620 }, { "epoch": 7.26, - "learning_rate": 5.50232056631194e-06, - "loss": 0.0416, + "learning_rate": 1.5524940321095906e-05, + "loss": 0.0425, "step": 155625 }, { "epoch": 7.26, - "learning_rate": 5.501851765036801e-06, - "loss": 0.0328, + "learning_rate": 1.5524472251259886e-05, + "loss": 0.0189, "step": 155630 }, { "epoch": 7.26, - "learning_rate": 5.501382963761662e-06, - "loss": 0.0294, + "learning_rate": 1.552400418142387e-05, + "loss": 0.0177, "step": 155635 }, { "epoch": 7.26, - "learning_rate": 5.500914162486523e-06, - "loss": 0.0227, + "learning_rate": 1.552353611158785e-05, + "loss": 0.0301, "step": 155640 }, { "epoch": 7.26, - "learning_rate": 5.500445361211383e-06, - "loss": 0.0066, + "learning_rate": 1.552306804175183e-05, + "loss": 0.1448, "step": 155645 }, { "epoch": 7.26, - "learning_rate": 5.499976559936243e-06, - "loss": 0.0297, + "learning_rate": 1.5522599971915812e-05, + "loss": 0.0586, "step": 155650 }, { "epoch": 7.26, - "learning_rate": 5.499507758661104e-06, - "loss": 0.0742, + "learning_rate": 1.5522131902079792e-05, + "loss": 0.057, "step": 155655 }, { "epoch": 7.26, - "learning_rate": 5.499038957385965e-06, - "loss": 0.0876, + "learning_rate": 1.5521663832243772e-05, + "loss": 0.0686, "step": 155660 }, { "epoch": 7.26, - "learning_rate": 5.498570156110825e-06, - "loss": 0.1532, + "learning_rate": 1.5521195762407752e-05, + "loss": 0.1662, "step": 155665 }, { "epoch": 7.26, - "learning_rate": 5.498101354835686e-06, - "loss": 0.2263, + "learning_rate": 1.5520727692571735e-05, + "loss": 0.1446, "step": 155670 }, { "epoch": 7.26, - "learning_rate": 5.4976325535605456e-06, - "loss": 0.0224, + "learning_rate": 1.552025962273571e-05, + "loss": 0.0229, "step": 155675 }, { "epoch": 7.26, - "learning_rate": 5.497163752285407e-06, - "loss": 0.0279, + "learning_rate": 1.551979155289969e-05, + "loss": 0.0153, "step": 155680 }, { "epoch": 7.26, - "learning_rate": 5.496694951010267e-06, - "loss": 0.0237, + "learning_rate": 1.551932348306367e-05, + "loss": 0.0103, "step": 155685 }, { "epoch": 7.26, - "learning_rate": 5.496226149735128e-06, - "loss": 0.0308, + "learning_rate": 1.5518855413227654e-05, + "loss": 0.0182, "step": 155690 }, { "epoch": 7.26, - "learning_rate": 5.495757348459988e-06, - "loss": 0.0224, + "learning_rate": 1.5518387343391634e-05, + "loss": 0.0373, "step": 155695 }, { "epoch": 7.27, - "learning_rate": 5.495288547184849e-06, - "loss": 0.0605, + "learning_rate": 1.5517919273555614e-05, + "loss": 0.0462, "step": 155700 }, { "epoch": 7.27, - "learning_rate": 5.494819745909709e-06, - "loss": 0.0833, + "learning_rate": 1.5517451203719594e-05, + "loss": 0.041, "step": 155705 }, { "epoch": 7.27, - "learning_rate": 5.49435094463457e-06, - "loss": 0.1244, + "learning_rate": 1.5516983133883577e-05, + "loss": 0.1571, "step": 155710 }, { "epoch": 7.27, - "learning_rate": 5.49388214335943e-06, - "loss": 0.2253, + "learning_rate": 1.5516515064047557e-05, + "loss": 0.0804, "step": 155715 }, { "epoch": 7.27, - "learning_rate": 5.493413342084291e-06, - "loss": 0.2213, + "learning_rate": 1.5516046994211537e-05, + "loss": 0.1336, "step": 155720 }, { "epoch": 7.27, - "learning_rate": 5.492944540809152e-06, - "loss": 0.0519, + "learning_rate": 1.551557892437552e-05, + "loss": 0.0295, "step": 155725 }, { "epoch": 7.27, - "learning_rate": 5.4924757395340125e-06, - "loss": 0.0141, + "learning_rate": 1.55151108545395e-05, + "loss": 0.0513, "step": 155730 }, { "epoch": 7.27, - "learning_rate": 5.492006938258872e-06, - "loss": 0.0197, + "learning_rate": 1.551464278470348e-05, + "loss": 0.0171, "step": 155735 }, { "epoch": 7.27, - "learning_rate": 5.491538136983733e-06, - "loss": 0.0643, + "learning_rate": 1.5514174714867456e-05, + "loss": 0.0121, "step": 155740 }, { "epoch": 7.27, - "learning_rate": 5.491069335708593e-06, - "loss": 0.0767, + "learning_rate": 1.551370664503144e-05, + "loss": 0.0717, "step": 155745 }, { "epoch": 7.27, - "learning_rate": 5.490600534433455e-06, - "loss": 0.0433, + "learning_rate": 1.551323857519542e-05, + "loss": 0.0275, "step": 155750 }, { "epoch": 7.27, - "learning_rate": 5.490131733158315e-06, - "loss": 0.1568, + "learning_rate": 1.55127705053594e-05, + "loss": 0.0533, "step": 155755 }, { "epoch": 7.27, - "learning_rate": 5.4896629318831755e-06, - "loss": 0.099, + "learning_rate": 1.551230243552338e-05, + "loss": 0.0655, "step": 155760 }, { "epoch": 7.27, - "learning_rate": 5.489194130608035e-06, - "loss": 0.1908, + "learning_rate": 1.551183436568736e-05, + "loss": 0.2717, "step": 155765 }, { "epoch": 7.27, - "learning_rate": 5.488725329332895e-06, - "loss": 0.0816, + "learning_rate": 1.551136629585134e-05, + "loss": 0.1075, "step": 155770 }, { "epoch": 7.27, - "learning_rate": 5.488256528057757e-06, - "loss": 0.0426, + "learning_rate": 1.551089822601532e-05, + "loss": 0.0104, "step": 155775 }, { "epoch": 7.27, - "learning_rate": 5.487787726782618e-06, - "loss": 0.0287, + "learning_rate": 1.5510430156179304e-05, + "loss": 0.0316, "step": 155780 }, { "epoch": 7.27, - "learning_rate": 5.487318925507478e-06, - "loss": 0.0363, + "learning_rate": 1.5509962086343284e-05, + "loss": 0.0328, "step": 155785 }, { "epoch": 7.27, - "learning_rate": 5.486850124232338e-06, - "loss": 0.3074, + "learning_rate": 1.5509494016507264e-05, + "loss": 0.0146, "step": 155790 }, { "epoch": 7.27, - "learning_rate": 5.486381322957199e-06, - "loss": 0.0292, + "learning_rate": 1.5509025946671244e-05, + "loss": 0.0354, "step": 155795 }, { "epoch": 7.27, - "learning_rate": 5.48591252168206e-06, - "loss": 0.0623, + "learning_rate": 1.5508557876835224e-05, + "loss": 0.083, "step": 155800 }, { "epoch": 7.27, - "learning_rate": 5.48544372040692e-06, - "loss": 0.0647, + "learning_rate": 1.5508089806999204e-05, + "loss": 0.032, "step": 155805 }, { "epoch": 7.27, - "learning_rate": 5.48497491913178e-06, - "loss": 0.1158, + "learning_rate": 1.5507621737163183e-05, + "loss": 0.1513, "step": 155810 }, { "epoch": 7.27, - "learning_rate": 5.484506117856641e-06, - "loss": 0.2266, + "learning_rate": 1.5507153667327163e-05, + "loss": 0.1161, "step": 155815 }, { "epoch": 7.27, - "learning_rate": 5.484037316581502e-06, - "loss": 0.1587, + "learning_rate": 1.5506685597491146e-05, + "loss": 0.1769, "step": 155820 }, { "epoch": 7.27, - "learning_rate": 5.483568515306362e-06, - "loss": 0.0465, + "learning_rate": 1.5506217527655126e-05, + "loss": 0.0292, "step": 155825 }, { "epoch": 7.27, - "learning_rate": 5.483099714031222e-06, - "loss": 0.0227, + "learning_rate": 1.5505749457819106e-05, + "loss": 0.009, "step": 155830 }, { "epoch": 7.27, - "learning_rate": 5.482630912756083e-06, - "loss": 0.0426, + "learning_rate": 1.550528138798309e-05, + "loss": 0.0267, "step": 155835 }, { "epoch": 7.27, - "learning_rate": 5.482162111480943e-06, - "loss": 0.0625, + "learning_rate": 1.550481331814707e-05, + "loss": 0.005, "step": 155840 }, { "epoch": 7.27, - "learning_rate": 5.4816933102058045e-06, - "loss": 0.013, + "learning_rate": 1.550434524831105e-05, + "loss": 0.0163, "step": 155845 }, { "epoch": 7.27, - "learning_rate": 5.4812245089306644e-06, - "loss": 0.0216, + "learning_rate": 1.550387717847503e-05, + "loss": 0.0598, "step": 155850 }, { "epoch": 7.27, - "learning_rate": 5.480755707655525e-06, - "loss": 0.0354, + "learning_rate": 1.5503409108639012e-05, + "loss": 0.1227, "step": 155855 }, { "epoch": 7.27, - "learning_rate": 5.480286906380385e-06, - "loss": 0.1425, + "learning_rate": 1.5502941038802992e-05, + "loss": 0.0868, "step": 155860 }, { "epoch": 7.27, - "learning_rate": 5.479818105105247e-06, - "loss": 0.2203, + "learning_rate": 1.5502472968966968e-05, + "loss": 0.118, "step": 155865 }, { "epoch": 7.27, - "learning_rate": 5.479349303830107e-06, - "loss": 0.1932, + "learning_rate": 1.5502004899130948e-05, + "loss": 0.0798, "step": 155870 }, { "epoch": 7.27, - "learning_rate": 5.4788805025549675e-06, - "loss": 0.0344, + "learning_rate": 1.550153682929493e-05, + "loss": 0.0254, "step": 155875 }, { "epoch": 7.27, - "learning_rate": 5.478411701279827e-06, - "loss": 0.0016, + "learning_rate": 1.550106875945891e-05, + "loss": 0.0085, "step": 155880 }, { "epoch": 7.27, - "learning_rate": 5.477942900004688e-06, - "loss": 0.0082, + "learning_rate": 1.550060068962289e-05, + "loss": 0.0261, "step": 155885 }, { "epoch": 7.27, - "learning_rate": 5.477474098729549e-06, - "loss": 0.0593, + "learning_rate": 1.5500132619786874e-05, + "loss": 0.016, "step": 155890 }, { "epoch": 7.27, - "learning_rate": 5.47700529745441e-06, - "loss": 0.0914, + "learning_rate": 1.5499664549950854e-05, + "loss": 0.0457, "step": 155895 }, { "epoch": 7.27, - "learning_rate": 5.47653649617927e-06, - "loss": 0.0401, + "learning_rate": 1.5499196480114834e-05, + "loss": 0.0839, "step": 155900 }, { "epoch": 7.27, - "learning_rate": 5.4760676949041305e-06, - "loss": 0.0593, + "learning_rate": 1.5498728410278813e-05, + "loss": 0.0894, "step": 155905 }, { "epoch": 7.27, - "learning_rate": 5.475598893628991e-06, - "loss": 0.1142, + "learning_rate": 1.5498260340442797e-05, + "loss": 0.1238, "step": 155910 }, { "epoch": 7.28, - "learning_rate": 5.475130092353852e-06, - "loss": 0.2957, + "learning_rate": 1.5497792270606776e-05, + "loss": 0.1555, "step": 155915 }, { "epoch": 7.28, - "learning_rate": 5.474661291078712e-06, - "loss": 0.1715, + "learning_rate": 1.5497324200770756e-05, + "loss": 0.3632, "step": 155920 }, { "epoch": 7.28, - "learning_rate": 5.474192489803573e-06, - "loss": 0.0483, + "learning_rate": 1.5496856130934736e-05, + "loss": 0.043, "step": 155925 }, { "epoch": 7.28, - "learning_rate": 5.473723688528433e-06, - "loss": 0.0191, + "learning_rate": 1.5496388061098716e-05, + "loss": 0.0278, "step": 155930 }, { "epoch": 7.28, - "learning_rate": 5.473254887253294e-06, - "loss": 0.0278, + "learning_rate": 1.5495919991262696e-05, + "loss": 0.0234, "step": 155935 }, { "epoch": 7.28, - "learning_rate": 5.472786085978154e-06, - "loss": 0.0935, + "learning_rate": 1.5495451921426676e-05, + "loss": 0.06, "step": 155940 }, { "epoch": 7.28, - "learning_rate": 5.472317284703015e-06, - "loss": 0.092, + "learning_rate": 1.5494983851590655e-05, + "loss": 0.0306, "step": 155945 }, { "epoch": 7.28, - "learning_rate": 5.471848483427875e-06, - "loss": 0.0106, + "learning_rate": 1.549451578175464e-05, + "loss": 0.0339, "step": 155950 }, { "epoch": 7.28, - "learning_rate": 5.471379682152736e-06, - "loss": 0.1398, + "learning_rate": 1.549404771191862e-05, + "loss": 0.0625, "step": 155955 }, { "epoch": 7.28, - "learning_rate": 5.4709108808775965e-06, - "loss": 0.1352, + "learning_rate": 1.5493579642082598e-05, + "loss": 0.1268, "step": 155960 }, { "epoch": 7.28, - "learning_rate": 5.470442079602457e-06, - "loss": 0.1754, + "learning_rate": 1.549311157224658e-05, + "loss": 0.1082, "step": 155965 }, { "epoch": 7.28, - "learning_rate": 5.469973278327317e-06, - "loss": 0.1718, + "learning_rate": 1.549264350241056e-05, + "loss": 0.1291, "step": 155970 }, { "epoch": 7.28, - "learning_rate": 5.469504477052178e-06, - "loss": 0.0451, + "learning_rate": 1.549217543257454e-05, + "loss": 0.0389, "step": 155975 }, { "epoch": 7.28, - "learning_rate": 5.469035675777039e-06, - "loss": 0.0205, + "learning_rate": 1.549170736273852e-05, + "loss": 0.0285, "step": 155980 }, { "epoch": 7.28, - "learning_rate": 5.4685668745018996e-06, - "loss": 0.0016, + "learning_rate": 1.5491239292902504e-05, + "loss": 0.0043, "step": 155985 }, { "epoch": 7.28, - "learning_rate": 5.4680980732267595e-06, - "loss": 0.0179, + "learning_rate": 1.549077122306648e-05, + "loss": 0.0327, "step": 155990 }, { "epoch": 7.28, - "learning_rate": 5.46762927195162e-06, - "loss": 0.0149, + "learning_rate": 1.549030315323046e-05, + "loss": 0.0516, "step": 155995 }, { "epoch": 7.28, - "learning_rate": 5.46716047067648e-06, - "loss": 0.065, + "learning_rate": 1.548983508339444e-05, + "loss": 0.0221, "step": 156000 }, { "epoch": 7.28, - "learning_rate": 5.466691669401342e-06, - "loss": 0.0888, + "learning_rate": 1.5489367013558423e-05, + "loss": 0.0412, "step": 156005 }, { "epoch": 7.28, - "learning_rate": 5.466222868126202e-06, - "loss": 0.244, + "learning_rate": 1.5488898943722403e-05, + "loss": 0.1035, "step": 156010 }, { "epoch": 7.28, - "learning_rate": 5.4657540668510626e-06, - "loss": 0.1731, + "learning_rate": 1.5488430873886383e-05, + "loss": 0.0563, "step": 156015 }, { "epoch": 7.28, - "learning_rate": 5.4652852655759225e-06, - "loss": 0.1341, + "learning_rate": 1.5487962804050366e-05, + "loss": 0.1677, "step": 156020 }, { "epoch": 7.28, - "learning_rate": 5.4648164643007824e-06, - "loss": 0.0332, + "learning_rate": 1.5487494734214346e-05, + "loss": 0.0264, "step": 156025 }, { "epoch": 7.28, - "learning_rate": 5.464347663025644e-06, - "loss": 0.0506, + "learning_rate": 1.5487026664378326e-05, + "loss": 0.0221, "step": 156030 }, { "epoch": 7.28, - "learning_rate": 5.463878861750505e-06, - "loss": 0.0246, + "learning_rate": 1.5486558594542306e-05, + "loss": 0.0431, "step": 156035 }, { "epoch": 7.28, - "learning_rate": 5.463410060475365e-06, - "loss": 0.0615, + "learning_rate": 1.548609052470629e-05, + "loss": 0.0273, "step": 156040 }, { "epoch": 7.28, - "learning_rate": 5.462941259200225e-06, - "loss": 0.0246, + "learning_rate": 1.548562245487027e-05, + "loss": 0.0222, "step": 156045 }, { "epoch": 7.28, - "learning_rate": 5.462472457925086e-06, - "loss": 0.0693, + "learning_rate": 1.548515438503425e-05, + "loss": 0.0408, "step": 156050 }, { "epoch": 7.28, - "learning_rate": 5.462003656649947e-06, - "loss": 0.026, + "learning_rate": 1.5484686315198225e-05, + "loss": 0.0613, "step": 156055 }, { "epoch": 7.28, - "learning_rate": 5.461534855374807e-06, - "loss": 0.1002, + "learning_rate": 1.5484218245362208e-05, + "loss": 0.0955, "step": 156060 }, { "epoch": 7.28, - "learning_rate": 5.461066054099667e-06, - "loss": 0.1253, + "learning_rate": 1.5483750175526188e-05, + "loss": 0.1159, "step": 156065 }, { "epoch": 7.28, - "learning_rate": 5.460597252824528e-06, - "loss": 0.1469, + "learning_rate": 1.5483282105690168e-05, + "loss": 0.2409, "step": 156070 }, { "epoch": 7.28, - "learning_rate": 5.460128451549389e-06, - "loss": 0.0287, + "learning_rate": 1.548281403585415e-05, + "loss": 0.0366, "step": 156075 }, { "epoch": 7.28, - "learning_rate": 5.459659650274249e-06, - "loss": 0.0253, + "learning_rate": 1.548234596601813e-05, + "loss": 0.0153, "step": 156080 }, { "epoch": 7.28, - "learning_rate": 5.459190848999109e-06, - "loss": 0.0322, + "learning_rate": 1.548187789618211e-05, + "loss": 0.0265, "step": 156085 }, { "epoch": 7.28, - "learning_rate": 5.45872204772397e-06, - "loss": 0.0395, + "learning_rate": 1.548140982634609e-05, + "loss": 0.0138, "step": 156090 }, { "epoch": 7.28, - "learning_rate": 5.45825324644883e-06, - "loss": 0.0376, + "learning_rate": 1.5480941756510074e-05, + "loss": 0.0338, "step": 156095 }, { "epoch": 7.28, - "learning_rate": 5.457784445173692e-06, - "loss": 0.0309, + "learning_rate": 1.5480473686674053e-05, + "loss": 0.0337, "step": 156100 }, { "epoch": 7.28, - "learning_rate": 5.4573156438985515e-06, - "loss": 0.0272, + "learning_rate": 1.5480005616838033e-05, + "loss": 0.0887, "step": 156105 }, { "epoch": 7.28, - "learning_rate": 5.456846842623412e-06, - "loss": 0.0883, + "learning_rate": 1.5479537547002013e-05, + "loss": 0.0945, "step": 156110 }, { "epoch": 7.28, - "learning_rate": 5.456378041348272e-06, - "loss": 0.0929, + "learning_rate": 1.5479069477165996e-05, + "loss": 0.1481, "step": 156115 }, { "epoch": 7.28, - "learning_rate": 5.455909240073134e-06, - "loss": 0.1545, + "learning_rate": 1.5478601407329973e-05, + "loss": 0.168, "step": 156120 }, { "epoch": 7.29, - "learning_rate": 5.455440438797994e-06, - "loss": 0.0256, + "learning_rate": 1.5478133337493953e-05, + "loss": 0.0172, "step": 156125 }, { "epoch": 7.29, - "learning_rate": 5.454971637522855e-06, - "loss": 0.0132, + "learning_rate": 1.5477665267657932e-05, + "loss": 0.0311, "step": 156130 }, { "epoch": 7.29, - "learning_rate": 5.4545028362477145e-06, - "loss": 0.006, + "learning_rate": 1.5477197197821916e-05, + "loss": 0.0182, "step": 156135 }, { "epoch": 7.29, - "learning_rate": 5.454034034972575e-06, - "loss": 0.0717, + "learning_rate": 1.5476729127985895e-05, + "loss": 0.0442, "step": 156140 }, { "epoch": 7.29, - "learning_rate": 5.453565233697437e-06, - "loss": 0.116, + "learning_rate": 1.5476261058149875e-05, + "loss": 0.031, "step": 156145 }, { "epoch": 7.29, - "learning_rate": 5.453096432422297e-06, - "loss": 0.0841, + "learning_rate": 1.547579298831386e-05, + "loss": 0.0162, "step": 156150 }, { "epoch": 7.29, - "learning_rate": 5.452627631147157e-06, - "loss": 0.0723, + "learning_rate": 1.5475324918477838e-05, + "loss": 0.0584, "step": 156155 }, { "epoch": 7.29, - "learning_rate": 5.452158829872018e-06, - "loss": 0.1414, + "learning_rate": 1.5474856848641818e-05, + "loss": 0.119, "step": 156160 }, { "epoch": 7.29, - "learning_rate": 5.4516900285968775e-06, - "loss": 0.1066, + "learning_rate": 1.5474388778805798e-05, + "loss": 0.1044, "step": 156165 }, { "epoch": 7.29, - "learning_rate": 5.451221227321739e-06, - "loss": 0.155, + "learning_rate": 1.547392070896978e-05, + "loss": 0.1003, "step": 156170 }, { "epoch": 7.29, - "learning_rate": 5.450752426046599e-06, - "loss": 0.0074, + "learning_rate": 1.547345263913376e-05, + "loss": 0.0026, "step": 156175 }, { "epoch": 7.29, - "learning_rate": 5.45028362477146e-06, - "loss": 0.0122, + "learning_rate": 1.5472984569297737e-05, + "loss": 0.0336, "step": 156180 }, { "epoch": 7.29, - "learning_rate": 5.44981482349632e-06, - "loss": 0.0285, + "learning_rate": 1.5472516499461717e-05, + "loss": 0.0106, "step": 156185 }, { "epoch": 7.29, - "learning_rate": 5.449346022221181e-06, - "loss": 0.0376, + "learning_rate": 1.54720484296257e-05, + "loss": 0.0291, "step": 156190 }, { "epoch": 7.29, - "learning_rate": 5.448877220946041e-06, - "loss": 0.0719, + "learning_rate": 1.547158035978968e-05, + "loss": 0.0707, "step": 156195 }, { "epoch": 7.29, - "learning_rate": 5.448408419670902e-06, - "loss": 0.0711, + "learning_rate": 1.547111228995366e-05, + "loss": 0.013, "step": 156200 }, { "epoch": 7.29, - "learning_rate": 5.447939618395762e-06, - "loss": 0.0529, + "learning_rate": 1.5470644220117643e-05, + "loss": 0.0577, "step": 156205 }, { "epoch": 7.29, - "learning_rate": 5.447470817120623e-06, - "loss": 0.219, + "learning_rate": 1.5470176150281623e-05, + "loss": 0.1283, "step": 156210 }, { "epoch": 7.29, - "learning_rate": 5.447002015845484e-06, - "loss": 0.1322, + "learning_rate": 1.5469708080445603e-05, + "loss": 0.2202, "step": 156215 }, { "epoch": 7.29, - "learning_rate": 5.446533214570344e-06, - "loss": 0.1778, + "learning_rate": 1.5469240010609583e-05, + "loss": 0.1886, "step": 156220 }, { "epoch": 7.29, - "learning_rate": 5.446064413295204e-06, - "loss": 0.0257, + "learning_rate": 1.5468771940773566e-05, + "loss": 0.0354, "step": 156225 }, { "epoch": 7.29, - "learning_rate": 5.445595612020065e-06, - "loss": 0.0273, + "learning_rate": 1.5468303870937546e-05, + "loss": 0.0108, "step": 156230 }, { "epoch": 7.29, - "learning_rate": 5.445126810744926e-06, - "loss": 0.0079, + "learning_rate": 1.5467835801101525e-05, + "loss": 0.016, "step": 156235 }, { "epoch": 7.29, - "learning_rate": 5.444658009469787e-06, - "loss": 0.0323, + "learning_rate": 1.5467367731265505e-05, + "loss": 0.0063, "step": 156240 }, { "epoch": 7.29, - "learning_rate": 5.444189208194647e-06, - "loss": 0.0539, + "learning_rate": 1.5466899661429485e-05, + "loss": 0.1182, "step": 156245 }, { "epoch": 7.29, - "learning_rate": 5.443720406919507e-06, - "loss": 0.0577, + "learning_rate": 1.5466431591593465e-05, + "loss": 0.0299, "step": 156250 }, { "epoch": 7.29, - "learning_rate": 5.443251605644367e-06, - "loss": 0.0921, + "learning_rate": 1.5465963521757445e-05, + "loss": 0.1184, "step": 156255 }, { "epoch": 7.29, - "learning_rate": 5.442782804369229e-06, - "loss": 0.0379, + "learning_rate": 1.5465495451921428e-05, + "loss": 0.0783, "step": 156260 }, { "epoch": 7.29, - "learning_rate": 5.442314003094089e-06, - "loss": 0.1496, + "learning_rate": 1.5465027382085408e-05, + "loss": 0.1149, "step": 156265 }, { "epoch": 7.29, - "learning_rate": 5.44184520181895e-06, - "loss": 0.1625, + "learning_rate": 1.5464559312249388e-05, + "loss": 0.2581, "step": 156270 }, { "epoch": 7.29, - "learning_rate": 5.44137640054381e-06, - "loss": 0.0049, + "learning_rate": 1.5464091242413367e-05, + "loss": 0.0092, "step": 156275 }, { "epoch": 7.29, - "learning_rate": 5.4409075992686695e-06, - "loss": 0.0572, + "learning_rate": 1.546362317257735e-05, + "loss": 0.0172, "step": 156280 }, { "epoch": 7.29, - "learning_rate": 5.440438797993531e-06, - "loss": 0.0108, + "learning_rate": 1.546315510274133e-05, + "loss": 0.0249, "step": 156285 }, { "epoch": 7.29, - "learning_rate": 5.439969996718392e-06, - "loss": 0.0139, + "learning_rate": 1.546268703290531e-05, + "loss": 0.0159, "step": 156290 }, { "epoch": 7.29, - "learning_rate": 5.439501195443252e-06, - "loss": 0.0334, + "learning_rate": 1.546221896306929e-05, + "loss": 0.0733, "step": 156295 }, { "epoch": 7.29, - "learning_rate": 5.439032394168113e-06, - "loss": 0.0567, + "learning_rate": 1.5461750893233273e-05, + "loss": 0.0456, "step": 156300 }, { "epoch": 7.29, - "learning_rate": 5.4385635928929734e-06, - "loss": 0.0398, + "learning_rate": 1.5461282823397253e-05, + "loss": 0.0646, "step": 156305 }, { "epoch": 7.29, - "learning_rate": 5.438094791617834e-06, - "loss": 0.0497, + "learning_rate": 1.546081475356123e-05, + "loss": 0.0608, "step": 156310 }, { "epoch": 7.29, - "learning_rate": 5.437625990342694e-06, - "loss": 0.0622, + "learning_rate": 1.546034668372521e-05, + "loss": 0.0964, "step": 156315 }, { "epoch": 7.29, - "learning_rate": 5.437157189067555e-06, - "loss": 0.1841, + "learning_rate": 1.5459878613889193e-05, + "loss": 0.1581, "step": 156320 }, { "epoch": 7.29, - "learning_rate": 5.436688387792415e-06, - "loss": 0.0488, + "learning_rate": 1.5459410544053172e-05, + "loss": 0.0143, "step": 156325 }, { "epoch": 7.29, - "learning_rate": 5.4362195865172765e-06, - "loss": 0.0327, + "learning_rate": 1.5458942474217152e-05, + "loss": 0.03, "step": 156330 }, { "epoch": 7.29, - "learning_rate": 5.4357507852421364e-06, - "loss": 0.0226, + "learning_rate": 1.5458474404381135e-05, + "loss": 0.0271, "step": 156335 }, { "epoch": 7.3, - "learning_rate": 5.435281983966997e-06, - "loss": 0.0558, + "learning_rate": 1.5458006334545115e-05, + "loss": 0.0218, "step": 156340 }, { "epoch": 7.3, - "learning_rate": 5.434813182691857e-06, - "loss": 0.0772, + "learning_rate": 1.5457538264709095e-05, + "loss": 0.0481, "step": 156345 }, { "epoch": 7.3, - "learning_rate": 5.434344381416717e-06, - "loss": 0.034, + "learning_rate": 1.5457070194873075e-05, + "loss": 0.0509, "step": 156350 }, { "epoch": 7.3, - "learning_rate": 5.433875580141579e-06, - "loss": 0.0671, + "learning_rate": 1.5456602125037058e-05, + "loss": 0.0211, "step": 156355 }, { "epoch": 7.3, - "learning_rate": 5.4334067788664395e-06, - "loss": 0.0737, + "learning_rate": 1.5456134055201038e-05, + "loss": 0.0947, "step": 156360 }, { "epoch": 7.3, - "learning_rate": 5.432937977591299e-06, - "loss": 0.1431, + "learning_rate": 1.5455665985365018e-05, + "loss": 0.1245, "step": 156365 }, { "epoch": 7.3, - "learning_rate": 5.432469176316159e-06, - "loss": 0.2053, + "learning_rate": 1.5455197915528994e-05, + "loss": 0.099, "step": 156370 }, { "epoch": 7.3, - "learning_rate": 5.432000375041021e-06, - "loss": 0.05, + "learning_rate": 1.5454729845692977e-05, + "loss": 0.0035, "step": 156375 }, { "epoch": 7.3, - "learning_rate": 5.431531573765882e-06, - "loss": 0.0179, + "learning_rate": 1.5454261775856957e-05, + "loss": 0.0362, "step": 156380 }, { "epoch": 7.3, - "learning_rate": 5.431062772490742e-06, - "loss": 0.0138, + "learning_rate": 1.5453793706020937e-05, + "loss": 0.0135, "step": 156385 }, { "epoch": 7.3, - "learning_rate": 5.430593971215602e-06, - "loss": 0.0806, + "learning_rate": 1.545332563618492e-05, + "loss": 0.0099, "step": 156390 }, { "epoch": 7.3, - "learning_rate": 5.430125169940462e-06, - "loss": 0.0184, + "learning_rate": 1.54528575663489e-05, + "loss": 0.0464, "step": 156395 }, { "epoch": 7.3, - "learning_rate": 5.429656368665324e-06, - "loss": 0.0166, + "learning_rate": 1.545238949651288e-05, + "loss": 0.0558, "step": 156400 }, { "epoch": 7.3, - "learning_rate": 5.429187567390184e-06, - "loss": 0.0482, + "learning_rate": 1.545192142667686e-05, + "loss": 0.0433, "step": 156405 }, { "epoch": 7.3, - "learning_rate": 5.428718766115044e-06, - "loss": 0.1046, + "learning_rate": 1.5451453356840843e-05, + "loss": 0.0725, "step": 156410 }, { "epoch": 7.3, - "learning_rate": 5.428249964839905e-06, - "loss": 0.1924, + "learning_rate": 1.5450985287004823e-05, + "loss": 0.1598, "step": 156415 }, { "epoch": 7.3, - "learning_rate": 5.427781163564765e-06, - "loss": 0.207, + "learning_rate": 1.5450517217168802e-05, + "loss": 0.203, "step": 156420 }, { "epoch": 7.3, - "learning_rate": 5.427312362289626e-06, - "loss": 0.0131, + "learning_rate": 1.5450049147332782e-05, + "loss": 0.0294, "step": 156425 }, { "epoch": 7.3, - "learning_rate": 5.426843561014486e-06, - "loss": 0.0557, + "learning_rate": 1.5449581077496765e-05, + "loss": 0.0217, "step": 156430 }, { "epoch": 7.3, - "learning_rate": 5.426374759739347e-06, - "loss": 0.0225, + "learning_rate": 1.5449113007660742e-05, + "loss": 0.0088, "step": 156435 }, { "epoch": 7.3, - "learning_rate": 5.425905958464207e-06, - "loss": 0.0149, + "learning_rate": 1.5448644937824722e-05, + "loss": 0.0501, "step": 156440 }, { "epoch": 7.3, - "learning_rate": 5.4254371571890685e-06, - "loss": 0.0107, + "learning_rate": 1.5448176867988705e-05, + "loss": 0.027, "step": 156445 }, { "epoch": 7.3, - "learning_rate": 5.4249683559139285e-06, - "loss": 0.075, + "learning_rate": 1.5447708798152685e-05, + "loss": 0.0092, "step": 156450 }, { "epoch": 7.3, - "learning_rate": 5.424499554638789e-06, - "loss": 0.0923, + "learning_rate": 1.5447240728316665e-05, + "loss": 0.0756, "step": 156455 }, { "epoch": 7.3, - "learning_rate": 5.424030753363649e-06, - "loss": 0.0729, + "learning_rate": 1.5446772658480644e-05, + "loss": 0.0629, "step": 156460 }, { "epoch": 7.3, - "learning_rate": 5.42356195208851e-06, - "loss": 0.1251, + "learning_rate": 1.5446304588644628e-05, + "loss": 0.1995, "step": 156465 }, { "epoch": 7.3, - "learning_rate": 5.423093150813371e-06, - "loss": 0.1509, + "learning_rate": 1.5445836518808607e-05, + "loss": 0.1434, "step": 156470 }, { "epoch": 7.3, - "learning_rate": 5.4226243495382315e-06, - "loss": 0.0002, + "learning_rate": 1.5445368448972587e-05, + "loss": 0.0337, "step": 156475 }, { "epoch": 7.3, - "learning_rate": 5.4221555482630914e-06, - "loss": 0.0115, + "learning_rate": 1.5444900379136567e-05, + "loss": 0.0397, "step": 156480 }, { "epoch": 7.3, - "learning_rate": 5.421686746987952e-06, - "loss": 0.0309, + "learning_rate": 1.544443230930055e-05, + "loss": 0.0126, "step": 156485 }, { "epoch": 7.3, - "learning_rate": 5.421217945712812e-06, - "loss": 0.0577, + "learning_rate": 1.544396423946453e-05, + "loss": 0.0414, "step": 156490 }, { "epoch": 7.3, - "learning_rate": 5.420749144437674e-06, - "loss": 0.0156, + "learning_rate": 1.5443496169628506e-05, + "loss": 0.0173, "step": 156495 }, { "epoch": 7.3, - "learning_rate": 5.420280343162534e-06, - "loss": 0.0197, + "learning_rate": 1.544302809979249e-05, + "loss": 0.0722, "step": 156500 }, { "epoch": 7.3, - "learning_rate": 5.4198115418873945e-06, - "loss": 0.0217, + "learning_rate": 1.544256002995647e-05, + "loss": 0.0198, "step": 156505 }, { "epoch": 7.3, - "learning_rate": 5.4193427406122544e-06, - "loss": 0.0706, + "learning_rate": 1.544209196012045e-05, + "loss": 0.0696, "step": 156510 }, { "epoch": 7.3, - "learning_rate": 5.418873939337116e-06, - "loss": 0.138, + "learning_rate": 1.544162389028443e-05, + "loss": 0.1931, "step": 156515 }, { "epoch": 7.3, - "learning_rate": 5.418405138061976e-06, - "loss": 0.221, + "learning_rate": 1.5441155820448412e-05, + "loss": 0.1641, "step": 156520 }, { "epoch": 7.3, - "learning_rate": 5.417936336786837e-06, - "loss": 0.0476, + "learning_rate": 1.5440687750612392e-05, + "loss": 0.0025, "step": 156525 }, { "epoch": 7.3, - "learning_rate": 5.417467535511697e-06, - "loss": 0.0164, + "learning_rate": 1.5440219680776372e-05, + "loss": 0.0122, "step": 156530 }, { "epoch": 7.3, - "learning_rate": 5.4169987342365575e-06, - "loss": 0.0281, + "learning_rate": 1.5439751610940352e-05, + "loss": 0.032, "step": 156535 }, { "epoch": 7.3, - "learning_rate": 5.416529932961418e-06, - "loss": 0.0122, + "learning_rate": 1.5439283541104335e-05, + "loss": 0.0198, "step": 156540 }, { "epoch": 7.3, - "learning_rate": 5.416061131686279e-06, - "loss": 0.0832, + "learning_rate": 1.5438815471268315e-05, + "loss": 0.0102, "step": 156545 }, { "epoch": 7.3, - "learning_rate": 5.415592330411139e-06, - "loss": 0.0374, + "learning_rate": 1.5438347401432295e-05, + "loss": 0.0252, "step": 156550 }, { "epoch": 7.31, - "learning_rate": 5.415123529136e-06, - "loss": 0.0353, + "learning_rate": 1.5437879331596274e-05, + "loss": 0.0672, "step": 156555 }, { "epoch": 7.31, - "learning_rate": 5.4146547278608606e-06, - "loss": 0.0589, + "learning_rate": 1.5437411261760254e-05, + "loss": 0.0692, "step": 156560 }, { "epoch": 7.31, - "learning_rate": 5.414185926585721e-06, - "loss": 0.1066, + "learning_rate": 1.5436943191924234e-05, + "loss": 0.0722, "step": 156565 }, { "epoch": 7.31, - "learning_rate": 5.413717125310581e-06, - "loss": 0.1206, + "learning_rate": 1.5436475122088214e-05, + "loss": 0.1363, "step": 156570 }, { "epoch": 7.31, - "learning_rate": 5.413248324035442e-06, - "loss": 0.059, + "learning_rate": 1.5436007052252197e-05, + "loss": 0.0239, "step": 156575 }, { "epoch": 7.31, - "learning_rate": 5.412779522760302e-06, - "loss": 0.0197, + "learning_rate": 1.5435538982416177e-05, + "loss": 0.0168, "step": 156580 }, { "epoch": 7.31, - "learning_rate": 5.412310721485164e-06, - "loss": 0.017, + "learning_rate": 1.5435070912580157e-05, + "loss": 0.005, "step": 156585 }, { "epoch": 7.31, - "learning_rate": 5.4118419202100235e-06, - "loss": 0.0545, + "learning_rate": 1.5434602842744137e-05, + "loss": 0.0692, "step": 156590 }, { "epoch": 7.31, - "learning_rate": 5.411373118934884e-06, - "loss": 0.044, + "learning_rate": 1.543413477290812e-05, + "loss": 0.0533, "step": 156595 }, { "epoch": 7.31, - "learning_rate": 5.410904317659744e-06, - "loss": 0.0757, + "learning_rate": 1.54336667030721e-05, + "loss": 0.0599, "step": 156600 }, { "epoch": 7.31, - "learning_rate": 5.410435516384604e-06, - "loss": 0.0905, + "learning_rate": 1.543319863323608e-05, + "loss": 0.0814, "step": 156605 }, { "epoch": 7.31, - "learning_rate": 5.409966715109466e-06, - "loss": 0.0881, + "learning_rate": 1.543273056340006e-05, + "loss": 0.095, "step": 156610 }, { "epoch": 7.31, - "learning_rate": 5.409497913834327e-06, - "loss": 0.1331, + "learning_rate": 1.5432262493564042e-05, + "loss": 0.2443, "step": 156615 }, { "epoch": 7.31, - "learning_rate": 5.4090291125591865e-06, - "loss": 0.2732, + "learning_rate": 1.5431794423728022e-05, + "loss": 0.2525, "step": 156620 }, { "epoch": 7.31, - "learning_rate": 5.4085603112840465e-06, - "loss": 0.0188, + "learning_rate": 1.5431326353892e-05, + "loss": 0.0194, "step": 156625 }, { "epoch": 7.31, - "learning_rate": 5.408091510008908e-06, - "loss": 0.0248, + "learning_rate": 1.5430858284055982e-05, + "loss": 0.0092, "step": 156630 }, { "epoch": 7.31, - "learning_rate": 5.407622708733769e-06, + "learning_rate": 1.5430390214219962e-05, "loss": 0.007, "step": 156635 }, { "epoch": 7.31, - "learning_rate": 5.407153907458629e-06, - "loss": 0.0396, + "learning_rate": 1.542992214438394e-05, + "loss": 0.0097, "step": 156640 }, { "epoch": 7.31, - "learning_rate": 5.406685106183489e-06, - "loss": 0.0992, + "learning_rate": 1.542945407454792e-05, + "loss": 0.0975, "step": 156645 }, { "epoch": 7.31, - "learning_rate": 5.4062163049083495e-06, - "loss": 0.0412, + "learning_rate": 1.5428986004711905e-05, + "loss": 0.0446, "step": 156650 }, { "epoch": 7.31, - "learning_rate": 5.405747503633211e-06, - "loss": 0.1151, + "learning_rate": 1.5428517934875884e-05, + "loss": 0.028, "step": 156655 }, { "epoch": 7.31, - "learning_rate": 5.405278702358071e-06, - "loss": 0.0488, + "learning_rate": 1.5428049865039864e-05, + "loss": 0.0947, "step": 156660 }, { "epoch": 7.31, - "learning_rate": 5.404809901082931e-06, - "loss": 0.1393, + "learning_rate": 1.5427581795203844e-05, + "loss": 0.1429, "step": 156665 }, { "epoch": 7.31, - "learning_rate": 5.404341099807792e-06, - "loss": 0.1511, + "learning_rate": 1.5427113725367827e-05, + "loss": 0.1598, "step": 156670 }, { "epoch": 7.31, - "learning_rate": 5.403872298532652e-06, - "loss": 0.0085, + "learning_rate": 1.5426645655531807e-05, + "loss": 0.0039, "step": 156675 }, { "epoch": 7.31, - "learning_rate": 5.403403497257513e-06, - "loss": 0.0085, + "learning_rate": 1.5426177585695787e-05, + "loss": 0.013, "step": 156680 }, { "epoch": 7.31, - "learning_rate": 5.402934695982373e-06, - "loss": 0.0234, + "learning_rate": 1.5425709515859767e-05, + "loss": 0.0433, "step": 156685 }, { "epoch": 7.31, - "learning_rate": 5.402465894707234e-06, - "loss": 0.0222, + "learning_rate": 1.5425241446023746e-05, + "loss": 0.0479, "step": 156690 }, { "epoch": 7.31, - "learning_rate": 5.401997093432094e-06, - "loss": 0.0521, + "learning_rate": 1.5424773376187726e-05, + "loss": 0.0561, "step": 156695 }, { "epoch": 7.31, - "learning_rate": 5.401528292156956e-06, - "loss": 0.064, + "learning_rate": 1.5424305306351706e-05, + "loss": 0.0905, "step": 156700 }, { "epoch": 7.31, - "learning_rate": 5.4010594908818156e-06, - "loss": 0.0793, + "learning_rate": 1.542383723651569e-05, + "loss": 0.0709, "step": 156705 }, { "epoch": 7.31, - "learning_rate": 5.400590689606676e-06, - "loss": 0.0899, + "learning_rate": 1.542336916667967e-05, + "loss": 0.1108, "step": 156710 }, { "epoch": 7.31, - "learning_rate": 5.400121888331536e-06, - "loss": 0.2453, + "learning_rate": 1.542290109684365e-05, + "loss": 0.1258, "step": 156715 }, { "epoch": 7.31, - "learning_rate": 5.399653087056397e-06, - "loss": 0.1084, + "learning_rate": 1.542243302700763e-05, + "loss": 0.1056, "step": 156720 }, { "epoch": 7.31, - "learning_rate": 5.399184285781258e-06, - "loss": 0.0285, + "learning_rate": 1.5421964957171612e-05, + "loss": 0.0137, "step": 156725 }, { "epoch": 7.31, - "learning_rate": 5.398715484506119e-06, - "loss": 0.0178, + "learning_rate": 1.5421496887335592e-05, + "loss": 0.0203, "step": 156730 }, { "epoch": 7.31, - "learning_rate": 5.3982466832309786e-06, - "loss": 0.0107, + "learning_rate": 1.542102881749957e-05, + "loss": 0.0223, "step": 156735 }, { "epoch": 7.31, - "learning_rate": 5.397777881955839e-06, - "loss": 0.0211, + "learning_rate": 1.542056074766355e-05, + "loss": 0.0339, "step": 156740 }, { "epoch": 7.31, - "learning_rate": 5.397309080680699e-06, - "loss": 0.0383, + "learning_rate": 1.5420092677827535e-05, + "loss": 0.0267, "step": 156745 }, { "epoch": 7.31, - "learning_rate": 5.396840279405561e-06, - "loss": 0.0615, + "learning_rate": 1.541962460799151e-05, + "loss": 0.0895, "step": 156750 }, { "epoch": 7.31, - "learning_rate": 5.396371478130421e-06, - "loss": 0.0492, + "learning_rate": 1.541915653815549e-05, + "loss": 0.0771, "step": 156755 }, { "epoch": 7.31, - "learning_rate": 5.395902676855282e-06, - "loss": 0.0868, + "learning_rate": 1.5418688468319474e-05, + "loss": 0.1851, "step": 156760 }, { "epoch": 7.31, - "learning_rate": 5.3954338755801415e-06, - "loss": 0.1228, + "learning_rate": 1.5418220398483454e-05, + "loss": 0.0795, "step": 156765 }, { "epoch": 7.32, - "learning_rate": 5.394965074305003e-06, - "loss": 0.1256, + "learning_rate": 1.5417752328647434e-05, + "loss": 0.1611, "step": 156770 }, { "epoch": 7.32, - "learning_rate": 5.394496273029863e-06, - "loss": 0.03, + "learning_rate": 1.5417284258811414e-05, + "loss": 0.0396, "step": 156775 }, { "epoch": 7.32, - "learning_rate": 5.394027471754724e-06, - "loss": 0.0113, + "learning_rate": 1.5416816188975397e-05, + "loss": 0.0142, "step": 156780 }, { "epoch": 7.32, - "learning_rate": 5.393558670479584e-06, - "loss": 0.0486, + "learning_rate": 1.5416348119139377e-05, + "loss": 0.0597, "step": 156785 }, { "epoch": 7.32, - "learning_rate": 5.393089869204445e-06, - "loss": 0.0477, + "learning_rate": 1.5415880049303356e-05, + "loss": 0.0093, "step": 156790 }, { "epoch": 7.32, - "learning_rate": 5.392621067929305e-06, - "loss": 0.0106, + "learning_rate": 1.5415411979467336e-05, + "loss": 0.0168, "step": 156795 }, { "epoch": 7.32, - "learning_rate": 5.392152266654166e-06, - "loss": 0.0445, + "learning_rate": 1.541494390963132e-05, + "loss": 0.0331, "step": 156800 }, { "epoch": 7.32, - "learning_rate": 5.391683465379026e-06, - "loss": 0.0525, + "learning_rate": 1.54144758397953e-05, + "loss": 0.0893, "step": 156805 }, { "epoch": 7.32, - "learning_rate": 5.391214664103887e-06, - "loss": 0.1899, + "learning_rate": 1.541400776995928e-05, + "loss": 0.0678, "step": 156810 }, { "epoch": 7.32, - "learning_rate": 5.390745862828747e-06, - "loss": 0.1443, + "learning_rate": 1.541353970012326e-05, + "loss": 0.1381, "step": 156815 }, { "epoch": 7.32, - "learning_rate": 5.3902770615536084e-06, - "loss": 0.2493, + "learning_rate": 1.541307163028724e-05, + "loss": 0.178, "step": 156820 }, { "epoch": 7.32, - "learning_rate": 5.389808260278468e-06, - "loss": 0.0093, + "learning_rate": 1.541260356045122e-05, + "loss": 0.0351, "step": 156825 }, { "epoch": 7.32, - "learning_rate": 5.389339459003329e-06, - "loss": 0.0285, + "learning_rate": 1.54121354906152e-05, + "loss": 0.0026, "step": 156830 }, { "epoch": 7.32, - "learning_rate": 5.388870657728189e-06, - "loss": 0.0143, + "learning_rate": 1.541166742077918e-05, + "loss": 0.0282, "step": 156835 }, { "epoch": 7.32, - "learning_rate": 5.388401856453051e-06, - "loss": 0.0775, + "learning_rate": 1.541119935094316e-05, + "loss": 0.0078, "step": 156840 }, { "epoch": 7.32, - "learning_rate": 5.387933055177911e-06, - "loss": 0.011, + "learning_rate": 1.541073128110714e-05, + "loss": 0.7365, "step": 156845 }, { "epoch": 7.32, - "learning_rate": 5.3874642539027714e-06, - "loss": 0.0695, + "learning_rate": 1.541026321127112e-05, + "loss": 0.0687, "step": 156850 }, { "epoch": 7.32, - "learning_rate": 5.386995452627631e-06, - "loss": 0.0732, + "learning_rate": 1.5409795141435104e-05, + "loss": 0.1052, "step": 156855 }, { "epoch": 7.32, - "learning_rate": 5.386526651352491e-06, - "loss": 0.2085, + "learning_rate": 1.5409327071599084e-05, + "loss": 0.0432, "step": 156860 }, { "epoch": 7.32, - "learning_rate": 5.386057850077353e-06, - "loss": 0.0704, + "learning_rate": 1.5408859001763064e-05, + "loss": 0.1289, "step": 156865 }, { "epoch": 7.32, - "learning_rate": 5.385589048802214e-06, - "loss": 0.1552, + "learning_rate": 1.5408390931927047e-05, + "loss": 0.2648, "step": 156870 }, { "epoch": 7.32, - "learning_rate": 5.385120247527074e-06, - "loss": 0.0356, + "learning_rate": 1.5407922862091023e-05, + "loss": 0.0223, "step": 156875 }, { "epoch": 7.32, - "learning_rate": 5.3846514462519336e-06, - "loss": 0.0049, + "learning_rate": 1.5407454792255003e-05, + "loss": 0.0357, "step": 156880 }, { "epoch": 7.32, - "learning_rate": 5.384182644976795e-06, - "loss": 0.0087, + "learning_rate": 1.5406986722418983e-05, + "loss": 0.027, "step": 156885 }, { "epoch": 7.32, - "learning_rate": 5.383713843701656e-06, - "loss": 0.0527, + "learning_rate": 1.5406518652582966e-05, + "loss": 0.0537, "step": 156890 }, { "epoch": 7.32, - "learning_rate": 5.383245042426516e-06, - "loss": 0.0575, + "learning_rate": 1.5406050582746946e-05, + "loss": 0.0342, "step": 156895 }, { "epoch": 7.32, - "learning_rate": 5.382776241151376e-06, - "loss": 0.0474, + "learning_rate": 1.5405582512910926e-05, + "loss": 0.0399, "step": 156900 }, { "epoch": 7.32, - "learning_rate": 5.382307439876237e-06, - "loss": 0.1638, + "learning_rate": 1.5405114443074906e-05, + "loss": 0.0533, "step": 156905 }, { "epoch": 7.32, - "learning_rate": 5.381838638601098e-06, - "loss": 0.0399, + "learning_rate": 1.540464637323889e-05, + "loss": 0.0431, "step": 156910 }, { "epoch": 7.32, - "learning_rate": 5.381369837325958e-06, - "loss": 0.2229, + "learning_rate": 1.540417830340287e-05, + "loss": 0.1185, "step": 156915 }, { "epoch": 7.32, - "learning_rate": 5.380901036050818e-06, - "loss": 0.2734, + "learning_rate": 1.540371023356685e-05, + "loss": 0.241, "step": 156920 }, { "epoch": 7.32, - "learning_rate": 5.380432234775679e-06, - "loss": 0.0136, + "learning_rate": 1.540324216373083e-05, + "loss": 0.0587, "step": 156925 }, { "epoch": 7.32, - "learning_rate": 5.379963433500539e-06, - "loss": 0.0382, + "learning_rate": 1.540277409389481e-05, + "loss": 0.0038, "step": 156930 }, { "epoch": 7.32, - "learning_rate": 5.3794946322254005e-06, - "loss": 0.0499, + "learning_rate": 1.540230602405879e-05, + "loss": 0.0478, "step": 156935 }, { "epoch": 7.32, - "learning_rate": 5.37902583095026e-06, - "loss": 0.0431, + "learning_rate": 1.5401837954222768e-05, + "loss": 0.0317, "step": 156940 }, { "epoch": 7.32, - "learning_rate": 5.378557029675121e-06, - "loss": 0.04, + "learning_rate": 1.540136988438675e-05, + "loss": 0.0091, "step": 156945 }, { "epoch": 7.32, - "learning_rate": 5.378088228399981e-06, - "loss": 0.0645, + "learning_rate": 1.540090181455073e-05, + "loss": 0.0625, "step": 156950 }, { "epoch": 7.32, - "learning_rate": 5.377619427124843e-06, - "loss": 0.0479, + "learning_rate": 1.540043374471471e-05, + "loss": 0.0314, "step": 156955 }, { "epoch": 7.32, - "learning_rate": 5.377150625849703e-06, - "loss": 0.0619, + "learning_rate": 1.539996567487869e-05, + "loss": 0.1012, "step": 156960 }, { "epoch": 7.32, - "learning_rate": 5.3766818245745635e-06, - "loss": 0.1055, + "learning_rate": 1.5399497605042674e-05, + "loss": 0.0817, "step": 156965 }, { "epoch": 7.32, - "learning_rate": 5.376213023299423e-06, - "loss": 0.1588, + "learning_rate": 1.5399029535206654e-05, + "loss": 0.2612, "step": 156970 }, { "epoch": 7.32, - "learning_rate": 5.375744222024284e-06, - "loss": 0.0269, + "learning_rate": 1.5398561465370633e-05, + "loss": 0.0108, "step": 156975 }, { "epoch": 7.32, - "learning_rate": 5.375275420749145e-06, - "loss": 0.0311, + "learning_rate": 1.5398093395534613e-05, + "loss": 0.0023, "step": 156980 }, { "epoch": 7.33, - "learning_rate": 5.374806619474006e-06, - "loss": 0.0282, + "learning_rate": 1.5397625325698596e-05, + "loss": 0.0146, "step": 156985 }, { "epoch": 7.33, - "learning_rate": 5.374337818198866e-06, - "loss": 0.0172, + "learning_rate": 1.5397157255862576e-05, + "loss": 0.0118, "step": 156990 }, { "epoch": 7.33, - "learning_rate": 5.3738690169237264e-06, - "loss": 0.047, + "learning_rate": 1.5396689186026556e-05, + "loss": 0.0418, "step": 156995 }, { "epoch": 7.33, - "learning_rate": 5.373400215648586e-06, - "loss": 0.0477, + "learning_rate": 1.5396221116190536e-05, + "loss": 0.0816, "step": 157000 }, { "epoch": 7.33, - "learning_rate": 5.372931414373448e-06, - "loss": 0.0352, + "learning_rate": 1.5395753046354516e-05, + "loss": 0.0965, "step": 157005 }, { "epoch": 7.33, - "learning_rate": 5.372462613098308e-06, - "loss": 0.0734, + "learning_rate": 1.5395284976518495e-05, + "loss": 0.0678, "step": 157010 }, { "epoch": 7.33, - "learning_rate": 5.371993811823169e-06, - "loss": 0.0825, + "learning_rate": 1.5394816906682475e-05, + "loss": 0.0899, "step": 157015 }, { "epoch": 7.33, - "learning_rate": 5.371525010548029e-06, - "loss": 0.1306, + "learning_rate": 1.539434883684646e-05, + "loss": 0.1519, "step": 157020 }, { "epoch": 7.33, - "learning_rate": 5.37105620927289e-06, - "loss": 0.0393, + "learning_rate": 1.539388076701044e-05, + "loss": 0.0256, "step": 157025 }, { "epoch": 7.33, - "learning_rate": 5.37058740799775e-06, - "loss": 0.05, + "learning_rate": 1.5393412697174418e-05, + "loss": 0.0036, "step": 157030 }, { "epoch": 7.33, - "learning_rate": 5.370118606722611e-06, - "loss": 0.0142, + "learning_rate": 1.5392944627338398e-05, + "loss": 0.0048, "step": 157035 }, { "epoch": 7.33, - "learning_rate": 5.369649805447471e-06, - "loss": 0.0025, + "learning_rate": 1.539247655750238e-05, + "loss": 0.0255, "step": 157040 }, { "epoch": 7.33, - "learning_rate": 5.369181004172332e-06, - "loss": 0.0109, + "learning_rate": 1.539200848766636e-05, + "loss": 0.0584, "step": 157045 }, { "epoch": 7.33, - "learning_rate": 5.3687122028971925e-06, - "loss": 0.0493, + "learning_rate": 1.539154041783034e-05, + "loss": 0.0576, "step": 157050 }, { "epoch": 7.33, - "learning_rate": 5.368243401622053e-06, - "loss": 0.0822, + "learning_rate": 1.5391072347994324e-05, + "loss": 0.0939, "step": 157055 }, { "epoch": 7.33, - "learning_rate": 5.367774600346913e-06, - "loss": 0.1329, + "learning_rate": 1.5390604278158304e-05, + "loss": 0.0874, "step": 157060 }, { "epoch": 7.33, - "learning_rate": 5.367305799071774e-06, - "loss": 0.1853, + "learning_rate": 1.539013620832228e-05, + "loss": 0.1229, "step": 157065 }, { "epoch": 7.33, - "learning_rate": 5.366836997796634e-06, - "loss": 0.2168, + "learning_rate": 1.538966813848626e-05, + "loss": 0.1891, "step": 157070 }, { "epoch": 7.33, - "learning_rate": 5.3663681965214956e-06, - "loss": 0.0169, + "learning_rate": 1.5389200068650243e-05, + "loss": 0.0099, "step": 157075 }, { "epoch": 7.33, - "learning_rate": 5.3658993952463555e-06, - "loss": 0.0222, + "learning_rate": 1.5388731998814223e-05, + "loss": 0.0229, "step": 157080 }, { "epoch": 7.33, - "learning_rate": 5.365430593971216e-06, - "loss": 0.0239, + "learning_rate": 1.5388263928978203e-05, + "loss": 0.0264, "step": 157085 }, { "epoch": 7.33, - "learning_rate": 5.364961792696076e-06, - "loss": 0.0717, + "learning_rate": 1.5387795859142183e-05, + "loss": 0.0042, "step": 157090 }, { "epoch": 7.33, - "learning_rate": 5.364492991420938e-06, - "loss": 0.0168, + "learning_rate": 1.5387327789306166e-05, + "loss": 0.0197, "step": 157095 }, { "epoch": 7.33, - "learning_rate": 5.364024190145798e-06, - "loss": 0.0103, + "learning_rate": 1.5386859719470146e-05, + "loss": 0.0811, "step": 157100 }, { "epoch": 7.33, - "learning_rate": 5.3635553888706585e-06, - "loss": 0.0822, + "learning_rate": 1.5386391649634126e-05, + "loss": 0.0579, "step": 157105 }, { "epoch": 7.33, - "learning_rate": 5.3630865875955185e-06, - "loss": 0.0556, + "learning_rate": 1.538592357979811e-05, + "loss": 0.0741, "step": 157110 }, { "epoch": 7.33, - "learning_rate": 5.362617786320378e-06, - "loss": 0.1413, + "learning_rate": 1.538545550996209e-05, + "loss": 0.1485, "step": 157115 }, { "epoch": 7.33, - "learning_rate": 5.36214898504524e-06, - "loss": 0.1142, + "learning_rate": 1.538498744012607e-05, + "loss": 0.2459, "step": 157120 }, { "epoch": 7.33, - "learning_rate": 5.361680183770101e-06, - "loss": 0.0367, + "learning_rate": 1.5384519370290048e-05, + "loss": 0.063, "step": 157125 }, { "epoch": 7.33, - "learning_rate": 5.361211382494961e-06, - "loss": 0.0114, + "learning_rate": 1.5384051300454028e-05, + "loss": 0.0595, "step": 157130 }, { "epoch": 7.33, - "learning_rate": 5.360742581219821e-06, - "loss": 0.0195, + "learning_rate": 1.5383583230618008e-05, + "loss": 0.0465, "step": 157135 }, { "epoch": 7.33, - "learning_rate": 5.3602737799446815e-06, - "loss": 0.0022, + "learning_rate": 1.5383115160781988e-05, + "loss": 0.0278, "step": 157140 }, { "epoch": 7.33, - "learning_rate": 5.359804978669543e-06, - "loss": 0.0064, + "learning_rate": 1.5382647090945967e-05, + "loss": 0.0138, "step": 157145 }, { "epoch": 7.33, - "learning_rate": 5.359336177394403e-06, - "loss": 0.0472, + "learning_rate": 1.538217902110995e-05, + "loss": 0.1014, "step": 157150 }, { "epoch": 7.33, - "learning_rate": 5.358867376119263e-06, - "loss": 0.047, + "learning_rate": 1.538171095127393e-05, + "loss": 0.0912, "step": 157155 }, { "epoch": 7.33, - "learning_rate": 5.358398574844124e-06, - "loss": 0.0828, + "learning_rate": 1.538124288143791e-05, + "loss": 0.0249, "step": 157160 }, { "epoch": 7.33, - "learning_rate": 5.357929773568985e-06, - "loss": 0.1268, + "learning_rate": 1.538077481160189e-05, + "loss": 0.1206, "step": 157165 }, { "epoch": 7.33, - "learning_rate": 5.357460972293845e-06, - "loss": 0.1663, + "learning_rate": 1.5380306741765873e-05, + "loss": 0.1043, "step": 157170 }, { "epoch": 7.33, - "learning_rate": 5.356992171018705e-06, - "loss": 0.0246, + "learning_rate": 1.5379838671929853e-05, + "loss": 0.0471, "step": 157175 }, { "epoch": 7.33, - "learning_rate": 5.356523369743566e-06, - "loss": 0.0042, + "learning_rate": 1.5379370602093833e-05, + "loss": 0.0166, "step": 157180 }, { "epoch": 7.33, - "learning_rate": 5.356054568468426e-06, - "loss": 0.0517, + "learning_rate": 1.5378902532257816e-05, + "loss": 0.0182, "step": 157185 }, { "epoch": 7.33, - "learning_rate": 5.355585767193288e-06, - "loss": 0.0329, + "learning_rate": 1.5378434462421793e-05, + "loss": 0.0232, "step": 157190 }, { "epoch": 7.33, - "learning_rate": 5.3551169659181475e-06, - "loss": 0.0719, + "learning_rate": 1.5377966392585772e-05, + "loss": 0.0256, "step": 157195 }, { "epoch": 7.34, - "learning_rate": 5.354648164643008e-06, - "loss": 0.1103, + "learning_rate": 1.5377498322749752e-05, + "loss": 0.059, "step": 157200 }, { "epoch": 7.34, - "learning_rate": 5.354179363367868e-06, - "loss": 0.0558, + "learning_rate": 1.5377030252913735e-05, + "loss": 0.0458, "step": 157205 }, { "epoch": 7.34, - "learning_rate": 5.35371056209273e-06, - "loss": 0.0528, + "learning_rate": 1.5376562183077715e-05, + "loss": 0.0797, "step": 157210 }, { "epoch": 7.34, - "learning_rate": 5.35324176081759e-06, - "loss": 0.1309, + "learning_rate": 1.5376094113241695e-05, + "loss": 0.061, "step": 157215 }, { "epoch": 7.34, - "learning_rate": 5.3527729595424506e-06, - "loss": 0.191, + "learning_rate": 1.5375626043405675e-05, + "loss": 0.1182, "step": 157220 }, { "epoch": 7.34, - "learning_rate": 5.3523041582673105e-06, - "loss": 0.0615, + "learning_rate": 1.5375157973569658e-05, + "loss": 0.0556, "step": 157225 }, { "epoch": 7.34, - "learning_rate": 5.351835356992171e-06, - "loss": 0.0087, + "learning_rate": 1.5374689903733638e-05, + "loss": 0.0388, "step": 157230 }, { "epoch": 7.34, - "learning_rate": 5.351366555717032e-06, - "loss": 0.0381, + "learning_rate": 1.5374221833897618e-05, + "loss": 0.0578, "step": 157235 }, { "epoch": 7.34, - "learning_rate": 5.350897754441893e-06, - "loss": 0.0758, + "learning_rate": 1.53737537640616e-05, + "loss": 0.0156, "step": 157240 }, { "epoch": 7.34, - "learning_rate": 5.350428953166753e-06, - "loss": 0.0807, + "learning_rate": 1.537328569422558e-05, + "loss": 0.0056, "step": 157245 }, { "epoch": 7.34, - "learning_rate": 5.3499601518916136e-06, - "loss": 0.0463, + "learning_rate": 1.537281762438956e-05, + "loss": 0.0878, "step": 157250 }, { "epoch": 7.34, - "learning_rate": 5.3494913506164735e-06, - "loss": 0.0651, + "learning_rate": 1.5372349554553537e-05, + "loss": 0.0803, "step": 157255 }, { "epoch": 7.34, - "learning_rate": 5.349022549341335e-06, - "loss": 0.0934, + "learning_rate": 1.537188148471752e-05, + "loss": 0.0732, "step": 157260 }, { "epoch": 7.34, - "learning_rate": 5.348553748066195e-06, - "loss": 0.1151, + "learning_rate": 1.53714134148815e-05, + "loss": 0.2214, "step": 157265 }, { "epoch": 7.34, - "learning_rate": 5.348084946791056e-06, - "loss": 0.1807, + "learning_rate": 1.537094534504548e-05, + "loss": 0.0796, "step": 157270 }, { "epoch": 7.34, - "learning_rate": 5.347616145515916e-06, - "loss": 0.0333, + "learning_rate": 1.537047727520946e-05, + "loss": 0.0202, "step": 157275 }, { "epoch": 7.34, - "learning_rate": 5.347147344240777e-06, - "loss": 0.0162, + "learning_rate": 1.5370009205373443e-05, + "loss": 0.0032, "step": 157280 }, { "epoch": 7.34, - "learning_rate": 5.346678542965637e-06, - "loss": 0.0048, + "learning_rate": 1.5369541135537423e-05, + "loss": 0.0357, "step": 157285 }, { "epoch": 7.34, - "learning_rate": 5.346209741690498e-06, - "loss": 0.021, + "learning_rate": 1.5369073065701403e-05, + "loss": 0.0204, "step": 157290 }, { "epoch": 7.34, - "learning_rate": 5.345740940415358e-06, - "loss": 0.0492, + "learning_rate": 1.5368604995865386e-05, + "loss": 0.0373, "step": 157295 }, { "epoch": 7.34, - "learning_rate": 5.345272139140219e-06, - "loss": 0.1336, + "learning_rate": 1.5368136926029366e-05, + "loss": 0.1138, "step": 157300 }, { "epoch": 7.34, - "learning_rate": 5.34480333786508e-06, - "loss": 0.0278, + "learning_rate": 1.5367668856193345e-05, + "loss": 0.0462, "step": 157305 }, { "epoch": 7.34, - "learning_rate": 5.34433453658994e-06, - "loss": 0.0574, + "learning_rate": 1.5367200786357325e-05, + "loss": 0.1087, "step": 157310 }, { "epoch": 7.34, - "learning_rate": 5.3438657353148e-06, - "loss": 0.1229, + "learning_rate": 1.536673271652131e-05, + "loss": 0.0971, "step": 157315 }, { "epoch": 7.34, - "learning_rate": 5.343396934039661e-06, - "loss": 0.1847, + "learning_rate": 1.5366264646685285e-05, + "loss": 0.2016, "step": 157320 }, { "epoch": 7.34, - "learning_rate": 5.342928132764521e-06, - "loss": 0.0225, + "learning_rate": 1.5365796576849265e-05, + "loss": 0.0259, "step": 157325 }, { "epoch": 7.34, - "learning_rate": 5.342459331489383e-06, - "loss": 0.0092, + "learning_rate": 1.5365328507013244e-05, + "loss": 0.0066, "step": 157330 }, { "epoch": 7.34, - "learning_rate": 5.341990530214243e-06, - "loss": 0.0067, + "learning_rate": 1.5364860437177228e-05, + "loss": 0.021, "step": 157335 }, { "epoch": 7.34, - "learning_rate": 5.341521728939103e-06, - "loss": 0.0428, + "learning_rate": 1.5364392367341207e-05, + "loss": 0.0373, "step": 157340 }, { "epoch": 7.34, - "learning_rate": 5.341052927663963e-06, - "loss": 0.038, + "learning_rate": 1.5363924297505187e-05, + "loss": 0.0236, "step": 157345 }, { "epoch": 7.34, - "learning_rate": 5.340584126388825e-06, - "loss": 0.057, + "learning_rate": 1.5363456227669167e-05, + "loss": 0.066, "step": 157350 }, { "epoch": 7.34, - "learning_rate": 5.340115325113685e-06, - "loss": 0.038, + "learning_rate": 1.536298815783315e-05, + "loss": 0.0417, "step": 157355 }, { "epoch": 7.34, - "learning_rate": 5.339646523838546e-06, - "loss": 0.1339, + "learning_rate": 1.536252008799713e-05, + "loss": 0.049, "step": 157360 }, { "epoch": 7.34, - "learning_rate": 5.339177722563406e-06, - "loss": 0.1711, + "learning_rate": 1.536205201816111e-05, + "loss": 0.1677, "step": 157365 }, { "epoch": 7.34, - "learning_rate": 5.3387089212882655e-06, - "loss": 0.2606, + "learning_rate": 1.5361583948325093e-05, + "loss": 0.0967, "step": 157370 }, { "epoch": 7.34, - "learning_rate": 5.338240120013127e-06, - "loss": 0.036, + "learning_rate": 1.5361115878489073e-05, + "loss": 0.0024, "step": 157375 }, { "epoch": 7.34, - "learning_rate": 5.337771318737988e-06, - "loss": 0.0061, + "learning_rate": 1.536064780865305e-05, + "loss": 0.0051, "step": 157380 }, { "epoch": 7.34, - "learning_rate": 5.337302517462848e-06, - "loss": 0.0333, + "learning_rate": 1.536017973881703e-05, + "loss": 0.0042, "step": 157385 }, { "epoch": 7.34, - "learning_rate": 5.336833716187708e-06, - "loss": 0.0126, + "learning_rate": 1.5359711668981012e-05, + "loss": 0.0161, "step": 157390 }, { "epoch": 7.34, - "learning_rate": 5.3363649149125686e-06, - "loss": 0.0365, + "learning_rate": 1.5359243599144992e-05, + "loss": 0.0486, "step": 157395 }, { "epoch": 7.34, - "learning_rate": 5.33589611363743e-06, - "loss": 0.0407, + "learning_rate": 1.5358775529308972e-05, + "loss": 0.0947, "step": 157400 }, { "epoch": 7.34, - "learning_rate": 5.33542731236229e-06, - "loss": 0.0549, + "learning_rate": 1.5358307459472952e-05, + "loss": 0.0392, "step": 157405 }, { "epoch": 7.34, - "learning_rate": 5.33495851108715e-06, - "loss": 0.0569, + "learning_rate": 1.5357839389636935e-05, + "loss": 0.0432, "step": 157410 }, { "epoch": 7.35, - "learning_rate": 5.334489709812011e-06, - "loss": 0.1913, + "learning_rate": 1.5357371319800915e-05, + "loss": 0.1262, "step": 157415 }, { "epoch": 7.35, - "learning_rate": 5.3340209085368725e-06, - "loss": 0.222, + "learning_rate": 1.5356903249964895e-05, + "loss": 0.3605, "step": 157420 }, { "epoch": 7.35, - "learning_rate": 5.333552107261732e-06, - "loss": 0.0534, + "learning_rate": 1.5356435180128878e-05, + "loss": 0.024, "step": 157425 }, { "epoch": 7.35, - "learning_rate": 5.333083305986592e-06, - "loss": 0.0054, + "learning_rate": 1.5355967110292858e-05, + "loss": 0.0408, "step": 157430 }, { "epoch": 7.35, - "learning_rate": 5.332614504711453e-06, - "loss": 0.0171, + "learning_rate": 1.5355499040456838e-05, + "loss": 0.0439, "step": 157435 }, { "epoch": 7.35, - "learning_rate": 5.332145703436313e-06, - "loss": 0.0427, + "learning_rate": 1.5355030970620817e-05, + "loss": 0.0576, "step": 157440 }, { "epoch": 7.35, - "learning_rate": 5.331676902161175e-06, - "loss": 0.0918, + "learning_rate": 1.5354562900784797e-05, + "loss": 0.0088, "step": 157445 }, { "epoch": 7.35, - "learning_rate": 5.331208100886035e-06, - "loss": 0.1365, + "learning_rate": 1.5354094830948777e-05, + "loss": 0.043, "step": 157450 }, { "epoch": 7.35, - "learning_rate": 5.330739299610895e-06, - "loss": 0.0513, + "learning_rate": 1.5353626761112757e-05, + "loss": 0.072, "step": 157455 }, { "epoch": 7.35, - "learning_rate": 5.330270498335755e-06, - "loss": 0.1673, + "learning_rate": 1.5353158691276737e-05, + "loss": 0.1312, "step": 157460 }, { "epoch": 7.35, - "learning_rate": 5.329801697060616e-06, - "loss": 0.1287, + "learning_rate": 1.535269062144072e-05, + "loss": 0.0783, "step": 157465 }, { "epoch": 7.35, - "learning_rate": 5.329332895785477e-06, - "loss": 0.2625, + "learning_rate": 1.53522225516047e-05, + "loss": 0.2024, "step": 157470 }, { "epoch": 7.35, - "learning_rate": 5.328864094510338e-06, - "loss": 0.0371, + "learning_rate": 1.535175448176868e-05, + "loss": 0.0211, "step": 157475 }, { "epoch": 7.35, - "learning_rate": 5.328395293235198e-06, - "loss": 0.0342, + "learning_rate": 1.5351286411932663e-05, + "loss": 0.0054, "step": 157480 }, { "epoch": 7.35, - "learning_rate": 5.327926491960058e-06, - "loss": 0.0211, + "learning_rate": 1.5350818342096643e-05, + "loss": 0.0193, "step": 157485 }, { "epoch": 7.35, - "learning_rate": 5.327457690684919e-06, - "loss": 0.0298, + "learning_rate": 1.5350350272260622e-05, + "loss": 0.0568, "step": 157490 }, { "epoch": 7.35, - "learning_rate": 5.32698888940978e-06, - "loss": 0.0343, + "learning_rate": 1.5349882202424602e-05, + "loss": 0.0244, "step": 157495 }, { "epoch": 7.35, - "learning_rate": 5.32652008813464e-06, - "loss": 0.0684, + "learning_rate": 1.5349414132588585e-05, + "loss": 0.016, "step": 157500 }, { "epoch": 7.35, - "learning_rate": 5.326051286859501e-06, - "loss": 0.0732, + "learning_rate": 1.5348946062752565e-05, + "loss": 0.0682, "step": 157505 }, { "epoch": 7.35, - "learning_rate": 5.325582485584361e-06, - "loss": 0.0686, + "learning_rate": 1.534847799291654e-05, + "loss": 0.0711, "step": 157510 }, { "epoch": 7.35, - "learning_rate": 5.325113684309222e-06, - "loss": 0.1399, + "learning_rate": 1.534800992308052e-05, + "loss": 0.0924, "step": 157515 }, { "epoch": 7.35, - "learning_rate": 5.324644883034082e-06, - "loss": 0.1888, + "learning_rate": 1.5347541853244505e-05, + "loss": 0.0839, "step": 157520 }, { "epoch": 7.35, - "learning_rate": 5.324176081758943e-06, - "loss": 0.0201, + "learning_rate": 1.5347073783408484e-05, + "loss": 0.041, "step": 157525 }, { "epoch": 7.35, - "learning_rate": 5.323707280483803e-06, - "loss": 0.0372, + "learning_rate": 1.5346605713572464e-05, + "loss": 0.0123, "step": 157530 }, { "epoch": 7.35, - "learning_rate": 5.3232384792086645e-06, - "loss": 0.0159, + "learning_rate": 1.5346137643736444e-05, + "loss": 0.0116, "step": 157535 }, { "epoch": 7.35, - "learning_rate": 5.3227696779335244e-06, - "loss": 0.0112, + "learning_rate": 1.5345669573900427e-05, + "loss": 0.0102, "step": 157540 }, { "epoch": 7.35, - "learning_rate": 5.322300876658385e-06, - "loss": 0.0709, + "learning_rate": 1.5345201504064407e-05, + "loss": 0.0269, "step": 157545 }, { "epoch": 7.35, - "learning_rate": 5.321832075383245e-06, - "loss": 0.086, + "learning_rate": 1.5344733434228387e-05, + "loss": 0.0391, "step": 157550 }, { "epoch": 7.35, - "learning_rate": 5.321363274108106e-06, - "loss": 0.0334, + "learning_rate": 1.534426536439237e-05, + "loss": 0.0367, "step": 157555 }, { "epoch": 7.35, - "learning_rate": 5.320894472832967e-06, - "loss": 0.1198, + "learning_rate": 1.534379729455635e-05, + "loss": 0.084, "step": 157560 }, { "epoch": 7.35, - "learning_rate": 5.3204256715578275e-06, - "loss": 0.1256, + "learning_rate": 1.534332922472033e-05, + "loss": 0.061, "step": 157565 }, { "epoch": 7.35, - "learning_rate": 5.3199568702826874e-06, - "loss": 0.2144, + "learning_rate": 1.5342861154884306e-05, + "loss": 0.2325, "step": 157570 }, { "epoch": 7.35, - "learning_rate": 5.319488069007548e-06, - "loss": 0.0539, + "learning_rate": 1.534239308504829e-05, + "loss": 0.0401, "step": 157575 }, { "epoch": 7.35, - "learning_rate": 5.319019267732408e-06, - "loss": 0.0408, + "learning_rate": 1.534192501521227e-05, + "loss": 0.005, "step": 157580 }, { "epoch": 7.35, - "learning_rate": 5.31855046645727e-06, - "loss": 0.0316, + "learning_rate": 1.534145694537625e-05, + "loss": 0.013, "step": 157585 }, { "epoch": 7.35, - "learning_rate": 5.31808166518213e-06, - "loss": 0.0633, + "learning_rate": 1.534098887554023e-05, + "loss": 0.0215, "step": 157590 }, { "epoch": 7.35, - "learning_rate": 5.3176128639069905e-06, - "loss": 0.0303, + "learning_rate": 1.5340520805704212e-05, + "loss": 0.0579, "step": 157595 }, { "epoch": 7.35, - "learning_rate": 5.31714406263185e-06, - "loss": 0.0383, + "learning_rate": 1.5340052735868192e-05, + "loss": 0.0615, "step": 157600 }, { "epoch": 7.35, - "learning_rate": 5.316675261356712e-06, - "loss": 0.0447, + "learning_rate": 1.5339584666032172e-05, + "loss": 0.091, "step": 157605 }, { "epoch": 7.35, - "learning_rate": 5.316206460081572e-06, - "loss": 0.1574, + "learning_rate": 1.5339116596196155e-05, + "loss": 0.0679, "step": 157610 }, { "epoch": 7.35, - "learning_rate": 5.315737658806433e-06, - "loss": 0.1585, + "learning_rate": 1.5338648526360135e-05, + "loss": 0.2171, "step": 157615 }, { "epoch": 7.35, - "learning_rate": 5.315268857531293e-06, - "loss": 0.1813, + "learning_rate": 1.5338180456524115e-05, + "loss": 0.1811, "step": 157620 }, { "epoch": 7.35, - "learning_rate": 5.314800056256153e-06, - "loss": 0.0481, + "learning_rate": 1.5337712386688094e-05, + "loss": 0.02, "step": 157625 }, { "epoch": 7.36, - "learning_rate": 5.314331254981014e-06, - "loss": 0.0171, + "learning_rate": 1.5337244316852078e-05, + "loss": 0.0098, "step": 157630 }, { "epoch": 7.36, - "learning_rate": 5.313862453705875e-06, - "loss": 0.0289, + "learning_rate": 1.5336776247016054e-05, + "loss": 0.0175, "step": 157635 }, { "epoch": 7.36, - "learning_rate": 5.313393652430735e-06, - "loss": 0.038, + "learning_rate": 1.5336308177180034e-05, + "loss": 0.0125, "step": 157640 }, { "epoch": 7.36, - "learning_rate": 5.312924851155595e-06, - "loss": 0.0567, + "learning_rate": 1.5335840107344014e-05, + "loss": 0.0371, "step": 157645 }, { "epoch": 7.36, - "learning_rate": 5.312456049880456e-06, - "loss": 0.0322, + "learning_rate": 1.5335372037507997e-05, + "loss": 0.0404, "step": 157650 }, { "epoch": 7.36, - "learning_rate": 5.311987248605317e-06, - "loss": 0.1252, + "learning_rate": 1.5334903967671977e-05, + "loss": 0.0721, "step": 157655 }, { "epoch": 7.36, - "learning_rate": 5.311518447330177e-06, - "loss": 0.0911, + "learning_rate": 1.5334435897835956e-05, + "loss": 0.1623, "step": 157660 }, { "epoch": 7.36, - "learning_rate": 5.311049646055037e-06, - "loss": 0.0894, + "learning_rate": 1.533396782799994e-05, + "loss": 0.116, "step": 157665 }, { "epoch": 7.36, - "learning_rate": 5.310580844779898e-06, - "loss": 0.1562, + "learning_rate": 1.533349975816392e-05, + "loss": 0.2594, "step": 157670 }, { "epoch": 7.36, - "learning_rate": 5.31011204350476e-06, - "loss": 0.0281, + "learning_rate": 1.53330316883279e-05, + "loss": 0.0797, "step": 157675 }, { "epoch": 7.36, - "learning_rate": 5.3096432422296195e-06, - "loss": 0.0029, + "learning_rate": 1.533256361849188e-05, + "loss": 0.0466, "step": 157680 }, { "epoch": 7.36, - "learning_rate": 5.3091744409544795e-06, - "loss": 0.004, + "learning_rate": 1.5332095548655862e-05, + "loss": 0.0264, "step": 157685 }, { "epoch": 7.36, - "learning_rate": 5.30870563967934e-06, - "loss": 0.0429, + "learning_rate": 1.5331627478819842e-05, + "loss": 0.0234, "step": 157690 }, { "epoch": 7.36, - "learning_rate": 5.3082368384042e-06, - "loss": 0.05, + "learning_rate": 1.5331159408983822e-05, + "loss": 0.0765, "step": 157695 }, { "epoch": 7.36, - "learning_rate": 5.307768037129062e-06, - "loss": 0.0603, + "learning_rate": 1.53306913391478e-05, + "loss": 0.0225, "step": 157700 }, { "epoch": 7.36, - "learning_rate": 5.307299235853922e-06, - "loss": 0.0605, + "learning_rate": 1.533022326931178e-05, + "loss": 0.0858, "step": 157705 }, { "epoch": 7.36, - "learning_rate": 5.3068304345787825e-06, - "loss": 0.0798, + "learning_rate": 1.532975519947576e-05, + "loss": 0.0511, "step": 157710 }, { "epoch": 7.36, - "learning_rate": 5.3063616333036424e-06, - "loss": 0.2277, + "learning_rate": 1.532928712963974e-05, + "loss": 0.1755, "step": 157715 }, { "epoch": 7.36, - "learning_rate": 5.305892832028503e-06, - "loss": 0.1352, + "learning_rate": 1.532881905980372e-05, + "loss": 0.1754, "step": 157720 }, { "epoch": 7.36, - "learning_rate": 5.305424030753365e-06, - "loss": 0.0328, + "learning_rate": 1.5328350989967704e-05, + "loss": 0.0151, "step": 157725 }, { "epoch": 7.36, - "learning_rate": 5.304955229478225e-06, - "loss": 0.0139, + "learning_rate": 1.5327882920131684e-05, + "loss": 0.0405, "step": 157730 }, { "epoch": 7.36, - "learning_rate": 5.304486428203085e-06, - "loss": 0.009, + "learning_rate": 1.5327414850295664e-05, + "loss": 0.0086, "step": 157735 }, { "epoch": 7.36, - "learning_rate": 5.3040176269279455e-06, - "loss": 0.0194, + "learning_rate": 1.5326946780459647e-05, + "loss": 0.0611, "step": 157740 }, { "epoch": 7.36, - "learning_rate": 5.303548825652807e-06, - "loss": 0.0296, + "learning_rate": 1.5326478710623627e-05, + "loss": 0.0194, "step": 157745 }, { "epoch": 7.36, - "learning_rate": 5.303080024377667e-06, - "loss": 0.0317, + "learning_rate": 1.5326010640787607e-05, + "loss": 0.0619, "step": 157750 }, { "epoch": 7.36, - "learning_rate": 5.302611223102527e-06, - "loss": 0.0627, + "learning_rate": 1.5325542570951587e-05, + "loss": 0.0365, "step": 157755 }, { "epoch": 7.36, - "learning_rate": 5.302142421827388e-06, - "loss": 0.0887, + "learning_rate": 1.5325074501115566e-05, + "loss": 0.1648, "step": 157760 }, { "epoch": 7.36, - "learning_rate": 5.301673620552248e-06, - "loss": 0.0636, + "learning_rate": 1.5324606431279546e-05, + "loss": 0.2059, "step": 157765 }, { "epoch": 7.36, - "learning_rate": 5.301204819277109e-06, - "loss": 0.1206, + "learning_rate": 1.5324138361443526e-05, + "loss": 0.183, "step": 157770 }, { "epoch": 7.36, - "learning_rate": 5.300736018001969e-06, - "loss": 0.0522, + "learning_rate": 1.5323670291607506e-05, + "loss": 0.0154, "step": 157775 }, { "epoch": 7.36, - "learning_rate": 5.30026721672683e-06, - "loss": 0.0573, + "learning_rate": 1.532320222177149e-05, + "loss": 0.0094, "step": 157780 }, { "epoch": 7.36, - "learning_rate": 5.29979841545169e-06, - "loss": 0.022, + "learning_rate": 1.532273415193547e-05, + "loss": 0.0362, "step": 157785 }, { "epoch": 7.36, - "learning_rate": 5.299329614176551e-06, - "loss": 0.0165, + "learning_rate": 1.532226608209945e-05, + "loss": 0.0201, "step": 157790 }, { "epoch": 7.36, - "learning_rate": 5.2988608129014115e-06, - "loss": 0.0634, + "learning_rate": 1.5321798012263432e-05, + "loss": 0.0327, "step": 157795 }, { "epoch": 7.36, - "learning_rate": 5.298392011626272e-06, - "loss": 0.062, + "learning_rate": 1.5321329942427412e-05, + "loss": 0.028, "step": 157800 }, { "epoch": 7.36, - "learning_rate": 5.297923210351132e-06, - "loss": 0.1061, + "learning_rate": 1.532086187259139e-05, + "loss": 0.1099, "step": 157805 }, { "epoch": 7.36, - "learning_rate": 5.297454409075993e-06, - "loss": 0.0398, + "learning_rate": 1.532039380275537e-05, + "loss": 0.0811, "step": 157810 }, { "epoch": 7.36, - "learning_rate": 5.296985607800854e-06, - "loss": 0.179, + "learning_rate": 1.5319925732919355e-05, + "loss": 0.1017, "step": 157815 }, { "epoch": 7.36, - "learning_rate": 5.296516806525715e-06, - "loss": 0.2115, + "learning_rate": 1.5319457663083334e-05, + "loss": 0.2105, "step": 157820 }, { "epoch": 7.36, - "learning_rate": 5.2960480052505745e-06, - "loss": 0.0029, + "learning_rate": 1.531898959324731e-05, + "loss": 0.0241, "step": 157825 }, { "epoch": 7.36, - "learning_rate": 5.295579203975435e-06, - "loss": 0.0145, + "learning_rate": 1.531852152341129e-05, + "loss": 0.012, "step": 157830 }, { "epoch": 7.36, - "learning_rate": 5.295110402700295e-06, - "loss": 0.0221, + "learning_rate": 1.5318053453575274e-05, + "loss": 0.0163, "step": 157835 }, { "epoch": 7.37, - "learning_rate": 5.294641601425157e-06, - "loss": 0.0306, + "learning_rate": 1.5317585383739254e-05, + "loss": 0.0301, "step": 157840 }, { "epoch": 7.37, - "learning_rate": 5.294172800150017e-06, - "loss": 0.1058, + "learning_rate": 1.5317117313903233e-05, + "loss": 0.0163, "step": 157845 }, { "epoch": 7.37, - "learning_rate": 5.293703998874878e-06, - "loss": 0.034, + "learning_rate": 1.5316649244067217e-05, + "loss": 0.0856, "step": 157850 }, { "epoch": 7.37, - "learning_rate": 5.2932351975997375e-06, - "loss": 0.0455, + "learning_rate": 1.5316181174231196e-05, + "loss": 0.0399, "step": 157855 }, { "epoch": 7.37, - "learning_rate": 5.292766396324599e-06, - "loss": 0.0956, + "learning_rate": 1.5315713104395176e-05, + "loss": 0.0499, "step": 157860 }, { "epoch": 7.37, - "learning_rate": 5.292297595049459e-06, - "loss": 0.0956, + "learning_rate": 1.5315245034559156e-05, + "loss": 0.1262, "step": 157865 }, { "epoch": 7.37, - "learning_rate": 5.29182879377432e-06, - "loss": 0.1582, + "learning_rate": 1.531477696472314e-05, + "loss": 0.1656, "step": 157870 }, { "epoch": 7.37, - "learning_rate": 5.29135999249918e-06, - "loss": 0.0312, + "learning_rate": 1.531430889488712e-05, + "loss": 0.0311, "step": 157875 }, { "epoch": 7.37, - "learning_rate": 5.29089119122404e-06, - "loss": 0.0432, + "learning_rate": 1.53138408250511e-05, + "loss": 0.0237, "step": 157880 }, { "epoch": 7.37, - "learning_rate": 5.290422389948901e-06, - "loss": 0.0117, + "learning_rate": 1.5313372755215075e-05, + "loss": 0.0231, "step": 157885 }, { "epoch": 7.37, - "learning_rate": 5.289953588673762e-06, - "loss": 0.0235, + "learning_rate": 1.531290468537906e-05, + "loss": 0.0547, "step": 157890 }, { "epoch": 7.37, - "learning_rate": 5.289484787398622e-06, - "loss": 0.0856, + "learning_rate": 1.531243661554304e-05, + "loss": 0.0627, "step": 157895 }, { "epoch": 7.37, - "learning_rate": 5.289015986123483e-06, - "loss": 0.0193, + "learning_rate": 1.5311968545707018e-05, + "loss": 0.051, "step": 157900 }, { "epoch": 7.37, - "learning_rate": 5.288547184848343e-06, - "loss": 0.09, + "learning_rate": 1.5311500475871e-05, + "loss": 0.0707, "step": 157905 }, { "epoch": 7.37, - "learning_rate": 5.288078383573204e-06, - "loss": 0.0984, + "learning_rate": 1.531103240603498e-05, + "loss": 0.0383, "step": 157910 }, { "epoch": 7.37, - "learning_rate": 5.287609582298064e-06, - "loss": 0.1506, + "learning_rate": 1.531056433619896e-05, + "loss": 0.2551, "step": 157915 }, { "epoch": 7.37, - "learning_rate": 5.287140781022925e-06, - "loss": 0.2192, + "learning_rate": 1.531009626636294e-05, + "loss": 0.0989, "step": 157920 }, { "epoch": 7.37, - "learning_rate": 5.286671979747785e-06, - "loss": 0.0014, + "learning_rate": 1.5309628196526924e-05, + "loss": 0.0114, "step": 157925 }, { "epoch": 7.37, - "learning_rate": 5.286203178472647e-06, - "loss": 0.0122, + "learning_rate": 1.5309160126690904e-05, + "loss": 0.0532, "step": 157930 }, { "epoch": 7.37, - "learning_rate": 5.285734377197507e-06, - "loss": 0.0061, + "learning_rate": 1.5308692056854884e-05, + "loss": 0.0119, "step": 157935 }, { "epoch": 7.37, - "learning_rate": 5.285265575922367e-06, - "loss": 0.03, + "learning_rate": 1.5308223987018864e-05, + "loss": 0.0801, "step": 157940 }, { "epoch": 7.37, - "learning_rate": 5.284796774647227e-06, - "loss": 0.0347, + "learning_rate": 1.5307755917182847e-05, + "loss": 0.0595, "step": 157945 }, { "epoch": 7.37, - "learning_rate": 5.284327973372087e-06, - "loss": 0.0721, + "learning_rate": 1.5307287847346823e-05, + "loss": 0.0487, "step": 157950 }, { "epoch": 7.37, - "learning_rate": 5.283859172096949e-06, - "loss": 0.0933, + "learning_rate": 1.5306819777510803e-05, + "loss": 0.0808, "step": 157955 }, { "epoch": 7.37, - "learning_rate": 5.28339037082181e-06, - "loss": 0.1153, + "learning_rate": 1.5306351707674783e-05, + "loss": 0.0724, "step": 157960 }, { "epoch": 7.37, - "learning_rate": 5.28292156954667e-06, - "loss": 0.1862, + "learning_rate": 1.5305883637838766e-05, + "loss": 0.1449, "step": 157965 }, { "epoch": 7.37, - "learning_rate": 5.2824527682715296e-06, - "loss": 0.1985, + "learning_rate": 1.5305415568002746e-05, + "loss": 0.2436, "step": 157970 }, { "epoch": 7.37, - "learning_rate": 5.28198396699639e-06, - "loss": 0.0631, + "learning_rate": 1.5304947498166726e-05, + "loss": 0.0291, "step": 157975 }, { "epoch": 7.37, - "learning_rate": 5.281515165721252e-06, - "loss": 0.0179, + "learning_rate": 1.530447942833071e-05, + "loss": 0.0108, "step": 157980 }, { "epoch": 7.37, - "learning_rate": 5.281046364446112e-06, - "loss": 0.0252, + "learning_rate": 1.530401135849469e-05, + "loss": 0.0098, "step": 157985 }, { "epoch": 7.37, - "learning_rate": 5.280577563170972e-06, - "loss": 0.1052, + "learning_rate": 1.530354328865867e-05, + "loss": 0.0565, "step": 157990 }, { "epoch": 7.37, - "learning_rate": 5.280108761895833e-06, - "loss": 0.082, + "learning_rate": 1.530307521882265e-05, + "loss": 0.027, "step": 157995 }, { "epoch": 7.37, - "learning_rate": 5.279639960620694e-06, - "loss": 0.0555, + "learning_rate": 1.530260714898663e-05, + "loss": 0.0435, "step": 158000 }, { "epoch": 7.37, - "learning_rate": 5.279171159345554e-06, - "loss": 0.0518, + "learning_rate": 1.530213907915061e-05, + "loss": 0.0909, "step": 158005 }, { "epoch": 7.37, - "learning_rate": 5.278702358070414e-06, - "loss": 0.1141, + "learning_rate": 1.530167100931459e-05, + "loss": 0.1229, "step": 158010 }, { "epoch": 7.37, - "learning_rate": 5.278233556795275e-06, - "loss": 0.1509, + "learning_rate": 1.5301202939478568e-05, + "loss": 0.2224, "step": 158015 }, { "epoch": 7.37, - "learning_rate": 5.277764755520135e-06, - "loss": 0.2687, + "learning_rate": 1.530073486964255e-05, + "loss": 0.2039, "step": 158020 }, { "epoch": 7.37, - "learning_rate": 5.2772959542449964e-06, - "loss": 0.0028, + "learning_rate": 1.530026679980653e-05, + "loss": 0.0073, "step": 158025 }, { "epoch": 7.37, - "learning_rate": 5.276827152969856e-06, - "loss": 0.0046, + "learning_rate": 1.529979872997051e-05, + "loss": 0.0058, "step": 158030 }, { "epoch": 7.37, - "learning_rate": 5.276358351694717e-06, - "loss": 0.0252, + "learning_rate": 1.5299330660134494e-05, + "loss": 0.0419, "step": 158035 }, { "epoch": 7.37, - "learning_rate": 5.275889550419577e-06, - "loss": 0.1007, + "learning_rate": 1.5298862590298473e-05, + "loss": 0.0682, "step": 158040 }, { "epoch": 7.37, - "learning_rate": 5.275420749144438e-06, - "loss": 0.0377, + "learning_rate": 1.5298394520462453e-05, + "loss": 0.0493, "step": 158045 }, { "epoch": 7.37, - "learning_rate": 5.274951947869299e-06, - "loss": 0.0475, + "learning_rate": 1.5297926450626433e-05, + "loss": 0.0077, "step": 158050 }, { "epoch": 7.38, - "learning_rate": 5.2744831465941594e-06, - "loss": 0.0364, + "learning_rate": 1.5297458380790416e-05, + "loss": 0.0986, "step": 158055 }, { "epoch": 7.38, - "learning_rate": 5.274014345319019e-06, - "loss": 0.1316, + "learning_rate": 1.5296990310954396e-05, + "loss": 0.0336, "step": 158060 }, { "epoch": 7.38, - "learning_rate": 5.27354554404388e-06, - "loss": 0.1567, + "learning_rate": 1.5296522241118376e-05, + "loss": 0.1676, "step": 158065 }, { "epoch": 7.38, - "learning_rate": 5.273076742768741e-06, - "loss": 0.1946, + "learning_rate": 1.5296054171282356e-05, + "loss": 0.0555, "step": 158070 }, { "epoch": 7.38, - "learning_rate": 5.272607941493602e-06, - "loss": 0.0007, + "learning_rate": 1.5295586101446336e-05, + "loss": 0.0085, "step": 158075 }, { "epoch": 7.38, - "learning_rate": 5.272139140218462e-06, - "loss": 0.0154, + "learning_rate": 1.5295118031610315e-05, + "loss": 0.0126, "step": 158080 }, { "epoch": 7.38, - "learning_rate": 5.2716703389433224e-06, - "loss": 0.0415, + "learning_rate": 1.5294649961774295e-05, + "loss": 0.0152, "step": 158085 }, { "epoch": 7.38, - "learning_rate": 5.271201537668182e-06, - "loss": 0.0416, + "learning_rate": 1.529418189193828e-05, + "loss": 0.014, "step": 158090 }, { "epoch": 7.38, - "learning_rate": 5.270732736393044e-06, - "loss": 0.1025, + "learning_rate": 1.5293713822102258e-05, + "loss": 0.0467, "step": 158095 }, { "epoch": 7.38, - "learning_rate": 5.270263935117904e-06, - "loss": 0.0616, + "learning_rate": 1.5293245752266238e-05, + "loss": 0.03, "step": 158100 }, { "epoch": 7.38, - "learning_rate": 5.269795133842765e-06, - "loss": 0.0596, + "learning_rate": 1.5292777682430218e-05, + "loss": 0.0612, "step": 158105 }, { "epoch": 7.38, - "learning_rate": 5.269326332567625e-06, - "loss": 0.0341, + "learning_rate": 1.52923096125942e-05, + "loss": 0.0816, "step": 158110 }, { "epoch": 7.38, - "learning_rate": 5.268857531292485e-06, - "loss": 0.0852, + "learning_rate": 1.529184154275818e-05, + "loss": 0.0926, "step": 158115 }, { "epoch": 7.38, - "learning_rate": 5.268388730017346e-06, - "loss": 0.1944, + "learning_rate": 1.529137347292216e-05, + "loss": 0.1703, "step": 158120 }, { "epoch": 7.38, - "learning_rate": 5.267919928742207e-06, - "loss": 0.0138, + "learning_rate": 1.529090540308614e-05, + "loss": 0.0532, "step": 158125 }, { "epoch": 7.38, - "learning_rate": 5.267451127467067e-06, - "loss": 0.0276, + "learning_rate": 1.5290437333250124e-05, + "loss": 0.0047, "step": 158130 }, { "epoch": 7.38, - "learning_rate": 5.266982326191928e-06, - "loss": 0.0395, + "learning_rate": 1.5289969263414104e-05, + "loss": 0.0112, "step": 158135 }, { "epoch": 7.38, - "learning_rate": 5.2665135249167885e-06, - "loss": 0.0404, + "learning_rate": 1.528950119357808e-05, + "loss": 0.0242, "step": 158140 }, { "epoch": 7.38, - "learning_rate": 5.266044723641649e-06, - "loss": 0.0529, + "learning_rate": 1.528903312374206e-05, + "loss": 0.0309, "step": 158145 }, { "epoch": 7.38, - "learning_rate": 5.265575922366509e-06, - "loss": 0.0389, + "learning_rate": 1.5288565053906043e-05, + "loss": 0.0259, "step": 158150 }, { "epoch": 7.38, - "learning_rate": 5.26510712109137e-06, - "loss": 0.0255, + "learning_rate": 1.5288096984070023e-05, + "loss": 0.0611, "step": 158155 }, { "epoch": 7.38, - "learning_rate": 5.26463831981623e-06, - "loss": 0.0535, + "learning_rate": 1.5287628914234003e-05, + "loss": 0.0401, "step": 158160 }, { "epoch": 7.38, - "learning_rate": 5.2641695185410915e-06, - "loss": 0.12, + "learning_rate": 1.5287160844397986e-05, + "loss": 0.179, "step": 158165 }, { "epoch": 7.38, - "learning_rate": 5.2637007172659515e-06, - "loss": 0.0838, + "learning_rate": 1.5286692774561966e-05, + "loss": 0.1796, "step": 158170 }, { "epoch": 7.38, - "learning_rate": 5.263231915990812e-06, - "loss": 0.034, + "learning_rate": 1.5286224704725945e-05, + "loss": 0.0324, "step": 158175 }, { "epoch": 7.38, - "learning_rate": 5.262763114715672e-06, - "loss": 0.0183, + "learning_rate": 1.5285756634889925e-05, + "loss": 0.0216, "step": 158180 }, { "epoch": 7.38, - "learning_rate": 5.262294313440534e-06, - "loss": 0.013, + "learning_rate": 1.528528856505391e-05, + "loss": 0.0181, "step": 158185 }, { "epoch": 7.38, - "learning_rate": 5.261825512165394e-06, - "loss": 0.0024, + "learning_rate": 1.5284820495217888e-05, + "loss": 0.0569, "step": 158190 }, { "epoch": 7.38, - "learning_rate": 5.2613567108902545e-06, - "loss": 0.0427, + "learning_rate": 1.5284352425381868e-05, + "loss": 0.0247, "step": 158195 }, { "epoch": 7.38, - "learning_rate": 5.2608879096151145e-06, - "loss": 0.0661, + "learning_rate": 1.5283884355545848e-05, + "loss": 0.052, "step": 158200 }, { "epoch": 7.38, - "learning_rate": 5.260419108339974e-06, - "loss": 0.0379, + "learning_rate": 1.5283416285709828e-05, + "loss": 0.0812, "step": 158205 }, { "epoch": 7.38, - "learning_rate": 5.259950307064836e-06, - "loss": 0.0373, + "learning_rate": 1.5282948215873808e-05, + "loss": 0.0924, "step": 158210 }, { "epoch": 7.38, - "learning_rate": 5.259481505789697e-06, - "loss": 0.1285, + "learning_rate": 1.5282480146037787e-05, + "loss": 0.0875, "step": 158215 }, { "epoch": 7.38, - "learning_rate": 5.259012704514557e-06, - "loss": 0.1139, + "learning_rate": 1.528201207620177e-05, + "loss": 0.1873, "step": 158220 }, { "epoch": 7.38, - "learning_rate": 5.258543903239417e-06, - "loss": 0.0432, + "learning_rate": 1.528154400636575e-05, + "loss": 0.0373, "step": 158225 }, { "epoch": 7.38, - "learning_rate": 5.2580751019642774e-06, - "loss": 0.0264, + "learning_rate": 1.528107593652973e-05, + "loss": 0.0128, "step": 158230 }, { "epoch": 7.38, - "learning_rate": 5.257606300689139e-06, - "loss": 0.08, + "learning_rate": 1.528060786669371e-05, + "loss": 0.02, "step": 158235 }, { "epoch": 7.38, - "learning_rate": 5.257137499413999e-06, - "loss": 0.051, + "learning_rate": 1.5280139796857693e-05, + "loss": 0.0093, "step": 158240 }, { "epoch": 7.38, - "learning_rate": 5.256668698138859e-06, - "loss": 0.0814, + "learning_rate": 1.5279671727021673e-05, + "loss": 0.0568, "step": 158245 }, { "epoch": 7.38, - "learning_rate": 5.25619989686372e-06, - "loss": 0.0191, + "learning_rate": 1.5279203657185653e-05, + "loss": 0.0749, "step": 158250 }, { "epoch": 7.38, - "learning_rate": 5.255731095588581e-06, - "loss": 0.0227, + "learning_rate": 1.5278735587349633e-05, + "loss": 0.0241, "step": 158255 }, { "epoch": 7.38, - "learning_rate": 5.255262294313441e-06, - "loss": 0.1208, + "learning_rate": 1.5278267517513616e-05, + "loss": 0.0562, "step": 158260 }, { "epoch": 7.38, - "learning_rate": 5.254793493038301e-06, - "loss": 0.1883, + "learning_rate": 1.5277799447677592e-05, + "loss": 0.1568, "step": 158265 }, { "epoch": 7.39, - "learning_rate": 5.254324691763162e-06, - "loss": 0.1271, + "learning_rate": 1.5277331377841572e-05, + "loss": 0.0692, "step": 158270 }, { "epoch": 7.39, - "learning_rate": 5.253855890488022e-06, - "loss": 0.0088, + "learning_rate": 1.5276863308005555e-05, + "loss": 0.0246, "step": 158275 }, { "epoch": 7.39, - "learning_rate": 5.2533870892128836e-06, - "loss": 0.0175, + "learning_rate": 1.5276395238169535e-05, + "loss": 0.0211, "step": 158280 }, { "epoch": 7.39, - "learning_rate": 5.2529182879377435e-06, - "loss": 0.0156, + "learning_rate": 1.5275927168333515e-05, + "loss": 0.0138, "step": 158285 }, { "epoch": 7.39, - "learning_rate": 5.252449486662604e-06, - "loss": 0.0328, + "learning_rate": 1.5275459098497495e-05, + "loss": 0.0229, "step": 158290 }, { "epoch": 7.39, - "learning_rate": 5.251980685387464e-06, - "loss": 0.0933, + "learning_rate": 1.5274991028661478e-05, + "loss": 0.0092, "step": 158295 }, { "epoch": 7.39, - "learning_rate": 5.251511884112325e-06, - "loss": 0.0211, + "learning_rate": 1.5274522958825458e-05, + "loss": 0.0497, "step": 158300 }, { "epoch": 7.39, - "learning_rate": 5.251043082837186e-06, - "loss": 0.031, + "learning_rate": 1.5274054888989438e-05, + "loss": 0.0458, "step": 158305 }, { "epoch": 7.39, - "learning_rate": 5.2505742815620465e-06, - "loss": 0.1428, + "learning_rate": 1.5273586819153417e-05, + "loss": 0.1255, "step": 158310 }, { "epoch": 7.39, - "learning_rate": 5.2501054802869065e-06, - "loss": 0.1164, + "learning_rate": 1.52731187493174e-05, + "loss": 0.1504, "step": 158315 }, { "epoch": 7.39, - "learning_rate": 5.249636679011767e-06, - "loss": 0.1913, + "learning_rate": 1.527265067948138e-05, + "loss": 0.1878, "step": 158320 }, { "epoch": 7.39, - "learning_rate": 5.249167877736628e-06, - "loss": 0.0104, + "learning_rate": 1.527218260964536e-05, + "loss": 0.0267, "step": 158325 }, { "epoch": 7.39, - "learning_rate": 5.248699076461489e-06, - "loss": 0.0271, + "learning_rate": 1.5271714539809337e-05, + "loss": 0.0093, "step": 158330 }, { "epoch": 7.39, - "learning_rate": 5.248230275186349e-06, - "loss": 0.0099, + "learning_rate": 1.527124646997332e-05, + "loss": 0.0417, "step": 158335 }, { "epoch": 7.39, - "learning_rate": 5.2477614739112095e-06, - "loss": 0.0359, + "learning_rate": 1.52707784001373e-05, + "loss": 0.0202, "step": 158340 }, { "epoch": 7.39, - "learning_rate": 5.2472926726360695e-06, - "loss": 0.0096, + "learning_rate": 1.527031033030128e-05, + "loss": 0.0142, "step": 158345 }, { "epoch": 7.39, - "learning_rate": 5.246823871360931e-06, - "loss": 0.1444, + "learning_rate": 1.5269842260465263e-05, + "loss": 0.0393, "step": 158350 }, { "epoch": 7.39, - "learning_rate": 5.246355070085791e-06, - "loss": 0.0477, + "learning_rate": 1.5269374190629243e-05, + "loss": 0.0272, "step": 158355 }, { "epoch": 7.39, - "learning_rate": 5.245886268810652e-06, - "loss": 0.0804, + "learning_rate": 1.5268906120793222e-05, + "loss": 0.0578, "step": 158360 }, { "epoch": 7.39, - "learning_rate": 5.245417467535512e-06, - "loss": 0.1788, + "learning_rate": 1.5268438050957202e-05, + "loss": 0.0802, "step": 158365 }, { "epoch": 7.39, - "learning_rate": 5.2449486662603725e-06, - "loss": 0.2783, + "learning_rate": 1.5267969981121185e-05, + "loss": 0.2166, "step": 158370 }, { "epoch": 7.39, - "learning_rate": 5.244479864985233e-06, - "loss": 0.0226, + "learning_rate": 1.5267501911285165e-05, + "loss": 0.0126, "step": 158375 }, { "epoch": 7.39, - "learning_rate": 5.244011063710094e-06, - "loss": 0.0104, + "learning_rate": 1.5267033841449145e-05, + "loss": 0.0047, "step": 158380 }, { "epoch": 7.39, - "learning_rate": 5.243542262434954e-06, - "loss": 0.0066, + "learning_rate": 1.5266565771613125e-05, + "loss": 0.0308, "step": 158385 }, { "epoch": 7.39, - "learning_rate": 5.243073461159815e-06, - "loss": 0.0297, + "learning_rate": 1.5266097701777105e-05, + "loss": 0.028, "step": 158390 }, { "epoch": 7.39, - "learning_rate": 5.242604659884676e-06, - "loss": 0.0685, + "learning_rate": 1.5265629631941085e-05, + "loss": 0.0595, "step": 158395 }, { "epoch": 7.39, - "learning_rate": 5.242135858609536e-06, - "loss": 0.0621, + "learning_rate": 1.5265161562105064e-05, + "loss": 0.0164, "step": 158400 }, { "epoch": 7.39, - "learning_rate": 5.241667057334396e-06, - "loss": 0.11, + "learning_rate": 1.5264693492269048e-05, + "loss": 0.1139, "step": 158405 }, { "epoch": 7.39, - "learning_rate": 5.241198256059257e-06, - "loss": 0.0851, + "learning_rate": 1.5264225422433027e-05, + "loss": 0.1117, "step": 158410 }, { "epoch": 7.39, - "learning_rate": 5.240729454784117e-06, - "loss": 0.0436, + "learning_rate": 1.5263757352597007e-05, + "loss": 0.1639, "step": 158415 }, { "epoch": 7.39, - "learning_rate": 5.240260653508979e-06, - "loss": 0.1651, + "learning_rate": 1.5263289282760987e-05, + "loss": 0.1014, "step": 158420 }, { "epoch": 7.39, - "learning_rate": 5.2397918522338386e-06, - "loss": 0.0149, + "learning_rate": 1.526282121292497e-05, + "loss": 0.0298, "step": 158425 }, { "epoch": 7.39, - "learning_rate": 5.239323050958699e-06, - "loss": 0.0077, + "learning_rate": 1.526235314308895e-05, + "loss": 0.0033, "step": 158430 }, { "epoch": 7.39, - "learning_rate": 5.238854249683559e-06, - "loss": 0.0156, + "learning_rate": 1.526188507325293e-05, + "loss": 0.0152, "step": 158435 }, { "epoch": 7.39, - "learning_rate": 5.238385448408419e-06, - "loss": 0.0542, + "learning_rate": 1.526141700341691e-05, + "loss": 0.0279, "step": 158440 }, { "epoch": 7.39, - "learning_rate": 5.237916647133281e-06, - "loss": 0.0823, + "learning_rate": 1.5260948933580893e-05, + "loss": 0.0639, "step": 158445 }, { "epoch": 7.39, - "learning_rate": 5.237447845858142e-06, - "loss": 0.0413, + "learning_rate": 1.5260480863744873e-05, + "loss": 0.0685, "step": 158450 }, { "epoch": 7.39, - "learning_rate": 5.2369790445830016e-06, - "loss": 0.0928, + "learning_rate": 1.526001279390885e-05, + "loss": 0.0611, "step": 158455 }, { "epoch": 7.39, - "learning_rate": 5.2365102433078615e-06, - "loss": 0.1003, + "learning_rate": 1.5259544724072832e-05, + "loss": 0.0824, "step": 158460 }, { "epoch": 7.39, - "learning_rate": 5.236041442032723e-06, - "loss": 0.1214, + "learning_rate": 1.5259076654236812e-05, + "loss": 0.1204, "step": 158465 }, { "epoch": 7.39, - "learning_rate": 5.235572640757584e-06, - "loss": 0.2223, + "learning_rate": 1.5258608584400792e-05, + "loss": 0.2659, "step": 158470 }, { "epoch": 7.39, - "learning_rate": 5.235103839482444e-06, - "loss": 0.0217, + "learning_rate": 1.5258140514564772e-05, + "loss": 0.0862, "step": 158475 }, { "epoch": 7.39, - "learning_rate": 5.234635038207304e-06, - "loss": 0.0539, + "learning_rate": 1.5257672444728755e-05, + "loss": 0.0162, "step": 158480 }, { "epoch": 7.4, - "learning_rate": 5.2341662369321646e-06, - "loss": 0.0307, + "learning_rate": 1.5257204374892735e-05, + "loss": 0.0291, "step": 158485 }, { "epoch": 7.4, - "learning_rate": 5.233697435657026e-06, - "loss": 0.0306, + "learning_rate": 1.5256736305056715e-05, + "loss": 0.0681, "step": 158490 }, { "epoch": 7.4, - "learning_rate": 5.233228634381886e-06, - "loss": 0.0564, + "learning_rate": 1.5256268235220694e-05, + "loss": 0.0442, "step": 158495 }, { "epoch": 7.4, - "learning_rate": 5.232759833106746e-06, - "loss": 0.3383, + "learning_rate": 1.5255800165384678e-05, + "loss": 0.066, "step": 158500 }, { "epoch": 7.4, - "learning_rate": 5.232291031831607e-06, - "loss": 0.0258, + "learning_rate": 1.5255332095548656e-05, + "loss": 0.0368, "step": 158505 }, { "epoch": 7.4, - "learning_rate": 5.2318222305564685e-06, - "loss": 0.0528, + "learning_rate": 1.5254864025712636e-05, + "loss": 0.0905, "step": 158510 }, { "epoch": 7.4, - "learning_rate": 5.231353429281328e-06, - "loss": 0.0968, + "learning_rate": 1.5254395955876619e-05, + "loss": 0.201, "step": 158515 }, { "epoch": 7.4, - "learning_rate": 5.230884628006188e-06, - "loss": 0.2363, + "learning_rate": 1.5253927886040599e-05, + "loss": 0.1497, "step": 158520 }, { "epoch": 7.4, - "learning_rate": 5.230415826731049e-06, - "loss": 0.0696, + "learning_rate": 1.5253459816204578e-05, + "loss": 0.0229, "step": 158525 }, { "epoch": 7.4, - "learning_rate": 5.229947025455909e-06, - "loss": 0.0125, + "learning_rate": 1.5252991746368558e-05, + "loss": 0.0343, "step": 158530 }, { "epoch": 7.4, - "learning_rate": 5.229478224180771e-06, - "loss": 0.0322, + "learning_rate": 1.525252367653254e-05, + "loss": 0.0556, "step": 158535 }, { "epoch": 7.4, - "learning_rate": 5.229009422905631e-06, - "loss": 0.0451, + "learning_rate": 1.525205560669652e-05, + "loss": 0.0486, "step": 158540 }, { "epoch": 7.4, - "learning_rate": 5.228540621630491e-06, - "loss": 0.0311, + "learning_rate": 1.52515875368605e-05, + "loss": 0.0105, "step": 158545 }, { "epoch": 7.4, - "learning_rate": 5.228071820355351e-06, - "loss": 0.0536, + "learning_rate": 1.525111946702448e-05, + "loss": 0.1113, "step": 158550 }, { "epoch": 7.4, - "learning_rate": 5.227603019080212e-06, - "loss": 0.0422, + "learning_rate": 1.5250651397188462e-05, + "loss": 0.0675, "step": 158555 }, { "epoch": 7.4, - "learning_rate": 5.227134217805073e-06, - "loss": 0.1032, + "learning_rate": 1.5250183327352442e-05, + "loss": 0.0848, "step": 158560 }, { "epoch": 7.4, - "learning_rate": 5.226665416529934e-06, - "loss": 0.0438, + "learning_rate": 1.524971525751642e-05, + "loss": 0.1333, "step": 158565 }, { "epoch": 7.4, - "learning_rate": 5.226196615254794e-06, - "loss": 0.1571, + "learning_rate": 1.52492471876804e-05, + "loss": 0.2258, "step": 158570 }, { "epoch": 7.4, - "learning_rate": 5.225727813979654e-06, - "loss": 0.0236, + "learning_rate": 1.5248779117844383e-05, + "loss": 0.0316, "step": 158575 }, { "epoch": 7.4, - "learning_rate": 5.225259012704515e-06, - "loss": 0.0043, + "learning_rate": 1.5248311048008363e-05, + "loss": 0.0024, "step": 158580 }, { "epoch": 7.4, - "learning_rate": 5.224790211429376e-06, - "loss": 0.0313, + "learning_rate": 1.5247842978172343e-05, + "loss": 0.0578, "step": 158585 }, { "epoch": 7.4, - "learning_rate": 5.224321410154236e-06, - "loss": 0.0161, + "learning_rate": 1.5247374908336326e-05, + "loss": 0.0102, "step": 158590 }, { "epoch": 7.4, - "learning_rate": 5.223852608879097e-06, - "loss": 0.0133, + "learning_rate": 1.5246906838500304e-05, + "loss": 0.0289, "step": 158595 }, { "epoch": 7.4, - "learning_rate": 5.223383807603957e-06, - "loss": 0.0677, + "learning_rate": 1.5246438768664284e-05, + "loss": 0.0442, "step": 158600 }, { "epoch": 7.4, - "learning_rate": 5.222915006328818e-06, - "loss": 0.0336, + "learning_rate": 1.5245970698828264e-05, + "loss": 0.0961, "step": 158605 }, { "epoch": 7.4, - "learning_rate": 5.222446205053678e-06, - "loss": 0.1153, + "learning_rate": 1.5245502628992247e-05, + "loss": 0.0554, "step": 158610 }, { "epoch": 7.4, - "learning_rate": 5.221977403778539e-06, - "loss": 0.1265, + "learning_rate": 1.5245034559156227e-05, + "loss": 0.1229, "step": 158615 }, { "epoch": 7.4, - "learning_rate": 5.221508602503399e-06, - "loss": 0.2323, + "learning_rate": 1.5244566489320207e-05, + "loss": 0.1312, "step": 158620 }, { "epoch": 7.4, - "learning_rate": 5.22103980122826e-06, - "loss": 0.0184, + "learning_rate": 1.5244098419484187e-05, + "loss": 0.0196, "step": 158625 }, { "epoch": 7.4, - "learning_rate": 5.22057099995312e-06, - "loss": 0.0139, + "learning_rate": 1.5243630349648168e-05, + "loss": 0.018, "step": 158630 }, { "epoch": 7.4, - "learning_rate": 5.220102198677981e-06, - "loss": 0.0179, + "learning_rate": 1.5243162279812148e-05, + "loss": 0.0312, "step": 158635 }, { "epoch": 7.4, - "learning_rate": 5.219633397402841e-06, - "loss": 0.0047, + "learning_rate": 1.5242694209976128e-05, + "loss": 0.0576, "step": 158640 }, { "epoch": 7.4, - "learning_rate": 5.219164596127702e-06, - "loss": 0.0629, + "learning_rate": 1.5242226140140111e-05, + "loss": 0.0369, "step": 158645 }, { "epoch": 7.4, - "learning_rate": 5.218695794852563e-06, - "loss": 0.0429, + "learning_rate": 1.524175807030409e-05, + "loss": 0.0398, "step": 158650 }, { "epoch": 7.4, - "learning_rate": 5.2182269935774235e-06, - "loss": 0.0628, + "learning_rate": 1.524129000046807e-05, + "loss": 0.1161, "step": 158655 }, { "epoch": 7.4, - "learning_rate": 5.217758192302283e-06, - "loss": 0.0454, + "learning_rate": 1.5240821930632049e-05, + "loss": 0.0674, "step": 158660 }, { "epoch": 7.4, - "learning_rate": 5.217289391027144e-06, - "loss": 0.1341, + "learning_rate": 1.5240353860796032e-05, + "loss": 0.0738, "step": 158665 }, { "epoch": 7.4, - "learning_rate": 5.216820589752004e-06, - "loss": 0.1942, + "learning_rate": 1.5239885790960012e-05, + "loss": 0.187, "step": 158670 }, { "epoch": 7.4, - "learning_rate": 5.216351788476866e-06, - "loss": 0.0214, + "learning_rate": 1.5239417721123992e-05, + "loss": 0.0687, "step": 158675 }, { "epoch": 7.4, - "learning_rate": 5.215882987201726e-06, - "loss": 0.0075, + "learning_rate": 1.5238949651287971e-05, + "loss": 0.0071, "step": 158680 }, { "epoch": 7.4, - "learning_rate": 5.2154141859265865e-06, - "loss": 0.0346, + "learning_rate": 1.5238481581451955e-05, + "loss": 0.0411, "step": 158685 }, { "epoch": 7.4, - "learning_rate": 5.214945384651446e-06, - "loss": 0.0699, + "learning_rate": 1.5238013511615933e-05, + "loss": 0.0668, "step": 158690 }, { "epoch": 7.4, - "learning_rate": 5.214476583376306e-06, - "loss": 0.0356, + "learning_rate": 1.5237545441779913e-05, + "loss": 0.0531, "step": 158695 }, { "epoch": 7.41, - "learning_rate": 5.214007782101168e-06, - "loss": 0.088, + "learning_rate": 1.5237077371943896e-05, + "loss": 0.0706, "step": 158700 }, { "epoch": 7.41, - "learning_rate": 5.213538980826029e-06, - "loss": 0.043, + "learning_rate": 1.5236609302107876e-05, + "loss": 0.0542, "step": 158705 }, { "epoch": 7.41, - "learning_rate": 5.213070179550889e-06, - "loss": 0.1098, + "learning_rate": 1.5236141232271855e-05, + "loss": 0.1127, "step": 158710 }, { "epoch": 7.41, - "learning_rate": 5.212601378275749e-06, - "loss": 0.1304, + "learning_rate": 1.5235673162435835e-05, + "loss": 0.1531, "step": 158715 }, { "epoch": 7.41, - "learning_rate": 5.21213257700061e-06, - "loss": 0.1959, + "learning_rate": 1.5235205092599818e-05, + "loss": 0.2182, "step": 158720 }, { "epoch": 7.41, - "learning_rate": 5.211663775725471e-06, - "loss": 0.0396, + "learning_rate": 1.5234737022763797e-05, + "loss": 0.0188, "step": 158725 }, { "epoch": 7.41, - "learning_rate": 5.211194974450331e-06, - "loss": 0.0272, + "learning_rate": 1.5234268952927776e-05, + "loss": 0.0172, "step": 158730 }, { "epoch": 7.41, - "learning_rate": 5.210726173175191e-06, - "loss": 0.0228, + "learning_rate": 1.5233800883091756e-05, + "loss": 0.0096, "step": 158735 }, { "epoch": 7.41, - "learning_rate": 5.210257371900052e-06, - "loss": 0.0126, + "learning_rate": 1.523333281325574e-05, + "loss": 0.0434, "step": 158740 }, { "epoch": 7.41, - "learning_rate": 5.209788570624913e-06, - "loss": 0.0244, + "learning_rate": 1.523286474341972e-05, + "loss": 0.0075, "step": 158745 }, { "epoch": 7.41, - "learning_rate": 5.209319769349773e-06, - "loss": 0.0325, + "learning_rate": 1.5232396673583699e-05, + "loss": 0.017, "step": 158750 }, { "epoch": 7.41, - "learning_rate": 5.208850968074633e-06, - "loss": 0.0695, + "learning_rate": 1.5231928603747677e-05, + "loss": 0.0557, "step": 158755 }, { "epoch": 7.41, - "learning_rate": 5.208382166799494e-06, - "loss": 0.0871, + "learning_rate": 1.523146053391166e-05, + "loss": 0.0848, "step": 158760 }, { "epoch": 7.41, - "learning_rate": 5.207913365524354e-06, - "loss": 0.2423, + "learning_rate": 1.523099246407564e-05, + "loss": 0.0675, "step": 158765 }, { "epoch": 7.41, - "learning_rate": 5.2074445642492155e-06, - "loss": 0.0662, + "learning_rate": 1.523052439423962e-05, + "loss": 0.1789, "step": 158770 }, { "epoch": 7.41, - "learning_rate": 5.2069757629740754e-06, - "loss": 0.007, + "learning_rate": 1.5230056324403603e-05, + "loss": 0.0208, "step": 158775 }, { "epoch": 7.41, - "learning_rate": 5.206506961698936e-06, - "loss": 0.0201, + "learning_rate": 1.5229588254567583e-05, + "loss": 0.006, "step": 158780 }, { "epoch": 7.41, - "learning_rate": 5.206038160423796e-06, - "loss": 0.0338, + "learning_rate": 1.5229120184731561e-05, + "loss": 0.0062, "step": 158785 }, { "epoch": 7.41, - "learning_rate": 5.205569359148658e-06, - "loss": 0.0311, + "learning_rate": 1.5228652114895541e-05, + "loss": 0.0401, "step": 158790 }, { "epoch": 7.41, - "learning_rate": 5.205100557873518e-06, - "loss": 0.0252, + "learning_rate": 1.5228184045059524e-05, + "loss": 0.0438, "step": 158795 }, { "epoch": 7.41, - "learning_rate": 5.2046317565983785e-06, - "loss": 0.0447, + "learning_rate": 1.5227715975223504e-05, + "loss": 0.0829, "step": 158800 }, { "epoch": 7.41, - "learning_rate": 5.204162955323238e-06, - "loss": 0.0892, + "learning_rate": 1.5227247905387484e-05, + "loss": 0.0367, "step": 158805 }, { "epoch": 7.41, - "learning_rate": 5.203694154048099e-06, - "loss": 0.0644, + "learning_rate": 1.5226779835551464e-05, + "loss": 0.1044, "step": 158810 }, { "epoch": 7.41, - "learning_rate": 5.20322535277296e-06, - "loss": 0.3424, + "learning_rate": 1.5226311765715447e-05, + "loss": 0.205, "step": 158815 }, { "epoch": 7.41, - "learning_rate": 5.202756551497821e-06, - "loss": 0.3015, + "learning_rate": 1.5225843695879425e-05, + "loss": 0.113, "step": 158820 }, { "epoch": 7.41, - "learning_rate": 5.202287750222681e-06, - "loss": 0.0415, + "learning_rate": 1.5225375626043405e-05, + "loss": 0.0347, "step": 158825 }, { "epoch": 7.41, - "learning_rate": 5.2018189489475415e-06, - "loss": 0.0389, + "learning_rate": 1.5224907556207388e-05, + "loss": 0.0014, "step": 158830 }, { "epoch": 7.41, - "learning_rate": 5.201350147672401e-06, - "loss": 0.01, + "learning_rate": 1.5224439486371368e-05, + "loss": 0.0167, "step": 158835 }, { "epoch": 7.41, - "learning_rate": 5.200881346397263e-06, - "loss": 0.0468, + "learning_rate": 1.5223971416535348e-05, + "loss": 0.0099, "step": 158840 }, { "epoch": 7.41, - "learning_rate": 5.200412545122123e-06, - "loss": 0.023, + "learning_rate": 1.5223503346699327e-05, + "loss": 0.0157, "step": 158845 }, { "epoch": 7.41, - "learning_rate": 5.199943743846984e-06, - "loss": 0.0285, + "learning_rate": 1.5223035276863309e-05, + "loss": 0.0268, "step": 158850 }, { "epoch": 7.41, - "learning_rate": 5.199474942571844e-06, - "loss": 0.1084, + "learning_rate": 1.5222567207027289e-05, + "loss": 0.1083, "step": 158855 }, { "epoch": 7.41, - "learning_rate": 5.199006141296705e-06, - "loss": 0.1511, + "learning_rate": 1.5222099137191269e-05, + "loss": 0.1017, "step": 158860 }, { "epoch": 7.41, - "learning_rate": 5.198537340021565e-06, - "loss": 0.1269, + "learning_rate": 1.5221631067355248e-05, + "loss": 0.1296, "step": 158865 }, { "epoch": 7.41, - "learning_rate": 5.198068538746426e-06, - "loss": 0.2161, + "learning_rate": 1.5221162997519232e-05, + "loss": 0.0896, "step": 158870 }, { "epoch": 7.41, - "learning_rate": 5.197599737471286e-06, - "loss": 0.0579, + "learning_rate": 1.5220694927683211e-05, + "loss": 0.0203, "step": 158875 }, { "epoch": 7.41, - "learning_rate": 5.197130936196147e-06, - "loss": 0.0324, + "learning_rate": 1.522022685784719e-05, + "loss": 0.0193, "step": 158880 }, { "epoch": 7.41, - "learning_rate": 5.1966621349210075e-06, - "loss": 0.01, + "learning_rate": 1.5219758788011173e-05, + "loss": 0.0126, "step": 158885 }, { "epoch": 7.41, - "learning_rate": 5.196193333645868e-06, - "loss": 0.0248, + "learning_rate": 1.5219290718175153e-05, + "loss": 0.0171, "step": 158890 }, { "epoch": 7.41, - "learning_rate": 5.195724532370728e-06, - "loss": 0.044, + "learning_rate": 1.5218822648339132e-05, + "loss": 0.0271, "step": 158895 }, { "epoch": 7.41, - "learning_rate": 5.195255731095589e-06, - "loss": 0.08, + "learning_rate": 1.5218354578503112e-05, + "loss": 0.036, "step": 158900 }, { "epoch": 7.41, - "learning_rate": 5.19478692982045e-06, - "loss": 0.1451, + "learning_rate": 1.5217886508667095e-05, + "loss": 0.1456, "step": 158905 }, { "epoch": 7.41, - "learning_rate": 5.194318128545311e-06, - "loss": 0.0569, + "learning_rate": 1.5217418438831075e-05, + "loss": 0.0759, "step": 158910 }, { "epoch": 7.42, - "learning_rate": 5.1938493272701705e-06, - "loss": 0.0923, + "learning_rate": 1.5216950368995053e-05, + "loss": 0.1639, "step": 158915 }, { "epoch": 7.42, - "learning_rate": 5.193380525995031e-06, - "loss": 0.2769, + "learning_rate": 1.5216482299159033e-05, + "loss": 0.1815, "step": 158920 }, { "epoch": 7.42, - "learning_rate": 5.192911724719891e-06, - "loss": 0.0284, + "learning_rate": 1.5216014229323016e-05, + "loss": 0.0017, "step": 158925 }, { "epoch": 7.42, - "learning_rate": 5.192442923444753e-06, - "loss": 0.0549, + "learning_rate": 1.5215546159486996e-05, + "loss": 0.0058, "step": 158930 }, { "epoch": 7.42, - "learning_rate": 5.191974122169613e-06, - "loss": 0.0168, + "learning_rate": 1.5215078089650976e-05, + "loss": 0.0539, "step": 158935 }, { "epoch": 7.42, - "learning_rate": 5.1915053208944736e-06, - "loss": 0.0464, + "learning_rate": 1.5214610019814956e-05, + "loss": 0.0063, "step": 158940 }, { "epoch": 7.42, - "learning_rate": 5.1910365196193335e-06, - "loss": 0.076, + "learning_rate": 1.5214141949978937e-05, + "loss": 0.0291, "step": 158945 }, { "epoch": 7.42, - "learning_rate": 5.1905677183441934e-06, - "loss": 0.0658, + "learning_rate": 1.5213673880142917e-05, + "loss": 0.0364, "step": 158950 }, { "epoch": 7.42, - "learning_rate": 5.190098917069055e-06, - "loss": 0.049, + "learning_rate": 1.5213205810306897e-05, + "loss": 0.0625, "step": 158955 }, { "epoch": 7.42, - "learning_rate": 5.189630115793916e-06, - "loss": 0.1534, + "learning_rate": 1.521273774047088e-05, + "loss": 0.0717, "step": 158960 }, { "epoch": 7.42, - "learning_rate": 5.189161314518776e-06, - "loss": 0.182, + "learning_rate": 1.521226967063486e-05, + "loss": 0.1304, "step": 158965 }, { "epoch": 7.42, - "learning_rate": 5.188692513243636e-06, - "loss": 0.0917, + "learning_rate": 1.521180160079884e-05, + "loss": 0.1269, "step": 158970 }, { "epoch": 7.42, - "learning_rate": 5.188223711968497e-06, - "loss": 0.0008, + "learning_rate": 1.5211333530962818e-05, + "loss": 0.0119, "step": 158975 }, { "epoch": 7.42, - "learning_rate": 5.187754910693358e-06, - "loss": 0.0148, + "learning_rate": 1.5210865461126801e-05, + "loss": 0.0066, "step": 158980 }, { "epoch": 7.42, - "learning_rate": 5.187286109418218e-06, - "loss": 0.0511, + "learning_rate": 1.5210397391290781e-05, + "loss": 0.0143, "step": 158985 }, { "epoch": 7.42, - "learning_rate": 5.186817308143078e-06, - "loss": 0.0458, + "learning_rate": 1.520992932145476e-05, + "loss": 0.0337, "step": 158990 }, { "epoch": 7.42, - "learning_rate": 5.186348506867939e-06, - "loss": 0.0935, + "learning_rate": 1.520946125161874e-05, + "loss": 0.0336, "step": 158995 }, { "epoch": 7.42, - "learning_rate": 5.1858797055928e-06, - "loss": 0.0396, + "learning_rate": 1.5208993181782724e-05, + "loss": 0.0506, "step": 159000 }, { "epoch": 7.42, - "learning_rate": 5.18541090431766e-06, - "loss": 0.1021, + "learning_rate": 1.5208525111946704e-05, + "loss": 0.035, "step": 159005 }, { "epoch": 7.42, - "learning_rate": 5.18494210304252e-06, - "loss": 0.0455, + "learning_rate": 1.5208057042110682e-05, + "loss": 0.0943, "step": 159010 }, { "epoch": 7.42, - "learning_rate": 5.184473301767381e-06, - "loss": 0.2049, + "learning_rate": 1.5207588972274665e-05, + "loss": 0.1642, "step": 159015 }, { "epoch": 7.42, - "learning_rate": 5.184004500492241e-06, - "loss": 0.2614, + "learning_rate": 1.5207120902438645e-05, + "loss": 0.1318, "step": 159020 }, { "epoch": 7.42, - "learning_rate": 5.183535699217103e-06, - "loss": 0.0451, + "learning_rate": 1.5206652832602625e-05, + "loss": 0.0347, "step": 159025 }, { "epoch": 7.42, - "learning_rate": 5.1830668979419625e-06, - "loss": 0.011, + "learning_rate": 1.5206184762766604e-05, + "loss": 0.0069, "step": 159030 }, { "epoch": 7.42, - "learning_rate": 5.182598096666823e-06, - "loss": 0.0265, + "learning_rate": 1.5205716692930588e-05, + "loss": 0.026, "step": 159035 }, { "epoch": 7.42, - "learning_rate": 5.182129295391683e-06, - "loss": 0.0347, + "learning_rate": 1.5205248623094566e-05, + "loss": 0.0163, "step": 159040 }, { "epoch": 7.42, - "learning_rate": 5.181660494116545e-06, - "loss": 0.0231, + "learning_rate": 1.5204780553258546e-05, + "loss": 0.0553, "step": 159045 }, { "epoch": 7.42, - "learning_rate": 5.181191692841405e-06, - "loss": 0.0816, + "learning_rate": 1.5204312483422525e-05, + "loss": 0.1, "step": 159050 }, { "epoch": 7.42, - "learning_rate": 5.180722891566266e-06, - "loss": 0.1025, + "learning_rate": 1.5203844413586509e-05, + "loss": 0.0866, "step": 159055 }, { "epoch": 7.42, - "learning_rate": 5.1802540902911255e-06, - "loss": 0.0668, + "learning_rate": 1.5203376343750488e-05, + "loss": 0.0749, "step": 159060 }, { "epoch": 7.42, - "learning_rate": 5.179785289015986e-06, - "loss": 0.0598, + "learning_rate": 1.5202908273914468e-05, + "loss": 0.1291, "step": 159065 }, { "epoch": 7.42, - "learning_rate": 5.179316487740847e-06, - "loss": 0.1203, + "learning_rate": 1.520244020407845e-05, + "loss": 0.1242, "step": 159070 }, { "epoch": 7.42, - "learning_rate": 5.178847686465708e-06, - "loss": 0.0161, + "learning_rate": 1.520197213424243e-05, + "loss": 0.0195, "step": 159075 }, { "epoch": 7.42, - "learning_rate": 5.178378885190568e-06, - "loss": 0.0051, + "learning_rate": 1.520150406440641e-05, + "loss": 0.0123, "step": 159080 }, { "epoch": 7.42, - "learning_rate": 5.177910083915429e-06, - "loss": 0.0154, + "learning_rate": 1.520103599457039e-05, + "loss": 0.0135, "step": 159085 }, { "epoch": 7.42, - "learning_rate": 5.1774412826402885e-06, - "loss": 0.0241, + "learning_rate": 1.5200567924734372e-05, + "loss": 0.0546, "step": 159090 }, { "epoch": 7.42, - "learning_rate": 5.17697248136515e-06, - "loss": 0.0695, + "learning_rate": 1.5200099854898352e-05, + "loss": 0.0349, "step": 159095 }, { "epoch": 7.42, - "learning_rate": 5.17650368009001e-06, - "loss": 0.0401, + "learning_rate": 1.5199631785062332e-05, + "loss": 0.0487, "step": 159100 }, { "epoch": 7.42, - "learning_rate": 5.176034878814871e-06, - "loss": 0.0645, + "learning_rate": 1.519916371522631e-05, + "loss": 0.0738, "step": 159105 }, { "epoch": 7.42, - "learning_rate": 5.175566077539731e-06, - "loss": 0.0482, + "learning_rate": 1.5198695645390293e-05, + "loss": 0.0288, "step": 159110 }, { "epoch": 7.42, - "learning_rate": 5.175097276264592e-06, - "loss": 0.1444, + "learning_rate": 1.5198227575554273e-05, + "loss": 0.1211, "step": 159115 }, { "epoch": 7.42, - "learning_rate": 5.174628474989452e-06, - "loss": 0.1795, + "learning_rate": 1.5197759505718253e-05, + "loss": 0.1305, "step": 159120 }, { "epoch": 7.42, - "learning_rate": 5.174159673714313e-06, - "loss": 0.0218, + "learning_rate": 1.5197291435882233e-05, + "loss": 0.0038, "step": 159125 }, { "epoch": 7.43, - "learning_rate": 5.173690872439173e-06, - "loss": 0.0292, + "learning_rate": 1.5196823366046216e-05, + "loss": 0.0074, "step": 159130 }, { "epoch": 7.43, - "learning_rate": 5.173222071164034e-06, - "loss": 0.018, + "learning_rate": 1.5196355296210194e-05, + "loss": 0.0346, "step": 159135 }, { "epoch": 7.43, - "learning_rate": 5.172753269888895e-06, - "loss": 0.0394, + "learning_rate": 1.5195887226374174e-05, + "loss": 0.0315, "step": 159140 }, { "epoch": 7.43, - "learning_rate": 5.172284468613755e-06, - "loss": 0.0395, + "learning_rate": 1.5195419156538157e-05, + "loss": 0.0056, "step": 159145 }, { "epoch": 7.43, - "learning_rate": 5.171815667338615e-06, - "loss": 0.0311, + "learning_rate": 1.5194951086702137e-05, + "loss": 0.108, "step": 159150 }, { "epoch": 7.43, - "learning_rate": 5.171346866063476e-06, - "loss": 0.1073, + "learning_rate": 1.5194483016866117e-05, + "loss": 0.0652, "step": 159155 }, { "epoch": 7.43, - "learning_rate": 5.170878064788336e-06, - "loss": 0.0894, + "learning_rate": 1.5194014947030097e-05, + "loss": 0.1063, "step": 159160 }, { "epoch": 7.43, - "learning_rate": 5.170409263513198e-06, - "loss": 0.1114, + "learning_rate": 1.5193546877194078e-05, + "loss": 0.0639, "step": 159165 }, { "epoch": 7.43, - "learning_rate": 5.169940462238058e-06, - "loss": 0.2483, + "learning_rate": 1.5193078807358058e-05, + "loss": 0.1335, "step": 159170 }, { "epoch": 7.43, - "learning_rate": 5.169471660962918e-06, - "loss": 0.0516, + "learning_rate": 1.5192610737522038e-05, + "loss": 0.0224, "step": 159175 }, { "epoch": 7.43, - "learning_rate": 5.169002859687778e-06, - "loss": 0.0007, + "learning_rate": 1.5192142667686018e-05, + "loss": 0.0266, "step": 159180 }, { "epoch": 7.43, - "learning_rate": 5.16853405841264e-06, - "loss": 0.0196, + "learning_rate": 1.519167459785e-05, + "loss": 0.0311, "step": 159185 }, { "epoch": 7.43, - "learning_rate": 5.1680652571375e-06, - "loss": 0.0287, + "learning_rate": 1.519120652801398e-05, + "loss": 0.0303, "step": 159190 }, { "epoch": 7.43, - "learning_rate": 5.167596455862361e-06, - "loss": 0.0189, + "learning_rate": 1.519073845817796e-05, + "loss": 0.0268, "step": 159195 }, { "epoch": 7.43, - "learning_rate": 5.167127654587221e-06, - "loss": 0.0455, + "learning_rate": 1.5190270388341942e-05, + "loss": 0.0679, "step": 159200 }, { "epoch": 7.43, - "learning_rate": 5.1666588533120805e-06, - "loss": 0.0681, + "learning_rate": 1.5189802318505922e-05, + "loss": 0.0819, "step": 159205 }, { "epoch": 7.43, - "learning_rate": 5.166190052036942e-06, - "loss": 0.0883, + "learning_rate": 1.5189334248669902e-05, + "loss": 0.1026, "step": 159210 }, { "epoch": 7.43, - "learning_rate": 5.165721250761803e-06, - "loss": 0.2003, + "learning_rate": 1.5188866178833881e-05, + "loss": 0.2275, "step": 159215 }, { "epoch": 7.43, - "learning_rate": 5.165252449486663e-06, - "loss": 0.2037, + "learning_rate": 1.5188398108997865e-05, + "loss": 0.234, "step": 159220 }, { "epoch": 7.43, - "learning_rate": 5.164783648211523e-06, - "loss": 0.0459, + "learning_rate": 1.5187930039161844e-05, + "loss": 0.0071, "step": 159225 }, { "epoch": 7.43, - "learning_rate": 5.1643148469363844e-06, - "loss": 0.002, + "learning_rate": 1.5187461969325823e-05, + "loss": 0.0048, "step": 159230 }, { "epoch": 7.43, - "learning_rate": 5.163846045661245e-06, - "loss": 0.0214, + "learning_rate": 1.5186993899489802e-05, + "loss": 0.0044, "step": 159235 }, { "epoch": 7.43, - "learning_rate": 5.163377244386105e-06, - "loss": 0.0608, + "learning_rate": 1.5186525829653786e-05, + "loss": 0.0588, "step": 159240 }, { "epoch": 7.43, - "learning_rate": 5.162908443110965e-06, - "loss": 0.0047, + "learning_rate": 1.5186057759817765e-05, + "loss": 0.0174, "step": 159245 }, { "epoch": 7.43, - "learning_rate": 5.162439641835826e-06, - "loss": 0.0509, + "learning_rate": 1.5185589689981745e-05, + "loss": 0.0341, "step": 159250 }, { "epoch": 7.43, - "learning_rate": 5.1619708405606875e-06, - "loss": 0.0627, + "learning_rate": 1.5185121620145728e-05, + "loss": 0.0271, "step": 159255 }, { "epoch": 7.43, - "learning_rate": 5.1615020392855474e-06, - "loss": 0.0774, + "learning_rate": 1.5184653550309707e-05, + "loss": 0.118, "step": 159260 }, { "epoch": 7.43, - "learning_rate": 5.161033238010407e-06, - "loss": 0.1779, + "learning_rate": 1.5184185480473686e-05, + "loss": 0.2513, "step": 159265 }, { "epoch": 7.43, - "learning_rate": 5.160564436735268e-06, - "loss": 0.2345, + "learning_rate": 1.5183717410637666e-05, + "loss": 0.1575, "step": 159270 }, { "epoch": 7.43, - "learning_rate": 5.160095635460128e-06, - "loss": 0.002, + "learning_rate": 1.518324934080165e-05, + "loss": 0.026, "step": 159275 }, { "epoch": 7.43, - "learning_rate": 5.15962683418499e-06, - "loss": 0.0046, + "learning_rate": 1.518278127096563e-05, + "loss": 0.0059, "step": 159280 }, { "epoch": 7.43, - "learning_rate": 5.15915803290985e-06, - "loss": 0.0191, + "learning_rate": 1.5182313201129609e-05, + "loss": 0.0637, "step": 159285 }, { "epoch": 7.43, - "learning_rate": 5.1586892316347104e-06, - "loss": 0.0585, + "learning_rate": 1.5181845131293589e-05, + "loss": 0.0552, "step": 159290 }, { "epoch": 7.43, - "learning_rate": 5.15822043035957e-06, - "loss": 0.0163, + "learning_rate": 1.518137706145757e-05, + "loss": 0.0134, "step": 159295 }, { "epoch": 7.43, - "learning_rate": 5.157751629084432e-06, - "loss": 0.0528, + "learning_rate": 1.518090899162155e-05, + "loss": 0.0253, "step": 159300 }, { "epoch": 7.43, - "learning_rate": 5.157282827809292e-06, - "loss": 0.0524, + "learning_rate": 1.518044092178553e-05, + "loss": 0.0482, "step": 159305 }, { "epoch": 7.43, - "learning_rate": 5.156814026534153e-06, - "loss": 0.0569, + "learning_rate": 1.5179972851949513e-05, + "loss": 0.1152, "step": 159310 }, { "epoch": 7.43, - "learning_rate": 5.156345225259013e-06, - "loss": 0.1555, + "learning_rate": 1.5179504782113493e-05, + "loss": 0.1868, "step": 159315 }, { "epoch": 7.43, - "learning_rate": 5.155876423983873e-06, - "loss": 0.1486, + "learning_rate": 1.5179036712277473e-05, + "loss": 0.3551, "step": 159320 }, { "epoch": 7.43, - "learning_rate": 5.155407622708735e-06, - "loss": 0.0325, + "learning_rate": 1.5178568642441451e-05, + "loss": 0.0495, "step": 159325 }, { "epoch": 7.43, - "learning_rate": 5.154938821433595e-06, - "loss": 0.0207, + "learning_rate": 1.5178100572605434e-05, + "loss": 0.007, "step": 159330 }, { "epoch": 7.43, - "learning_rate": 5.154470020158455e-06, - "loss": 0.0176, + "learning_rate": 1.5177632502769414e-05, + "loss": 0.0134, "step": 159335 }, { "epoch": 7.44, - "learning_rate": 5.154001218883316e-06, - "loss": 0.0262, + "learning_rate": 1.5177164432933394e-05, + "loss": 0.0298, "step": 159340 }, { "epoch": 7.44, - "learning_rate": 5.153532417608176e-06, - "loss": 0.0477, + "learning_rate": 1.5176696363097374e-05, + "loss": 0.0514, "step": 159345 }, { "epoch": 7.44, - "learning_rate": 5.153063616333037e-06, - "loss": 0.0723, + "learning_rate": 1.5176228293261357e-05, + "loss": 0.0581, "step": 159350 }, { "epoch": 7.44, - "learning_rate": 5.152594815057897e-06, - "loss": 0.0775, + "learning_rate": 1.5175760223425335e-05, + "loss": 0.021, "step": 159355 }, { "epoch": 7.44, - "learning_rate": 5.152126013782758e-06, - "loss": 0.0471, + "learning_rate": 1.5175292153589315e-05, + "loss": 0.1311, "step": 159360 }, { "epoch": 7.44, - "learning_rate": 5.151657212507618e-06, - "loss": 0.1486, + "learning_rate": 1.5174824083753295e-05, + "loss": 0.1366, "step": 159365 }, { "epoch": 7.44, - "learning_rate": 5.1511884112324795e-06, - "loss": 0.1825, + "learning_rate": 1.5174356013917278e-05, + "loss": 0.1645, "step": 159370 }, { "epoch": 7.44, - "learning_rate": 5.1507196099573395e-06, - "loss": 0.0168, + "learning_rate": 1.5173887944081258e-05, + "loss": 0.0144, "step": 159375 }, { "epoch": 7.44, - "learning_rate": 5.1502508086822e-06, - "loss": 0.0067, + "learning_rate": 1.5173419874245237e-05, + "loss": 0.0336, "step": 159380 }, { "epoch": 7.44, - "learning_rate": 5.14978200740706e-06, - "loss": 0.0444, + "learning_rate": 1.5172951804409219e-05, + "loss": 0.0316, "step": 159385 }, { "epoch": 7.44, - "learning_rate": 5.149313206131921e-06, - "loss": 0.0494, + "learning_rate": 1.5172483734573199e-05, + "loss": 0.001, "step": 159390 }, { "epoch": 7.44, - "learning_rate": 5.148844404856782e-06, - "loss": 0.0458, + "learning_rate": 1.5172015664737179e-05, + "loss": 0.0149, "step": 159395 }, { "epoch": 7.44, - "learning_rate": 5.1483756035816425e-06, - "loss": 0.0616, + "learning_rate": 1.5171547594901158e-05, + "loss": 0.092, "step": 159400 }, { "epoch": 7.44, - "learning_rate": 5.1479068023065025e-06, - "loss": 0.1227, + "learning_rate": 1.5171079525065142e-05, + "loss": 0.0499, "step": 159405 }, { "epoch": 7.44, - "learning_rate": 5.147438001031363e-06, - "loss": 0.092, + "learning_rate": 1.5170611455229121e-05, + "loss": 0.0728, "step": 159410 }, { "epoch": 7.44, - "learning_rate": 5.146969199756223e-06, - "loss": 0.2132, + "learning_rate": 1.5170143385393101e-05, + "loss": 0.1475, "step": 159415 }, { "epoch": 7.44, - "learning_rate": 5.146500398481085e-06, - "loss": 0.2035, + "learning_rate": 1.516967531555708e-05, + "loss": 0.1185, "step": 159420 }, { "epoch": 7.44, - "learning_rate": 5.146031597205945e-06, - "loss": 0.0469, + "learning_rate": 1.5169207245721063e-05, + "loss": 0.0215, "step": 159425 }, { "epoch": 7.44, - "learning_rate": 5.1455627959308055e-06, - "loss": 0.0309, + "learning_rate": 1.5168739175885042e-05, + "loss": 0.0254, "step": 159430 }, { "epoch": 7.44, - "learning_rate": 5.1450939946556654e-06, - "loss": 0.0013, + "learning_rate": 1.5168271106049022e-05, + "loss": 0.0225, "step": 159435 }, { "epoch": 7.44, - "learning_rate": 5.144625193380527e-06, - "loss": 0.0058, + "learning_rate": 1.5167803036213005e-05, + "loss": 0.0351, "step": 159440 }, { "epoch": 7.44, - "learning_rate": 5.144156392105387e-06, - "loss": 0.0395, + "learning_rate": 1.5167334966376985e-05, + "loss": 0.0552, "step": 159445 }, { "epoch": 7.44, - "learning_rate": 5.143687590830248e-06, - "loss": 0.0262, + "learning_rate": 1.5166866896540963e-05, + "loss": 0.0245, "step": 159450 }, { "epoch": 7.44, - "learning_rate": 5.143218789555108e-06, - "loss": 0.077, + "learning_rate": 1.5166398826704943e-05, + "loss": 0.0251, "step": 159455 }, { "epoch": 7.44, - "learning_rate": 5.142749988279968e-06, - "loss": 0.0806, + "learning_rate": 1.5165930756868926e-05, + "loss": 0.0471, "step": 159460 }, { "epoch": 7.44, - "learning_rate": 5.142281187004829e-06, - "loss": 0.1837, + "learning_rate": 1.5165462687032906e-05, + "loss": 0.0807, "step": 159465 }, { "epoch": 7.44, - "learning_rate": 5.14181238572969e-06, - "loss": 0.1664, + "learning_rate": 1.5164994617196886e-05, + "loss": 0.2157, "step": 159470 }, { "epoch": 7.44, - "learning_rate": 5.14134358445455e-06, - "loss": 0.0405, + "learning_rate": 1.5164526547360866e-05, + "loss": 0.0324, "step": 159475 }, { "epoch": 7.44, - "learning_rate": 5.140874783179411e-06, - "loss": 0.0085, + "learning_rate": 1.5164058477524847e-05, + "loss": 0.0409, "step": 159480 }, { "epoch": 7.44, - "learning_rate": 5.140405981904271e-06, - "loss": 0.009, + "learning_rate": 1.5163590407688827e-05, + "loss": 0.004, "step": 159485 }, { "epoch": 7.44, - "learning_rate": 5.139937180629132e-06, - "loss": 0.0176, + "learning_rate": 1.5163122337852807e-05, + "loss": 0.0106, "step": 159490 }, { "epoch": 7.44, - "learning_rate": 5.139468379353992e-06, - "loss": 0.0466, + "learning_rate": 1.516265426801679e-05, + "loss": 0.0397, "step": 159495 }, { "epoch": 7.44, - "learning_rate": 5.138999578078853e-06, - "loss": 0.058, + "learning_rate": 1.516218619818077e-05, + "loss": 0.0375, "step": 159500 }, { "epoch": 7.44, - "learning_rate": 5.138530776803713e-06, - "loss": 0.0418, + "learning_rate": 1.516171812834475e-05, + "loss": 0.0509, "step": 159505 }, { "epoch": 7.44, - "learning_rate": 5.138061975528575e-06, - "loss": 0.0331, + "learning_rate": 1.516125005850873e-05, + "loss": 0.0709, "step": 159510 }, { "epoch": 7.44, - "learning_rate": 5.1375931742534345e-06, - "loss": 0.1614, + "learning_rate": 1.5160781988672711e-05, + "loss": 0.1582, "step": 159515 }, { "epoch": 7.44, - "learning_rate": 5.137124372978295e-06, - "loss": 0.2732, + "learning_rate": 1.5160313918836691e-05, + "loss": 0.1133, "step": 159520 }, { "epoch": 7.44, - "learning_rate": 5.136655571703155e-06, - "loss": 0.0105, + "learning_rate": 1.515984584900067e-05, + "loss": 0.0169, "step": 159525 }, { "epoch": 7.44, - "learning_rate": 5.136186770428015e-06, - "loss": 0.0382, + "learning_rate": 1.515937777916465e-05, + "loss": 0.0217, "step": 159530 }, { "epoch": 7.44, - "learning_rate": 5.135717969152877e-06, - "loss": 0.0412, + "learning_rate": 1.5158909709328634e-05, + "loss": 0.0247, "step": 159535 }, { "epoch": 7.44, - "learning_rate": 5.135249167877738e-06, - "loss": 0.0789, + "learning_rate": 1.5158441639492614e-05, + "loss": 0.0156, "step": 159540 }, { "epoch": 7.44, - "learning_rate": 5.1347803666025975e-06, - "loss": 0.0862, + "learning_rate": 1.5157973569656592e-05, + "loss": 0.023, "step": 159545 }, { "epoch": 7.44, - "learning_rate": 5.1343115653274575e-06, - "loss": 0.0558, + "learning_rate": 1.5157505499820572e-05, + "loss": 0.0223, "step": 159550 }, { "epoch": 7.45, - "learning_rate": 5.133842764052319e-06, - "loss": 0.0622, + "learning_rate": 1.5157037429984555e-05, + "loss": 0.0273, "step": 159555 }, { "epoch": 7.45, - "learning_rate": 5.13337396277718e-06, - "loss": 0.0569, + "learning_rate": 1.5156569360148535e-05, + "loss": 0.0972, "step": 159560 }, { "epoch": 7.45, - "learning_rate": 5.13290516150204e-06, - "loss": 0.1177, + "learning_rate": 1.5156101290312514e-05, + "loss": 0.1106, "step": 159565 }, { "epoch": 7.45, - "learning_rate": 5.1324363602269e-06, - "loss": 0.1859, + "learning_rate": 1.5155633220476498e-05, + "loss": 0.1426, "step": 159570 }, { "epoch": 7.45, - "learning_rate": 5.1319675589517605e-06, - "loss": 0.0403, + "learning_rate": 1.5155165150640476e-05, + "loss": 0.0298, "step": 159575 }, { "epoch": 7.45, - "learning_rate": 5.131498757676622e-06, - "loss": 0.0217, + "learning_rate": 1.5154697080804455e-05, + "loss": 0.0263, "step": 159580 }, { "epoch": 7.45, - "learning_rate": 5.131029956401482e-06, - "loss": 0.0087, + "learning_rate": 1.5154229010968435e-05, + "loss": 0.011, "step": 159585 }, { "epoch": 7.45, - "learning_rate": 5.130561155126342e-06, - "loss": 0.0612, + "learning_rate": 1.5153760941132419e-05, + "loss": 0.015, "step": 159590 }, { "epoch": 7.45, - "learning_rate": 5.130092353851203e-06, - "loss": 0.0288, + "learning_rate": 1.5153292871296398e-05, + "loss": 0.0372, "step": 159595 }, { "epoch": 7.45, - "learning_rate": 5.129623552576063e-06, - "loss": 0.06, + "learning_rate": 1.5152824801460378e-05, + "loss": 0.0525, "step": 159600 }, { "epoch": 7.45, - "learning_rate": 5.129154751300924e-06, - "loss": 0.0441, + "learning_rate": 1.5152356731624358e-05, + "loss": 0.0391, "step": 159605 }, { "epoch": 7.45, - "learning_rate": 5.128685950025784e-06, - "loss": 0.0946, + "learning_rate": 1.515188866178834e-05, + "loss": 0.0808, "step": 159610 }, { "epoch": 7.45, - "learning_rate": 5.128217148750645e-06, - "loss": 0.1896, + "learning_rate": 1.515142059195232e-05, + "loss": 0.1518, "step": 159615 }, { "epoch": 7.45, - "learning_rate": 5.127748347475505e-06, - "loss": 0.2331, + "learning_rate": 1.5150952522116299e-05, + "loss": 0.1287, "step": 159620 }, { "epoch": 7.45, - "learning_rate": 5.127279546200367e-06, - "loss": 0.0062, + "learning_rate": 1.5150484452280282e-05, + "loss": 0.028, "step": 159625 }, { "epoch": 7.45, - "learning_rate": 5.1268107449252266e-06, - "loss": 0.0106, + "learning_rate": 1.5150016382444262e-05, + "loss": 0.0043, "step": 159630 }, { "epoch": 7.45, - "learning_rate": 5.126341943650087e-06, - "loss": 0.0256, + "learning_rate": 1.5149548312608242e-05, + "loss": 0.0419, "step": 159635 }, { "epoch": 7.45, - "learning_rate": 5.125873142374947e-06, - "loss": 0.0209, + "learning_rate": 1.514908024277222e-05, + "loss": 0.0149, "step": 159640 }, { "epoch": 7.45, - "learning_rate": 5.125404341099808e-06, - "loss": 0.1144, + "learning_rate": 1.5148612172936203e-05, + "loss": 0.0182, "step": 159645 }, { "epoch": 7.45, - "learning_rate": 5.124935539824669e-06, - "loss": 0.0759, + "learning_rate": 1.5148144103100183e-05, + "loss": 0.0283, "step": 159650 }, { "epoch": 7.45, - "learning_rate": 5.12446673854953e-06, - "loss": 0.0211, + "learning_rate": 1.5147676033264163e-05, + "loss": 0.0628, "step": 159655 }, { "epoch": 7.45, - "learning_rate": 5.1239979372743896e-06, - "loss": 0.0807, + "learning_rate": 1.5147207963428143e-05, + "loss": 0.0702, "step": 159660 }, { "epoch": 7.45, - "learning_rate": 5.12352913599925e-06, - "loss": 0.1418, + "learning_rate": 1.5146739893592126e-05, + "loss": 0.126, "step": 159665 }, { "epoch": 7.45, - "learning_rate": 5.12306033472411e-06, - "loss": 0.1225, + "learning_rate": 1.5146271823756104e-05, + "loss": 0.1554, "step": 159670 }, { "epoch": 7.45, - "learning_rate": 5.122591533448972e-06, - "loss": 0.0442, + "learning_rate": 1.5145803753920084e-05, + "loss": 0.0385, "step": 159675 }, { "epoch": 7.45, - "learning_rate": 5.122122732173832e-06, - "loss": 0.0199, + "learning_rate": 1.5145335684084067e-05, + "loss": 0.0133, "step": 159680 }, { "epoch": 7.45, - "learning_rate": 5.121653930898693e-06, - "loss": 0.0233, + "learning_rate": 1.5144867614248047e-05, + "loss": 0.0081, "step": 159685 }, { "epoch": 7.45, - "learning_rate": 5.1211851296235526e-06, - "loss": 0.0026, + "learning_rate": 1.5144399544412027e-05, + "loss": 0.0764, "step": 159690 }, { "epoch": 7.45, - "learning_rate": 5.120716328348414e-06, - "loss": 0.046, + "learning_rate": 1.5143931474576007e-05, + "loss": 0.0875, "step": 159695 }, { "epoch": 7.45, - "learning_rate": 5.120247527073274e-06, - "loss": 0.0308, + "learning_rate": 1.514346340473999e-05, + "loss": 0.0314, "step": 159700 }, { "epoch": 7.45, - "learning_rate": 5.119778725798135e-06, - "loss": 0.042, + "learning_rate": 1.5142995334903968e-05, + "loss": 0.0628, "step": 159705 }, { "epoch": 7.45, - "learning_rate": 5.119309924522995e-06, - "loss": 0.1686, + "learning_rate": 1.5142527265067948e-05, + "loss": 0.2057, "step": 159710 }, { "epoch": 7.45, - "learning_rate": 5.118841123247856e-06, - "loss": 0.125, + "learning_rate": 1.5142059195231928e-05, + "loss": 0.1606, "step": 159715 }, { "epoch": 7.45, - "learning_rate": 5.118372321972716e-06, - "loss": 0.0828, + "learning_rate": 1.514159112539591e-05, + "loss": 0.0769, "step": 159720 }, { "epoch": 7.45, - "learning_rate": 5.117903520697577e-06, - "loss": 0.061, + "learning_rate": 1.514112305555989e-05, + "loss": 0.0252, "step": 159725 }, { "epoch": 7.45, - "learning_rate": 5.117434719422437e-06, - "loss": 0.0452, + "learning_rate": 1.514065498572387e-05, + "loss": 0.0342, "step": 159730 }, { "epoch": 7.45, - "learning_rate": 5.116965918147298e-06, - "loss": 0.0184, + "learning_rate": 1.5140186915887848e-05, + "loss": 0.0138, "step": 159735 }, { "epoch": 7.45, - "learning_rate": 5.116497116872158e-06, - "loss": 0.0517, + "learning_rate": 1.5139718846051832e-05, + "loss": 0.039, "step": 159740 }, { "epoch": 7.45, - "learning_rate": 5.1160283155970194e-06, - "loss": 0.0657, + "learning_rate": 1.5139250776215811e-05, + "loss": 0.0206, "step": 159745 }, { "epoch": 7.45, - "learning_rate": 5.115559514321879e-06, - "loss": 0.0843, + "learning_rate": 1.5138782706379791e-05, + "loss": 0.0826, "step": 159750 }, { "epoch": 7.45, - "learning_rate": 5.11509071304674e-06, - "loss": 0.0691, + "learning_rate": 1.5138314636543775e-05, + "loss": 0.1062, "step": 159755 }, { "epoch": 7.45, - "learning_rate": 5.1146219117716e-06, - "loss": 0.0422, + "learning_rate": 1.5137846566707754e-05, + "loss": 0.0357, "step": 159760 }, { "epoch": 7.45, - "learning_rate": 5.114153110496462e-06, - "loss": 0.087, + "learning_rate": 1.5137378496871732e-05, + "loss": 0.0992, "step": 159765 }, { "epoch": 7.46, - "learning_rate": 5.113684309221322e-06, - "loss": 0.1379, + "learning_rate": 1.5136910427035712e-05, + "loss": 0.1907, "step": 159770 }, { "epoch": 7.46, - "learning_rate": 5.1132155079461824e-06, - "loss": 0.0689, + "learning_rate": 1.5136442357199695e-05, + "loss": 0.0191, "step": 159775 }, { "epoch": 7.46, - "learning_rate": 5.112746706671042e-06, - "loss": 0.0005, + "learning_rate": 1.5135974287363675e-05, + "loss": 0.0125, "step": 159780 }, { "epoch": 7.46, - "learning_rate": 5.112277905395902e-06, - "loss": 0.0096, + "learning_rate": 1.5135506217527655e-05, + "loss": 0.0821, "step": 159785 }, { "epoch": 7.46, - "learning_rate": 5.111809104120764e-06, - "loss": 0.0173, + "learning_rate": 1.5135038147691635e-05, + "loss": 0.009, "step": 159790 }, { "epoch": 7.46, - "learning_rate": 5.111340302845625e-06, - "loss": 0.0171, + "learning_rate": 1.5134570077855618e-05, + "loss": 0.0356, "step": 159795 }, { "epoch": 7.46, - "learning_rate": 5.110871501570485e-06, - "loss": 0.0855, + "learning_rate": 1.5134102008019596e-05, + "loss": 0.0395, "step": 159800 }, { "epoch": 7.46, - "learning_rate": 5.110402700295345e-06, - "loss": 0.0373, + "learning_rate": 1.5133633938183576e-05, + "loss": 0.0576, "step": 159805 }, { "epoch": 7.46, - "learning_rate": 5.109933899020205e-06, - "loss": 0.0715, + "learning_rate": 1.513316586834756e-05, + "loss": 0.0701, "step": 159810 }, { "epoch": 7.46, - "learning_rate": 5.109465097745067e-06, - "loss": 0.1262, + "learning_rate": 1.5132697798511539e-05, + "loss": 0.0881, "step": 159815 }, { "epoch": 7.46, - "learning_rate": 5.108996296469927e-06, - "loss": 0.2423, + "learning_rate": 1.5132229728675519e-05, + "loss": 0.1253, "step": 159820 }, { "epoch": 7.46, - "learning_rate": 5.108527495194787e-06, - "loss": 0.0232, + "learning_rate": 1.5131761658839499e-05, + "loss": 0.03, "step": 159825 }, { "epoch": 7.46, - "learning_rate": 5.108058693919648e-06, - "loss": 0.0366, + "learning_rate": 1.513129358900348e-05, + "loss": 0.0272, "step": 159830 }, { "epoch": 7.46, - "learning_rate": 5.107589892644509e-06, - "loss": 0.0303, + "learning_rate": 1.513082551916746e-05, + "loss": 0.0505, "step": 159835 }, { "epoch": 7.46, - "learning_rate": 5.107121091369369e-06, - "loss": 0.014, + "learning_rate": 1.513035744933144e-05, + "loss": 0.0605, "step": 159840 }, { "epoch": 7.46, - "learning_rate": 5.106652290094229e-06, - "loss": 0.0658, + "learning_rate": 1.512988937949542e-05, + "loss": 0.0566, "step": 159845 }, { "epoch": 7.46, - "learning_rate": 5.10618348881909e-06, - "loss": 0.05, + "learning_rate": 1.5129421309659403e-05, + "loss": 0.064, "step": 159850 }, { "epoch": 7.46, - "learning_rate": 5.10571468754395e-06, - "loss": 0.0503, + "learning_rate": 1.5128953239823383e-05, + "loss": 0.0641, "step": 159855 }, { "epoch": 7.46, - "learning_rate": 5.1052458862688115e-06, - "loss": 0.0736, + "learning_rate": 1.5128485169987361e-05, + "loss": 0.0703, "step": 159860 }, { "epoch": 7.46, - "learning_rate": 5.104777084993671e-06, - "loss": 0.181, + "learning_rate": 1.5128017100151344e-05, + "loss": 0.1119, "step": 159865 }, { "epoch": 7.46, - "learning_rate": 5.104308283718532e-06, - "loss": 0.2725, + "learning_rate": 1.5127549030315324e-05, + "loss": 0.1193, "step": 159870 }, { "epoch": 7.46, - "learning_rate": 5.103839482443392e-06, - "loss": 0.0116, + "learning_rate": 1.5127080960479304e-05, + "loss": 0.0272, "step": 159875 }, { "epoch": 7.46, - "learning_rate": 5.103370681168254e-06, - "loss": 0.0072, + "learning_rate": 1.5126612890643284e-05, + "loss": 0.0185, "step": 159880 }, { "epoch": 7.46, - "learning_rate": 5.102901879893114e-06, - "loss": 0.0044, + "learning_rate": 1.5126144820807267e-05, + "loss": 0.0269, "step": 159885 }, { "epoch": 7.46, - "learning_rate": 5.1024330786179745e-06, - "loss": 0.0752, + "learning_rate": 1.5125676750971245e-05, + "loss": 0.0277, "step": 159890 }, { "epoch": 7.46, - "learning_rate": 5.101964277342834e-06, - "loss": 0.0322, + "learning_rate": 1.5125208681135225e-05, + "loss": 0.0309, "step": 159895 }, { "epoch": 7.46, - "learning_rate": 5.101495476067695e-06, - "loss": 0.0811, + "learning_rate": 1.5124740611299204e-05, + "loss": 0.0242, "step": 159900 }, { "epoch": 7.46, - "learning_rate": 5.101026674792556e-06, - "loss": 0.1005, + "learning_rate": 1.5124272541463188e-05, + "loss": 0.0477, "step": 159905 }, { "epoch": 7.46, - "learning_rate": 5.100557873517417e-06, - "loss": 0.0673, + "learning_rate": 1.5123804471627168e-05, + "loss": 0.0892, "step": 159910 }, { "epoch": 7.46, - "learning_rate": 5.100089072242277e-06, - "loss": 0.1553, + "learning_rate": 1.5123336401791147e-05, + "loss": 0.1327, "step": 159915 }, { "epoch": 7.46, - "learning_rate": 5.0996202709671375e-06, - "loss": 0.1425, + "learning_rate": 1.512286833195513e-05, + "loss": 0.1531, "step": 159920 }, { "epoch": 7.46, - "learning_rate": 5.099151469691997e-06, - "loss": 0.0033, + "learning_rate": 1.5122400262119109e-05, + "loss": 0.0201, "step": 159925 }, { "epoch": 7.46, - "learning_rate": 5.098682668416859e-06, - "loss": 0.0649, + "learning_rate": 1.5121932192283088e-05, + "loss": 0.0325, "step": 159930 }, { "epoch": 7.46, - "learning_rate": 5.098213867141719e-06, - "loss": 0.0097, + "learning_rate": 1.5121464122447068e-05, + "loss": 0.0342, "step": 159935 }, { "epoch": 7.46, - "learning_rate": 5.09774506586658e-06, - "loss": 0.0488, + "learning_rate": 1.5120996052611051e-05, + "loss": 0.0302, "step": 159940 }, { "epoch": 7.46, - "learning_rate": 5.09727626459144e-06, - "loss": 0.0456, + "learning_rate": 1.5120527982775031e-05, + "loss": 0.0211, "step": 159945 }, { "epoch": 7.46, - "learning_rate": 5.096807463316301e-06, - "loss": 0.0307, + "learning_rate": 1.5120059912939011e-05, + "loss": 0.0262, "step": 159950 }, { "epoch": 7.46, - "learning_rate": 5.096338662041161e-06, - "loss": 0.0265, + "learning_rate": 1.511959184310299e-05, + "loss": 0.0834, "step": 159955 }, { "epoch": 7.46, - "learning_rate": 5.095869860766022e-06, - "loss": 0.0466, + "learning_rate": 1.5119123773266972e-05, + "loss": 0.1227, "step": 159960 }, { "epoch": 7.46, - "learning_rate": 5.095401059490882e-06, - "loss": 0.1167, + "learning_rate": 1.5118655703430952e-05, + "loss": 0.1294, "step": 159965 }, { "epoch": 7.46, - "learning_rate": 5.094932258215743e-06, - "loss": 0.1983, + "learning_rate": 1.5118187633594932e-05, + "loss": 0.225, "step": 159970 }, { "epoch": 7.46, - "learning_rate": 5.0944634569406035e-06, - "loss": 0.0067, + "learning_rate": 1.5117719563758912e-05, + "loss": 0.03, "step": 159975 }, { "epoch": 7.46, - "learning_rate": 5.093994655665464e-06, - "loss": 0.0191, + "learning_rate": 1.5117251493922895e-05, + "loss": 0.0205, "step": 159980 }, { "epoch": 7.47, - "learning_rate": 5.093525854390324e-06, - "loss": 0.0233, + "learning_rate": 1.5116783424086873e-05, + "loss": 0.0069, "step": 159985 }, { "epoch": 7.47, - "learning_rate": 5.093057053115185e-06, - "loss": 0.01, + "learning_rate": 1.5116315354250853e-05, + "loss": 0.0382, "step": 159990 }, { "epoch": 7.47, - "learning_rate": 5.092588251840045e-06, - "loss": 0.0361, + "learning_rate": 1.5115847284414836e-05, + "loss": 0.0219, "step": 159995 }, { "epoch": 7.47, - "learning_rate": 5.0921194505649066e-06, - "loss": 0.0235, + "learning_rate": 1.5115379214578816e-05, + "loss": 0.051, "step": 160000 }, { "epoch": 7.47, - "learning_rate": 5.0916506492897665e-06, - "loss": 0.0748, + "learning_rate": 1.5114911144742796e-05, + "loss": 0.0203, "step": 160005 }, { "epoch": 7.47, - "learning_rate": 5.091181848014627e-06, - "loss": 0.087, + "learning_rate": 1.5114443074906776e-05, + "loss": 0.1501, "step": 160010 }, { "epoch": 7.47, - "learning_rate": 5.090713046739487e-06, - "loss": 0.224, + "learning_rate": 1.5113975005070759e-05, + "loss": 0.0979, "step": 160015 }, { "epoch": 7.47, - "learning_rate": 5.090244245464349e-06, - "loss": 0.1089, + "learning_rate": 1.5113506935234737e-05, + "loss": 0.1614, "step": 160020 }, { "epoch": 7.47, - "learning_rate": 5.089775444189209e-06, - "loss": 0.0018, + "learning_rate": 1.5113038865398717e-05, + "loss": 0.0215, "step": 160025 }, { "epoch": 7.47, - "learning_rate": 5.0893066429140695e-06, - "loss": 0.0506, + "learning_rate": 1.5112570795562697e-05, + "loss": 0.03, "step": 160030 }, { "epoch": 7.47, - "learning_rate": 5.0888378416389295e-06, - "loss": 0.0061, + "learning_rate": 1.511210272572668e-05, + "loss": 0.0108, "step": 160035 }, { "epoch": 7.47, - "learning_rate": 5.088369040363789e-06, - "loss": 0.0215, + "learning_rate": 1.511163465589066e-05, + "loss": 0.0347, "step": 160040 }, { "epoch": 7.47, - "learning_rate": 5.087900239088651e-06, - "loss": 0.0635, + "learning_rate": 1.511116658605464e-05, + "loss": 0.038, "step": 160045 }, { "epoch": 7.47, - "learning_rate": 5.087431437813512e-06, - "loss": 0.0592, + "learning_rate": 1.5110698516218621e-05, + "loss": 0.0539, "step": 160050 }, { "epoch": 7.47, - "learning_rate": 5.086962636538372e-06, - "loss": 0.0808, + "learning_rate": 1.5110230446382601e-05, + "loss": 0.0329, "step": 160055 }, { "epoch": 7.47, - "learning_rate": 5.086493835263232e-06, - "loss": 0.0429, + "learning_rate": 1.510976237654658e-05, + "loss": 0.0702, "step": 160060 }, { "epoch": 7.47, - "learning_rate": 5.0860250339880925e-06, - "loss": 0.1387, + "learning_rate": 1.510929430671056e-05, + "loss": 0.1518, "step": 160065 }, { "epoch": 7.47, - "learning_rate": 5.085556232712954e-06, - "loss": 0.2801, + "learning_rate": 1.5108826236874544e-05, + "loss": 0.1841, "step": 160070 }, { "epoch": 7.47, - "learning_rate": 5.085087431437814e-06, - "loss": 0.0308, + "learning_rate": 1.5108358167038524e-05, + "loss": 0.0013, "step": 160075 }, { "epoch": 7.47, - "learning_rate": 5.084618630162674e-06, - "loss": 0.0141, + "learning_rate": 1.5107890097202502e-05, + "loss": 0.0214, "step": 160080 }, { "epoch": 7.47, - "learning_rate": 5.084149828887535e-06, - "loss": 0.012, + "learning_rate": 1.5107422027366481e-05, + "loss": 0.0113, "step": 160085 }, { "epoch": 7.47, - "learning_rate": 5.083681027612396e-06, - "loss": 0.0451, + "learning_rate": 1.5106953957530465e-05, + "loss": 0.032, "step": 160090 }, { "epoch": 7.47, - "learning_rate": 5.083212226337256e-06, - "loss": 0.0697, + "learning_rate": 1.5106485887694444e-05, + "loss": 0.0382, "step": 160095 }, { "epoch": 7.47, - "learning_rate": 5.082743425062116e-06, - "loss": 0.0588, + "learning_rate": 1.5106017817858424e-05, + "loss": 0.0228, "step": 160100 }, { "epoch": 7.47, - "learning_rate": 5.082274623786977e-06, - "loss": 0.0384, + "learning_rate": 1.5105549748022407e-05, + "loss": 0.0705, "step": 160105 }, { "epoch": 7.47, - "learning_rate": 5.081805822511837e-06, - "loss": 0.092, + "learning_rate": 1.5105081678186387e-05, + "loss": 0.0614, "step": 160110 }, { "epoch": 7.47, - "learning_rate": 5.081337021236699e-06, - "loss": 0.1325, + "learning_rate": 1.5104613608350365e-05, + "loss": 0.1673, "step": 160115 }, { "epoch": 7.47, - "learning_rate": 5.0808682199615585e-06, - "loss": 0.2414, + "learning_rate": 1.5104145538514345e-05, + "loss": 0.1607, "step": 160120 }, { "epoch": 7.47, - "learning_rate": 5.080399418686419e-06, - "loss": 0.0139, + "learning_rate": 1.5103677468678328e-05, + "loss": 0.0179, "step": 160125 }, { "epoch": 7.47, - "learning_rate": 5.079930617411279e-06, - "loss": 0.0156, + "learning_rate": 1.5103209398842308e-05, + "loss": 0.0286, "step": 160130 }, { "epoch": 7.47, - "learning_rate": 5.07946181613614e-06, - "loss": 0.0492, + "learning_rate": 1.5102741329006288e-05, + "loss": 0.0241, "step": 160135 }, { "epoch": 7.47, - "learning_rate": 5.078993014861001e-06, - "loss": 0.0348, + "learning_rate": 1.5102273259170268e-05, + "loss": 0.0239, "step": 160140 }, { "epoch": 7.47, - "learning_rate": 5.0785242135858616e-06, - "loss": 0.0356, + "learning_rate": 1.510180518933425e-05, + "loss": 0.084, "step": 160145 }, { "epoch": 7.47, - "learning_rate": 5.0780554123107215e-06, - "loss": 0.0028, + "learning_rate": 1.510133711949823e-05, + "loss": 0.0608, "step": 160150 }, { "epoch": 7.47, - "learning_rate": 5.077586611035582e-06, - "loss": 0.096, + "learning_rate": 1.5100869049662209e-05, + "loss": 0.0325, "step": 160155 }, { "epoch": 7.47, - "learning_rate": 5.077117809760443e-06, - "loss": 0.0838, + "learning_rate": 1.5100400979826189e-05, + "loss": 0.064, "step": 160160 }, { "epoch": 7.47, - "learning_rate": 5.076649008485304e-06, - "loss": 0.1638, + "learning_rate": 1.5099932909990172e-05, + "loss": 0.1513, "step": 160165 }, { "epoch": 7.47, - "learning_rate": 5.076180207210164e-06, - "loss": 0.1055, + "learning_rate": 1.5099464840154152e-05, + "loss": 0.1546, "step": 160170 }, { "epoch": 7.47, - "learning_rate": 5.0757114059350246e-06, - "loss": 0.036, + "learning_rate": 1.509899677031813e-05, + "loss": 0.062, "step": 160175 }, { "epoch": 7.47, - "learning_rate": 5.0752426046598845e-06, - "loss": 0.0182, + "learning_rate": 1.5098528700482113e-05, + "loss": 0.0308, "step": 160180 }, { "epoch": 7.47, - "learning_rate": 5.074773803384746e-06, - "loss": 0.0124, + "learning_rate": 1.5098060630646093e-05, + "loss": 0.021, "step": 160185 }, { "epoch": 7.47, - "learning_rate": 5.074305002109606e-06, - "loss": 0.051, + "learning_rate": 1.5097592560810073e-05, + "loss": 0.0503, "step": 160190 }, { "epoch": 7.47, - "learning_rate": 5.073836200834467e-06, - "loss": 0.0118, + "learning_rate": 1.5097124490974053e-05, + "loss": 0.0071, "step": 160195 }, { "epoch": 7.48, - "learning_rate": 5.073367399559327e-06, - "loss": 0.0696, + "learning_rate": 1.5096656421138036e-05, + "loss": 0.0685, "step": 160200 }, { "epoch": 7.48, - "learning_rate": 5.072898598284188e-06, - "loss": 0.0678, + "learning_rate": 1.5096188351302016e-05, + "loss": 0.0399, "step": 160205 }, { "epoch": 7.48, - "learning_rate": 5.072429797009048e-06, - "loss": 0.1381, + "learning_rate": 1.5095720281465994e-05, + "loss": 0.0674, "step": 160210 }, { "epoch": 7.48, - "learning_rate": 5.071960995733909e-06, - "loss": 0.0865, + "learning_rate": 1.5095252211629974e-05, + "loss": 0.1204, "step": 160215 }, { "epoch": 7.48, - "learning_rate": 5.071492194458769e-06, - "loss": 0.1874, + "learning_rate": 1.5094784141793957e-05, + "loss": 0.1655, "step": 160220 }, { "epoch": 7.48, - "learning_rate": 5.07102339318363e-06, - "loss": 0.0039, + "learning_rate": 1.5094316071957937e-05, + "loss": 0.0324, "step": 160225 }, { "epoch": 7.48, - "learning_rate": 5.070554591908491e-06, - "loss": 0.0143, + "learning_rate": 1.5093848002121916e-05, + "loss": 0.0204, "step": 160230 }, { "epoch": 7.48, - "learning_rate": 5.070085790633351e-06, - "loss": 0.0563, + "learning_rate": 1.50933799322859e-05, + "loss": 0.0441, "step": 160235 }, { "epoch": 7.48, - "learning_rate": 5.069616989358211e-06, - "loss": 0.0243, + "learning_rate": 1.5092911862449878e-05, + "loss": 0.0178, "step": 160240 }, { "epoch": 7.48, - "learning_rate": 5.069148188083072e-06, - "loss": 0.0353, + "learning_rate": 1.5092443792613858e-05, + "loss": 0.039, "step": 160245 }, { "epoch": 7.48, - "learning_rate": 5.068679386807932e-06, - "loss": 0.0945, + "learning_rate": 1.5091975722777837e-05, + "loss": 0.082, "step": 160250 }, { "epoch": 7.48, - "learning_rate": 5.068210585532794e-06, - "loss": 0.0439, + "learning_rate": 1.509150765294182e-05, + "loss": 0.0354, "step": 160255 }, { "epoch": 7.48, - "learning_rate": 5.067741784257654e-06, - "loss": 0.1369, + "learning_rate": 1.50910395831058e-05, + "loss": 0.0825, "step": 160260 }, { "epoch": 7.48, - "learning_rate": 5.067272982982514e-06, - "loss": 0.2729, + "learning_rate": 1.509057151326978e-05, + "loss": 0.1265, "step": 160265 }, { "epoch": 7.48, - "learning_rate": 5.066804181707374e-06, - "loss": 0.1041, + "learning_rate": 1.5090103443433758e-05, + "loss": 0.1346, "step": 160270 }, { "epoch": 7.48, - "learning_rate": 5.066335380432236e-06, - "loss": 0.0569, + "learning_rate": 1.5089635373597742e-05, + "loss": 0.0243, "step": 160275 }, { "epoch": 7.48, - "learning_rate": 5.065866579157096e-06, - "loss": 0.0095, + "learning_rate": 1.5089167303761721e-05, + "loss": 0.039, "step": 160280 }, { "epoch": 7.48, - "learning_rate": 5.065397777881957e-06, - "loss": 0.0451, + "learning_rate": 1.5088699233925701e-05, + "loss": 0.0239, "step": 160285 }, { "epoch": 7.48, - "learning_rate": 5.064928976606817e-06, - "loss": 0.0229, + "learning_rate": 1.5088231164089684e-05, + "loss": 0.0177, "step": 160290 }, { "epoch": 7.48, - "learning_rate": 5.0644601753316765e-06, - "loss": 0.0371, + "learning_rate": 1.5087763094253664e-05, + "loss": 0.0236, "step": 160295 }, { "epoch": 7.48, - "learning_rate": 5.063991374056538e-06, - "loss": 0.0397, + "learning_rate": 1.5087295024417644e-05, + "loss": 0.0172, "step": 160300 }, { "epoch": 7.48, - "learning_rate": 5.063522572781399e-06, - "loss": 0.0591, + "learning_rate": 1.5086826954581622e-05, + "loss": 0.0723, "step": 160305 }, { "epoch": 7.48, - "learning_rate": 5.063053771506259e-06, - "loss": 0.1006, + "learning_rate": 1.5086358884745605e-05, + "loss": 0.0441, "step": 160310 }, { "epoch": 7.48, - "learning_rate": 5.062584970231119e-06, - "loss": 0.1339, + "learning_rate": 1.5085890814909585e-05, + "loss": 0.2332, "step": 160315 }, { "epoch": 7.48, - "learning_rate": 5.06211616895598e-06, - "loss": 0.125, + "learning_rate": 1.5085422745073565e-05, + "loss": 0.2201, "step": 160320 }, { "epoch": 7.48, - "learning_rate": 5.061647367680841e-06, - "loss": 0.0068, + "learning_rate": 1.5084954675237545e-05, + "loss": 0.0259, "step": 160325 }, { "epoch": 7.48, - "learning_rate": 5.061178566405701e-06, - "loss": 0.0329, + "learning_rate": 1.5084486605401528e-05, + "loss": 0.0325, "step": 160330 }, { "epoch": 7.48, - "learning_rate": 5.060709765130561e-06, - "loss": 0.0188, + "learning_rate": 1.5084018535565506e-05, + "loss": 0.025, "step": 160335 }, { "epoch": 7.48, - "learning_rate": 5.060240963855422e-06, - "loss": 0.0351, + "learning_rate": 1.5083550465729486e-05, + "loss": 0.0516, "step": 160340 }, { "epoch": 7.48, - "learning_rate": 5.0597721625802835e-06, - "loss": 0.0295, + "learning_rate": 1.5083082395893466e-05, + "loss": 0.0258, "step": 160345 }, { "epoch": 7.48, - "learning_rate": 5.059303361305143e-06, - "loss": 0.1311, + "learning_rate": 1.5082614326057449e-05, + "loss": 0.0603, "step": 160350 }, { "epoch": 7.48, - "learning_rate": 5.058834560030003e-06, - "loss": 0.099, + "learning_rate": 1.5082146256221429e-05, + "loss": 0.0232, "step": 160355 }, { "epoch": 7.48, - "learning_rate": 5.058365758754864e-06, - "loss": 0.1301, + "learning_rate": 1.5081678186385409e-05, + "loss": 0.0921, "step": 160360 }, { "epoch": 7.48, - "learning_rate": 5.057896957479724e-06, - "loss": 0.2462, + "learning_rate": 1.508121011654939e-05, + "loss": 0.1228, "step": 160365 }, { "epoch": 7.48, - "learning_rate": 5.057428156204586e-06, - "loss": 0.4285, + "learning_rate": 1.508074204671337e-05, + "loss": 0.2851, "step": 160370 }, { "epoch": 7.48, - "learning_rate": 5.056959354929446e-06, - "loss": 0.0029, + "learning_rate": 1.508027397687735e-05, + "loss": 0.0326, "step": 160375 }, { "epoch": 7.48, - "learning_rate": 5.056490553654306e-06, - "loss": 0.0118, + "learning_rate": 1.507980590704133e-05, + "loss": 0.0424, "step": 160380 }, { "epoch": 7.48, - "learning_rate": 5.056021752379166e-06, - "loss": 0.0434, + "learning_rate": 1.5079337837205313e-05, + "loss": 0.0062, "step": 160385 }, { "epoch": 7.48, - "learning_rate": 5.055552951104027e-06, - "loss": 0.0368, + "learning_rate": 1.5078869767369293e-05, + "loss": 0.0192, "step": 160390 }, { "epoch": 7.48, - "learning_rate": 5.055084149828888e-06, - "loss": 0.0886, + "learning_rate": 1.5078401697533272e-05, + "loss": 0.0913, "step": 160395 }, { "epoch": 7.48, - "learning_rate": 5.054615348553749e-06, - "loss": 0.057, + "learning_rate": 1.507793362769725e-05, + "loss": 0.0595, "step": 160400 }, { "epoch": 7.48, - "learning_rate": 5.054146547278609e-06, - "loss": 0.0528, + "learning_rate": 1.5077465557861234e-05, + "loss": 0.0435, "step": 160405 }, { "epoch": 7.48, - "learning_rate": 5.053677746003469e-06, - "loss": 0.1028, + "learning_rate": 1.5076997488025214e-05, + "loss": 0.0733, "step": 160410 }, { "epoch": 7.49, - "learning_rate": 5.05320894472833e-06, - "loss": 0.1347, + "learning_rate": 1.5076529418189193e-05, + "loss": 0.129, "step": 160415 }, { "epoch": 7.49, - "learning_rate": 5.052740143453191e-06, - "loss": 0.1794, + "learning_rate": 1.5076061348353177e-05, + "loss": 0.2102, "step": 160420 }, { "epoch": 7.49, - "learning_rate": 5.052271342178051e-06, - "loss": 0.0038, + "learning_rate": 1.5075593278517156e-05, + "loss": 0.0121, "step": 160425 }, { "epoch": 7.49, - "learning_rate": 5.051802540902912e-06, - "loss": 0.0037, + "learning_rate": 1.5075125208681135e-05, + "loss": 0.0119, "step": 160430 }, { "epoch": 7.49, - "learning_rate": 5.051333739627772e-06, - "loss": 0.03, + "learning_rate": 1.5074657138845114e-05, + "loss": 0.0271, "step": 160435 }, { "epoch": 7.49, - "learning_rate": 5.050864938352633e-06, - "loss": 0.018, + "learning_rate": 1.5074189069009098e-05, + "loss": 0.0668, "step": 160440 }, { "epoch": 7.49, - "learning_rate": 5.050396137077493e-06, - "loss": 0.0233, + "learning_rate": 1.5073720999173077e-05, + "loss": 0.0179, "step": 160445 }, { "epoch": 7.49, - "learning_rate": 5.049927335802354e-06, - "loss": 0.0543, + "learning_rate": 1.5073252929337057e-05, + "loss": 0.0219, "step": 160450 }, { "epoch": 7.49, - "learning_rate": 5.049458534527214e-06, - "loss": 0.0532, + "learning_rate": 1.5072784859501037e-05, + "loss": 0.0343, "step": 160455 }, { "epoch": 7.49, - "learning_rate": 5.048989733252075e-06, - "loss": 0.1181, + "learning_rate": 1.5072316789665019e-05, + "loss": 0.0626, "step": 160460 }, { "epoch": 7.49, - "learning_rate": 5.0485209319769354e-06, - "loss": 0.1163, + "learning_rate": 1.5071848719828998e-05, + "loss": 0.1028, "step": 160465 }, { "epoch": 7.49, - "learning_rate": 5.048052130701796e-06, - "loss": 0.3157, + "learning_rate": 1.5071380649992978e-05, + "loss": 0.1436, "step": 160470 }, { "epoch": 7.49, - "learning_rate": 5.047583329426656e-06, - "loss": 0.0628, + "learning_rate": 1.5070912580156961e-05, + "loss": 0.0269, "step": 160475 }, { "epoch": 7.49, - "learning_rate": 5.047114528151517e-06, - "loss": 0.0189, + "learning_rate": 1.5070444510320941e-05, + "loss": 0.0048, "step": 160480 }, { "epoch": 7.49, - "learning_rate": 5.046645726876378e-06, - "loss": 0.0434, + "learning_rate": 1.5069976440484921e-05, + "loss": 0.0186, "step": 160485 }, { "epoch": 7.49, - "learning_rate": 5.0461769256012385e-06, - "loss": 0.0094, + "learning_rate": 1.5069508370648901e-05, + "loss": 0.0206, "step": 160490 }, { "epoch": 7.49, - "learning_rate": 5.0457081243260984e-06, - "loss": 0.0486, + "learning_rate": 1.5069040300812882e-05, + "loss": 0.0155, "step": 160495 }, { "epoch": 7.49, - "learning_rate": 5.045239323050959e-06, - "loss": 0.069, + "learning_rate": 1.5068572230976862e-05, + "loss": 0.0216, "step": 160500 }, { "epoch": 7.49, - "learning_rate": 5.044770521775819e-06, - "loss": 0.0522, + "learning_rate": 1.5068104161140842e-05, + "loss": 0.0418, "step": 160505 }, { "epoch": 7.49, - "learning_rate": 5.044301720500681e-06, - "loss": 0.1028, + "learning_rate": 1.5067636091304822e-05, + "loss": 0.0648, "step": 160510 }, { "epoch": 7.49, - "learning_rate": 5.043832919225541e-06, - "loss": 0.109, + "learning_rate": 1.5067168021468805e-05, + "loss": 0.0582, "step": 160515 }, { "epoch": 7.49, - "learning_rate": 5.0433641179504015e-06, - "loss": 0.2326, + "learning_rate": 1.5066699951632785e-05, + "loss": 0.1658, "step": 160520 }, { "epoch": 7.49, - "learning_rate": 5.042895316675261e-06, - "loss": 0.0175, + "learning_rate": 1.5066231881796763e-05, + "loss": 0.0394, "step": 160525 }, { "epoch": 7.49, - "learning_rate": 5.042426515400123e-06, - "loss": 0.0065, + "learning_rate": 1.5065763811960743e-05, + "loss": 0.0335, "step": 160530 }, { "epoch": 7.49, - "learning_rate": 5.041957714124983e-06, - "loss": 0.0189, + "learning_rate": 1.5065295742124726e-05, + "loss": 0.0274, "step": 160535 }, { "epoch": 7.49, - "learning_rate": 5.041488912849844e-06, - "loss": 0.0634, + "learning_rate": 1.5064827672288706e-05, + "loss": 0.0287, "step": 160540 }, { "epoch": 7.49, - "learning_rate": 5.041020111574704e-06, - "loss": 0.0627, + "learning_rate": 1.5064359602452686e-05, + "loss": 0.0356, "step": 160545 }, { "epoch": 7.49, - "learning_rate": 5.040551310299564e-06, - "loss": 0.0432, + "learning_rate": 1.5063891532616669e-05, + "loss": 0.0678, "step": 160550 }, { "epoch": 7.49, - "learning_rate": 5.040082509024425e-06, - "loss": 0.1048, + "learning_rate": 1.5063423462780647e-05, + "loss": 0.0837, "step": 160555 }, { "epoch": 7.49, - "learning_rate": 5.039613707749286e-06, - "loss": 0.1006, + "learning_rate": 1.5062955392944627e-05, + "loss": 0.0669, "step": 160560 }, { "epoch": 7.49, - "learning_rate": 5.039144906474146e-06, - "loss": 0.254, + "learning_rate": 1.5062487323108607e-05, + "loss": 0.2637, "step": 160565 }, { "epoch": 7.49, - "learning_rate": 5.038676105199006e-06, - "loss": 0.192, + "learning_rate": 1.506201925327259e-05, + "loss": 0.1843, "step": 160570 }, { "epoch": 7.49, - "learning_rate": 5.038207303923867e-06, - "loss": 0.0277, + "learning_rate": 1.506155118343657e-05, + "loss": 0.024, "step": 160575 }, { "epoch": 7.49, - "learning_rate": 5.037738502648728e-06, - "loss": 0.0326, + "learning_rate": 1.506108311360055e-05, + "loss": 0.0143, "step": 160580 }, { "epoch": 7.49, - "learning_rate": 5.037269701373588e-06, - "loss": 0.0105, + "learning_rate": 1.506061504376453e-05, + "loss": 0.0152, "step": 160585 }, { "epoch": 7.49, - "learning_rate": 5.036800900098448e-06, - "loss": 0.0261, + "learning_rate": 1.506014697392851e-05, + "loss": 0.0298, "step": 160590 }, { "epoch": 7.49, - "learning_rate": 5.036332098823309e-06, - "loss": 0.0361, + "learning_rate": 1.505967890409249e-05, + "loss": 0.0784, "step": 160595 }, { "epoch": 7.49, - "learning_rate": 5.035863297548171e-06, - "loss": 0.0275, + "learning_rate": 1.505921083425647e-05, + "loss": 0.0406, "step": 160600 }, { "epoch": 7.49, - "learning_rate": 5.0353944962730305e-06, - "loss": 0.0879, + "learning_rate": 1.5058742764420454e-05, + "loss": 0.0447, "step": 160605 }, { "epoch": 7.49, - "learning_rate": 5.0349256949978905e-06, - "loss": 0.1431, + "learning_rate": 1.5058274694584433e-05, + "loss": 0.087, "step": 160610 }, { "epoch": 7.49, - "learning_rate": 5.034456893722751e-06, - "loss": 0.1567, + "learning_rate": 1.5057806624748413e-05, + "loss": 0.1101, "step": 160615 }, { "epoch": 7.49, - "learning_rate": 5.033988092447611e-06, - "loss": 0.2614, + "learning_rate": 1.5057338554912391e-05, + "loss": 0.1108, "step": 160620 }, { "epoch": 7.49, - "learning_rate": 5.033519291172473e-06, - "loss": 0.022, + "learning_rate": 1.5056870485076375e-05, + "loss": 0.0603, "step": 160625 }, { "epoch": 7.5, - "learning_rate": 5.033050489897333e-06, - "loss": 0.0244, + "learning_rate": 1.5056402415240354e-05, + "loss": 0.0303, "step": 160630 }, { "epoch": 7.5, - "learning_rate": 5.0325816886221935e-06, - "loss": 0.0675, + "learning_rate": 1.5055934345404334e-05, + "loss": 0.0043, "step": 160635 }, { "epoch": 7.5, - "learning_rate": 5.0321128873470534e-06, - "loss": 0.0635, + "learning_rate": 1.5055466275568314e-05, + "loss": 0.068, "step": 160640 }, { "epoch": 7.5, - "learning_rate": 5.031644086071914e-06, - "loss": 0.0198, + "learning_rate": 1.5054998205732297e-05, + "loss": 0.0564, "step": 160645 }, { "epoch": 7.5, - "learning_rate": 5.031175284796775e-06, - "loss": 0.0757, + "learning_rate": 1.5054530135896275e-05, + "loss": 0.0463, "step": 160650 }, { "epoch": 7.5, - "learning_rate": 5.030706483521636e-06, - "loss": 0.0449, + "learning_rate": 1.5054062066060255e-05, + "loss": 0.1472, "step": 160655 }, { "epoch": 7.5, - "learning_rate": 5.030237682246496e-06, - "loss": 0.0938, + "learning_rate": 1.5053593996224238e-05, + "loss": 0.1013, "step": 160660 }, { "epoch": 7.5, - "learning_rate": 5.0297688809713565e-06, - "loss": 0.1782, + "learning_rate": 1.5053125926388218e-05, + "loss": 0.2154, "step": 160665 }, { "epoch": 7.5, - "learning_rate": 5.029300079696217e-06, - "loss": 0.2056, + "learning_rate": 1.5052657856552198e-05, + "loss": 0.1778, "step": 160670 }, { "epoch": 7.5, - "learning_rate": 5.028831278421078e-06, - "loss": 0.0065, + "learning_rate": 1.5052189786716178e-05, + "loss": 0.0333, "step": 160675 }, { "epoch": 7.5, - "learning_rate": 5.028362477145938e-06, - "loss": 0.0393, + "learning_rate": 1.505172171688016e-05, + "loss": 0.0085, "step": 160680 }, { "epoch": 7.5, - "learning_rate": 5.027893675870799e-06, - "loss": 0.0176, + "learning_rate": 1.505125364704414e-05, + "loss": 0.0126, "step": 160685 }, { "epoch": 7.5, - "learning_rate": 5.027424874595659e-06, - "loss": 0.0364, + "learning_rate": 1.5050785577208119e-05, + "loss": 0.0151, "step": 160690 }, { "epoch": 7.5, - "learning_rate": 5.02695607332052e-06, - "loss": 0.0387, + "learning_rate": 1.5050317507372099e-05, + "loss": 0.0353, "step": 160695 }, { "epoch": 7.5, - "learning_rate": 5.02648727204538e-06, - "loss": 0.1082, + "learning_rate": 1.5049849437536082e-05, + "loss": 0.0215, "step": 160700 }, { "epoch": 7.5, - "learning_rate": 5.026018470770241e-06, - "loss": 0.0371, + "learning_rate": 1.5049381367700062e-05, + "loss": 0.1134, "step": 160705 }, { "epoch": 7.5, - "learning_rate": 5.025549669495101e-06, - "loss": 0.0487, + "learning_rate": 1.5048913297864042e-05, + "loss": 0.1097, "step": 160710 }, { "epoch": 7.5, - "learning_rate": 5.025080868219962e-06, - "loss": 0.1481, + "learning_rate": 1.5048445228028023e-05, + "loss": 0.1265, "step": 160715 }, { "epoch": 7.5, - "learning_rate": 5.0246120669448226e-06, - "loss": 0.2579, + "learning_rate": 1.5047977158192003e-05, + "loss": 0.1718, "step": 160720 }, { "epoch": 7.5, - "learning_rate": 5.024143265669683e-06, - "loss": 0.026, + "learning_rate": 1.5047509088355983e-05, + "loss": 0.0266, "step": 160725 }, { "epoch": 7.5, - "learning_rate": 5.023674464394543e-06, - "loss": 0.034, + "learning_rate": 1.5047041018519963e-05, + "loss": 0.014, "step": 160730 }, { "epoch": 7.5, - "learning_rate": 5.023205663119404e-06, - "loss": 0.0406, + "learning_rate": 1.5046572948683946e-05, + "loss": 0.0541, "step": 160735 }, { "epoch": 7.5, - "learning_rate": 5.022736861844265e-06, - "loss": 0.0409, + "learning_rate": 1.5046104878847926e-05, + "loss": 0.0342, "step": 160740 }, { "epoch": 7.5, - "learning_rate": 5.022268060569126e-06, - "loss": 0.061, + "learning_rate": 1.5045636809011904e-05, + "loss": 0.0337, "step": 160745 }, { "epoch": 7.5, - "learning_rate": 5.0217992592939855e-06, - "loss": 0.0351, + "learning_rate": 1.5045168739175884e-05, + "loss": 0.0608, "step": 160750 }, { "epoch": 7.5, - "learning_rate": 5.021330458018846e-06, - "loss": 0.1014, + "learning_rate": 1.5044700669339867e-05, + "loss": 0.1048, "step": 160755 }, { "epoch": 7.5, - "learning_rate": 5.020861656743706e-06, - "loss": 0.1463, + "learning_rate": 1.5044232599503847e-05, + "loss": 0.1004, "step": 160760 }, { "epoch": 7.5, - "learning_rate": 5.020392855468568e-06, - "loss": 0.0938, + "learning_rate": 1.5043764529667826e-05, + "loss": 0.1352, "step": 160765 }, { "epoch": 7.5, - "learning_rate": 5.019924054193428e-06, - "loss": 0.2164, + "learning_rate": 1.5043296459831806e-05, + "loss": 0.2464, "step": 160770 }, { "epoch": 7.5, - "learning_rate": 5.019455252918289e-06, - "loss": 0.0075, + "learning_rate": 1.5042828389995788e-05, + "loss": 0.0131, "step": 160775 }, { "epoch": 7.5, - "learning_rate": 5.0189864516431485e-06, - "loss": 0.0215, + "learning_rate": 1.5042360320159768e-05, + "loss": 0.0158, "step": 160780 }, { "epoch": 7.5, - "learning_rate": 5.0185176503680085e-06, - "loss": 0.0305, + "learning_rate": 1.5041892250323747e-05, + "loss": 0.0191, "step": 160785 }, { "epoch": 7.5, - "learning_rate": 5.01804884909287e-06, - "loss": 0.0253, + "learning_rate": 1.504142418048773e-05, + "loss": 0.015, "step": 160790 }, { "epoch": 7.5, - "learning_rate": 5.017580047817731e-06, - "loss": 0.0709, + "learning_rate": 1.504095611065171e-05, + "loss": 0.0246, "step": 160795 }, { "epoch": 7.5, - "learning_rate": 5.017111246542591e-06, - "loss": 0.059, + "learning_rate": 1.504048804081569e-05, + "loss": 0.0871, "step": 160800 }, { "epoch": 7.5, - "learning_rate": 5.016642445267451e-06, - "loss": 0.0246, + "learning_rate": 1.504001997097967e-05, + "loss": 0.0367, "step": 160805 }, { "epoch": 7.5, - "learning_rate": 5.016173643992312e-06, - "loss": 0.0929, + "learning_rate": 1.5039551901143652e-05, + "loss": 0.0605, "step": 160810 }, { "epoch": 7.5, - "learning_rate": 5.015704842717173e-06, - "loss": 0.1629, + "learning_rate": 1.5039083831307631e-05, + "loss": 0.1689, "step": 160815 }, { "epoch": 7.5, - "learning_rate": 5.015236041442033e-06, - "loss": 0.1235, + "learning_rate": 1.5038615761471611e-05, + "loss": 0.2008, "step": 160820 }, { "epoch": 7.5, - "learning_rate": 5.014767240166893e-06, - "loss": 0.0035, + "learning_rate": 1.5038147691635591e-05, + "loss": 0.0613, "step": 160825 }, { "epoch": 7.5, - "learning_rate": 5.014298438891754e-06, - "loss": 0.0144, + "learning_rate": 1.5037679621799574e-05, + "loss": 0.0105, "step": 160830 }, { "epoch": 7.5, - "learning_rate": 5.0138296376166154e-06, - "loss": 0.0162, + "learning_rate": 1.5037211551963554e-05, + "loss": 0.0107, "step": 160835 }, { "epoch": 7.51, - "learning_rate": 5.013360836341475e-06, - "loss": 0.0126, + "learning_rate": 1.5036743482127532e-05, + "loss": 0.0403, "step": 160840 }, { "epoch": 7.51, - "learning_rate": 5.012892035066335e-06, - "loss": 0.0094, + "learning_rate": 1.5036275412291515e-05, + "loss": 0.0191, "step": 160845 }, { "epoch": 7.51, - "learning_rate": 5.012423233791196e-06, - "loss": 0.0525, + "learning_rate": 1.5035807342455495e-05, + "loss": 0.0607, "step": 160850 }, { "epoch": 7.51, - "learning_rate": 5.011954432516058e-06, - "loss": 0.1009, + "learning_rate": 1.5035339272619475e-05, + "loss": 0.0799, "step": 160855 }, { "epoch": 7.51, - "learning_rate": 5.011485631240918e-06, - "loss": 0.1523, + "learning_rate": 1.5034871202783455e-05, + "loss": 0.1522, "step": 160860 }, { "epoch": 7.51, - "learning_rate": 5.0110168299657776e-06, - "loss": 0.0705, + "learning_rate": 1.5034403132947438e-05, + "loss": 0.1674, "step": 160865 }, { "epoch": 7.51, - "learning_rate": 5.010548028690638e-06, - "loss": 0.2017, + "learning_rate": 1.5033935063111416e-05, + "loss": 0.2354, "step": 160870 }, { "epoch": 7.51, - "learning_rate": 5.010079227415498e-06, - "loss": 0.0052, + "learning_rate": 1.5033466993275396e-05, + "loss": 0.0131, "step": 160875 }, { "epoch": 7.51, - "learning_rate": 5.00961042614036e-06, - "loss": 0.0097, + "learning_rate": 1.5032998923439376e-05, + "loss": 0.0094, "step": 160880 }, { "epoch": 7.51, - "learning_rate": 5.00914162486522e-06, - "loss": 0.0364, + "learning_rate": 1.5032530853603359e-05, + "loss": 0.0245, "step": 160885 }, { "epoch": 7.51, - "learning_rate": 5.008672823590081e-06, - "loss": 0.0353, + "learning_rate": 1.5032062783767339e-05, + "loss": 0.0335, "step": 160890 }, { "epoch": 7.51, - "learning_rate": 5.0082040223149406e-06, - "loss": 0.0587, + "learning_rate": 1.5031594713931319e-05, + "loss": 0.0478, "step": 160895 }, { "epoch": 7.51, - "learning_rate": 5.007735221039801e-06, - "loss": 0.0619, + "learning_rate": 1.5031126644095302e-05, + "loss": 0.039, "step": 160900 }, { "epoch": 7.51, - "learning_rate": 5.007266419764663e-06, - "loss": 0.0743, + "learning_rate": 1.503065857425928e-05, + "loss": 0.0475, "step": 160905 }, { "epoch": 7.51, - "learning_rate": 5.006797618489523e-06, - "loss": 0.2104, + "learning_rate": 1.503019050442326e-05, + "loss": 0.0543, "step": 160910 }, { "epoch": 7.51, - "learning_rate": 5.006328817214383e-06, - "loss": 0.1545, + "learning_rate": 1.502972243458724e-05, + "loss": 0.0799, "step": 160915 }, { "epoch": 7.51, - "learning_rate": 5.005860015939244e-06, - "loss": 0.292, + "learning_rate": 1.5029254364751223e-05, + "loss": 0.1504, "step": 160920 }, { "epoch": 7.51, - "learning_rate": 5.005391214664105e-06, - "loss": 0.0349, + "learning_rate": 1.5028786294915203e-05, + "loss": 0.0298, "step": 160925 }, { "epoch": 7.51, - "learning_rate": 5.004922413388965e-06, - "loss": 0.0262, + "learning_rate": 1.5028318225079182e-05, + "loss": 0.0137, "step": 160930 }, { "epoch": 7.51, - "learning_rate": 5.004453612113825e-06, - "loss": 0.0331, + "learning_rate": 1.502785015524316e-05, + "loss": 0.0647, "step": 160935 }, { "epoch": 7.51, - "learning_rate": 5.003984810838686e-06, - "loss": 0.1208, + "learning_rate": 1.5027382085407144e-05, + "loss": 0.0524, "step": 160940 }, { "epoch": 7.51, - "learning_rate": 5.003516009563546e-06, - "loss": 0.044, + "learning_rate": 1.5026914015571124e-05, + "loss": 0.0195, "step": 160945 }, { "epoch": 7.51, - "learning_rate": 5.0030472082884075e-06, - "loss": 0.0335, + "learning_rate": 1.5026445945735103e-05, + "loss": 0.0257, "step": 160950 }, { "epoch": 7.51, - "learning_rate": 5.002578407013267e-06, - "loss": 0.0699, + "learning_rate": 1.5025977875899083e-05, + "loss": 0.0247, "step": 160955 }, { "epoch": 7.51, - "learning_rate": 5.002109605738128e-06, - "loss": 0.137, + "learning_rate": 1.5025509806063066e-05, + "loss": 0.0532, "step": 160960 }, { "epoch": 7.51, - "learning_rate": 5.001640804462988e-06, - "loss": 0.0721, + "learning_rate": 1.5025041736227045e-05, + "loss": 0.1582, "step": 160965 }, { "epoch": 7.51, - "learning_rate": 5.001172003187849e-06, - "loss": 0.129, + "learning_rate": 1.5024573666391024e-05, + "loss": 0.2296, "step": 160970 }, { "epoch": 7.51, - "learning_rate": 5.00070320191271e-06, - "loss": 0.0177, + "learning_rate": 1.5024105596555008e-05, + "loss": 0.0333, "step": 160975 }, { "epoch": 7.51, - "learning_rate": 5.0002344006375704e-06, - "loss": 0.0031, + "learning_rate": 1.5023637526718987e-05, + "loss": 0.0252, "step": 160980 }, { "epoch": 7.51, - "learning_rate": 4.99976559936243e-06, - "loss": 0.0199, + "learning_rate": 1.5023169456882967e-05, + "loss": 0.0092, "step": 160985 }, { "epoch": 7.51, - "learning_rate": 4.999296798087291e-06, - "loss": 0.0605, + "learning_rate": 1.5022701387046947e-05, + "loss": 0.0206, "step": 160990 }, { "epoch": 7.51, - "learning_rate": 4.998827996812152e-06, - "loss": 0.0631, + "learning_rate": 1.502223331721093e-05, + "loss": 0.0456, "step": 160995 }, { "epoch": 7.51, - "learning_rate": 4.998359195537013e-06, - "loss": 0.0769, + "learning_rate": 1.5021765247374908e-05, + "loss": 0.0393, "step": 161000 }, { "epoch": 7.51, - "learning_rate": 4.997890394261873e-06, - "loss": 0.0681, + "learning_rate": 1.5021297177538888e-05, + "loss": 0.0162, "step": 161005 }, { "epoch": 7.51, - "learning_rate": 4.9974215929867334e-06, - "loss": 0.0876, + "learning_rate": 1.5020829107702868e-05, + "loss": 0.0295, "step": 161010 }, { "epoch": 7.51, - "learning_rate": 4.996952791711594e-06, - "loss": 0.1417, + "learning_rate": 1.5020361037866851e-05, + "loss": 0.1373, "step": 161015 }, { "epoch": 7.51, - "learning_rate": 4.996483990436454e-06, - "loss": 0.1829, + "learning_rate": 1.5019892968030831e-05, + "loss": 0.1537, "step": 161020 }, { "epoch": 7.51, - "learning_rate": 4.996015189161315e-06, - "loss": 0.0261, + "learning_rate": 1.501942489819481e-05, + "loss": 0.0104, "step": 161025 }, { "epoch": 7.51, - "learning_rate": 4.995546387886176e-06, - "loss": 0.0167, + "learning_rate": 1.5018956828358792e-05, + "loss": 0.0104, "step": 161030 }, { "epoch": 7.51, - "learning_rate": 4.9950775866110365e-06, - "loss": 0.0257, + "learning_rate": 1.5018488758522772e-05, + "loss": 0.028, "step": 161035 }, { "epoch": 7.51, - "learning_rate": 4.994608785335896e-06, - "loss": 0.0379, + "learning_rate": 1.5018020688686752e-05, + "loss": 0.0094, "step": 161040 }, { "epoch": 7.51, - "learning_rate": 4.994139984060757e-06, - "loss": 0.0256, + "learning_rate": 1.5017552618850732e-05, + "loss": 0.0341, "step": 161045 }, { "epoch": 7.51, - "learning_rate": 4.993671182785618e-06, - "loss": 0.0979, + "learning_rate": 1.5017084549014715e-05, + "loss": 0.0415, "step": 161050 }, { "epoch": 7.52, - "learning_rate": 4.993202381510478e-06, - "loss": 0.0893, + "learning_rate": 1.5016616479178695e-05, + "loss": 0.156, "step": 161055 }, { "epoch": 7.52, - "learning_rate": 4.992733580235339e-06, - "loss": 0.0793, + "learning_rate": 1.5016148409342673e-05, + "loss": 0.0539, "step": 161060 }, { "epoch": 7.52, - "learning_rate": 4.9922647789601995e-06, - "loss": 0.1833, + "learning_rate": 1.5015680339506653e-05, + "loss": 0.1059, "step": 161065 }, { "epoch": 7.52, - "learning_rate": 4.99179597768506e-06, - "loss": 0.2742, + "learning_rate": 1.5015212269670636e-05, + "loss": 0.1357, "step": 161070 }, { "epoch": 7.52, - "learning_rate": 4.99132717640992e-06, - "loss": 0.048, + "learning_rate": 1.5014744199834616e-05, + "loss": 0.0104, "step": 161075 }, { "epoch": 7.52, - "learning_rate": 4.990858375134781e-06, - "loss": 0.0115, + "learning_rate": 1.5014276129998596e-05, + "loss": 0.0078, "step": 161080 }, { "epoch": 7.52, - "learning_rate": 4.990389573859642e-06, - "loss": 0.0232, + "learning_rate": 1.5013808060162579e-05, + "loss": 0.0059, "step": 161085 }, { "epoch": 7.52, - "learning_rate": 4.989920772584502e-06, - "loss": 0.0371, + "learning_rate": 1.5013339990326559e-05, + "loss": 0.0311, "step": 161090 }, { "epoch": 7.52, - "learning_rate": 4.9894519713093625e-06, - "loss": 0.0312, + "learning_rate": 1.5012871920490537e-05, + "loss": 0.0268, "step": 161095 }, { "epoch": 7.52, - "learning_rate": 4.988983170034223e-06, - "loss": 0.0437, + "learning_rate": 1.5012403850654517e-05, + "loss": 0.0672, "step": 161100 }, { "epoch": 7.52, - "learning_rate": 4.988514368759084e-06, - "loss": 0.0805, + "learning_rate": 1.50119357808185e-05, + "loss": 0.0688, "step": 161105 }, { "epoch": 7.52, - "learning_rate": 4.988045567483944e-06, - "loss": 0.1034, + "learning_rate": 1.501146771098248e-05, + "loss": 0.0447, "step": 161110 }, { "epoch": 7.52, - "learning_rate": 4.987576766208804e-06, - "loss": 0.0716, + "learning_rate": 1.501099964114646e-05, + "loss": 0.1091, "step": 161115 }, { "epoch": 7.52, - "learning_rate": 4.9871079649336655e-06, - "loss": 0.1738, + "learning_rate": 1.501053157131044e-05, + "loss": 0.1577, "step": 161120 }, { "epoch": 7.52, - "learning_rate": 4.9866391636585255e-06, - "loss": 0.0198, + "learning_rate": 1.501006350147442e-05, + "loss": 0.0269, "step": 161125 }, { "epoch": 7.52, - "learning_rate": 4.986170362383386e-06, - "loss": 0.0171, + "learning_rate": 1.50095954316384e-05, + "loss": 0.0013, "step": 161130 }, { "epoch": 7.52, - "learning_rate": 4.985701561108246e-06, - "loss": 0.0197, + "learning_rate": 1.500912736180238e-05, + "loss": 0.023, "step": 161135 }, { "epoch": 7.52, - "learning_rate": 4.985232759833108e-06, - "loss": 0.032, + "learning_rate": 1.500865929196636e-05, + "loss": 0.0213, "step": 161140 }, { "epoch": 7.52, - "learning_rate": 4.984763958557968e-06, - "loss": 0.0653, + "learning_rate": 1.5008191222130343e-05, + "loss": 0.0225, "step": 161145 }, { "epoch": 7.52, - "learning_rate": 4.984295157282828e-06, - "loss": 0.0832, + "learning_rate": 1.5007723152294323e-05, + "loss": 0.051, "step": 161150 }, { "epoch": 7.52, - "learning_rate": 4.9838263560076884e-06, - "loss": 0.1261, + "learning_rate": 1.5007255082458301e-05, + "loss": 0.0507, "step": 161155 }, { "epoch": 7.52, - "learning_rate": 4.983357554732549e-06, - "loss": 0.0988, + "learning_rate": 1.5006787012622285e-05, + "loss": 0.1089, "step": 161160 }, { "epoch": 7.52, - "learning_rate": 4.98288875345741e-06, - "loss": 0.2726, + "learning_rate": 1.5006318942786264e-05, + "loss": 0.1598, "step": 161165 }, { "epoch": 7.52, - "learning_rate": 4.98241995218227e-06, - "loss": 0.1542, + "learning_rate": 1.5005850872950244e-05, + "loss": 0.0954, "step": 161170 }, { "epoch": 7.52, - "learning_rate": 4.981951150907131e-06, - "loss": 0.0191, + "learning_rate": 1.5005382803114224e-05, + "loss": 0.0446, "step": 161175 }, { "epoch": 7.52, - "learning_rate": 4.9814823496319915e-06, - "loss": 0.014, + "learning_rate": 1.5004914733278207e-05, + "loss": 0.0078, "step": 161180 }, { "epoch": 7.52, - "learning_rate": 4.9810135483568514e-06, - "loss": 0.0194, + "learning_rate": 1.5004446663442187e-05, + "loss": 0.005, "step": 161185 }, { "epoch": 7.52, - "learning_rate": 4.980544747081712e-06, - "loss": 0.0062, + "learning_rate": 1.5003978593606165e-05, + "loss": 0.0174, "step": 161190 }, { "epoch": 7.52, - "learning_rate": 4.980075945806573e-06, - "loss": 0.0282, + "learning_rate": 1.5003510523770145e-05, + "loss": 0.0755, "step": 161195 }, { "epoch": 7.52, - "learning_rate": 4.979607144531434e-06, - "loss": 0.035, + "learning_rate": 1.5003042453934128e-05, + "loss": 0.0461, "step": 161200 }, { "epoch": 7.52, - "learning_rate": 4.979138343256294e-06, - "loss": 0.0251, + "learning_rate": 1.5002574384098108e-05, + "loss": 0.0654, "step": 161205 }, { "epoch": 7.52, - "learning_rate": 4.9786695419811545e-06, - "loss": 0.1007, + "learning_rate": 1.5002106314262088e-05, + "loss": 0.0418, "step": 161210 }, { "epoch": 7.52, - "learning_rate": 4.978200740706015e-06, - "loss": 0.1452, + "learning_rate": 1.5001638244426071e-05, + "loss": 0.2019, "step": 161215 }, { "epoch": 7.52, - "learning_rate": 4.977731939430875e-06, - "loss": 0.1743, + "learning_rate": 1.5001170174590049e-05, + "loss": 0.2355, "step": 161220 }, { "epoch": 7.52, - "learning_rate": 4.977263138155736e-06, - "loss": 0.0376, + "learning_rate": 1.5000702104754029e-05, + "loss": 0.006, "step": 161225 }, { "epoch": 7.52, - "learning_rate": 4.976794336880597e-06, - "loss": 0.0217, + "learning_rate": 1.5000234034918009e-05, + "loss": 0.0209, "step": 161230 }, { "epoch": 7.52, - "learning_rate": 4.9763255356054576e-06, - "loss": 0.004, + "learning_rate": 1.499976596508199e-05, + "loss": 0.008, "step": 161235 }, { "epoch": 7.52, - "learning_rate": 4.9758567343303175e-06, - "loss": 0.0062, + "learning_rate": 1.4999297895245972e-05, + "loss": 0.0014, "step": 161240 }, { "epoch": 7.52, - "learning_rate": 4.975387933055178e-06, - "loss": 0.007, + "learning_rate": 1.4998829825409952e-05, + "loss": 0.0323, "step": 161245 }, { "epoch": 7.52, - "learning_rate": 4.974919131780039e-06, - "loss": 0.0503, + "learning_rate": 1.4998361755573931e-05, + "loss": 0.0619, "step": 161250 }, { "epoch": 7.52, - "learning_rate": 4.974450330504899e-06, - "loss": 0.0835, + "learning_rate": 1.4997893685737911e-05, + "loss": 0.032, "step": 161255 }, { "epoch": 7.52, - "learning_rate": 4.97398152922976e-06, - "loss": 0.0761, + "learning_rate": 1.4997425615901893e-05, + "loss": 0.1637, "step": 161260 }, { "epoch": 7.52, - "learning_rate": 4.9735127279546205e-06, - "loss": 0.1859, + "learning_rate": 1.4996957546065874e-05, + "loss": 0.1362, "step": 161265 }, { "epoch": 7.53, - "learning_rate": 4.973043926679481e-06, - "loss": 0.1918, + "learning_rate": 1.4996489476229854e-05, + "loss": 0.1945, "step": 161270 }, { "epoch": 7.53, - "learning_rate": 4.972575125404341e-06, - "loss": 0.0614, + "learning_rate": 1.4996021406393836e-05, + "loss": 0.0565, "step": 161275 }, { "epoch": 7.53, - "learning_rate": 4.972106324129202e-06, - "loss": 0.0007, + "learning_rate": 1.4995553336557814e-05, + "loss": 0.0126, "step": 161280 }, { "epoch": 7.53, - "learning_rate": 4.971637522854063e-06, - "loss": 0.0089, + "learning_rate": 1.4995085266721795e-05, + "loss": 0.0118, "step": 161285 }, { "epoch": 7.53, - "learning_rate": 4.971168721578923e-06, - "loss": 0.0346, + "learning_rate": 1.4994617196885775e-05, + "loss": 0.0175, "step": 161290 }, { "epoch": 7.53, - "learning_rate": 4.9706999203037835e-06, - "loss": 0.0128, + "learning_rate": 1.4994149127049757e-05, + "loss": 0.0151, "step": 161295 }, { "epoch": 7.53, - "learning_rate": 4.970231119028644e-06, - "loss": 0.0296, + "learning_rate": 1.4993681057213736e-05, + "loss": 0.0375, "step": 161300 }, { "epoch": 7.53, - "learning_rate": 4.969762317753505e-06, - "loss": 0.0844, + "learning_rate": 1.4993212987377718e-05, + "loss": 0.0538, "step": 161305 }, { "epoch": 7.53, - "learning_rate": 4.969293516478365e-06, - "loss": 0.0711, + "learning_rate": 1.4992744917541698e-05, + "loss": 0.0906, "step": 161310 }, { "epoch": 7.53, - "learning_rate": 4.968824715203226e-06, - "loss": 0.1063, + "learning_rate": 1.4992276847705678e-05, + "loss": 0.099, "step": 161315 }, { "epoch": 7.53, - "learning_rate": 4.968355913928087e-06, - "loss": 0.2344, + "learning_rate": 1.4991808777869657e-05, + "loss": 0.1672, "step": 161320 }, { "epoch": 7.53, - "learning_rate": 4.967887112652947e-06, - "loss": 0.0129, + "learning_rate": 1.4991340708033639e-05, + "loss": 0.0555, "step": 161325 }, { "epoch": 7.53, - "learning_rate": 4.967418311377807e-06, - "loss": 0.0392, + "learning_rate": 1.499087263819762e-05, + "loss": 0.0619, "step": 161330 }, { "epoch": 7.53, - "learning_rate": 4.966949510102668e-06, - "loss": 0.0199, + "learning_rate": 1.49904045683616e-05, + "loss": 0.0107, "step": 161335 }, { "epoch": 7.53, - "learning_rate": 4.966480708827529e-06, - "loss": 0.0286, + "learning_rate": 1.4989936498525582e-05, + "loss": 0.0599, "step": 161340 }, { "epoch": 7.53, - "learning_rate": 4.966011907552389e-06, - "loss": 0.006, + "learning_rate": 1.498946842868956e-05, + "loss": 0.0389, "step": 161345 }, { "epoch": 7.53, - "learning_rate": 4.96554310627725e-06, - "loss": 0.0143, + "learning_rate": 1.4989000358853541e-05, + "loss": 0.0412, "step": 161350 }, { "epoch": 7.53, - "learning_rate": 4.96507430500211e-06, - "loss": 0.0601, + "learning_rate": 1.4988532289017521e-05, + "loss": 0.0392, "step": 161355 }, { "epoch": 7.53, - "learning_rate": 4.964605503726971e-06, - "loss": 0.0952, + "learning_rate": 1.4988064219181503e-05, + "loss": 0.0383, "step": 161360 }, { "epoch": 7.53, - "learning_rate": 4.964136702451831e-06, - "loss": 0.1428, + "learning_rate": 1.4987596149345482e-05, + "loss": 0.1156, "step": 161365 }, { "epoch": 7.53, - "learning_rate": 4.963667901176691e-06, - "loss": 0.2963, + "learning_rate": 1.4987128079509464e-05, + "loss": 0.1286, "step": 161370 }, { "epoch": 7.53, - "learning_rate": 4.963199099901553e-06, - "loss": 0.048, + "learning_rate": 1.4986660009673442e-05, + "loss": 0.0099, "step": 161375 }, { "epoch": 7.53, - "learning_rate": 4.9627302986264126e-06, - "loss": 0.0096, + "learning_rate": 1.4986191939837424e-05, + "loss": 0.0126, "step": 161380 }, { "epoch": 7.53, - "learning_rate": 4.962261497351273e-06, - "loss": 0.0146, + "learning_rate": 1.4985723870001403e-05, + "loss": 0.0023, "step": 161385 }, { "epoch": 7.53, - "learning_rate": 4.961792696076133e-06, - "loss": 0.0632, + "learning_rate": 1.4985255800165385e-05, + "loss": 0.0457, "step": 161390 }, { "epoch": 7.53, - "learning_rate": 4.961323894800995e-06, - "loss": 0.0998, + "learning_rate": 1.4984787730329366e-05, + "loss": 0.0133, "step": 161395 }, { "epoch": 7.53, - "learning_rate": 4.960855093525855e-06, - "loss": 0.0381, + "learning_rate": 1.4984319660493346e-05, + "loss": 0.0225, "step": 161400 }, { "epoch": 7.53, - "learning_rate": 4.960386292250715e-06, - "loss": 0.0576, + "learning_rate": 1.4983851590657328e-05, + "loss": 0.0804, "step": 161405 }, { "epoch": 7.53, - "learning_rate": 4.9599174909755756e-06, - "loss": 0.0786, + "learning_rate": 1.4983383520821306e-05, + "loss": 0.1382, "step": 161410 }, { "epoch": 7.53, - "learning_rate": 4.959448689700436e-06, - "loss": 0.1541, + "learning_rate": 1.4982915450985287e-05, + "loss": 0.1679, "step": 161415 }, { "epoch": 7.53, - "learning_rate": 4.958979888425297e-06, - "loss": 0.1233, + "learning_rate": 1.4982447381149267e-05, + "loss": 0.2811, "step": 161420 }, { "epoch": 7.53, - "learning_rate": 4.958511087150157e-06, - "loss": 0.0158, + "learning_rate": 1.4981979311313249e-05, + "loss": 0.0365, "step": 161425 }, { "epoch": 7.53, - "learning_rate": 4.958042285875018e-06, - "loss": 0.0119, + "learning_rate": 1.4981511241477229e-05, + "loss": 0.0225, "step": 161430 }, { "epoch": 7.53, - "learning_rate": 4.957573484599879e-06, - "loss": 0.0777, + "learning_rate": 1.498104317164121e-05, + "loss": 0.0502, "step": 161435 }, { "epoch": 7.53, - "learning_rate": 4.9571046833247385e-06, - "loss": 0.0354, + "learning_rate": 1.4980575101805188e-05, + "loss": 0.0218, "step": 161440 }, { "epoch": 7.53, - "learning_rate": 4.956635882049599e-06, - "loss": 0.0384, + "learning_rate": 1.498010703196917e-05, + "loss": 0.076, "step": 161445 }, { "epoch": 7.53, - "learning_rate": 4.95616708077446e-06, - "loss": 0.0281, + "learning_rate": 1.4979638962133151e-05, + "loss": 0.0332, "step": 161450 }, { "epoch": 7.53, - "learning_rate": 4.955698279499321e-06, - "loss": 0.0327, + "learning_rate": 1.4979170892297131e-05, + "loss": 0.0662, "step": 161455 }, { "epoch": 7.53, - "learning_rate": 4.955229478224181e-06, - "loss": 0.0491, + "learning_rate": 1.4978702822461113e-05, + "loss": 0.0909, "step": 161460 }, { "epoch": 7.53, - "learning_rate": 4.954760676949042e-06, - "loss": 0.1628, + "learning_rate": 1.4978234752625092e-05, + "loss": 0.1019, "step": 161465 }, { "epoch": 7.53, - "learning_rate": 4.954291875673902e-06, - "loss": 0.1404, + "learning_rate": 1.4977766682789072e-05, + "loss": 0.1132, "step": 161470 }, { "epoch": 7.53, - "learning_rate": 4.953823074398762e-06, - "loss": 0.0217, + "learning_rate": 1.4977298612953052e-05, + "loss": 0.0296, "step": 161475 }, { "epoch": 7.53, - "learning_rate": 4.953354273123623e-06, - "loss": 0.0249, + "learning_rate": 1.4976830543117034e-05, + "loss": 0.0095, "step": 161480 }, { "epoch": 7.54, - "learning_rate": 4.952885471848484e-06, - "loss": 0.0409, + "learning_rate": 1.4976362473281013e-05, + "loss": 0.0498, "step": 161485 }, { "epoch": 7.54, - "learning_rate": 4.952416670573345e-06, - "loss": 0.0237, + "learning_rate": 1.4975894403444995e-05, + "loss": 0.0046, "step": 161490 }, { "epoch": 7.54, - "learning_rate": 4.951947869298205e-06, - "loss": 0.0413, + "learning_rate": 1.4975426333608975e-05, + "loss": 0.0129, "step": 161495 }, { "epoch": 7.54, - "learning_rate": 4.951479068023065e-06, - "loss": 0.0483, + "learning_rate": 1.4974958263772956e-05, + "loss": 0.0213, "step": 161500 }, { "epoch": 7.54, - "learning_rate": 4.951010266747926e-06, - "loss": 0.0117, + "learning_rate": 1.4974490193936934e-05, + "loss": 0.0689, "step": 161505 }, { "epoch": 7.54, - "learning_rate": 4.950541465472786e-06, - "loss": 0.1341, + "learning_rate": 1.4974022124100916e-05, + "loss": 0.0603, "step": 161510 }, { "epoch": 7.54, - "learning_rate": 4.950072664197647e-06, - "loss": 0.1477, + "learning_rate": 1.4973554054264897e-05, + "loss": 0.2143, "step": 161515 }, { "epoch": 7.54, - "learning_rate": 4.949603862922508e-06, - "loss": 0.1559, + "learning_rate": 1.4973085984428877e-05, + "loss": 0.3035, "step": 161520 }, { "epoch": 7.54, - "learning_rate": 4.9491350616473684e-06, - "loss": 0.0267, + "learning_rate": 1.4972617914592859e-05, + "loss": 0.01, "step": 161525 }, { "epoch": 7.54, - "learning_rate": 4.948666260372228e-06, - "loss": 0.0544, + "learning_rate": 1.4972149844756838e-05, + "loss": 0.0005, "step": 161530 }, { "epoch": 7.54, - "learning_rate": 4.948197459097089e-06, - "loss": 0.0257, + "learning_rate": 1.4971681774920818e-05, + "loss": 0.012, "step": 161535 }, { "epoch": 7.54, - "learning_rate": 4.94772865782195e-06, - "loss": 0.0299, + "learning_rate": 1.4971213705084798e-05, + "loss": 0.0425, "step": 161540 }, { "epoch": 7.54, - "learning_rate": 4.94725985654681e-06, - "loss": 0.0315, + "learning_rate": 1.497074563524878e-05, + "loss": 0.0417, "step": 161545 }, { "epoch": 7.54, - "learning_rate": 4.946791055271671e-06, - "loss": 0.1159, + "learning_rate": 1.497027756541276e-05, + "loss": 0.0249, "step": 161550 }, { "epoch": 7.54, - "learning_rate": 4.946322253996531e-06, - "loss": 0.0446, + "learning_rate": 1.4969809495576741e-05, + "loss": 0.1216, "step": 161555 }, { "epoch": 7.54, - "learning_rate": 4.945853452721392e-06, - "loss": 0.0818, + "learning_rate": 1.496934142574072e-05, + "loss": 0.041, "step": 161560 }, { "epoch": 7.54, - "learning_rate": 4.945384651446252e-06, - "loss": 0.1331, + "learning_rate": 1.49688733559047e-05, + "loss": 0.0761, "step": 161565 }, { "epoch": 7.54, - "learning_rate": 4.944915850171113e-06, - "loss": 0.33, + "learning_rate": 1.4968405286068682e-05, + "loss": 0.148, "step": 161570 }, { "epoch": 7.54, - "learning_rate": 4.944447048895974e-06, - "loss": 0.0177, + "learning_rate": 1.4967937216232662e-05, + "loss": 0.01, "step": 161575 }, { "epoch": 7.54, - "learning_rate": 4.943978247620834e-06, - "loss": 0.0211, + "learning_rate": 1.4967469146396643e-05, + "loss": 0.0107, "step": 161580 }, { "epoch": 7.54, - "learning_rate": 4.943509446345694e-06, - "loss": 0.0162, + "learning_rate": 1.4967001076560623e-05, + "loss": 0.0665, "step": 161585 }, { "epoch": 7.54, - "learning_rate": 4.943040645070555e-06, - "loss": 0.0462, + "learning_rate": 1.4966533006724605e-05, + "loss": 0.0302, "step": 161590 }, { "epoch": 7.54, - "learning_rate": 4.942571843795416e-06, - "loss": 0.0689, + "learning_rate": 1.4966064936888585e-05, + "loss": 0.0314, "step": 161595 }, { "epoch": 7.54, - "learning_rate": 4.942103042520276e-06, - "loss": 0.0511, + "learning_rate": 1.4965596867052564e-05, + "loss": 0.0664, "step": 161600 }, { "epoch": 7.54, - "learning_rate": 4.941634241245137e-06, - "loss": 0.0481, + "learning_rate": 1.4965128797216544e-05, + "loss": 0.06, "step": 161605 }, { "epoch": 7.54, - "learning_rate": 4.9411654399699975e-06, - "loss": 0.0736, + "learning_rate": 1.4964660727380526e-05, + "loss": 0.1168, "step": 161610 }, { "epoch": 7.54, - "learning_rate": 4.940696638694857e-06, - "loss": 0.1041, + "learning_rate": 1.4964192657544506e-05, + "loss": 0.0834, "step": 161615 }, { "epoch": 7.54, - "learning_rate": 4.940227837419718e-06, - "loss": 0.2441, + "learning_rate": 1.4963724587708487e-05, + "loss": 0.1522, "step": 161620 }, { "epoch": 7.54, - "learning_rate": 4.939759036144578e-06, - "loss": 0.0157, + "learning_rate": 1.4963256517872467e-05, + "loss": 0.0056, "step": 161625 }, { "epoch": 7.54, - "learning_rate": 4.93929023486944e-06, - "loss": 0.0214, + "learning_rate": 1.4962788448036447e-05, + "loss": 0.0034, "step": 161630 }, { "epoch": 7.54, - "learning_rate": 4.9388214335943e-06, - "loss": 0.0277, + "learning_rate": 1.4962320378200428e-05, + "loss": 0.0224, "step": 161635 }, { "epoch": 7.54, - "learning_rate": 4.9383526323191605e-06, - "loss": 0.0132, + "learning_rate": 1.4961852308364408e-05, + "loss": 0.0364, "step": 161640 }, { "epoch": 7.54, - "learning_rate": 4.93788383104402e-06, - "loss": 0.0426, + "learning_rate": 1.496138423852839e-05, + "loss": 0.0544, "step": 161645 }, { "epoch": 7.54, - "learning_rate": 4.937415029768882e-06, - "loss": 0.0351, + "learning_rate": 1.496091616869237e-05, + "loss": 0.0251, "step": 161650 }, { "epoch": 7.54, - "learning_rate": 4.936946228493742e-06, - "loss": 0.0604, + "learning_rate": 1.4960448098856351e-05, + "loss": 0.026, "step": 161655 }, { "epoch": 7.54, - "learning_rate": 4.936477427218602e-06, - "loss": 0.0662, + "learning_rate": 1.4959980029020329e-05, + "loss": 0.0999, "step": 161660 }, { "epoch": 7.54, - "learning_rate": 4.936008625943463e-06, - "loss": 0.1182, + "learning_rate": 1.495951195918431e-05, + "loss": 0.0815, "step": 161665 }, { "epoch": 7.54, - "learning_rate": 4.9355398246683234e-06, - "loss": 0.1406, + "learning_rate": 1.495904388934829e-05, + "loss": 0.2028, "step": 161670 }, { "epoch": 7.54, - "learning_rate": 4.935071023393184e-06, - "loss": 0.0221, + "learning_rate": 1.4958575819512272e-05, + "loss": 0.0247, "step": 161675 }, { "epoch": 7.54, - "learning_rate": 4.934602222118044e-06, - "loss": 0.0363, + "learning_rate": 1.4958107749676252e-05, + "loss": 0.0223, "step": 161680 }, { "epoch": 7.54, - "learning_rate": 4.934133420842905e-06, - "loss": 0.021, + "learning_rate": 1.4957639679840233e-05, + "loss": 0.0522, "step": 161685 }, { "epoch": 7.54, - "learning_rate": 4.933664619567766e-06, - "loss": 0.0085, + "learning_rate": 1.4957171610004213e-05, + "loss": 0.0691, "step": 161690 }, { "epoch": 7.54, - "learning_rate": 4.933195818292626e-06, - "loss": 0.0496, + "learning_rate": 1.4956703540168193e-05, + "loss": 0.0382, "step": 161695 }, { "epoch": 7.55, - "learning_rate": 4.9327270170174864e-06, - "loss": 0.1134, + "learning_rate": 1.4956235470332174e-05, + "loss": 0.0933, "step": 161700 }, { "epoch": 7.55, - "learning_rate": 4.932258215742347e-06, - "loss": 0.0214, + "learning_rate": 1.4955767400496154e-05, + "loss": 0.0619, "step": 161705 }, { "epoch": 7.55, - "learning_rate": 4.931789414467208e-06, - "loss": 0.0884, + "learning_rate": 1.4955299330660136e-05, + "loss": 0.0892, "step": 161710 }, { "epoch": 7.55, - "learning_rate": 4.931320613192068e-06, - "loss": 0.1711, + "learning_rate": 1.4954831260824115e-05, + "loss": 0.1686, "step": 161715 }, { "epoch": 7.55, - "learning_rate": 4.930851811916929e-06, - "loss": 0.2612, + "learning_rate": 1.4954363190988097e-05, + "loss": 0.129, "step": 161720 }, { "epoch": 7.55, - "learning_rate": 4.9303830106417895e-06, - "loss": 0.0403, + "learning_rate": 1.4953895121152075e-05, + "loss": 0.0311, "step": 161725 }, { "epoch": 7.55, - "learning_rate": 4.9299142093666494e-06, - "loss": 0.0158, + "learning_rate": 1.4953427051316057e-05, + "loss": 0.0234, "step": 161730 }, { "epoch": 7.55, - "learning_rate": 4.92944540809151e-06, - "loss": 0.013, + "learning_rate": 1.4952958981480036e-05, + "loss": 0.0163, "step": 161735 }, { "epoch": 7.55, - "learning_rate": 4.928976606816371e-06, - "loss": 0.05, + "learning_rate": 1.4952490911644018e-05, + "loss": 0.0108, "step": 161740 }, { "epoch": 7.55, - "learning_rate": 4.928507805541232e-06, - "loss": 0.0096, + "learning_rate": 1.4952022841807998e-05, + "loss": 0.0401, "step": 161745 }, { "epoch": 7.55, - "learning_rate": 4.928039004266092e-06, - "loss": 0.0554, + "learning_rate": 1.495155477197198e-05, + "loss": 0.0619, "step": 161750 }, { "epoch": 7.55, - "learning_rate": 4.9275702029909525e-06, - "loss": 0.0761, + "learning_rate": 1.4951086702135959e-05, + "loss": 0.0407, "step": 161755 }, { "epoch": 7.55, - "learning_rate": 4.927101401715813e-06, - "loss": 0.066, + "learning_rate": 1.4950618632299939e-05, + "loss": 0.1212, "step": 161760 }, { "epoch": 7.55, - "learning_rate": 4.926632600440673e-06, - "loss": 0.1068, + "learning_rate": 1.495015056246392e-05, + "loss": 0.0885, "step": 161765 }, { "epoch": 7.55, - "learning_rate": 4.926163799165534e-06, - "loss": 0.1359, + "learning_rate": 1.49496824926279e-05, + "loss": 0.1211, "step": 161770 }, { "epoch": 7.55, - "learning_rate": 4.925694997890395e-06, - "loss": 0.0326, + "learning_rate": 1.4949214422791882e-05, + "loss": 0.0171, "step": 161775 }, { "epoch": 7.55, - "learning_rate": 4.9252261966152555e-06, - "loss": 0.0021, + "learning_rate": 1.4948746352955862e-05, + "loss": 0.0254, "step": 161780 }, { "epoch": 7.55, - "learning_rate": 4.9247573953401155e-06, - "loss": 0.0362, + "learning_rate": 1.4948278283119843e-05, + "loss": 0.023, "step": 161785 }, { "epoch": 7.55, - "learning_rate": 4.924288594064976e-06, - "loss": 0.0082, + "learning_rate": 1.4947810213283821e-05, + "loss": 0.0204, "step": 161790 }, { "epoch": 7.55, - "learning_rate": 4.923819792789837e-06, - "loss": 0.0533, + "learning_rate": 1.4947342143447803e-05, + "loss": 0.0229, "step": 161795 }, { "epoch": 7.55, - "learning_rate": 4.923350991514697e-06, - "loss": 0.062, + "learning_rate": 1.4946874073611783e-05, + "loss": 0.0189, "step": 161800 }, { "epoch": 7.55, - "learning_rate": 4.922882190239558e-06, - "loss": 0.0762, + "learning_rate": 1.4946406003775764e-05, + "loss": 0.065, "step": 161805 }, { "epoch": 7.55, - "learning_rate": 4.9224133889644185e-06, - "loss": 0.1967, + "learning_rate": 1.4945937933939744e-05, + "loss": 0.1468, "step": 161810 }, { "epoch": 7.55, - "learning_rate": 4.921944587689279e-06, - "loss": 0.1538, + "learning_rate": 1.4945469864103725e-05, + "loss": 0.1646, "step": 161815 }, { "epoch": 7.55, - "learning_rate": 4.921475786414139e-06, - "loss": 0.1373, + "learning_rate": 1.4945001794267705e-05, + "loss": 0.1786, "step": 161820 }, { "epoch": 7.55, - "learning_rate": 4.921006985139e-06, - "loss": 0.0149, + "learning_rate": 1.4944533724431685e-05, + "loss": 0.0074, "step": 161825 }, { "epoch": 7.55, - "learning_rate": 4.920538183863861e-06, - "loss": 0.0147, + "learning_rate": 1.4944065654595667e-05, + "loss": 0.0295, "step": 161830 }, { "epoch": 7.55, - "learning_rate": 4.920069382588721e-06, - "loss": 0.0502, + "learning_rate": 1.4943597584759646e-05, + "loss": 0.0304, "step": 161835 }, { "epoch": 7.55, - "learning_rate": 4.9196005813135815e-06, - "loss": 0.0229, + "learning_rate": 1.4943129514923628e-05, + "loss": 0.0225, "step": 161840 }, { "epoch": 7.55, - "learning_rate": 4.919131780038442e-06, - "loss": 0.0175, + "learning_rate": 1.4942661445087608e-05, + "loss": 0.0209, "step": 161845 }, { "epoch": 7.55, - "learning_rate": 4.918662978763303e-06, - "loss": 0.0573, + "learning_rate": 1.4942193375251587e-05, + "loss": 0.1292, "step": 161850 }, { "epoch": 7.55, - "learning_rate": 4.918194177488163e-06, - "loss": 0.0911, + "learning_rate": 1.4941725305415567e-05, + "loss": 0.076, "step": 161855 }, { "epoch": 7.55, - "learning_rate": 4.917725376213024e-06, - "loss": 0.1609, + "learning_rate": 1.4941257235579549e-05, + "loss": 0.0788, "step": 161860 }, { "epoch": 7.55, - "learning_rate": 4.9172565749378846e-06, - "loss": 0.1197, + "learning_rate": 1.4940789165743529e-05, + "loss": 0.118, "step": 161865 }, { "epoch": 7.55, - "learning_rate": 4.9167877736627445e-06, - "loss": 0.2128, + "learning_rate": 1.494032109590751e-05, + "loss": 0.0967, "step": 161870 }, { "epoch": 7.55, - "learning_rate": 4.916318972387605e-06, - "loss": 0.033, + "learning_rate": 1.4939853026071492e-05, + "loss": 0.024, "step": 161875 }, { "epoch": 7.55, - "learning_rate": 4.915850171112466e-06, - "loss": 0.0242, + "learning_rate": 1.4939384956235471e-05, + "loss": 0.0146, "step": 161880 }, { "epoch": 7.55, - "learning_rate": 4.915381369837327e-06, - "loss": 0.0307, + "learning_rate": 1.4938916886399451e-05, + "loss": 0.0136, "step": 161885 }, { "epoch": 7.55, - "learning_rate": 4.914912568562187e-06, - "loss": 0.0341, + "learning_rate": 1.4938448816563431e-05, + "loss": 0.0057, "step": 161890 }, { "epoch": 7.55, - "learning_rate": 4.9144437672870476e-06, - "loss": 0.0497, + "learning_rate": 1.4937980746727413e-05, + "loss": 0.016, "step": 161895 }, { "epoch": 7.55, - "learning_rate": 4.913974966011908e-06, - "loss": 0.0637, + "learning_rate": 1.4937512676891392e-05, + "loss": 0.0239, "step": 161900 }, { "epoch": 7.55, - "learning_rate": 4.913506164736768e-06, - "loss": 0.0685, + "learning_rate": 1.4937044607055374e-05, + "loss": 0.0769, "step": 161905 }, { "epoch": 7.55, - "learning_rate": 4.913037363461629e-06, - "loss": 0.0604, + "learning_rate": 1.4936576537219354e-05, + "loss": 0.0497, "step": 161910 }, { "epoch": 7.56, - "learning_rate": 4.912568562186489e-06, - "loss": 0.2024, + "learning_rate": 1.4936108467383334e-05, + "loss": 0.1529, "step": 161915 }, { "epoch": 7.56, - "learning_rate": 4.912099760911351e-06, - "loss": 0.1956, + "learning_rate": 1.4935640397547313e-05, + "loss": 0.2434, "step": 161920 }, { "epoch": 7.56, - "learning_rate": 4.9116309596362106e-06, - "loss": 0.0292, + "learning_rate": 1.4935172327711295e-05, + "loss": 0.0254, "step": 161925 }, { "epoch": 7.56, - "learning_rate": 4.911162158361071e-06, - "loss": 0.0093, + "learning_rate": 1.4934704257875275e-05, + "loss": 0.0137, "step": 161930 }, { "epoch": 7.56, - "learning_rate": 4.910693357085931e-06, - "loss": 0.0248, + "learning_rate": 1.4934236188039256e-05, + "loss": 0.0217, "step": 161935 }, { "epoch": 7.56, - "learning_rate": 4.910224555810792e-06, - "loss": 0.0465, + "learning_rate": 1.4933768118203238e-05, + "loss": 0.0166, "step": 161940 }, { "epoch": 7.56, - "learning_rate": 4.909755754535653e-06, - "loss": 0.0129, + "learning_rate": 1.4933300048367216e-05, + "loss": 0.0677, "step": 161945 }, { "epoch": 7.56, - "learning_rate": 4.909286953260513e-06, - "loss": 0.0371, + "learning_rate": 1.4932831978531197e-05, + "loss": 0.0058, "step": 161950 }, { "epoch": 7.56, - "learning_rate": 4.9088181519853735e-06, - "loss": 0.0137, + "learning_rate": 1.4932363908695177e-05, + "loss": 0.1045, "step": 161955 }, { "epoch": 7.56, - "learning_rate": 4.908349350710234e-06, - "loss": 0.0631, + "learning_rate": 1.4931895838859159e-05, + "loss": 0.0787, "step": 161960 }, { "epoch": 7.56, - "learning_rate": 4.907880549435095e-06, - "loss": 0.1551, + "learning_rate": 1.4931427769023139e-05, + "loss": 0.1421, "step": 161965 }, { "epoch": 7.56, - "learning_rate": 4.907411748159955e-06, - "loss": 0.1769, + "learning_rate": 1.493095969918712e-05, + "loss": 0.1644, "step": 161970 }, { "epoch": 7.56, - "learning_rate": 4.906942946884816e-06, - "loss": 0.0288, + "learning_rate": 1.4930491629351098e-05, + "loss": 0.0081, "step": 161975 }, { "epoch": 7.56, - "learning_rate": 4.906474145609677e-06, - "loss": 0.0127, + "learning_rate": 1.493002355951508e-05, + "loss": 0.0137, "step": 161980 }, { "epoch": 7.56, - "learning_rate": 4.9060053443345365e-06, - "loss": 0.0159, + "learning_rate": 1.492955548967906e-05, + "loss": 0.0043, "step": 161985 }, { "epoch": 7.56, - "learning_rate": 4.905536543059397e-06, - "loss": 0.0542, + "learning_rate": 1.4929087419843041e-05, + "loss": 0.01, "step": 161990 }, { "epoch": 7.56, - "learning_rate": 4.905067741784258e-06, - "loss": 0.0503, + "learning_rate": 1.492861935000702e-05, + "loss": 0.0515, "step": 161995 }, { "epoch": 7.56, - "learning_rate": 4.904598940509119e-06, - "loss": 0.0547, + "learning_rate": 1.4928151280171002e-05, + "loss": 0.052, "step": 162000 }, { "epoch": 7.56, - "learning_rate": 4.904130139233979e-06, - "loss": 0.0816, + "learning_rate": 1.4927683210334984e-05, + "loss": 0.0219, "step": 162005 }, { "epoch": 7.56, - "learning_rate": 4.90366133795884e-06, - "loss": 0.0638, + "learning_rate": 1.4927215140498962e-05, + "loss": 0.1425, "step": 162010 }, { "epoch": 7.56, - "learning_rate": 4.9031925366837e-06, - "loss": 0.1609, + "learning_rate": 1.4926747070662943e-05, + "loss": 0.0446, "step": 162015 }, { "epoch": 7.56, - "learning_rate": 4.90272373540856e-06, - "loss": 0.1392, + "learning_rate": 1.4926279000826923e-05, + "loss": 0.215, "step": 162020 }, { "epoch": 7.56, - "learning_rate": 4.902254934133421e-06, - "loss": 0.0125, + "learning_rate": 1.4925810930990905e-05, + "loss": 0.0391, "step": 162025 }, { "epoch": 7.56, - "learning_rate": 4.901786132858282e-06, - "loss": 0.0265, + "learning_rate": 1.4925342861154885e-05, + "loss": 0.0189, "step": 162030 }, { "epoch": 7.56, - "learning_rate": 4.901317331583143e-06, - "loss": 0.0269, + "learning_rate": 1.4924874791318866e-05, + "loss": 0.023, "step": 162035 }, { "epoch": 7.56, - "learning_rate": 4.900848530308003e-06, - "loss": 0.0086, + "learning_rate": 1.4924406721482844e-05, + "loss": 0.0255, "step": 162040 }, { "epoch": 7.56, - "learning_rate": 4.900379729032863e-06, - "loss": 0.1068, + "learning_rate": 1.4923938651646826e-05, + "loss": 0.0409, "step": 162045 }, { "epoch": 7.56, - "learning_rate": 4.899910927757724e-06, - "loss": 0.0656, + "learning_rate": 1.4923470581810806e-05, + "loss": 0.0679, "step": 162050 }, { "epoch": 7.56, - "learning_rate": 4.899442126482584e-06, - "loss": 0.0446, + "learning_rate": 1.4923002511974787e-05, + "loss": 0.0967, "step": 162055 }, { "epoch": 7.56, - "learning_rate": 4.898973325207445e-06, - "loss": 0.0449, + "learning_rate": 1.4922534442138769e-05, + "loss": 0.0864, "step": 162060 }, { "epoch": 7.56, - "learning_rate": 4.898504523932306e-06, - "loss": 0.1671, + "learning_rate": 1.4922066372302748e-05, + "loss": 0.0788, "step": 162065 }, { "epoch": 7.56, - "learning_rate": 4.898035722657166e-06, - "loss": 0.239, + "learning_rate": 1.4921598302466728e-05, + "loss": 0.1454, "step": 162070 }, { "epoch": 7.56, - "learning_rate": 4.897566921382026e-06, - "loss": 0.0185, + "learning_rate": 1.4921130232630708e-05, + "loss": 0.0337, "step": 162075 }, { "epoch": 7.56, - "learning_rate": 4.897098120106887e-06, - "loss": 0.0392, + "learning_rate": 1.492066216279469e-05, + "loss": 0.0105, "step": 162080 }, { "epoch": 7.56, - "learning_rate": 4.896629318831748e-06, - "loss": 0.0115, + "learning_rate": 1.492019409295867e-05, + "loss": 0.0382, "step": 162085 }, { "epoch": 7.56, - "learning_rate": 4.896160517556608e-06, - "loss": 0.0084, + "learning_rate": 1.4919726023122651e-05, + "loss": 0.0316, "step": 162090 }, { "epoch": 7.56, - "learning_rate": 4.895691716281469e-06, - "loss": 0.0201, + "learning_rate": 1.491925795328663e-05, + "loss": 0.0366, "step": 162095 }, { "epoch": 7.56, - "learning_rate": 4.895222915006329e-06, - "loss": 0.0421, + "learning_rate": 1.4918789883450612e-05, + "loss": 0.0368, "step": 162100 }, { "epoch": 7.56, - "learning_rate": 4.89475411373119e-06, - "loss": 0.0784, + "learning_rate": 1.491832181361459e-05, + "loss": 0.0729, "step": 162105 }, { "epoch": 7.56, - "learning_rate": 4.89428531245605e-06, - "loss": 0.1023, + "learning_rate": 1.4917853743778572e-05, + "loss": 0.099, "step": 162110 }, { "epoch": 7.56, - "learning_rate": 4.893816511180911e-06, - "loss": 0.1167, + "learning_rate": 1.4917385673942552e-05, + "loss": 0.1201, "step": 162115 }, { "epoch": 7.56, - "learning_rate": 4.893347709905772e-06, - "loss": 0.1756, + "learning_rate": 1.4916917604106533e-05, + "loss": 0.1073, "step": 162120 }, { "epoch": 7.56, - "learning_rate": 4.892878908630632e-06, - "loss": 0.0338, + "learning_rate": 1.4916449534270515e-05, + "loss": 0.0239, "step": 162125 }, { "epoch": 7.57, - "learning_rate": 4.892410107355492e-06, - "loss": 0.0289, + "learning_rate": 1.4915981464434495e-05, + "loss": 0.0067, "step": 162130 }, { "epoch": 7.57, - "learning_rate": 4.891941306080353e-06, - "loss": 0.0152, + "learning_rate": 1.4915513394598474e-05, + "loss": 0.0559, "step": 162135 }, { "epoch": 7.57, - "learning_rate": 4.891472504805214e-06, - "loss": 0.0281, + "learning_rate": 1.4915045324762454e-05, + "loss": 0.0044, "step": 162140 }, { "epoch": 7.57, - "learning_rate": 4.891003703530074e-06, - "loss": 0.0307, + "learning_rate": 1.4914577254926436e-05, + "loss": 0.0197, "step": 162145 }, { "epoch": 7.57, - "learning_rate": 4.890534902254935e-06, - "loss": 0.0707, + "learning_rate": 1.4914109185090416e-05, + "loss": 0.0556, "step": 162150 }, { "epoch": 7.57, - "learning_rate": 4.8900661009797955e-06, - "loss": 0.0815, + "learning_rate": 1.4913641115254397e-05, + "loss": 0.0435, "step": 162155 }, { "epoch": 7.57, - "learning_rate": 4.889597299704655e-06, - "loss": 0.0845, + "learning_rate": 1.4913173045418377e-05, + "loss": 0.0904, "step": 162160 }, { "epoch": 7.57, - "learning_rate": 4.889128498429516e-06, - "loss": 0.128, + "learning_rate": 1.4912704975582357e-05, + "loss": 0.1974, "step": 162165 }, { "epoch": 7.57, - "learning_rate": 4.888659697154376e-06, - "loss": 0.1196, + "learning_rate": 1.4912236905746336e-05, + "loss": 0.1361, "step": 162170 }, { "epoch": 7.57, - "learning_rate": 4.888190895879238e-06, - "loss": 0.0126, + "learning_rate": 1.4911768835910318e-05, + "loss": 0.0286, "step": 162175 }, { "epoch": 7.57, - "learning_rate": 4.887722094604098e-06, - "loss": 0.0024, + "learning_rate": 1.49113007660743e-05, + "loss": 0.0555, "step": 162180 }, { "epoch": 7.57, - "learning_rate": 4.8872532933289584e-06, - "loss": 0.0129, + "learning_rate": 1.491083269623828e-05, + "loss": 0.0147, "step": 162185 }, { "epoch": 7.57, - "learning_rate": 4.886784492053818e-06, - "loss": 0.0801, + "learning_rate": 1.491036462640226e-05, + "loss": 0.0333, "step": 162190 }, { "epoch": 7.57, - "learning_rate": 4.886315690778679e-06, - "loss": 0.0686, + "learning_rate": 1.490989655656624e-05, + "loss": 0.0554, "step": 162195 }, { "epoch": 7.57, - "learning_rate": 4.88584688950354e-06, - "loss": 0.0313, + "learning_rate": 1.490942848673022e-05, + "loss": 0.0746, "step": 162200 }, { "epoch": 7.57, - "learning_rate": 4.8853780882284e-06, - "loss": 0.0503, + "learning_rate": 1.49089604168942e-05, + "loss": 0.0918, "step": 162205 }, { "epoch": 7.57, - "learning_rate": 4.884909286953261e-06, - "loss": 0.0801, + "learning_rate": 1.4908492347058182e-05, + "loss": 0.0671, "step": 162210 }, { "epoch": 7.57, - "learning_rate": 4.8844404856781214e-06, - "loss": 0.304, + "learning_rate": 1.4908024277222162e-05, + "loss": 0.0953, "step": 162215 }, { "epoch": 7.57, - "learning_rate": 4.883971684402982e-06, - "loss": 0.2219, + "learning_rate": 1.4907556207386143e-05, + "loss": 0.2817, "step": 162220 }, { "epoch": 7.57, - "learning_rate": 4.883502883127842e-06, - "loss": 0.0027, + "learning_rate": 1.4907088137550123e-05, + "loss": 0.0421, "step": 162225 }, { "epoch": 7.57, - "learning_rate": 4.883034081852703e-06, - "loss": 0.0111, + "learning_rate": 1.4906620067714103e-05, + "loss": 0.0123, "step": 162230 }, { "epoch": 7.57, - "learning_rate": 4.882565280577564e-06, - "loss": 0.0146, + "learning_rate": 1.4906151997878083e-05, + "loss": 0.0056, "step": 162235 }, { "epoch": 7.57, - "learning_rate": 4.882096479302424e-06, - "loss": 0.0419, + "learning_rate": 1.4905683928042064e-05, + "loss": 0.0199, "step": 162240 }, { "epoch": 7.57, - "learning_rate": 4.8816276780272844e-06, - "loss": 0.0762, + "learning_rate": 1.4905215858206046e-05, + "loss": 0.0469, "step": 162245 }, { "epoch": 7.57, - "learning_rate": 4.881158876752145e-06, - "loss": 0.0279, + "learning_rate": 1.4904747788370025e-05, + "loss": 0.0503, "step": 162250 }, { "epoch": 7.57, - "learning_rate": 4.880690075477006e-06, - "loss": 0.127, + "learning_rate": 1.4904279718534007e-05, + "loss": 0.0598, "step": 162255 }, { "epoch": 7.57, - "learning_rate": 4.880221274201866e-06, - "loss": 0.067, + "learning_rate": 1.4903811648697985e-05, + "loss": 0.0444, "step": 162260 }, { "epoch": 7.57, - "learning_rate": 4.879752472926727e-06, - "loss": 0.084, + "learning_rate": 1.4903343578861967e-05, + "loss": 0.0938, "step": 162265 }, { "epoch": 7.57, - "learning_rate": 4.8792836716515875e-06, - "loss": 0.2722, + "learning_rate": 1.4902875509025946e-05, + "loss": 0.1794, "step": 162270 }, { "epoch": 7.57, - "learning_rate": 4.878814870376447e-06, - "loss": 0.0229, + "learning_rate": 1.4902407439189928e-05, + "loss": 0.0051, "step": 162275 }, { "epoch": 7.57, - "learning_rate": 4.878346069101308e-06, - "loss": 0.0188, + "learning_rate": 1.4901939369353908e-05, + "loss": 0.0388, "step": 162280 }, { "epoch": 7.57, - "learning_rate": 4.877877267826169e-06, - "loss": 0.078, + "learning_rate": 1.490147129951789e-05, + "loss": 0.0228, "step": 162285 }, { "epoch": 7.57, - "learning_rate": 4.87740846655103e-06, - "loss": 0.0566, + "learning_rate": 1.4901003229681869e-05, + "loss": 0.0259, "step": 162290 }, { "epoch": 7.57, - "learning_rate": 4.87693966527589e-06, - "loss": 0.0593, + "learning_rate": 1.4900535159845849e-05, + "loss": 0.0426, "step": 162295 }, { "epoch": 7.57, - "learning_rate": 4.8764708640007505e-06, - "loss": 0.0363, + "learning_rate": 1.4900067090009829e-05, + "loss": 0.0484, "step": 162300 }, { "epoch": 7.57, - "learning_rate": 4.876002062725611e-06, - "loss": 0.0468, + "learning_rate": 1.489959902017381e-05, + "loss": 0.0772, "step": 162305 }, { "epoch": 7.57, - "learning_rate": 4.875533261450471e-06, - "loss": 0.0981, + "learning_rate": 1.4899130950337792e-05, + "loss": 0.1419, "step": 162310 }, { "epoch": 7.57, - "learning_rate": 4.875064460175332e-06, - "loss": 0.0778, + "learning_rate": 1.4898662880501772e-05, + "loss": 0.1003, "step": 162315 }, { "epoch": 7.57, - "learning_rate": 4.874595658900193e-06, - "loss": 0.2372, + "learning_rate": 1.4898194810665753e-05, + "loss": 0.1597, "step": 162320 }, { "epoch": 7.57, - "learning_rate": 4.8741268576250535e-06, - "loss": 0.0379, + "learning_rate": 1.4897726740829731e-05, + "loss": 0.0334, "step": 162325 }, { "epoch": 7.57, - "learning_rate": 4.8736580563499135e-06, - "loss": 0.0597, + "learning_rate": 1.4897258670993713e-05, + "loss": 0.0077, "step": 162330 }, { "epoch": 7.57, - "learning_rate": 4.873189255074774e-06, - "loss": 0.0468, + "learning_rate": 1.4896790601157692e-05, + "loss": 0.017, "step": 162335 }, { "epoch": 7.58, - "learning_rate": 4.872720453799635e-06, - "loss": 0.0314, + "learning_rate": 1.4896322531321674e-05, + "loss": 0.0578, "step": 162340 }, { "epoch": 7.58, - "learning_rate": 4.872251652524495e-06, - "loss": 0.0388, + "learning_rate": 1.4895854461485654e-05, + "loss": 0.0542, "step": 162345 }, { "epoch": 7.58, - "learning_rate": 4.871782851249356e-06, - "loss": 0.0341, + "learning_rate": 1.4895386391649635e-05, + "loss": 0.0395, "step": 162350 }, { "epoch": 7.58, - "learning_rate": 4.8713140499742165e-06, - "loss": 0.1452, + "learning_rate": 1.4894918321813613e-05, + "loss": 0.0591, "step": 162355 }, { "epoch": 7.58, - "learning_rate": 4.870845248699077e-06, - "loss": 0.1227, + "learning_rate": 1.4894450251977595e-05, + "loss": 0.0482, "step": 162360 }, { "epoch": 7.58, - "learning_rate": 4.870376447423937e-06, - "loss": 0.1489, + "learning_rate": 1.4893982182141576e-05, + "loss": 0.157, "step": 162365 }, { "epoch": 7.58, - "learning_rate": 4.869907646148798e-06, - "loss": 0.0554, + "learning_rate": 1.4893514112305556e-05, + "loss": 0.2269, "step": 162370 }, { "epoch": 7.58, - "learning_rate": 4.869438844873659e-06, - "loss": 0.0147, + "learning_rate": 1.4893046042469538e-05, + "loss": 0.0037, "step": 162375 }, { "epoch": 7.58, - "learning_rate": 4.868970043598519e-06, - "loss": 0.0335, + "learning_rate": 1.4892577972633518e-05, + "loss": 0.0144, "step": 162380 }, { "epoch": 7.58, - "learning_rate": 4.8685012423233795e-06, - "loss": 0.0244, + "learning_rate": 1.4892109902797499e-05, + "loss": 0.0067, "step": 162385 }, { "epoch": 7.58, - "learning_rate": 4.86803244104824e-06, - "loss": 0.0202, + "learning_rate": 1.4891641832961477e-05, + "loss": 0.0148, "step": 162390 }, { "epoch": 7.58, - "learning_rate": 4.867563639773101e-06, - "loss": 0.0433, + "learning_rate": 1.4891173763125459e-05, + "loss": 0.0248, "step": 162395 }, { "epoch": 7.58, - "learning_rate": 4.867094838497961e-06, - "loss": 0.0809, + "learning_rate": 1.4890705693289439e-05, + "loss": 0.0384, "step": 162400 }, { "epoch": 7.58, - "learning_rate": 4.866626037222822e-06, - "loss": 0.0455, + "learning_rate": 1.489023762345342e-05, + "loss": 0.107, "step": 162405 }, { "epoch": 7.58, - "learning_rate": 4.8661572359476826e-06, - "loss": 0.0841, + "learning_rate": 1.48897695536174e-05, + "loss": 0.059, "step": 162410 }, { "epoch": 7.58, - "learning_rate": 4.8656884346725425e-06, - "loss": 0.0874, + "learning_rate": 1.4889301483781381e-05, + "loss": 0.148, "step": 162415 }, { "epoch": 7.58, - "learning_rate": 4.865219633397403e-06, - "loss": 0.1627, + "learning_rate": 1.488883341394536e-05, + "loss": 0.2095, "step": 162420 }, { "epoch": 7.58, - "learning_rate": 4.864750832122263e-06, - "loss": 0.0367, + "learning_rate": 1.4888365344109341e-05, + "loss": 0.0116, "step": 162425 }, { "epoch": 7.58, - "learning_rate": 4.864282030847125e-06, - "loss": 0.0394, + "learning_rate": 1.4887897274273323e-05, + "loss": 0.0125, "step": 162430 }, { "epoch": 7.58, - "learning_rate": 4.863813229571985e-06, - "loss": 0.0244, + "learning_rate": 1.4887429204437302e-05, + "loss": 0.019, "step": 162435 }, { "epoch": 7.58, - "learning_rate": 4.8633444282968456e-06, - "loss": 0.0362, + "learning_rate": 1.4886961134601284e-05, + "loss": 0.0394, "step": 162440 }, { "epoch": 7.58, - "learning_rate": 4.8628756270217055e-06, - "loss": 0.0535, + "learning_rate": 1.4886493064765264e-05, + "loss": 0.0111, "step": 162445 }, { "epoch": 7.58, - "learning_rate": 4.862406825746566e-06, - "loss": 0.0525, + "learning_rate": 1.4886024994929244e-05, + "loss": 0.0402, "step": 162450 }, { "epoch": 7.58, - "learning_rate": 4.861938024471427e-06, - "loss": 0.061, + "learning_rate": 1.4885556925093223e-05, + "loss": 0.1228, "step": 162455 }, { "epoch": 7.58, - "learning_rate": 4.861469223196287e-06, - "loss": 0.1567, + "learning_rate": 1.4885088855257205e-05, + "loss": 0.0954, "step": 162460 }, { "epoch": 7.58, - "learning_rate": 4.861000421921148e-06, - "loss": 0.099, + "learning_rate": 1.4884620785421185e-05, + "loss": 0.1288, "step": 162465 }, { "epoch": 7.58, - "learning_rate": 4.8605316206460085e-06, - "loss": 0.0766, + "learning_rate": 1.4884152715585166e-05, + "loss": 0.244, "step": 162470 }, { "epoch": 7.58, - "learning_rate": 4.860062819370869e-06, - "loss": 0.0518, + "learning_rate": 1.4883684645749146e-05, + "loss": 0.0105, "step": 162475 }, { "epoch": 7.58, - "learning_rate": 4.859594018095729e-06, - "loss": 0.0267, + "learning_rate": 1.4883216575913128e-05, + "loss": 0.0074, "step": 162480 }, { "epoch": 7.58, - "learning_rate": 4.85912521682059e-06, - "loss": 0.0136, + "learning_rate": 1.4882748506077107e-05, + "loss": 0.011, "step": 162485 }, { "epoch": 7.58, - "learning_rate": 4.858656415545451e-06, - "loss": 0.0103, + "learning_rate": 1.4882280436241087e-05, + "loss": 0.0631, "step": 162490 }, { "epoch": 7.58, - "learning_rate": 4.858187614270311e-06, - "loss": 0.0819, + "learning_rate": 1.4881812366405069e-05, + "loss": 0.0163, "step": 162495 }, { "epoch": 7.58, - "learning_rate": 4.8577188129951715e-06, - "loss": 0.0286, + "learning_rate": 1.4881344296569048e-05, + "loss": 0.0553, "step": 162500 }, { "epoch": 7.58, - "learning_rate": 4.857250011720032e-06, - "loss": 0.1347, + "learning_rate": 1.488087622673303e-05, + "loss": 0.0657, "step": 162505 }, { "epoch": 7.58, - "learning_rate": 4.856781210444893e-06, - "loss": 0.0301, + "learning_rate": 1.488040815689701e-05, + "loss": 0.0674, "step": 162510 }, { "epoch": 7.58, - "learning_rate": 4.856312409169753e-06, - "loss": 0.1853, + "learning_rate": 1.487994008706099e-05, + "loss": 0.1236, "step": 162515 }, { "epoch": 7.58, - "learning_rate": 4.855843607894614e-06, - "loss": 0.1978, + "learning_rate": 1.487947201722497e-05, + "loss": 0.2565, "step": 162520 }, { "epoch": 7.58, - "learning_rate": 4.855374806619475e-06, - "loss": 0.0389, + "learning_rate": 1.4879003947388951e-05, + "loss": 0.0147, "step": 162525 }, { "epoch": 7.58, - "learning_rate": 4.8549060053443345e-06, - "loss": 0.0106, + "learning_rate": 1.487853587755293e-05, + "loss": 0.012, "step": 162530 }, { "epoch": 7.58, - "learning_rate": 4.854437204069195e-06, - "loss": 0.011, + "learning_rate": 1.4878067807716912e-05, + "loss": 0.0275, "step": 162535 }, { "epoch": 7.58, - "learning_rate": 4.853968402794056e-06, - "loss": 0.016, + "learning_rate": 1.4877599737880892e-05, + "loss": 0.0269, "step": 162540 }, { "epoch": 7.58, - "learning_rate": 4.853499601518917e-06, - "loss": 0.024, + "learning_rate": 1.4877131668044872e-05, + "loss": 0.0726, "step": 162545 }, { "epoch": 7.58, - "learning_rate": 4.853030800243777e-06, - "loss": 0.0414, + "learning_rate": 1.4876663598208853e-05, + "loss": 0.0266, "step": 162550 }, { "epoch": 7.59, - "learning_rate": 4.852561998968638e-06, - "loss": 0.0501, + "learning_rate": 1.4876195528372833e-05, + "loss": 0.0929, "step": 162555 }, { "epoch": 7.59, - "learning_rate": 4.852093197693498e-06, - "loss": 0.0807, + "learning_rate": 1.4875727458536815e-05, + "loss": 0.0788, "step": 162560 }, { "epoch": 7.59, - "learning_rate": 4.851624396418358e-06, - "loss": 0.2774, + "learning_rate": 1.4875259388700795e-05, + "loss": 0.0336, "step": 162565 }, { "epoch": 7.59, - "learning_rate": 4.851155595143219e-06, - "loss": 0.2431, + "learning_rate": 1.4874791318864776e-05, + "loss": 0.1656, "step": 162570 }, { "epoch": 7.59, - "learning_rate": 4.85068679386808e-06, - "loss": 0.0596, + "learning_rate": 1.4874323249028756e-05, + "loss": 0.0276, "step": 162575 }, { "epoch": 7.59, - "learning_rate": 4.850217992592941e-06, - "loss": 0.0324, + "learning_rate": 1.4873855179192736e-05, + "loss": 0.0136, "step": 162580 }, { "epoch": 7.59, - "learning_rate": 4.8497491913178006e-06, - "loss": 0.0041, + "learning_rate": 1.4873387109356716e-05, + "loss": 0.0271, "step": 162585 }, { "epoch": 7.59, - "learning_rate": 4.849280390042661e-06, - "loss": 0.0577, + "learning_rate": 1.4872919039520697e-05, + "loss": 0.0463, "step": 162590 }, { "epoch": 7.59, - "learning_rate": 4.848811588767522e-06, - "loss": 0.0321, + "learning_rate": 1.4872450969684677e-05, + "loss": 0.0376, "step": 162595 }, { "epoch": 7.59, - "learning_rate": 4.848342787492382e-06, - "loss": 0.0303, + "learning_rate": 1.4871982899848658e-05, + "loss": 0.0103, "step": 162600 }, { "epoch": 7.59, - "learning_rate": 4.847873986217243e-06, - "loss": 0.0583, + "learning_rate": 1.4871514830012638e-05, + "loss": 0.0553, "step": 162605 }, { "epoch": 7.59, - "learning_rate": 4.847405184942104e-06, - "loss": 0.0687, + "learning_rate": 1.4871046760176618e-05, + "loss": 0.0804, "step": 162610 }, { "epoch": 7.59, - "learning_rate": 4.846936383666964e-06, - "loss": 0.1244, + "learning_rate": 1.48705786903406e-05, + "loss": 0.0766, "step": 162615 }, { "epoch": 7.59, - "learning_rate": 4.846467582391824e-06, - "loss": 0.1197, + "learning_rate": 1.487011062050458e-05, + "loss": 0.2674, "step": 162620 }, { "epoch": 7.59, - "learning_rate": 4.845998781116685e-06, - "loss": 0.0324, + "learning_rate": 1.4869642550668561e-05, + "loss": 0.0196, "step": 162625 }, { "epoch": 7.59, - "learning_rate": 4.845529979841546e-06, - "loss": 0.0206, + "learning_rate": 1.486917448083254e-05, + "loss": 0.0499, "step": 162630 }, { "epoch": 7.59, - "learning_rate": 4.845061178566406e-06, - "loss": 0.0152, + "learning_rate": 1.4868706410996522e-05, + "loss": 0.0059, "step": 162635 }, { "epoch": 7.59, - "learning_rate": 4.844592377291267e-06, - "loss": 0.0219, + "learning_rate": 1.48682383411605e-05, + "loss": 0.0661, "step": 162640 }, { "epoch": 7.59, - "learning_rate": 4.844123576016127e-06, - "loss": 0.0202, + "learning_rate": 1.4867770271324482e-05, + "loss": 0.0586, "step": 162645 }, { "epoch": 7.59, - "learning_rate": 4.843654774740988e-06, - "loss": 0.036, + "learning_rate": 1.4867302201488462e-05, + "loss": 0.0461, "step": 162650 }, { "epoch": 7.59, - "learning_rate": 4.843185973465848e-06, - "loss": 0.0339, + "learning_rate": 1.4866834131652443e-05, + "loss": 0.0646, "step": 162655 }, { "epoch": 7.59, - "learning_rate": 4.842717172190709e-06, - "loss": 0.1013, + "learning_rate": 1.4866366061816423e-05, + "loss": 0.055, "step": 162660 }, { "epoch": 7.59, - "learning_rate": 4.84224837091557e-06, - "loss": 0.1164, + "learning_rate": 1.4865897991980404e-05, + "loss": 0.2058, "step": 162665 }, { "epoch": 7.59, - "learning_rate": 4.84177956964043e-06, - "loss": 0.0849, + "learning_rate": 1.4865429922144384e-05, + "loss": 0.1997, "step": 162670 }, { "epoch": 7.59, - "learning_rate": 4.84131076836529e-06, - "loss": 0.0151, + "learning_rate": 1.4864961852308364e-05, + "loss": 0.0358, "step": 162675 }, { "epoch": 7.59, - "learning_rate": 4.840841967090151e-06, - "loss": 0.0056, + "learning_rate": 1.4864493782472346e-05, + "loss": 0.0135, "step": 162680 }, { "epoch": 7.59, - "learning_rate": 4.840373165815012e-06, - "loss": 0.051, + "learning_rate": 1.4864025712636325e-05, + "loss": 0.0733, "step": 162685 }, { "epoch": 7.59, - "learning_rate": 4.839904364539872e-06, - "loss": 0.0593, + "learning_rate": 1.4863557642800307e-05, + "loss": 0.0171, "step": 162690 }, { "epoch": 7.59, - "learning_rate": 4.839435563264733e-06, - "loss": 0.03, + "learning_rate": 1.4863089572964287e-05, + "loss": 0.0908, "step": 162695 }, { "epoch": 7.59, - "learning_rate": 4.8389667619895934e-06, - "loss": 0.0413, + "learning_rate": 1.4862621503128268e-05, + "loss": 0.0317, "step": 162700 }, { "epoch": 7.59, - "learning_rate": 4.838497960714453e-06, - "loss": 0.0313, + "learning_rate": 1.4862153433292246e-05, + "loss": 0.0945, "step": 162705 }, { "epoch": 7.59, - "learning_rate": 4.838029159439314e-06, - "loss": 0.0928, + "learning_rate": 1.4861685363456228e-05, + "loss": 0.0761, "step": 162710 }, { "epoch": 7.59, - "learning_rate": 4.837560358164174e-06, - "loss": 0.1043, + "learning_rate": 1.4861217293620208e-05, + "loss": 0.1802, "step": 162715 }, { "epoch": 7.59, - "learning_rate": 4.837091556889036e-06, - "loss": 0.1097, + "learning_rate": 1.486074922378419e-05, + "loss": 0.1257, "step": 162720 }, { "epoch": 7.59, - "learning_rate": 4.836622755613896e-06, - "loss": 0.0272, + "learning_rate": 1.4860281153948169e-05, + "loss": 0.0018, "step": 162725 }, { "epoch": 7.59, - "learning_rate": 4.8361539543387564e-06, - "loss": 0.0262, + "learning_rate": 1.485981308411215e-05, + "loss": 0.0036, "step": 162730 }, { "epoch": 7.59, - "learning_rate": 4.835685153063616e-06, - "loss": 0.0477, + "learning_rate": 1.485934501427613e-05, + "loss": 0.034, "step": 162735 }, { "epoch": 7.59, - "learning_rate": 4.835216351788477e-06, - "loss": 0.0398, + "learning_rate": 1.485887694444011e-05, + "loss": 0.0405, "step": 162740 }, { "epoch": 7.59, - "learning_rate": 4.834747550513338e-06, - "loss": 0.0409, + "learning_rate": 1.4858408874604092e-05, + "loss": 0.0511, "step": 162745 }, { "epoch": 7.59, - "learning_rate": 4.834278749238198e-06, - "loss": 0.0588, + "learning_rate": 1.4857940804768072e-05, + "loss": 0.0278, "step": 162750 }, { "epoch": 7.59, - "learning_rate": 4.833809947963059e-06, - "loss": 0.0448, + "learning_rate": 1.4857472734932053e-05, + "loss": 0.0703, "step": 162755 }, { "epoch": 7.59, - "learning_rate": 4.8333411466879194e-06, - "loss": 0.1243, + "learning_rate": 1.4857004665096033e-05, + "loss": 0.0554, "step": 162760 }, { "epoch": 7.59, - "learning_rate": 4.83287234541278e-06, - "loss": 0.1399, + "learning_rate": 1.4856536595260013e-05, + "loss": 0.2158, "step": 162765 }, { "epoch": 7.6, - "learning_rate": 4.83240354413764e-06, - "loss": 0.1794, + "learning_rate": 1.4856068525423993e-05, + "loss": 0.2603, "step": 162770 }, { "epoch": 7.6, - "learning_rate": 4.831934742862501e-06, - "loss": 0.0631, + "learning_rate": 1.4855600455587974e-05, + "loss": 0.0351, "step": 162775 }, { "epoch": 7.6, - "learning_rate": 4.831465941587362e-06, - "loss": 0.0095, + "learning_rate": 1.4855132385751954e-05, + "loss": 0.024, "step": 162780 }, { "epoch": 7.6, - "learning_rate": 4.830997140312222e-06, - "loss": 0.0151, + "learning_rate": 1.4854664315915935e-05, + "loss": 0.0422, "step": 162785 }, { "epoch": 7.6, - "learning_rate": 4.830528339037082e-06, - "loss": 0.015, + "learning_rate": 1.4854196246079917e-05, + "loss": 0.0639, "step": 162790 }, { "epoch": 7.6, - "learning_rate": 4.830059537761943e-06, - "loss": 0.0209, + "learning_rate": 1.4853728176243897e-05, + "loss": 0.0135, "step": 162795 }, { "epoch": 7.6, - "learning_rate": 4.829590736486804e-06, - "loss": 0.077, + "learning_rate": 1.4853260106407877e-05, + "loss": 0.0276, "step": 162800 }, { "epoch": 7.6, - "learning_rate": 4.829121935211664e-06, - "loss": 0.1114, + "learning_rate": 1.4852792036571856e-05, + "loss": 0.095, "step": 162805 }, { "epoch": 7.6, - "learning_rate": 4.828653133936525e-06, - "loss": 0.1274, + "learning_rate": 1.4852323966735838e-05, + "loss": 0.0945, "step": 162810 }, { "epoch": 7.6, - "learning_rate": 4.8281843326613855e-06, - "loss": 0.0624, + "learning_rate": 1.4851855896899818e-05, + "loss": 0.113, "step": 162815 }, { "epoch": 7.6, - "learning_rate": 4.827715531386245e-06, - "loss": 0.0818, + "learning_rate": 1.48513878270638e-05, + "loss": 0.2764, "step": 162820 }, { "epoch": 7.6, - "learning_rate": 4.827246730111106e-06, - "loss": 0.0194, + "learning_rate": 1.4850919757227779e-05, + "loss": 0.0722, "step": 162825 }, { "epoch": 7.6, - "learning_rate": 4.826777928835967e-06, - "loss": 0.0022, + "learning_rate": 1.4850451687391759e-05, + "loss": 0.0168, "step": 162830 }, { "epoch": 7.6, - "learning_rate": 4.826309127560828e-06, - "loss": 0.0572, + "learning_rate": 1.4849983617555739e-05, + "loss": 0.0055, "step": 162835 }, { "epoch": 7.6, - "learning_rate": 4.825840326285688e-06, - "loss": 0.0714, + "learning_rate": 1.484951554771972e-05, + "loss": 0.0303, "step": 162840 }, { "epoch": 7.6, - "learning_rate": 4.8253715250105485e-06, - "loss": 0.0202, + "learning_rate": 1.48490474778837e-05, + "loss": 0.0452, "step": 162845 }, { "epoch": 7.6, - "learning_rate": 4.824902723735409e-06, - "loss": 0.0409, + "learning_rate": 1.4848579408047681e-05, + "loss": 0.0322, "step": 162850 }, { "epoch": 7.6, - "learning_rate": 4.824433922460269e-06, - "loss": 0.0574, + "learning_rate": 1.4848111338211663e-05, + "loss": 0.0364, "step": 162855 }, { "epoch": 7.6, - "learning_rate": 4.82396512118513e-06, - "loss": 0.0542, + "learning_rate": 1.4847643268375641e-05, + "loss": 0.0612, "step": 162860 }, { "epoch": 7.6, - "learning_rate": 4.823496319909991e-06, - "loss": 0.1829, + "learning_rate": 1.4847175198539623e-05, + "loss": 0.2313, "step": 162865 }, { "epoch": 7.6, - "learning_rate": 4.8230275186348515e-06, - "loss": 0.2426, + "learning_rate": 1.4846707128703602e-05, + "loss": 0.1469, "step": 162870 }, { "epoch": 7.6, - "learning_rate": 4.8225587173597115e-06, - "loss": 0.0417, + "learning_rate": 1.4846239058867584e-05, + "loss": 0.008, "step": 162875 }, { "epoch": 7.6, - "learning_rate": 4.822089916084572e-06, - "loss": 0.0043, + "learning_rate": 1.4845770989031564e-05, + "loss": 0.0177, "step": 162880 }, { "epoch": 7.6, - "learning_rate": 4.821621114809433e-06, - "loss": 0.0213, + "learning_rate": 1.4845302919195545e-05, + "loss": 0.0458, "step": 162885 }, { "epoch": 7.6, - "learning_rate": 4.821152313534293e-06, - "loss": 0.0544, + "learning_rate": 1.4844834849359525e-05, + "loss": 0.0194, "step": 162890 }, { "epoch": 7.6, - "learning_rate": 4.820683512259154e-06, - "loss": 0.0118, + "learning_rate": 1.4844366779523505e-05, + "loss": 0.0886, "step": 162895 }, { "epoch": 7.6, - "learning_rate": 4.8202147109840145e-06, - "loss": 0.0924, + "learning_rate": 1.4843898709687485e-05, + "loss": 0.0333, "step": 162900 }, { "epoch": 7.6, - "learning_rate": 4.819745909708875e-06, - "loss": 0.0837, + "learning_rate": 1.4843430639851466e-05, + "loss": 0.1157, "step": 162905 }, { "epoch": 7.6, - "learning_rate": 4.819277108433735e-06, - "loss": 0.1101, + "learning_rate": 1.4842962570015446e-05, + "loss": 0.0667, "step": 162910 }, { "epoch": 7.6, - "learning_rate": 4.818808307158596e-06, - "loss": 0.1182, + "learning_rate": 1.4842494500179428e-05, + "loss": 0.1186, "step": 162915 }, { "epoch": 7.6, - "learning_rate": 4.818339505883457e-06, - "loss": 0.1757, + "learning_rate": 1.4842026430343409e-05, + "loss": 0.2178, "step": 162920 }, { "epoch": 7.6, - "learning_rate": 4.817870704608317e-06, - "loss": 0.0023, + "learning_rate": 1.4841558360507387e-05, + "loss": 0.041, "step": 162925 }, { "epoch": 7.6, - "learning_rate": 4.8174019033331775e-06, - "loss": 0.0481, + "learning_rate": 1.4841090290671369e-05, + "loss": 0.0114, "step": 162930 }, { "epoch": 7.6, - "learning_rate": 4.816933102058038e-06, - "loss": 0.0503, + "learning_rate": 1.4840622220835349e-05, + "loss": 0.024, "step": 162935 }, { "epoch": 7.6, - "learning_rate": 4.816464300782899e-06, - "loss": 0.0302, + "learning_rate": 1.484015415099933e-05, + "loss": 0.0309, "step": 162940 }, { "epoch": 7.6, - "learning_rate": 4.815995499507759e-06, - "loss": 0.0532, + "learning_rate": 1.483968608116331e-05, + "loss": 0.0514, "step": 162945 }, { "epoch": 7.6, - "learning_rate": 4.815526698232619e-06, - "loss": 0.0359, + "learning_rate": 1.4839218011327291e-05, + "loss": 0.0483, "step": 162950 }, { "epoch": 7.6, - "learning_rate": 4.8150578969574806e-06, - "loss": 0.078, + "learning_rate": 1.483874994149127e-05, + "loss": 0.046, "step": 162955 }, { "epoch": 7.6, - "learning_rate": 4.8145890956823405e-06, - "loss": 0.0698, + "learning_rate": 1.4838281871655251e-05, + "loss": 0.0484, "step": 162960 }, { "epoch": 7.6, - "learning_rate": 4.814120294407201e-06, - "loss": 0.1181, + "learning_rate": 1.483781380181923e-05, + "loss": 0.0968, "step": 162965 }, { "epoch": 7.6, - "learning_rate": 4.813651493132061e-06, - "loss": 0.1155, + "learning_rate": 1.4837345731983212e-05, + "loss": 0.243, "step": 162970 }, { "epoch": 7.6, - "learning_rate": 4.813182691856923e-06, - "loss": 0.0462, + "learning_rate": 1.4836877662147194e-05, + "loss": 0.0611, "step": 162975 }, { "epoch": 7.6, - "learning_rate": 4.812713890581783e-06, - "loss": 0.0348, + "learning_rate": 1.4836409592311174e-05, + "loss": 0.0033, "step": 162980 }, { "epoch": 7.61, - "learning_rate": 4.8122450893066435e-06, - "loss": 0.0721, + "learning_rate": 1.4835941522475155e-05, + "loss": 0.0197, "step": 162985 }, { "epoch": 7.61, - "learning_rate": 4.8117762880315035e-06, - "loss": 0.0559, + "learning_rate": 1.4835473452639133e-05, + "loss": 0.0308, "step": 162990 }, { "epoch": 7.61, - "learning_rate": 4.811307486756364e-06, - "loss": 0.0287, + "learning_rate": 1.4835005382803115e-05, + "loss": 0.0302, "step": 162995 }, { "epoch": 7.61, - "learning_rate": 4.810838685481225e-06, - "loss": 0.0587, + "learning_rate": 1.4834537312967095e-05, + "loss": 0.0318, "step": 163000 }, { "epoch": 7.61, - "learning_rate": 4.810369884206085e-06, - "loss": 0.0867, + "learning_rate": 1.4834069243131076e-05, + "loss": 0.0382, "step": 163005 }, { "epoch": 7.61, - "learning_rate": 4.809901082930946e-06, - "loss": 0.1212, + "learning_rate": 1.4833601173295056e-05, + "loss": 0.0763, "step": 163010 }, { "epoch": 7.61, - "learning_rate": 4.8094322816558065e-06, - "loss": 0.1705, + "learning_rate": 1.4833133103459037e-05, + "loss": 0.0446, "step": 163015 }, { "epoch": 7.61, - "learning_rate": 4.808963480380667e-06, - "loss": 0.1298, + "learning_rate": 1.4832665033623016e-05, + "loss": 0.2753, "step": 163020 }, { "epoch": 7.61, - "learning_rate": 4.808494679105527e-06, - "loss": 0.035, + "learning_rate": 1.4832196963786997e-05, + "loss": 0.016, "step": 163025 }, { "epoch": 7.61, - "learning_rate": 4.808025877830388e-06, - "loss": 0.0025, + "learning_rate": 1.4831728893950977e-05, + "loss": 0.0118, "step": 163030 }, { "epoch": 7.61, - "learning_rate": 4.807557076555249e-06, - "loss": 0.0679, + "learning_rate": 1.4831260824114958e-05, + "loss": 0.0386, "step": 163035 }, { "epoch": 7.61, - "learning_rate": 4.807088275280109e-06, - "loss": 0.0416, + "learning_rate": 1.483079275427894e-05, + "loss": 0.0204, "step": 163040 }, { "epoch": 7.61, - "learning_rate": 4.8066194740049695e-06, - "loss": 0.068, + "learning_rate": 1.483032468444292e-05, + "loss": 0.0771, "step": 163045 }, { "epoch": 7.61, - "learning_rate": 4.80615067272983e-06, - "loss": 0.0303, + "learning_rate": 1.48298566146069e-05, + "loss": 0.0433, "step": 163050 }, { "epoch": 7.61, - "learning_rate": 4.805681871454691e-06, - "loss": 0.0656, + "learning_rate": 1.482938854477088e-05, + "loss": 0.0716, "step": 163055 }, { "epoch": 7.61, - "learning_rate": 4.805213070179551e-06, - "loss": 0.0472, + "learning_rate": 1.4828920474934861e-05, + "loss": 0.0852, "step": 163060 }, { "epoch": 7.61, - "learning_rate": 4.804744268904412e-06, - "loss": 0.1321, + "learning_rate": 1.482845240509884e-05, + "loss": 0.1583, "step": 163065 }, { "epoch": 7.61, - "learning_rate": 4.804275467629273e-06, - "loss": 0.1455, + "learning_rate": 1.4827984335262822e-05, + "loss": 0.2036, "step": 163070 }, { "epoch": 7.61, - "learning_rate": 4.8038066663541325e-06, - "loss": 0.0217, + "learning_rate": 1.4827516265426802e-05, + "loss": 0.0017, "step": 163075 }, { "epoch": 7.61, - "learning_rate": 4.803337865078993e-06, - "loss": 0.0132, + "learning_rate": 1.4827048195590784e-05, + "loss": 0.0241, "step": 163080 }, { "epoch": 7.61, - "learning_rate": 4.802869063803854e-06, - "loss": 0.0318, + "learning_rate": 1.4826580125754762e-05, + "loss": 0.0352, "step": 163085 }, { "epoch": 7.61, - "learning_rate": 4.802400262528715e-06, - "loss": 0.1067, + "learning_rate": 1.4826112055918743e-05, + "loss": 0.0086, "step": 163090 }, { "epoch": 7.61, - "learning_rate": 4.801931461253575e-06, - "loss": 0.0511, + "learning_rate": 1.4825643986082723e-05, + "loss": 0.0575, "step": 163095 }, { "epoch": 7.61, - "learning_rate": 4.8014626599784356e-06, - "loss": 0.0411, + "learning_rate": 1.4825175916246705e-05, + "loss": 0.0266, "step": 163100 }, { "epoch": 7.61, - "learning_rate": 4.800993858703296e-06, - "loss": 0.0341, + "learning_rate": 1.4824707846410686e-05, + "loss": 0.0408, "step": 163105 }, { "epoch": 7.61, - "learning_rate": 4.800525057428156e-06, - "loss": 0.1155, + "learning_rate": 1.4824239776574666e-05, + "loss": 0.0537, "step": 163110 }, { "epoch": 7.61, - "learning_rate": 4.800056256153017e-06, - "loss": 0.1383, + "learning_rate": 1.4823771706738646e-05, + "loss": 0.1593, "step": 163115 }, { "epoch": 7.61, - "learning_rate": 4.799587454877878e-06, - "loss": 0.1017, + "learning_rate": 1.4823303636902625e-05, + "loss": 0.1423, "step": 163120 }, { "epoch": 7.61, - "learning_rate": 4.799118653602739e-06, - "loss": 0.008, + "learning_rate": 1.4822835567066607e-05, + "loss": 0.0191, "step": 163125 }, { "epoch": 7.61, - "learning_rate": 4.7986498523275986e-06, - "loss": 0.0112, + "learning_rate": 1.4822367497230587e-05, + "loss": 0.0419, "step": 163130 }, { "epoch": 7.61, - "learning_rate": 4.798181051052459e-06, - "loss": 0.0244, + "learning_rate": 1.4821899427394568e-05, + "loss": 0.0291, "step": 163135 }, { "epoch": 7.61, - "learning_rate": 4.79771224977732e-06, - "loss": 0.031, + "learning_rate": 1.4821431357558548e-05, + "loss": 0.0301, "step": 163140 }, { "epoch": 7.61, - "learning_rate": 4.79724344850218e-06, - "loss": 0.0649, + "learning_rate": 1.4820963287722528e-05, + "loss": 0.0232, "step": 163145 }, { "epoch": 7.61, - "learning_rate": 4.796774647227041e-06, - "loss": 0.0712, + "learning_rate": 1.4820495217886508e-05, + "loss": 0.0566, "step": 163150 }, { "epoch": 7.61, - "learning_rate": 4.796305845951902e-06, - "loss": 0.0673, + "learning_rate": 1.482002714805049e-05, + "loss": 0.0829, "step": 163155 }, { "epoch": 7.61, - "learning_rate": 4.795837044676762e-06, - "loss": 0.1025, + "learning_rate": 1.481955907821447e-05, + "loss": 0.05, "step": 163160 }, { "epoch": 7.61, - "learning_rate": 4.795368243401622e-06, - "loss": 0.2194, + "learning_rate": 1.481909100837845e-05, + "loss": 0.2518, "step": 163165 }, { "epoch": 7.61, - "learning_rate": 4.794899442126483e-06, - "loss": 0.1746, + "learning_rate": 1.4818622938542432e-05, + "loss": 0.1431, "step": 163170 }, { "epoch": 7.61, - "learning_rate": 4.794430640851344e-06, - "loss": 0.0229, + "learning_rate": 1.4818154868706412e-05, + "loss": 0.0373, "step": 163175 }, { "epoch": 7.61, - "learning_rate": 4.793961839576204e-06, - "loss": 0.0032, + "learning_rate": 1.4817686798870392e-05, + "loss": 0.0055, "step": 163180 }, { "epoch": 7.61, - "learning_rate": 4.793493038301065e-06, - "loss": 0.0522, + "learning_rate": 1.4817218729034372e-05, + "loss": 0.0089, "step": 163185 }, { "epoch": 7.61, - "learning_rate": 4.793024237025925e-06, - "loss": 0.0039, + "learning_rate": 1.4816750659198353e-05, + "loss": 0.0071, "step": 163190 }, { "epoch": 7.61, - "learning_rate": 4.792555435750786e-06, - "loss": 0.0226, + "learning_rate": 1.4816282589362333e-05, + "loss": 0.031, "step": 163195 }, { "epoch": 7.62, - "learning_rate": 4.792086634475646e-06, - "loss": 0.0482, + "learning_rate": 1.4815814519526314e-05, + "loss": 0.0685, "step": 163200 }, { "epoch": 7.62, - "learning_rate": 4.791617833200506e-06, - "loss": 0.0651, + "learning_rate": 1.4815346449690294e-05, + "loss": 0.1915, "step": 163205 }, { "epoch": 7.62, - "learning_rate": 4.791149031925368e-06, - "loss": 0.0783, + "learning_rate": 1.4814878379854274e-05, + "loss": 0.0956, "step": 163210 }, { "epoch": 7.62, - "learning_rate": 4.790680230650228e-06, - "loss": 0.3574, + "learning_rate": 1.4814410310018254e-05, + "loss": 0.1421, "step": 163215 }, { "epoch": 7.62, - "learning_rate": 4.790211429375088e-06, - "loss": 0.1407, + "learning_rate": 1.4813942240182235e-05, + "loss": 0.1733, "step": 163220 }, { "epoch": 7.62, - "learning_rate": 4.789742628099948e-06, - "loss": 0.0551, + "learning_rate": 1.4813474170346217e-05, + "loss": 0.0212, "step": 163225 }, { "epoch": 7.62, - "learning_rate": 4.78927382682481e-06, - "loss": 0.0445, + "learning_rate": 1.4813006100510197e-05, + "loss": 0.0069, "step": 163230 }, { "epoch": 7.62, - "learning_rate": 4.78880502554967e-06, - "loss": 0.0155, + "learning_rate": 1.4812538030674178e-05, + "loss": 0.0232, "step": 163235 }, { "epoch": 7.62, - "learning_rate": 4.78833622427453e-06, - "loss": 0.0119, + "learning_rate": 1.4812069960838156e-05, + "loss": 0.0412, "step": 163240 }, { "epoch": 7.62, - "learning_rate": 4.787867422999391e-06, - "loss": 0.0316, + "learning_rate": 1.4811601891002138e-05, + "loss": 0.0138, "step": 163245 }, { "epoch": 7.62, - "learning_rate": 4.787398621724251e-06, - "loss": 0.0723, + "learning_rate": 1.4811133821166118e-05, + "loss": 0.0644, "step": 163250 }, { "epoch": 7.62, - "learning_rate": 4.786929820449112e-06, - "loss": 0.0688, + "learning_rate": 1.48106657513301e-05, + "loss": 0.0325, "step": 163255 }, { "epoch": 7.62, - "learning_rate": 4.786461019173972e-06, - "loss": 0.0659, + "learning_rate": 1.4810197681494079e-05, + "loss": 0.0596, "step": 163260 }, { "epoch": 7.62, - "learning_rate": 4.785992217898833e-06, - "loss": 0.0923, + "learning_rate": 1.480972961165806e-05, + "loss": 0.1237, "step": 163265 }, { "epoch": 7.62, - "learning_rate": 4.785523416623694e-06, - "loss": 0.2703, + "learning_rate": 1.4809261541822039e-05, + "loss": 0.1882, "step": 163270 }, { "epoch": 7.62, - "learning_rate": 4.7850546153485536e-06, - "loss": 0.004, + "learning_rate": 1.480879347198602e-05, + "loss": 0.0323, "step": 163275 }, { "epoch": 7.62, - "learning_rate": 4.784585814073414e-06, - "loss": 0.0279, + "learning_rate": 1.4808325402150002e-05, + "loss": 0.0114, "step": 163280 }, { "epoch": 7.62, - "learning_rate": 4.784117012798275e-06, - "loss": 0.0037, + "learning_rate": 1.4807857332313982e-05, + "loss": 0.0358, "step": 163285 }, { "epoch": 7.62, - "learning_rate": 4.783648211523136e-06, - "loss": 0.0884, + "learning_rate": 1.4807389262477963e-05, + "loss": 0.0687, "step": 163290 }, { "epoch": 7.62, - "learning_rate": 4.783179410247996e-06, - "loss": 0.0636, + "learning_rate": 1.4806921192641943e-05, + "loss": 0.0398, "step": 163295 }, { "epoch": 7.62, - "learning_rate": 4.782710608972857e-06, - "loss": 0.0631, + "learning_rate": 1.4806453122805924e-05, + "loss": 0.0557, "step": 163300 }, { "epoch": 7.62, - "learning_rate": 4.782241807697717e-06, - "loss": 0.0316, + "learning_rate": 1.4805985052969902e-05, + "loss": 0.0561, "step": 163305 }, { "epoch": 7.62, - "learning_rate": 4.781773006422578e-06, - "loss": 0.0625, + "learning_rate": 1.4805516983133884e-05, + "loss": 0.0784, "step": 163310 }, { "epoch": 7.62, - "learning_rate": 4.781304205147438e-06, - "loss": 0.1421, + "learning_rate": 1.4805048913297864e-05, + "loss": 0.1618, "step": 163315 }, { "epoch": 7.62, - "learning_rate": 4.780835403872299e-06, - "loss": 0.2979, + "learning_rate": 1.4804580843461845e-05, + "loss": 0.1754, "step": 163320 }, { "epoch": 7.62, - "learning_rate": 4.78036660259716e-06, - "loss": 0.0114, + "learning_rate": 1.4804112773625825e-05, + "loss": 0.014, "step": 163325 }, { "epoch": 7.62, - "learning_rate": 4.77989780132202e-06, - "loss": 0.0007, + "learning_rate": 1.4803644703789807e-05, + "loss": 0.0265, "step": 163330 }, { "epoch": 7.62, - "learning_rate": 4.77942900004688e-06, - "loss": 0.0078, + "learning_rate": 1.4803176633953785e-05, + "loss": 0.0077, "step": 163335 }, { "epoch": 7.62, - "learning_rate": 4.778960198771741e-06, - "loss": 0.0304, + "learning_rate": 1.4802708564117766e-05, + "loss": 0.0179, "step": 163340 }, { "epoch": 7.62, - "learning_rate": 4.778491397496602e-06, - "loss": 0.0657, + "learning_rate": 1.4802240494281748e-05, + "loss": 0.0108, "step": 163345 }, { "epoch": 7.62, - "learning_rate": 4.778022596221462e-06, - "loss": 0.0529, + "learning_rate": 1.4801772424445728e-05, + "loss": 0.0828, "step": 163350 }, { "epoch": 7.62, - "learning_rate": 4.777553794946323e-06, - "loss": 0.059, + "learning_rate": 1.4801304354609709e-05, + "loss": 0.0417, "step": 163355 }, { "epoch": 7.62, - "learning_rate": 4.7770849936711835e-06, - "loss": 0.071, + "learning_rate": 1.4800836284773689e-05, + "loss": 0.072, "step": 163360 }, { "epoch": 7.62, - "learning_rate": 4.776616192396043e-06, - "loss": 0.138, + "learning_rate": 1.4800368214937669e-05, + "loss": 0.1636, "step": 163365 }, { "epoch": 7.62, - "learning_rate": 4.776147391120904e-06, - "loss": 0.0807, + "learning_rate": 1.4799900145101649e-05, + "loss": 0.2012, "step": 163370 }, { "epoch": 7.62, - "learning_rate": 4.775678589845765e-06, - "loss": 0.01, + "learning_rate": 1.479943207526563e-05, + "loss": 0.0053, "step": 163375 }, { "epoch": 7.62, - "learning_rate": 4.775209788570626e-06, - "loss": 0.0298, + "learning_rate": 1.479896400542961e-05, + "loss": 0.0304, "step": 163380 }, { "epoch": 7.62, - "learning_rate": 4.774740987295486e-06, - "loss": 0.0031, + "learning_rate": 1.4798495935593591e-05, + "loss": 0.0079, "step": 163385 }, { "epoch": 7.62, - "learning_rate": 4.7742721860203464e-06, - "loss": 0.0171, + "learning_rate": 1.4798027865757571e-05, + "loss": 0.0298, "step": 163390 }, { "epoch": 7.62, - "learning_rate": 4.773803384745207e-06, - "loss": 0.0231, + "learning_rate": 1.4797559795921553e-05, + "loss": 0.0262, "step": 163395 }, { "epoch": 7.62, - "learning_rate": 4.773334583470067e-06, - "loss": 0.052, + "learning_rate": 1.4797091726085531e-05, + "loss": 0.0144, "step": 163400 }, { "epoch": 7.62, - "learning_rate": 4.772865782194928e-06, - "loss": 0.0311, + "learning_rate": 1.4796623656249512e-05, + "loss": 0.0294, "step": 163405 }, { "epoch": 7.62, - "learning_rate": 4.772396980919789e-06, - "loss": 0.2027, + "learning_rate": 1.4796155586413494e-05, + "loss": 0.1678, "step": 163410 }, { "epoch": 7.63, - "learning_rate": 4.7719281796446495e-06, - "loss": 0.0992, + "learning_rate": 1.4795687516577474e-05, + "loss": 0.072, "step": 163415 }, { "epoch": 7.63, - "learning_rate": 4.7714593783695094e-06, - "loss": 0.1105, + "learning_rate": 1.4795219446741455e-05, + "loss": 0.185, "step": 163420 }, { "epoch": 7.63, - "learning_rate": 4.77099057709437e-06, - "loss": 0.0222, + "learning_rate": 1.4794751376905435e-05, + "loss": 0.3412, "step": 163425 }, { "epoch": 7.63, - "learning_rate": 4.770521775819231e-06, - "loss": 0.0018, + "learning_rate": 1.4794283307069415e-05, + "loss": 0.0091, "step": 163430 }, { "epoch": 7.63, - "learning_rate": 4.770052974544091e-06, - "loss": 0.0075, + "learning_rate": 1.4793815237233395e-05, + "loss": 0.0497, "step": 163435 }, { "epoch": 7.63, - "learning_rate": 4.769584173268952e-06, - "loss": 0.0422, + "learning_rate": 1.4793347167397376e-05, + "loss": 0.0857, "step": 163440 }, { "epoch": 7.63, - "learning_rate": 4.7691153719938125e-06, - "loss": 0.0428, + "learning_rate": 1.4792879097561356e-05, + "loss": 0.034, "step": 163445 }, { "epoch": 7.63, - "learning_rate": 4.768646570718673e-06, - "loss": 0.0295, + "learning_rate": 1.4792411027725338e-05, + "loss": 0.0612, "step": 163450 }, { "epoch": 7.63, - "learning_rate": 4.768177769443533e-06, - "loss": 0.0824, + "learning_rate": 1.4791942957889317e-05, + "loss": 0.116, "step": 163455 }, { "epoch": 7.63, - "learning_rate": 4.767708968168394e-06, - "loss": 0.0658, + "learning_rate": 1.4791474888053297e-05, + "loss": 0.0495, "step": 163460 }, { "epoch": 7.63, - "learning_rate": 4.767240166893255e-06, - "loss": 0.2444, + "learning_rate": 1.4791006818217279e-05, + "loss": 0.1735, "step": 163465 }, { "epoch": 7.63, - "learning_rate": 4.766771365618115e-06, - "loss": 0.2416, + "learning_rate": 1.4790538748381258e-05, + "loss": 0.1571, "step": 163470 }, { "epoch": 7.63, - "learning_rate": 4.7663025643429755e-06, - "loss": 0.0362, + "learning_rate": 1.479007067854524e-05, + "loss": 0.0347, "step": 163475 }, { "epoch": 7.63, - "learning_rate": 4.765833763067836e-06, - "loss": 0.0172, + "learning_rate": 1.478960260870922e-05, + "loss": 0.0375, "step": 163480 }, { "epoch": 7.63, - "learning_rate": 4.765364961792697e-06, - "loss": 0.0051, + "learning_rate": 1.4789134538873201e-05, + "loss": 0.0184, "step": 163485 }, { "epoch": 7.63, - "learning_rate": 4.764896160517557e-06, - "loss": 0.0266, + "learning_rate": 1.4788666469037181e-05, + "loss": 0.0216, "step": 163490 }, { "epoch": 7.63, - "learning_rate": 4.764427359242417e-06, - "loss": 0.0196, + "learning_rate": 1.4788198399201161e-05, + "loss": 0.0523, "step": 163495 }, { "epoch": 7.63, - "learning_rate": 4.7639585579672785e-06, - "loss": 0.0446, + "learning_rate": 1.478773032936514e-05, + "loss": 0.0483, "step": 163500 }, { "epoch": 7.63, - "learning_rate": 4.7634897566921385e-06, - "loss": 0.079, + "learning_rate": 1.4787262259529122e-05, + "loss": 0.0785, "step": 163505 }, { "epoch": 7.63, - "learning_rate": 4.763020955416999e-06, - "loss": 0.1361, + "learning_rate": 1.4786794189693102e-05, + "loss": 0.131, "step": 163510 }, { "epoch": 7.63, - "learning_rate": 4.762552154141859e-06, - "loss": 0.0782, + "learning_rate": 1.4786326119857084e-05, + "loss": 0.2943, "step": 163515 }, { "epoch": 7.63, - "learning_rate": 4.762083352866721e-06, - "loss": 0.2041, + "learning_rate": 1.4785858050021063e-05, + "loss": 0.2467, "step": 163520 }, { "epoch": 7.63, - "learning_rate": 4.761614551591581e-06, - "loss": 0.0231, + "learning_rate": 1.4785389980185043e-05, + "loss": 0.0365, "step": 163525 }, { "epoch": 7.63, - "learning_rate": 4.761145750316441e-06, - "loss": 0.0107, + "learning_rate": 1.4784921910349025e-05, + "loss": 0.0179, "step": 163530 }, { "epoch": 7.63, - "learning_rate": 4.7606769490413015e-06, - "loss": 0.0088, + "learning_rate": 1.4784453840513005e-05, + "loss": 0.0139, "step": 163535 }, { "epoch": 7.63, - "learning_rate": 4.760208147766162e-06, - "loss": 0.0129, + "learning_rate": 1.4783985770676986e-05, + "loss": 0.0735, "step": 163540 }, { "epoch": 7.63, - "learning_rate": 4.759739346491023e-06, - "loss": 0.037, + "learning_rate": 1.4783517700840966e-05, + "loss": 0.052, "step": 163545 }, { "epoch": 7.63, - "learning_rate": 4.759270545215883e-06, - "loss": 0.0264, + "learning_rate": 1.4783049631004947e-05, + "loss": 0.0588, "step": 163550 }, { "epoch": 7.63, - "learning_rate": 4.758801743940744e-06, - "loss": 0.0732, + "learning_rate": 1.4782581561168926e-05, + "loss": 0.0685, "step": 163555 }, { "epoch": 7.63, - "learning_rate": 4.7583329426656045e-06, - "loss": 0.081, + "learning_rate": 1.4782113491332907e-05, + "loss": 0.1489, "step": 163560 }, { "epoch": 7.63, - "learning_rate": 4.7578641413904645e-06, - "loss": 0.0745, + "learning_rate": 1.4781645421496887e-05, + "loss": 0.1623, "step": 163565 }, { "epoch": 7.63, - "learning_rate": 4.757395340115325e-06, - "loss": 0.0746, + "learning_rate": 1.4781177351660868e-05, + "loss": 0.1671, "step": 163570 }, { "epoch": 7.63, - "learning_rate": 4.756926538840186e-06, - "loss": 0.0069, + "learning_rate": 1.4780709281824848e-05, + "loss": 0.013, "step": 163575 }, { "epoch": 7.63, - "learning_rate": 4.756457737565047e-06, - "loss": 0.0139, + "learning_rate": 1.478024121198883e-05, + "loss": 0.0149, "step": 163580 }, { "epoch": 7.63, - "learning_rate": 4.755988936289907e-06, - "loss": 0.0186, + "learning_rate": 1.4779773142152811e-05, + "loss": 0.0424, "step": 163585 }, { "epoch": 7.63, - "learning_rate": 4.7555201350147675e-06, - "loss": 0.0206, + "learning_rate": 1.477930507231679e-05, + "loss": 0.0106, "step": 163590 }, { "epoch": 7.63, - "learning_rate": 4.755051333739628e-06, - "loss": 0.07, + "learning_rate": 1.4778837002480771e-05, + "loss": 0.0409, "step": 163595 }, { "epoch": 7.63, - "learning_rate": 4.754582532464488e-06, - "loss": 0.0359, + "learning_rate": 1.477836893264475e-05, + "loss": 0.0068, "step": 163600 }, { "epoch": 7.63, - "learning_rate": 4.754113731189349e-06, - "loss": 0.0699, + "learning_rate": 1.4777900862808732e-05, + "loss": 0.0853, "step": 163605 }, { "epoch": 7.63, - "learning_rate": 4.75364492991421e-06, - "loss": 0.045, + "learning_rate": 1.4777432792972712e-05, + "loss": 0.1149, "step": 163610 }, { "epoch": 7.63, - "learning_rate": 4.7531761286390706e-06, - "loss": 0.1508, + "learning_rate": 1.4776964723136694e-05, + "loss": 0.2472, "step": 163615 }, { "epoch": 7.63, - "learning_rate": 4.7527073273639305e-06, - "loss": 0.2175, + "learning_rate": 1.4776496653300672e-05, + "loss": 0.3247, "step": 163620 }, { "epoch": 7.63, - "learning_rate": 4.752238526088791e-06, - "loss": 0.0032, + "learning_rate": 1.4776028583464653e-05, + "loss": 0.0282, "step": 163625 }, { "epoch": 7.64, - "learning_rate": 4.751769724813652e-06, - "loss": 0.0234, + "learning_rate": 1.4775560513628633e-05, + "loss": 0.0484, "step": 163630 }, { "epoch": 7.64, - "learning_rate": 4.751300923538513e-06, - "loss": 0.0204, + "learning_rate": 1.4775092443792614e-05, + "loss": 0.0135, "step": 163635 }, { "epoch": 7.64, - "learning_rate": 4.750832122263373e-06, - "loss": 0.0208, + "learning_rate": 1.4774624373956594e-05, + "loss": 0.0285, "step": 163640 }, { "epoch": 7.64, - "learning_rate": 4.7503633209882336e-06, - "loss": 0.028, + "learning_rate": 1.4774156304120576e-05, + "loss": 0.0724, "step": 163645 }, { "epoch": 7.64, - "learning_rate": 4.749894519713094e-06, - "loss": 0.0697, + "learning_rate": 1.4773688234284556e-05, + "loss": 0.0439, "step": 163650 }, { "epoch": 7.64, - "learning_rate": 4.749425718437954e-06, - "loss": 0.0202, + "learning_rate": 1.4773220164448535e-05, + "loss": 0.0258, "step": 163655 }, { "epoch": 7.64, - "learning_rate": 4.748956917162815e-06, - "loss": 0.1124, + "learning_rate": 1.4772752094612517e-05, + "loss": 0.0818, "step": 163660 }, { "epoch": 7.64, - "learning_rate": 4.748488115887676e-06, - "loss": 0.2057, + "learning_rate": 1.4772284024776497e-05, + "loss": 0.1675, "step": 163665 }, { "epoch": 7.64, - "learning_rate": 4.748019314612537e-06, - "loss": 0.3194, + "learning_rate": 1.4771815954940478e-05, + "loss": 0.1673, "step": 163670 }, { "epoch": 7.64, - "learning_rate": 4.7475505133373965e-06, - "loss": 0.058, + "learning_rate": 1.4771347885104458e-05, + "loss": 0.0297, "step": 163675 }, { "epoch": 7.64, - "learning_rate": 4.747081712062257e-06, - "loss": 0.0338, + "learning_rate": 1.477087981526844e-05, + "loss": 0.0076, "step": 163680 }, { "epoch": 7.64, - "learning_rate": 4.746612910787118e-06, - "loss": 0.017, + "learning_rate": 1.4770411745432418e-05, + "loss": 0.0435, "step": 163685 }, { "epoch": 7.64, - "learning_rate": 4.746144109511978e-06, - "loss": 0.0031, + "learning_rate": 1.47699436755964e-05, + "loss": 0.012, "step": 163690 }, { "epoch": 7.64, - "learning_rate": 4.745675308236839e-06, - "loss": 0.0681, + "learning_rate": 1.4769475605760379e-05, + "loss": 0.0416, "step": 163695 }, { "epoch": 7.64, - "learning_rate": 4.7452065069617e-06, - "loss": 0.0137, + "learning_rate": 1.476900753592436e-05, + "loss": 0.043, "step": 163700 }, { "epoch": 7.64, - "learning_rate": 4.74473770568656e-06, - "loss": 0.058, + "learning_rate": 1.476853946608834e-05, + "loss": 0.0732, "step": 163705 }, { "epoch": 7.64, - "learning_rate": 4.74426890441142e-06, - "loss": 0.1624, + "learning_rate": 1.4768071396252322e-05, + "loss": 0.0591, "step": 163710 }, { "epoch": 7.64, - "learning_rate": 4.743800103136281e-06, - "loss": 0.1535, + "learning_rate": 1.4767603326416302e-05, + "loss": 0.0572, "step": 163715 }, { "epoch": 7.64, - "learning_rate": 4.743331301861142e-06, - "loss": 0.1966, + "learning_rate": 1.4767135256580282e-05, + "loss": 0.184, "step": 163720 }, { "epoch": 7.64, - "learning_rate": 4.742862500586002e-06, - "loss": 0.0365, + "learning_rate": 1.4766667186744263e-05, + "loss": 0.0606, "step": 163725 }, { "epoch": 7.64, - "learning_rate": 4.742393699310863e-06, - "loss": 0.0368, + "learning_rate": 1.4766199116908243e-05, + "loss": 0.0418, "step": 163730 }, { "epoch": 7.64, - "learning_rate": 4.741924898035723e-06, - "loss": 0.0286, + "learning_rate": 1.4765731047072224e-05, + "loss": 0.045, "step": 163735 }, { "epoch": 7.64, - "learning_rate": 4.741456096760584e-06, - "loss": 0.0659, + "learning_rate": 1.4765262977236204e-05, + "loss": 0.0236, "step": 163740 }, { "epoch": 7.64, - "learning_rate": 4.740987295485444e-06, - "loss": 0.039, + "learning_rate": 1.4764794907400184e-05, + "loss": 0.0149, "step": 163745 }, { "epoch": 7.64, - "learning_rate": 4.740518494210304e-06, - "loss": 0.0169, + "learning_rate": 1.4764326837564164e-05, + "loss": 0.0354, "step": 163750 }, { "epoch": 7.64, - "learning_rate": 4.740049692935166e-06, - "loss": 0.0432, + "learning_rate": 1.4763858767728145e-05, + "loss": 0.0503, "step": 163755 }, { "epoch": 7.64, - "learning_rate": 4.739580891660026e-06, - "loss": 0.0777, + "learning_rate": 1.4763390697892125e-05, + "loss": 0.1204, "step": 163760 }, { "epoch": 7.64, - "learning_rate": 4.739112090384886e-06, - "loss": 0.1571, + "learning_rate": 1.4762922628056107e-05, + "loss": 0.1576, "step": 163765 }, { "epoch": 7.64, - "learning_rate": 4.738643289109746e-06, - "loss": 0.1452, + "learning_rate": 1.4762454558220088e-05, + "loss": 0.2205, "step": 163770 }, { "epoch": 7.64, - "learning_rate": 4.738174487834608e-06, - "loss": 0.0072, + "learning_rate": 1.4761986488384068e-05, + "loss": 0.0127, "step": 163775 }, { "epoch": 7.64, - "learning_rate": 4.737705686559468e-06, - "loss": 0.0061, + "learning_rate": 1.4761518418548048e-05, + "loss": 0.0097, "step": 163780 }, { "epoch": 7.64, - "learning_rate": 4.737236885284328e-06, - "loss": 0.0182, + "learning_rate": 1.4761050348712028e-05, + "loss": 0.0245, "step": 163785 }, { "epoch": 7.64, - "learning_rate": 4.7367680840091886e-06, - "loss": 0.0157, + "learning_rate": 1.476058227887601e-05, + "loss": 0.0187, "step": 163790 }, { "epoch": 7.64, - "learning_rate": 4.736299282734049e-06, - "loss": 0.035, + "learning_rate": 1.4760114209039989e-05, + "loss": 0.0281, "step": 163795 }, { "epoch": 7.64, - "learning_rate": 4.73583048145891e-06, - "loss": 0.0628, + "learning_rate": 1.475964613920397e-05, + "loss": 0.0229, "step": 163800 }, { "epoch": 7.64, - "learning_rate": 4.73536168018377e-06, - "loss": 0.1154, + "learning_rate": 1.475917806936795e-05, + "loss": 0.0719, "step": 163805 }, { "epoch": 7.64, - "learning_rate": 4.734892878908631e-06, - "loss": 0.1044, + "learning_rate": 1.475870999953193e-05, + "loss": 0.0192, "step": 163810 }, { "epoch": 7.64, - "learning_rate": 4.734424077633492e-06, - "loss": 0.1267, + "learning_rate": 1.475824192969591e-05, + "loss": 0.0475, "step": 163815 }, { "epoch": 7.64, - "learning_rate": 4.7339552763583516e-06, - "loss": 0.1958, + "learning_rate": 1.4757773859859891e-05, + "loss": 0.0762, "step": 163820 }, { "epoch": 7.64, - "learning_rate": 4.733486475083212e-06, - "loss": 0.0048, + "learning_rate": 1.4757305790023871e-05, + "loss": 0.0281, "step": 163825 }, { "epoch": 7.64, - "learning_rate": 4.733017673808073e-06, - "loss": 0.0284, + "learning_rate": 1.4756837720187853e-05, + "loss": 0.0501, "step": 163830 }, { "epoch": 7.64, - "learning_rate": 4.732548872532934e-06, - "loss": 0.0011, + "learning_rate": 1.4756369650351834e-05, + "loss": 0.0096, "step": 163835 }, { "epoch": 7.65, - "learning_rate": 4.732080071257794e-06, - "loss": 0.0512, + "learning_rate": 1.4755901580515812e-05, + "loss": 0.0419, "step": 163840 }, { "epoch": 7.65, - "learning_rate": 4.731611269982655e-06, - "loss": 0.0508, + "learning_rate": 1.4755433510679794e-05, + "loss": 0.0597, "step": 163845 }, { "epoch": 7.65, - "learning_rate": 4.731142468707515e-06, - "loss": 0.1059, + "learning_rate": 1.4754965440843774e-05, + "loss": 0.0326, "step": 163850 }, { "epoch": 7.65, - "learning_rate": 4.730673667432375e-06, - "loss": 0.0544, + "learning_rate": 1.4754497371007755e-05, + "loss": 0.0991, "step": 163855 }, { "epoch": 7.65, - "learning_rate": 4.730204866157236e-06, - "loss": 0.0282, + "learning_rate": 1.4754029301171735e-05, + "loss": 0.1892, "step": 163860 }, { "epoch": 7.65, - "learning_rate": 4.729736064882097e-06, - "loss": 0.0656, + "learning_rate": 1.4753561231335717e-05, + "loss": 0.1563, "step": 163865 }, { "epoch": 7.65, - "learning_rate": 4.729267263606958e-06, - "loss": 0.2103, + "learning_rate": 1.4753093161499696e-05, + "loss": 0.1192, "step": 163870 }, { "epoch": 7.65, - "learning_rate": 4.728798462331818e-06, - "loss": 0.0513, + "learning_rate": 1.4752625091663676e-05, + "loss": 0.0074, "step": 163875 }, { "epoch": 7.65, - "learning_rate": 4.728329661056678e-06, - "loss": 0.0157, + "learning_rate": 1.4752157021827656e-05, + "loss": 0.0108, "step": 163880 }, { "epoch": 7.65, - "learning_rate": 4.727860859781539e-06, - "loss": 0.0128, + "learning_rate": 1.4751688951991638e-05, + "loss": 0.0614, "step": 163885 }, { "epoch": 7.65, - "learning_rate": 4.727392058506399e-06, - "loss": 0.0372, + "learning_rate": 1.4751220882155619e-05, + "loss": 0.0212, "step": 163890 }, { "epoch": 7.65, - "learning_rate": 4.72692325723126e-06, - "loss": 0.0298, + "learning_rate": 1.4750752812319599e-05, + "loss": 0.0242, "step": 163895 }, { "epoch": 7.65, - "learning_rate": 4.726454455956121e-06, - "loss": 0.042, + "learning_rate": 1.475028474248358e-05, + "loss": 0.0255, "step": 163900 }, { "epoch": 7.65, - "learning_rate": 4.7259856546809814e-06, - "loss": 0.0459, + "learning_rate": 1.4749816672647559e-05, + "loss": 0.1002, "step": 163905 }, { "epoch": 7.65, - "learning_rate": 4.725516853405841e-06, - "loss": 0.1937, + "learning_rate": 1.474934860281154e-05, + "loss": 0.1094, "step": 163910 }, { "epoch": 7.65, - "learning_rate": 4.725048052130702e-06, - "loss": 0.1323, + "learning_rate": 1.474888053297552e-05, + "loss": 0.0934, "step": 163915 }, { "epoch": 7.65, - "learning_rate": 4.724579250855563e-06, - "loss": 0.1888, + "learning_rate": 1.4748412463139501e-05, + "loss": 0.2892, "step": 163920 }, { "epoch": 7.65, - "learning_rate": 4.724110449580423e-06, - "loss": 0.0088, + "learning_rate": 1.4747944393303481e-05, + "loss": 0.0082, "step": 163925 }, { "epoch": 7.65, - "learning_rate": 4.723641648305284e-06, - "loss": 0.0098, + "learning_rate": 1.4747476323467463e-05, + "loss": 0.0226, "step": 163930 }, { "epoch": 7.65, - "learning_rate": 4.7231728470301444e-06, - "loss": 0.0166, + "learning_rate": 1.474700825363144e-05, + "loss": 0.0084, "step": 163935 }, { "epoch": 7.65, - "learning_rate": 4.722704045755005e-06, - "loss": 0.0123, + "learning_rate": 1.4746540183795422e-05, + "loss": 0.034, "step": 163940 }, { "epoch": 7.65, - "learning_rate": 4.722235244479865e-06, - "loss": 0.0449, + "learning_rate": 1.4746072113959402e-05, + "loss": 0.0239, "step": 163945 }, { "epoch": 7.65, - "learning_rate": 4.721766443204726e-06, - "loss": 0.0364, + "learning_rate": 1.4745604044123384e-05, + "loss": 0.0494, "step": 163950 }, { "epoch": 7.65, - "learning_rate": 4.721297641929587e-06, - "loss": 0.0919, + "learning_rate": 1.4745135974287365e-05, + "loss": 0.0774, "step": 163955 }, { "epoch": 7.65, - "learning_rate": 4.7208288406544475e-06, - "loss": 0.0997, + "learning_rate": 1.4744667904451345e-05, + "loss": 0.061, "step": 163960 }, { "epoch": 7.65, - "learning_rate": 4.7203600393793074e-06, - "loss": 0.1445, + "learning_rate": 1.4744199834615325e-05, + "loss": 0.2427, "step": 163965 }, { "epoch": 7.65, - "learning_rate": 4.719891238104168e-06, - "loss": 0.2567, + "learning_rate": 1.4743731764779305e-05, + "loss": 0.2827, "step": 163970 }, { "epoch": 7.65, - "learning_rate": 4.719422436829029e-06, - "loss": 0.0201, + "learning_rate": 1.4743263694943286e-05, + "loss": 0.017, "step": 163975 }, { "epoch": 7.65, - "learning_rate": 4.718953635553889e-06, - "loss": 0.031, + "learning_rate": 1.4742795625107266e-05, + "loss": 0.0245, "step": 163980 }, { "epoch": 7.65, - "learning_rate": 4.71848483427875e-06, - "loss": 0.0439, + "learning_rate": 1.4742327555271247e-05, + "loss": 0.0145, "step": 163985 }, { "epoch": 7.65, - "learning_rate": 4.7180160330036105e-06, - "loss": 0.0788, + "learning_rate": 1.4741859485435227e-05, + "loss": 0.0505, "step": 163990 }, { "epoch": 7.65, - "learning_rate": 4.717547231728471e-06, - "loss": 0.0605, + "learning_rate": 1.4741391415599209e-05, + "loss": 0.0433, "step": 163995 }, { "epoch": 7.65, - "learning_rate": 4.717078430453331e-06, - "loss": 0.0745, + "learning_rate": 1.4740923345763187e-05, + "loss": 0.081, "step": 164000 }, { "epoch": 7.65, - "learning_rate": 4.716609629178191e-06, - "loss": 0.0359, + "learning_rate": 1.4740455275927168e-05, + "loss": 0.018, "step": 164005 }, { "epoch": 7.65, - "learning_rate": 4.716140827903053e-06, - "loss": 0.0963, + "learning_rate": 1.4739987206091148e-05, + "loss": 0.1194, "step": 164010 }, { "epoch": 7.65, - "learning_rate": 4.715672026627913e-06, - "loss": 0.1279, + "learning_rate": 1.473951913625513e-05, + "loss": 0.2048, "step": 164015 }, { "epoch": 7.65, - "learning_rate": 4.7152032253527735e-06, - "loss": 0.1814, + "learning_rate": 1.4739051066419111e-05, + "loss": 0.1677, "step": 164020 }, { "epoch": 7.65, - "learning_rate": 4.714734424077633e-06, - "loss": 0.012, + "learning_rate": 1.4738582996583091e-05, + "loss": 0.0196, "step": 164025 }, { "epoch": 7.65, - "learning_rate": 4.714265622802495e-06, - "loss": 0.0118, + "learning_rate": 1.4738114926747071e-05, + "loss": 0.0185, "step": 164030 }, { "epoch": 7.65, - "learning_rate": 4.713796821527355e-06, - "loss": 0.0419, + "learning_rate": 1.473764685691105e-05, + "loss": 0.0141, "step": 164035 }, { "epoch": 7.65, - "learning_rate": 4.713328020252215e-06, - "loss": 0.0543, + "learning_rate": 1.4737178787075032e-05, + "loss": 0.0042, "step": 164040 }, { "epoch": 7.65, - "learning_rate": 4.712859218977076e-06, - "loss": 0.038, + "learning_rate": 1.4736710717239012e-05, + "loss": 0.0278, "step": 164045 }, { "epoch": 7.65, - "learning_rate": 4.7123904177019365e-06, - "loss": 0.0475, + "learning_rate": 1.4736242647402994e-05, + "loss": 0.0262, "step": 164050 }, { "epoch": 7.66, - "learning_rate": 4.711921616426797e-06, - "loss": 0.033, + "learning_rate": 1.4735774577566973e-05, + "loss": 0.0931, "step": 164055 }, { "epoch": 7.66, - "learning_rate": 4.711452815151657e-06, - "loss": 0.0966, + "learning_rate": 1.4735306507730953e-05, + "loss": 0.2287, "step": 164060 }, { "epoch": 7.66, - "learning_rate": 4.710984013876518e-06, - "loss": 0.2836, + "learning_rate": 1.4734838437894933e-05, + "loss": 0.1138, "step": 164065 }, { "epoch": 7.66, - "learning_rate": 4.710515212601379e-06, - "loss": 0.1613, + "learning_rate": 1.4734370368058915e-05, + "loss": 0.1755, "step": 164070 }, { "epoch": 7.66, - "learning_rate": 4.710046411326239e-06, - "loss": 0.0044, + "learning_rate": 1.4733902298222896e-05, + "loss": 0.0144, "step": 164075 }, { "epoch": 7.66, - "learning_rate": 4.7095776100510995e-06, - "loss": 0.0218, + "learning_rate": 1.4733434228386876e-05, + "loss": 0.0326, "step": 164080 }, { "epoch": 7.66, - "learning_rate": 4.70910880877596e-06, - "loss": 0.0352, + "learning_rate": 1.4732966158550857e-05, + "loss": 0.0081, "step": 164085 }, { "epoch": 7.66, - "learning_rate": 4.708640007500821e-06, - "loss": 0.0388, + "learning_rate": 1.4732498088714837e-05, + "loss": 0.0393, "step": 164090 }, { "epoch": 7.66, - "learning_rate": 4.708171206225681e-06, - "loss": 0.0329, + "learning_rate": 1.4732030018878817e-05, + "loss": 0.0831, "step": 164095 }, { "epoch": 7.66, - "learning_rate": 4.707702404950542e-06, - "loss": 0.0434, + "learning_rate": 1.4731561949042797e-05, + "loss": 0.0358, "step": 164100 }, { "epoch": 7.66, - "learning_rate": 4.7072336036754025e-06, - "loss": 0.0772, + "learning_rate": 1.4731093879206778e-05, + "loss": 0.0384, "step": 164105 }, { "epoch": 7.66, - "learning_rate": 4.7067648024002624e-06, - "loss": 0.1298, + "learning_rate": 1.4730625809370758e-05, + "loss": 0.0357, "step": 164110 }, { "epoch": 7.66, - "learning_rate": 4.706296001125123e-06, - "loss": 0.123, + "learning_rate": 1.473015773953474e-05, + "loss": 0.1029, "step": 164115 }, { "epoch": 7.66, - "learning_rate": 4.705827199849984e-06, - "loss": 0.2835, + "learning_rate": 1.472968966969872e-05, + "loss": 0.2675, "step": 164120 }, { "epoch": 7.66, - "learning_rate": 4.705358398574845e-06, - "loss": 0.009, + "learning_rate": 1.47292215998627e-05, + "loss": 0.0264, "step": 164125 }, { "epoch": 7.66, - "learning_rate": 4.704889597299705e-06, - "loss": 0.0166, + "learning_rate": 1.4728753530026679e-05, + "loss": 0.03, "step": 164130 }, { "epoch": 7.66, - "learning_rate": 4.7044207960245655e-06, - "loss": 0.0069, + "learning_rate": 1.472828546019066e-05, + "loss": 0.0207, "step": 164135 }, { "epoch": 7.66, - "learning_rate": 4.703951994749426e-06, - "loss": 0.0788, + "learning_rate": 1.4727817390354642e-05, + "loss": 0.0679, "step": 164140 }, { "epoch": 7.66, - "learning_rate": 4.703483193474286e-06, - "loss": 0.0553, + "learning_rate": 1.4727349320518622e-05, + "loss": 0.0205, "step": 164145 }, { "epoch": 7.66, - "learning_rate": 4.703014392199147e-06, - "loss": 0.103, + "learning_rate": 1.4726881250682603e-05, + "loss": 0.1021, "step": 164150 }, { "epoch": 7.66, - "learning_rate": 4.702545590924008e-06, - "loss": 0.009, + "learning_rate": 1.4726413180846582e-05, + "loss": 0.0729, "step": 164155 }, { "epoch": 7.66, - "learning_rate": 4.7020767896488686e-06, - "loss": 0.0482, + "learning_rate": 1.4725945111010563e-05, + "loss": 0.074, "step": 164160 }, { "epoch": 7.66, - "learning_rate": 4.7016079883737285e-06, - "loss": 0.199, + "learning_rate": 1.4725477041174543e-05, + "loss": 0.1293, "step": 164165 }, { "epoch": 7.66, - "learning_rate": 4.701139187098589e-06, - "loss": 0.3331, + "learning_rate": 1.4725008971338524e-05, + "loss": 0.2886, "step": 164170 }, { "epoch": 7.66, - "learning_rate": 4.70067038582345e-06, - "loss": 0.0302, + "learning_rate": 1.4724540901502504e-05, + "loss": 0.0429, "step": 164175 }, { "epoch": 7.66, - "learning_rate": 4.70020158454831e-06, - "loss": 0.0091, + "learning_rate": 1.4724072831666486e-05, + "loss": 0.0065, "step": 164180 }, { "epoch": 7.66, - "learning_rate": 4.699732783273171e-06, - "loss": 0.0176, + "learning_rate": 1.4723604761830466e-05, + "loss": 0.0157, "step": 164185 }, { "epoch": 7.66, - "learning_rate": 4.6992639819980315e-06, - "loss": 0.0126, + "learning_rate": 1.4723136691994445e-05, + "loss": 0.0438, "step": 164190 }, { "epoch": 7.66, - "learning_rate": 4.698795180722892e-06, - "loss": 0.0258, + "learning_rate": 1.4722668622158427e-05, + "loss": 0.0559, "step": 164195 }, { "epoch": 7.66, - "learning_rate": 4.698326379447752e-06, - "loss": 0.024, + "learning_rate": 1.4722200552322407e-05, + "loss": 0.0297, "step": 164200 }, { "epoch": 7.66, - "learning_rate": 4.697857578172613e-06, - "loss": 0.0335, + "learning_rate": 1.4721732482486388e-05, + "loss": 0.1217, "step": 164205 }, { "epoch": 7.66, - "learning_rate": 4.697388776897474e-06, - "loss": 0.0431, + "learning_rate": 1.4721264412650368e-05, + "loss": 0.0811, "step": 164210 }, { "epoch": 7.66, - "learning_rate": 4.696919975622334e-06, - "loss": 0.1902, + "learning_rate": 1.472079634281435e-05, + "loss": 0.0905, "step": 164215 }, { "epoch": 7.66, - "learning_rate": 4.6964511743471945e-06, - "loss": 0.1451, + "learning_rate": 1.4720328272978328e-05, + "loss": 0.1642, "step": 164220 }, { "epoch": 7.66, - "learning_rate": 4.695982373072055e-06, - "loss": 0.0583, + "learning_rate": 1.471986020314231e-05, + "loss": 0.0337, "step": 164225 }, { "epoch": 7.66, - "learning_rate": 4.695513571796916e-06, - "loss": 0.0528, + "learning_rate": 1.4719392133306289e-05, + "loss": 0.0015, "step": 164230 }, { "epoch": 7.66, - "learning_rate": 4.695044770521776e-06, - "loss": 0.0146, + "learning_rate": 1.471892406347027e-05, + "loss": 0.0164, "step": 164235 }, { "epoch": 7.66, - "learning_rate": 4.694575969246637e-06, - "loss": 0.023, + "learning_rate": 1.471845599363425e-05, + "loss": 0.069, "step": 164240 }, { "epoch": 7.66, - "learning_rate": 4.694107167971498e-06, - "loss": 0.0495, + "learning_rate": 1.4717987923798232e-05, + "loss": 0.0915, "step": 164245 }, { "epoch": 7.66, - "learning_rate": 4.6936383666963575e-06, - "loss": 0.032, + "learning_rate": 1.471751985396221e-05, + "loss": 0.0801, "step": 164250 }, { "epoch": 7.66, - "learning_rate": 4.693169565421218e-06, - "loss": 0.0471, + "learning_rate": 1.4717051784126191e-05, + "loss": 0.0818, "step": 164255 }, { "epoch": 7.66, - "learning_rate": 4.692700764146079e-06, - "loss": 0.1117, + "learning_rate": 1.4716583714290173e-05, + "loss": 0.1226, "step": 164260 }, { "epoch": 7.66, - "learning_rate": 4.69223196287094e-06, - "loss": 0.17, + "learning_rate": 1.4716115644454153e-05, + "loss": 0.1488, "step": 164265 }, { "epoch": 7.67, - "learning_rate": 4.6917631615958e-06, - "loss": 0.2361, + "learning_rate": 1.4715647574618134e-05, + "loss": 0.0939, "step": 164270 }, { "epoch": 7.67, - "learning_rate": 4.691294360320661e-06, - "loss": 0.028, + "learning_rate": 1.4715179504782114e-05, + "loss": 0.023, "step": 164275 }, { "epoch": 7.67, - "learning_rate": 4.690825559045521e-06, - "loss": 0.0064, + "learning_rate": 1.4714711434946096e-05, + "loss": 0.0144, "step": 164280 }, { "epoch": 7.67, - "learning_rate": 4.690356757770382e-06, - "loss": 0.0069, + "learning_rate": 1.4714243365110074e-05, + "loss": 0.0092, "step": 164285 }, { "epoch": 7.67, - "learning_rate": 4.689887956495242e-06, - "loss": 0.0246, + "learning_rate": 1.4713775295274055e-05, + "loss": 0.032, "step": 164290 }, { "epoch": 7.67, - "learning_rate": 4.689419155220102e-06, - "loss": 0.0124, + "learning_rate": 1.4713307225438035e-05, + "loss": 0.0181, "step": 164295 }, { "epoch": 7.67, - "learning_rate": 4.688950353944964e-06, - "loss": 0.048, + "learning_rate": 1.4712839155602017e-05, + "loss": 0.0362, "step": 164300 }, { "epoch": 7.67, - "learning_rate": 4.6884815526698236e-06, - "loss": 0.1454, + "learning_rate": 1.4712371085765996e-05, + "loss": 0.0475, "step": 164305 }, { "epoch": 7.67, - "learning_rate": 4.688012751394684e-06, - "loss": 0.1143, + "learning_rate": 1.4711903015929978e-05, + "loss": 0.0893, "step": 164310 }, { "epoch": 7.67, - "learning_rate": 4.687543950119544e-06, - "loss": 0.0953, + "learning_rate": 1.4711434946093956e-05, + "loss": 0.1114, "step": 164315 }, { "epoch": 7.67, - "learning_rate": 4.687075148844406e-06, - "loss": 0.1795, + "learning_rate": 1.4710966876257938e-05, + "loss": 0.2267, "step": 164320 }, { "epoch": 7.67, - "learning_rate": 4.686606347569266e-06, - "loss": 0.0294, + "learning_rate": 1.4710498806421919e-05, + "loss": 0.0295, "step": 164325 }, { "epoch": 7.67, - "learning_rate": 4.686137546294126e-06, - "loss": 0.0085, + "learning_rate": 1.4710030736585899e-05, + "loss": 0.0051, "step": 164330 }, { "epoch": 7.67, - "learning_rate": 4.6856687450189866e-06, - "loss": 0.0107, + "learning_rate": 1.470956266674988e-05, + "loss": 0.0125, "step": 164335 }, { "epoch": 7.67, - "learning_rate": 4.685199943743847e-06, - "loss": 0.029, + "learning_rate": 1.470909459691386e-05, + "loss": 0.0122, "step": 164340 }, { "epoch": 7.67, - "learning_rate": 4.684731142468708e-06, - "loss": 0.112, + "learning_rate": 1.470862652707784e-05, + "loss": 0.0369, "step": 164345 }, { "epoch": 7.67, - "learning_rate": 4.684262341193568e-06, - "loss": 0.0569, + "learning_rate": 1.470815845724182e-05, + "loss": 0.0353, "step": 164350 }, { "epoch": 7.67, - "learning_rate": 4.683793539918429e-06, - "loss": 0.0188, + "learning_rate": 1.4707690387405801e-05, + "loss": 0.0497, "step": 164355 }, { "epoch": 7.67, - "learning_rate": 4.68332473864329e-06, - "loss": 0.0814, + "learning_rate": 1.4707222317569781e-05, + "loss": 0.1194, "step": 164360 }, { "epoch": 7.67, - "learning_rate": 4.6828559373681496e-06, - "loss": 0.0877, + "learning_rate": 1.4706754247733763e-05, + "loss": 0.208, "step": 164365 }, { "epoch": 7.67, - "learning_rate": 4.68238713609301e-06, - "loss": 0.1093, + "learning_rate": 1.4706286177897743e-05, + "loss": 0.1168, "step": 164370 }, { "epoch": 7.67, - "learning_rate": 4.681918334817871e-06, - "loss": 0.0204, + "learning_rate": 1.4705818108061724e-05, + "loss": 0.0408, "step": 164375 }, { "epoch": 7.67, - "learning_rate": 4.681449533542732e-06, - "loss": 0.0622, + "learning_rate": 1.4705350038225704e-05, + "loss": 0.0295, "step": 164380 }, { "epoch": 7.67, - "learning_rate": 4.680980732267592e-06, - "loss": 0.0129, + "learning_rate": 1.4704881968389684e-05, + "loss": 0.0439, "step": 164385 }, { "epoch": 7.67, - "learning_rate": 4.680511930992453e-06, - "loss": 0.0097, + "learning_rate": 1.4704413898553665e-05, + "loss": 0.0342, "step": 164390 }, { "epoch": 7.67, - "learning_rate": 4.680043129717313e-06, - "loss": 0.0182, + "learning_rate": 1.4703945828717645e-05, + "loss": 0.0195, "step": 164395 }, { "epoch": 7.67, - "learning_rate": 4.679574328442173e-06, - "loss": 0.0628, + "learning_rate": 1.4703477758881627e-05, + "loss": 0.0446, "step": 164400 }, { "epoch": 7.67, - "learning_rate": 4.679105527167034e-06, - "loss": 0.1548, + "learning_rate": 1.4703009689045606e-05, + "loss": 0.0587, "step": 164405 }, { "epoch": 7.67, - "learning_rate": 4.678636725891895e-06, - "loss": 0.1319, + "learning_rate": 1.4702541619209586e-05, + "loss": 0.0566, "step": 164410 }, { "epoch": 7.67, - "learning_rate": 4.678167924616756e-06, - "loss": 0.087, + "learning_rate": 1.4702073549373566e-05, + "loss": 0.0865, "step": 164415 }, { "epoch": 7.67, - "learning_rate": 4.677699123341616e-06, - "loss": 0.2183, + "learning_rate": 1.4701605479537547e-05, + "loss": 0.1835, "step": 164420 }, { "epoch": 7.67, - "learning_rate": 4.677230322066476e-06, - "loss": 0.0364, + "learning_rate": 1.4701137409701527e-05, + "loss": 0.0991, "step": 164425 }, { "epoch": 7.67, - "learning_rate": 4.676761520791337e-06, - "loss": 0.0132, + "learning_rate": 1.4700669339865509e-05, + "loss": 0.0352, "step": 164430 }, { "epoch": 7.67, - "learning_rate": 4.676292719516197e-06, - "loss": 0.0042, + "learning_rate": 1.4700201270029489e-05, + "loss": 0.0148, "step": 164435 }, { "epoch": 7.67, - "learning_rate": 4.675823918241058e-06, - "loss": 0.0343, + "learning_rate": 1.4699733200193468e-05, + "loss": 0.0125, "step": 164440 }, { "epoch": 7.67, - "learning_rate": 4.675355116965919e-06, - "loss": 0.0198, + "learning_rate": 1.469926513035745e-05, + "loss": 0.0419, "step": 164445 }, { "epoch": 7.67, - "learning_rate": 4.6748863156907794e-06, - "loss": 0.0999, + "learning_rate": 1.469879706052143e-05, + "loss": 0.0142, "step": 164450 }, { "epoch": 7.67, - "learning_rate": 4.674417514415639e-06, - "loss": 0.1011, + "learning_rate": 1.4698328990685411e-05, + "loss": 0.0279, "step": 164455 }, { "epoch": 7.67, - "learning_rate": 4.6739487131405e-06, - "loss": 0.1382, + "learning_rate": 1.4697860920849391e-05, + "loss": 0.0538, "step": 164460 }, { "epoch": 7.67, - "learning_rate": 4.673479911865361e-06, - "loss": 0.1123, + "learning_rate": 1.4697392851013373e-05, + "loss": 0.068, "step": 164465 }, { "epoch": 7.67, - "learning_rate": 4.673011110590221e-06, - "loss": 0.1507, + "learning_rate": 1.4696924781177352e-05, + "loss": 0.3545, "step": 164470 }, { "epoch": 7.67, - "learning_rate": 4.672542309315082e-06, - "loss": 0.0586, + "learning_rate": 1.4696456711341332e-05, + "loss": 0.0095, "step": 164475 }, { "epoch": 7.67, - "learning_rate": 4.6720735080399424e-06, - "loss": 0.0419, + "learning_rate": 1.4695988641505312e-05, + "loss": 0.0247, "step": 164480 }, { "epoch": 7.68, - "learning_rate": 4.671604706764803e-06, - "loss": 0.0358, + "learning_rate": 1.4695520571669294e-05, + "loss": 0.0229, "step": 164485 }, { "epoch": 7.68, - "learning_rate": 4.671135905489663e-06, - "loss": 0.0429, + "learning_rate": 1.4695052501833273e-05, + "loss": 0.0205, "step": 164490 }, { "epoch": 7.68, - "learning_rate": 4.670667104214524e-06, - "loss": 0.0432, + "learning_rate": 1.4694584431997255e-05, + "loss": 0.0154, "step": 164495 }, { "epoch": 7.68, - "learning_rate": 4.670198302939385e-06, - "loss": 0.018, + "learning_rate": 1.4694116362161235e-05, + "loss": 0.0665, "step": 164500 }, { "epoch": 7.68, - "learning_rate": 4.669729501664245e-06, - "loss": 0.0393, + "learning_rate": 1.4693648292325215e-05, + "loss": 0.0547, "step": 164505 }, { "epoch": 7.68, - "learning_rate": 4.669260700389105e-06, - "loss": 0.041, + "learning_rate": 1.4693180222489196e-05, + "loss": 0.0531, "step": 164510 }, { "epoch": 7.68, - "learning_rate": 4.668791899113966e-06, - "loss": 0.1012, + "learning_rate": 1.4692712152653176e-05, + "loss": 0.0973, "step": 164515 }, { "epoch": 7.68, - "learning_rate": 4.668323097838827e-06, - "loss": 0.234, + "learning_rate": 1.4692244082817157e-05, + "loss": 0.08, "step": 164520 }, { "epoch": 7.68, - "learning_rate": 4.667854296563687e-06, - "loss": 0.0196, + "learning_rate": 1.4691776012981137e-05, + "loss": 0.0096, "step": 164525 }, { "epoch": 7.68, - "learning_rate": 4.667385495288548e-06, - "loss": 0.025, + "learning_rate": 1.4691307943145119e-05, + "loss": 0.0205, "step": 164530 }, { "epoch": 7.68, - "learning_rate": 4.6669166940134085e-06, - "loss": 0.0376, + "learning_rate": 1.4690839873309097e-05, + "loss": 0.0461, "step": 164535 }, { "epoch": 7.68, - "learning_rate": 4.666447892738268e-06, - "loss": 0.0233, + "learning_rate": 1.4690371803473078e-05, + "loss": 0.05, "step": 164540 }, { "epoch": 7.68, - "learning_rate": 4.665979091463129e-06, - "loss": 0.0153, + "learning_rate": 1.4689903733637058e-05, + "loss": 0.0234, "step": 164545 }, { "epoch": 7.68, - "learning_rate": 4.665510290187989e-06, - "loss": 0.0727, + "learning_rate": 1.468943566380104e-05, + "loss": 0.0338, "step": 164550 }, { "epoch": 7.68, - "learning_rate": 4.665041488912851e-06, - "loss": 0.051, + "learning_rate": 1.468896759396502e-05, + "loss": 0.0691, "step": 164555 }, { "epoch": 7.68, - "learning_rate": 4.664572687637711e-06, - "loss": 0.0557, + "learning_rate": 1.4688499524129001e-05, + "loss": 0.0821, "step": 164560 }, { "epoch": 7.68, - "learning_rate": 4.6641038863625715e-06, - "loss": 0.1794, + "learning_rate": 1.468803145429298e-05, + "loss": 0.2876, "step": 164565 }, { "epoch": 7.68, - "learning_rate": 4.663635085087431e-06, - "loss": 0.1711, + "learning_rate": 1.468756338445696e-05, + "loss": 0.1778, "step": 164570 }, { "epoch": 7.68, - "learning_rate": 4.663166283812292e-06, - "loss": 0.0121, + "learning_rate": 1.4687095314620942e-05, + "loss": 0.0152, "step": 164575 }, { "epoch": 7.68, - "learning_rate": 4.662697482537153e-06, - "loss": 0.0131, + "learning_rate": 1.4686627244784922e-05, + "loss": 0.0273, "step": 164580 }, { "epoch": 7.68, - "learning_rate": 4.662228681262013e-06, - "loss": 0.031, + "learning_rate": 1.4686159174948904e-05, + "loss": 0.0036, "step": 164585 }, { "epoch": 7.68, - "learning_rate": 4.661759879986874e-06, - "loss": 0.0565, + "learning_rate": 1.4685691105112883e-05, + "loss": 0.0269, "step": 164590 }, { "epoch": 7.68, - "learning_rate": 4.6612910787117345e-06, - "loss": 0.0922, + "learning_rate": 1.4685223035276865e-05, + "loss": 0.0377, "step": 164595 }, { "epoch": 7.68, - "learning_rate": 4.660822277436595e-06, - "loss": 0.048, + "learning_rate": 1.4684754965440843e-05, + "loss": 0.0354, "step": 164600 }, { "epoch": 7.68, - "learning_rate": 4.660353476161455e-06, - "loss": 0.0702, + "learning_rate": 1.4684286895604824e-05, + "loss": 0.1183, "step": 164605 }, { "epoch": 7.68, - "learning_rate": 4.659884674886316e-06, - "loss": 0.0817, + "learning_rate": 1.4683818825768804e-05, + "loss": 0.1075, "step": 164610 }, { "epoch": 7.68, - "learning_rate": 4.659415873611177e-06, - "loss": 0.1334, + "learning_rate": 1.4683350755932786e-05, + "loss": 0.1354, "step": 164615 }, { "epoch": 7.68, - "learning_rate": 4.658947072336037e-06, - "loss": 0.2507, + "learning_rate": 1.4682882686096766e-05, + "loss": 0.2122, "step": 164620 }, { "epoch": 7.68, - "learning_rate": 4.6584782710608974e-06, - "loss": 0.0159, + "learning_rate": 1.4682414616260747e-05, + "loss": 0.0228, "step": 164625 }, { "epoch": 7.68, - "learning_rate": 4.658009469785758e-06, - "loss": 0.0197, + "learning_rate": 1.4681946546424727e-05, + "loss": 0.0378, "step": 164630 }, { "epoch": 7.68, - "learning_rate": 4.657540668510619e-06, - "loss": 0.0159, + "learning_rate": 1.4681478476588707e-05, + "loss": 0.0105, "step": 164635 }, { "epoch": 7.68, - "learning_rate": 4.657071867235479e-06, - "loss": 0.029, + "learning_rate": 1.4681010406752688e-05, + "loss": 0.0572, "step": 164640 }, { "epoch": 7.68, - "learning_rate": 4.65660306596034e-06, - "loss": 0.0278, + "learning_rate": 1.4680542336916668e-05, + "loss": 0.0442, "step": 164645 }, { "epoch": 7.68, - "learning_rate": 4.6561342646852005e-06, - "loss": 0.0206, + "learning_rate": 1.468007426708065e-05, + "loss": 0.0322, "step": 164650 }, { "epoch": 7.68, - "learning_rate": 4.6556654634100604e-06, - "loss": 0.039, + "learning_rate": 1.467960619724463e-05, + "loss": 0.0484, "step": 164655 }, { "epoch": 7.68, - "learning_rate": 4.655196662134921e-06, - "loss": 0.0606, + "learning_rate": 1.467913812740861e-05, + "loss": 0.082, "step": 164660 }, { "epoch": 7.68, - "learning_rate": 4.654727860859782e-06, - "loss": 0.0807, + "learning_rate": 1.4678670057572589e-05, + "loss": 0.1086, "step": 164665 }, { "epoch": 7.68, - "learning_rate": 4.654259059584643e-06, - "loss": 0.1156, + "learning_rate": 1.467820198773657e-05, + "loss": 0.1416, "step": 164670 }, { "epoch": 7.68, - "learning_rate": 4.653790258309503e-06, - "loss": 0.0141, + "learning_rate": 1.467773391790055e-05, + "loss": 0.0314, "step": 164675 }, { "epoch": 7.68, - "learning_rate": 4.6533214570343635e-06, - "loss": 0.016, + "learning_rate": 1.4677265848064532e-05, + "loss": 0.0097, "step": 164680 }, { "epoch": 7.68, - "learning_rate": 4.652852655759224e-06, - "loss": 0.0251, + "learning_rate": 1.4676797778228513e-05, + "loss": 0.0206, "step": 164685 }, { "epoch": 7.68, - "learning_rate": 4.652383854484084e-06, - "loss": 0.0244, + "learning_rate": 1.4676329708392493e-05, + "loss": 0.0217, "step": 164690 }, { "epoch": 7.68, - "learning_rate": 4.651915053208945e-06, - "loss": 0.0615, + "learning_rate": 1.4675861638556473e-05, + "loss": 0.0106, "step": 164695 }, { "epoch": 7.69, - "learning_rate": 4.651446251933806e-06, - "loss": 0.0318, + "learning_rate": 1.4675393568720453e-05, + "loss": 0.0558, "step": 164700 }, { "epoch": 7.69, - "learning_rate": 4.6509774506586665e-06, - "loss": 0.0513, + "learning_rate": 1.4674925498884434e-05, + "loss": 0.0429, "step": 164705 }, { "epoch": 7.69, - "learning_rate": 4.6505086493835265e-06, - "loss": 0.0632, + "learning_rate": 1.4674457429048414e-05, + "loss": 0.0961, "step": 164710 }, { "epoch": 7.69, - "learning_rate": 4.650039848108387e-06, - "loss": 0.1861, + "learning_rate": 1.4673989359212396e-05, + "loss": 0.1296, "step": 164715 }, { "epoch": 7.69, - "learning_rate": 4.649571046833248e-06, - "loss": 0.1746, + "learning_rate": 1.4673521289376376e-05, + "loss": 0.1895, "step": 164720 }, { "epoch": 7.69, - "learning_rate": 4.649102245558108e-06, - "loss": 0.0401, + "learning_rate": 1.4673053219540355e-05, + "loss": 0.0101, "step": 164725 }, { "epoch": 7.69, - "learning_rate": 4.648633444282969e-06, - "loss": 0.0249, + "learning_rate": 1.4672585149704335e-05, + "loss": 0.0154, "step": 164730 }, { "epoch": 7.69, - "learning_rate": 4.6481646430078295e-06, - "loss": 0.0227, + "learning_rate": 1.4672117079868317e-05, + "loss": 0.0125, "step": 164735 }, { "epoch": 7.69, - "learning_rate": 4.64769584173269e-06, - "loss": 0.0343, + "learning_rate": 1.4671649010032296e-05, + "loss": 0.0365, "step": 164740 }, { "epoch": 7.69, - "learning_rate": 4.64722704045755e-06, - "loss": 0.1005, + "learning_rate": 1.4671180940196278e-05, + "loss": 0.0172, "step": 164745 }, { "epoch": 7.69, - "learning_rate": 4.646758239182411e-06, - "loss": 0.0692, + "learning_rate": 1.467071287036026e-05, + "loss": 0.0359, "step": 164750 }, { "epoch": 7.69, - "learning_rate": 4.646289437907272e-06, - "loss": 0.0293, + "learning_rate": 1.4670244800524238e-05, + "loss": 0.0795, "step": 164755 }, { "epoch": 7.69, - "learning_rate": 4.645820636632132e-06, - "loss": 0.0984, + "learning_rate": 1.4669776730688219e-05, + "loss": 0.0118, "step": 164760 }, { "epoch": 7.69, - "learning_rate": 4.6453518353569925e-06, - "loss": 0.2069, + "learning_rate": 1.4669308660852199e-05, + "loss": 0.2083, "step": 164765 }, { "epoch": 7.69, - "learning_rate": 4.644883034081853e-06, - "loss": 0.1431, + "learning_rate": 1.466884059101618e-05, + "loss": 0.1836, "step": 164770 }, { "epoch": 7.69, - "learning_rate": 4.644414232806714e-06, - "loss": 0.0198, + "learning_rate": 1.466837252118016e-05, + "loss": 0.0833, "step": 164775 }, { "epoch": 7.69, - "learning_rate": 4.643945431531574e-06, - "loss": 0.0251, + "learning_rate": 1.4667904451344142e-05, + "loss": 0.0068, "step": 164780 }, { "epoch": 7.69, - "learning_rate": 4.643476630256435e-06, - "loss": 0.0033, + "learning_rate": 1.4667436381508122e-05, + "loss": 0.0092, "step": 164785 }, { "epoch": 7.69, - "learning_rate": 4.643007828981296e-06, - "loss": 0.0959, + "learning_rate": 1.4666968311672101e-05, + "loss": 0.0186, "step": 164790 }, { "epoch": 7.69, - "learning_rate": 4.6425390277061555e-06, - "loss": 0.037, + "learning_rate": 1.4666500241836081e-05, + "loss": 0.0147, "step": 164795 }, { "epoch": 7.69, - "learning_rate": 4.642070226431016e-06, - "loss": 0.0832, + "learning_rate": 1.4666032172000063e-05, + "loss": 0.0201, "step": 164800 }, { "epoch": 7.69, - "learning_rate": 4.641601425155876e-06, - "loss": 0.0485, + "learning_rate": 1.4665564102164043e-05, + "loss": 0.0633, "step": 164805 }, { "epoch": 7.69, - "learning_rate": 4.641132623880738e-06, - "loss": 0.0733, + "learning_rate": 1.4665096032328024e-05, + "loss": 0.0621, "step": 164810 }, { "epoch": 7.69, - "learning_rate": 4.640663822605598e-06, - "loss": 0.1461, + "learning_rate": 1.4664627962492006e-05, + "loss": 0.1616, "step": 164815 }, { "epoch": 7.69, - "learning_rate": 4.6401950213304586e-06, - "loss": 0.1614, + "learning_rate": 1.4664159892655984e-05, + "loss": 0.0938, "step": 164820 }, { "epoch": 7.69, - "learning_rate": 4.6397262200553185e-06, - "loss": 0.0013, + "learning_rate": 1.4663691822819965e-05, + "loss": 0.0054, "step": 164825 }, { "epoch": 7.69, - "learning_rate": 4.639257418780179e-06, - "loss": 0.0216, + "learning_rate": 1.4663223752983945e-05, + "loss": 0.0011, "step": 164830 }, { "epoch": 7.69, - "learning_rate": 4.63878861750504e-06, - "loss": 0.0239, + "learning_rate": 1.4662755683147927e-05, + "loss": 0.0056, "step": 164835 }, { "epoch": 7.69, - "learning_rate": 4.6383198162299e-06, - "loss": 0.0138, + "learning_rate": 1.4662287613311906e-05, + "loss": 0.007, "step": 164840 }, { "epoch": 7.69, - "learning_rate": 4.637851014954761e-06, - "loss": 0.0529, + "learning_rate": 1.4661819543475888e-05, + "loss": 0.0203, "step": 164845 }, { "epoch": 7.69, - "learning_rate": 4.6373822136796216e-06, - "loss": 0.1808, + "learning_rate": 1.4661351473639866e-05, + "loss": 0.112, "step": 164850 }, { "epoch": 7.69, - "learning_rate": 4.636913412404482e-06, - "loss": 0.0858, + "learning_rate": 1.4660883403803848e-05, + "loss": 0.0664, "step": 164855 }, { "epoch": 7.69, - "learning_rate": 4.636444611129342e-06, - "loss": 0.1227, + "learning_rate": 1.4660415333967827e-05, + "loss": 0.0288, "step": 164860 }, { "epoch": 7.69, - "learning_rate": 4.635975809854203e-06, - "loss": 0.1846, + "learning_rate": 1.4659947264131809e-05, + "loss": 0.0929, "step": 164865 }, { "epoch": 7.69, - "learning_rate": 4.635507008579064e-06, - "loss": 0.1676, + "learning_rate": 1.465947919429579e-05, + "loss": 0.1641, "step": 164870 }, { "epoch": 7.69, - "learning_rate": 4.635038207303924e-06, - "loss": 0.0204, + "learning_rate": 1.465901112445977e-05, + "loss": 0.003, "step": 164875 }, { "epoch": 7.69, - "learning_rate": 4.6345694060287846e-06, - "loss": 0.0099, + "learning_rate": 1.4658543054623752e-05, + "loss": 0.0427, "step": 164880 }, { "epoch": 7.69, - "learning_rate": 4.634100604753645e-06, - "loss": 0.0158, + "learning_rate": 1.465807498478773e-05, + "loss": 0.0108, "step": 164885 }, { "epoch": 7.69, - "learning_rate": 4.633631803478506e-06, - "loss": 0.0096, + "learning_rate": 1.4657606914951711e-05, + "loss": 0.0112, "step": 164890 }, { "epoch": 7.69, - "learning_rate": 4.633163002203366e-06, - "loss": 0.0407, + "learning_rate": 1.4657138845115691e-05, + "loss": 0.0267, "step": 164895 }, { "epoch": 7.69, - "learning_rate": 4.632694200928227e-06, - "loss": 0.0133, + "learning_rate": 1.4656670775279673e-05, + "loss": 0.0258, "step": 164900 }, { "epoch": 7.69, - "learning_rate": 4.632225399653088e-06, - "loss": 0.0829, + "learning_rate": 1.4656202705443652e-05, + "loss": 0.0994, "step": 164905 }, { "epoch": 7.69, - "learning_rate": 4.6317565983779475e-06, - "loss": 0.0592, + "learning_rate": 1.4655734635607634e-05, + "loss": 0.0905, "step": 164910 }, { "epoch": 7.7, - "learning_rate": 4.631287797102808e-06, - "loss": 0.0784, + "learning_rate": 1.4655266565771612e-05, + "loss": 0.149, "step": 164915 }, { "epoch": 7.7, - "learning_rate": 4.630818995827669e-06, - "loss": 0.1181, + "learning_rate": 1.4654798495935594e-05, + "loss": 0.2901, "step": 164920 }, { "epoch": 7.7, - "learning_rate": 4.63035019455253e-06, - "loss": 0.045, + "learning_rate": 1.4654330426099573e-05, + "loss": 0.015, "step": 164925 }, { "epoch": 7.7, - "learning_rate": 4.62988139327739e-06, - "loss": 0.0325, + "learning_rate": 1.4653862356263555e-05, + "loss": 0.0153, "step": 164930 }, { "epoch": 7.7, - "learning_rate": 4.629412592002251e-06, - "loss": 0.0422, + "learning_rate": 1.4653394286427536e-05, + "loss": 0.0155, "step": 164935 }, { "epoch": 7.7, - "learning_rate": 4.628943790727111e-06, - "loss": 0.008, + "learning_rate": 1.4652926216591516e-05, + "loss": 0.0285, "step": 164940 }, { "epoch": 7.7, - "learning_rate": 4.628474989451971e-06, - "loss": 0.0429, + "learning_rate": 1.4652458146755496e-05, + "loss": 0.0553, "step": 164945 }, { "epoch": 7.7, - "learning_rate": 4.628006188176832e-06, - "loss": 0.0394, + "learning_rate": 1.4651990076919476e-05, + "loss": 0.0419, "step": 164950 }, { "epoch": 7.7, - "learning_rate": 4.627537386901693e-06, - "loss": 0.0566, + "learning_rate": 1.4651522007083457e-05, + "loss": 0.0868, "step": 164955 }, { "epoch": 7.7, - "learning_rate": 4.627068585626554e-06, - "loss": 0.0707, + "learning_rate": 1.4651053937247437e-05, + "loss": 0.1297, "step": 164960 }, { "epoch": 7.7, - "learning_rate": 4.626599784351414e-06, - "loss": 0.1486, + "learning_rate": 1.4650585867411419e-05, + "loss": 0.1384, "step": 164965 }, { "epoch": 7.7, - "learning_rate": 4.626130983076274e-06, - "loss": 0.1189, + "learning_rate": 1.4650117797575399e-05, + "loss": 0.2316, "step": 164970 }, { "epoch": 7.7, - "learning_rate": 4.625662181801135e-06, - "loss": 0.0249, + "learning_rate": 1.464964972773938e-05, + "loss": 0.0564, "step": 164975 }, { "epoch": 7.7, - "learning_rate": 4.625193380525995e-06, - "loss": 0.0151, + "learning_rate": 1.4649181657903358e-05, + "loss": 0.0298, "step": 164980 }, { "epoch": 7.7, - "learning_rate": 4.624724579250856e-06, - "loss": 0.031, + "learning_rate": 1.464871358806734e-05, + "loss": 0.005, "step": 164985 }, { "epoch": 7.7, - "learning_rate": 4.624255777975717e-06, - "loss": 0.0456, + "learning_rate": 1.4648245518231321e-05, + "loss": 0.0378, "step": 164990 }, { "epoch": 7.7, - "learning_rate": 4.6237869767005774e-06, - "loss": 0.0366, + "learning_rate": 1.4647777448395301e-05, + "loss": 0.0193, "step": 164995 }, { "epoch": 7.7, - "learning_rate": 4.623318175425437e-06, - "loss": 0.0337, + "learning_rate": 1.4647309378559283e-05, + "loss": 0.0422, "step": 165000 }, { "epoch": 7.7, - "learning_rate": 4.622849374150298e-06, - "loss": 0.036, + "learning_rate": 1.4646841308723262e-05, + "loss": 0.026, "step": 165005 }, { "epoch": 7.7, - "learning_rate": 4.622380572875159e-06, - "loss": 0.2071, + "learning_rate": 1.4646373238887242e-05, + "loss": 0.06, "step": 165010 }, { "epoch": 7.7, - "learning_rate": 4.621911771600019e-06, - "loss": 0.264, + "learning_rate": 1.4645905169051222e-05, + "loss": 0.1348, "step": 165015 }, { "epoch": 7.7, - "learning_rate": 4.62144297032488e-06, - "loss": 0.1715, + "learning_rate": 1.4645437099215204e-05, + "loss": 0.1196, "step": 165020 }, { "epoch": 7.7, - "learning_rate": 4.62097416904974e-06, - "loss": 0.0063, + "learning_rate": 1.4644969029379183e-05, + "loss": 0.013, "step": 165025 }, { "epoch": 7.7, - "learning_rate": 4.620505367774601e-06, - "loss": 0.011, + "learning_rate": 1.4644500959543165e-05, + "loss": 0.0109, "step": 165030 }, { "epoch": 7.7, - "learning_rate": 4.620036566499461e-06, - "loss": 0.0127, + "learning_rate": 1.4644032889707145e-05, + "loss": 0.0465, "step": 165035 }, { "epoch": 7.7, - "learning_rate": 4.619567765224322e-06, - "loss": 0.0211, + "learning_rate": 1.4643564819871125e-05, + "loss": 0.026, "step": 165040 }, { "epoch": 7.7, - "learning_rate": 4.619098963949183e-06, - "loss": 0.0103, + "learning_rate": 1.4643096750035104e-05, + "loss": 0.031, "step": 165045 }, { "epoch": 7.7, - "learning_rate": 4.618630162674043e-06, - "loss": 0.0727, + "learning_rate": 1.4642628680199086e-05, + "loss": 0.0549, "step": 165050 }, { "epoch": 7.7, - "learning_rate": 4.618161361398903e-06, - "loss": 0.0753, + "learning_rate": 1.4642160610363067e-05, + "loss": 0.1018, "step": 165055 }, { "epoch": 7.7, - "learning_rate": 4.617692560123764e-06, - "loss": 0.0934, + "learning_rate": 1.4641692540527047e-05, + "loss": 0.0962, "step": 165060 }, { "epoch": 7.7, - "learning_rate": 4.617223758848625e-06, - "loss": 0.1112, + "learning_rate": 1.4641224470691029e-05, + "loss": 0.0673, "step": 165065 }, { "epoch": 7.7, - "learning_rate": 4.616754957573485e-06, - "loss": 0.0826, + "learning_rate": 1.4640756400855008e-05, + "loss": 0.2059, "step": 165070 }, { "epoch": 7.7, - "learning_rate": 4.616286156298346e-06, - "loss": 0.0184, + "learning_rate": 1.4640288331018988e-05, + "loss": 0.0521, "step": 165075 }, { "epoch": 7.7, - "learning_rate": 4.6158173550232065e-06, - "loss": 0.0046, + "learning_rate": 1.4639820261182968e-05, + "loss": 0.0461, "step": 165080 }, { "epoch": 7.7, - "learning_rate": 4.615348553748066e-06, - "loss": 0.0157, + "learning_rate": 1.463935219134695e-05, + "loss": 0.0648, "step": 165085 }, { "epoch": 7.7, - "learning_rate": 4.614879752472927e-06, - "loss": 0.0112, + "learning_rate": 1.463888412151093e-05, + "loss": 0.0115, "step": 165090 }, { "epoch": 7.7, - "learning_rate": 4.614410951197787e-06, - "loss": 0.0434, + "learning_rate": 1.4638416051674911e-05, + "loss": 0.0623, "step": 165095 }, { "epoch": 7.7, - "learning_rate": 4.613942149922649e-06, - "loss": 0.0415, + "learning_rate": 1.463794798183889e-05, + "loss": 0.0563, "step": 165100 }, { "epoch": 7.7, - "learning_rate": 4.613473348647509e-06, - "loss": 0.0529, + "learning_rate": 1.463747991200287e-05, + "loss": 0.0657, "step": 165105 }, { "epoch": 7.7, - "learning_rate": 4.6130045473723695e-06, - "loss": 0.064, + "learning_rate": 1.463701184216685e-05, + "loss": 0.0482, "step": 165110 }, { "epoch": 7.7, - "learning_rate": 4.612535746097229e-06, - "loss": 0.1181, + "learning_rate": 1.4636543772330832e-05, + "loss": 0.1219, "step": 165115 }, { "epoch": 7.7, - "learning_rate": 4.61206694482209e-06, - "loss": 0.2085, + "learning_rate": 1.4636075702494813e-05, + "loss": 0.1802, "step": 165120 }, { "epoch": 7.7, - "learning_rate": 4.611598143546951e-06, - "loss": 0.0346, + "learning_rate": 1.4635607632658793e-05, + "loss": 0.0144, "step": 165125 }, { "epoch": 7.71, - "learning_rate": 4.611129342271811e-06, - "loss": 0.0127, + "learning_rate": 1.4635139562822775e-05, + "loss": 0.0382, "step": 165130 }, { "epoch": 7.71, - "learning_rate": 4.610660540996672e-06, - "loss": 0.0821, + "learning_rate": 1.4634671492986753e-05, + "loss": 0.0095, "step": 165135 }, { "epoch": 7.71, - "learning_rate": 4.6101917397215324e-06, - "loss": 0.021, + "learning_rate": 1.4634203423150734e-05, + "loss": 0.0418, "step": 165140 }, { "epoch": 7.71, - "learning_rate": 4.609722938446393e-06, - "loss": 0.0014, + "learning_rate": 1.4633735353314714e-05, + "loss": 0.0472, "step": 165145 }, { "epoch": 7.71, - "learning_rate": 4.609254137171253e-06, - "loss": 0.0692, + "learning_rate": 1.4633267283478696e-05, + "loss": 0.1436, "step": 165150 }, { "epoch": 7.71, - "learning_rate": 4.608785335896114e-06, - "loss": 0.0934, + "learning_rate": 1.4632799213642676e-05, + "loss": 0.0953, "step": 165155 }, { "epoch": 7.71, - "learning_rate": 4.608316534620975e-06, - "loss": 0.0478, + "learning_rate": 1.4632331143806657e-05, + "loss": 0.0982, "step": 165160 }, { "epoch": 7.71, - "learning_rate": 4.607847733345835e-06, - "loss": 0.1162, + "learning_rate": 1.4631863073970637e-05, + "loss": 0.0439, "step": 165165 }, { "epoch": 7.71, - "learning_rate": 4.6073789320706954e-06, - "loss": 0.1554, + "learning_rate": 1.4631395004134617e-05, + "loss": 0.1341, "step": 165170 }, { "epoch": 7.71, - "learning_rate": 4.606910130795556e-06, - "loss": 0.0203, + "learning_rate": 1.4630926934298598e-05, + "loss": 0.0299, "step": 165175 }, { "epoch": 7.71, - "learning_rate": 4.606441329520417e-06, - "loss": 0.0132, + "learning_rate": 1.4630458864462578e-05, + "loss": 0.0372, "step": 165180 }, { "epoch": 7.71, - "learning_rate": 4.605972528245277e-06, - "loss": 0.0774, + "learning_rate": 1.462999079462656e-05, + "loss": 0.0093, "step": 165185 }, { "epoch": 7.71, - "learning_rate": 4.605503726970138e-06, - "loss": 0.0398, + "learning_rate": 1.462952272479054e-05, + "loss": 0.0152, "step": 165190 }, { "epoch": 7.71, - "learning_rate": 4.6050349256949985e-06, - "loss": 0.0305, + "learning_rate": 1.4629054654954521e-05, + "loss": 0.0643, "step": 165195 }, { "epoch": 7.71, - "learning_rate": 4.604566124419858e-06, - "loss": 0.0235, + "learning_rate": 1.4628586585118499e-05, + "loss": 0.0851, "step": 165200 }, { "epoch": 7.71, - "learning_rate": 4.604097323144719e-06, - "loss": 0.0253, + "learning_rate": 1.462811851528248e-05, + "loss": 0.0703, "step": 165205 }, { "epoch": 7.71, - "learning_rate": 4.60362852186958e-06, - "loss": 0.1201, + "learning_rate": 1.462765044544646e-05, + "loss": 0.0904, "step": 165210 }, { "epoch": 7.71, - "learning_rate": 4.603159720594441e-06, - "loss": 0.1268, + "learning_rate": 1.4627182375610442e-05, + "loss": 0.1152, "step": 165215 }, { "epoch": 7.71, - "learning_rate": 4.602690919319301e-06, - "loss": 0.1932, + "learning_rate": 1.4626714305774422e-05, + "loss": 0.1151, "step": 165220 }, { "epoch": 7.71, - "learning_rate": 4.6022221180441615e-06, - "loss": 0.0365, + "learning_rate": 1.4626246235938403e-05, + "loss": 0.0049, "step": 165225 }, { "epoch": 7.71, - "learning_rate": 4.601753316769022e-06, - "loss": 0.0189, + "learning_rate": 1.4625778166102381e-05, + "loss": 0.0188, "step": 165230 }, { "epoch": 7.71, - "learning_rate": 4.601284515493882e-06, - "loss": 0.002, + "learning_rate": 1.4625310096266363e-05, + "loss": 0.0171, "step": 165235 }, { "epoch": 7.71, - "learning_rate": 4.600815714218743e-06, - "loss": 0.0221, + "learning_rate": 1.4624842026430344e-05, + "loss": 0.0298, "step": 165240 }, { "epoch": 7.71, - "learning_rate": 4.600346912943604e-06, - "loss": 0.0358, + "learning_rate": 1.4624373956594324e-05, + "loss": 0.0226, "step": 165245 }, { "epoch": 7.71, - "learning_rate": 4.5998781116684645e-06, - "loss": 0.0803, + "learning_rate": 1.4623905886758306e-05, + "loss": 0.008, "step": 165250 }, { "epoch": 7.71, - "learning_rate": 4.5994093103933245e-06, - "loss": 0.0428, + "learning_rate": 1.4623437816922285e-05, + "loss": 0.0523, "step": 165255 }, { "epoch": 7.71, - "learning_rate": 4.598940509118185e-06, - "loss": 0.0732, + "learning_rate": 1.4622969747086265e-05, + "loss": 0.0646, "step": 165260 }, { "epoch": 7.71, - "learning_rate": 4.598471707843046e-06, - "loss": 0.0892, + "learning_rate": 1.4622501677250245e-05, + "loss": 0.1271, "step": 165265 }, { "epoch": 7.71, - "learning_rate": 4.598002906567906e-06, - "loss": 0.2499, + "learning_rate": 1.4622033607414227e-05, + "loss": 0.1609, "step": 165270 }, { "epoch": 7.71, - "learning_rate": 4.597534105292767e-06, - "loss": 0.0401, + "learning_rate": 1.4621565537578206e-05, + "loss": 0.0156, "step": 165275 }, { "epoch": 7.71, - "learning_rate": 4.5970653040176275e-06, - "loss": 0.0057, + "learning_rate": 1.4621097467742188e-05, + "loss": 0.0186, "step": 165280 }, { "epoch": 7.71, - "learning_rate": 4.596596502742488e-06, - "loss": 0.084, + "learning_rate": 1.4620629397906168e-05, + "loss": 0.0864, "step": 165285 }, { "epoch": 7.71, - "learning_rate": 4.596127701467348e-06, - "loss": 0.014, + "learning_rate": 1.462016132807015e-05, + "loss": 0.0232, "step": 165290 }, { "epoch": 7.71, - "learning_rate": 4.595658900192209e-06, - "loss": 0.0326, + "learning_rate": 1.4619693258234129e-05, + "loss": 0.0424, "step": 165295 }, { "epoch": 7.71, - "learning_rate": 4.59519009891707e-06, - "loss": 0.0554, + "learning_rate": 1.4619225188398109e-05, + "loss": 0.0508, "step": 165300 }, { "epoch": 7.71, - "learning_rate": 4.59472129764193e-06, - "loss": 0.0729, + "learning_rate": 1.461875711856209e-05, + "loss": 0.0284, "step": 165305 }, { "epoch": 7.71, - "learning_rate": 4.5942524963667905e-06, - "loss": 0.134, + "learning_rate": 1.461828904872607e-05, + "loss": 0.0783, "step": 165310 }, { "epoch": 7.71, - "learning_rate": 4.593783695091651e-06, - "loss": 0.2043, + "learning_rate": 1.4617820978890052e-05, + "loss": 0.0836, "step": 165315 }, { "epoch": 7.71, - "learning_rate": 4.593314893816512e-06, - "loss": 0.1657, + "learning_rate": 1.4617352909054032e-05, + "loss": 0.1426, "step": 165320 }, { "epoch": 7.71, - "learning_rate": 4.592846092541372e-06, - "loss": 0.0161, + "learning_rate": 1.4616884839218011e-05, + "loss": 0.0423, "step": 165325 }, { "epoch": 7.71, - "learning_rate": 4.592377291266233e-06, - "loss": 0.012, + "learning_rate": 1.4616416769381991e-05, + "loss": 0.0107, "step": 165330 }, { "epoch": 7.71, - "learning_rate": 4.5919084899910936e-06, - "loss": 0.0171, + "learning_rate": 1.4615948699545973e-05, + "loss": 0.0038, "step": 165335 }, { "epoch": 7.71, - "learning_rate": 4.5914396887159535e-06, - "loss": 0.0734, + "learning_rate": 1.4615480629709953e-05, + "loss": 0.0155, "step": 165340 }, { "epoch": 7.72, - "learning_rate": 4.590970887440814e-06, - "loss": 0.0649, + "learning_rate": 1.4615012559873934e-05, + "loss": 0.0302, "step": 165345 }, { "epoch": 7.72, - "learning_rate": 4.590502086165674e-06, - "loss": 0.0416, + "learning_rate": 1.4614544490037914e-05, + "loss": 0.0376, "step": 165350 }, { "epoch": 7.72, - "learning_rate": 4.590033284890536e-06, - "loss": 0.0357, + "learning_rate": 1.4614076420201894e-05, + "loss": 0.0847, "step": 165355 }, { "epoch": 7.72, - "learning_rate": 4.589564483615396e-06, - "loss": 0.0971, + "learning_rate": 1.4613608350365875e-05, + "loss": 0.037, "step": 165360 }, { "epoch": 7.72, - "learning_rate": 4.5890956823402566e-06, - "loss": 0.0497, + "learning_rate": 1.4613140280529855e-05, + "loss": 0.0528, "step": 165365 }, { "epoch": 7.72, - "learning_rate": 4.5886268810651165e-06, - "loss": 0.1205, + "learning_rate": 1.4612672210693837e-05, + "loss": 0.148, "step": 165370 }, { "epoch": 7.72, - "learning_rate": 4.588158079789977e-06, - "loss": 0.0179, + "learning_rate": 1.4612204140857816e-05, + "loss": 0.0207, "step": 165375 }, { "epoch": 7.72, - "learning_rate": 4.587689278514838e-06, - "loss": 0.0008, + "learning_rate": 1.4611736071021798e-05, + "loss": 0.0462, "step": 165380 }, { "epoch": 7.72, - "learning_rate": 4.587220477239698e-06, - "loss": 0.0456, + "learning_rate": 1.4611268001185778e-05, + "loss": 0.0109, "step": 165385 }, { "epoch": 7.72, - "learning_rate": 4.586751675964559e-06, - "loss": 0.0159, + "learning_rate": 1.4610799931349757e-05, + "loss": 0.0289, "step": 165390 }, { "epoch": 7.72, - "learning_rate": 4.5862828746894196e-06, - "loss": 0.0352, + "learning_rate": 1.4610331861513737e-05, + "loss": 0.0861, "step": 165395 }, { "epoch": 7.72, - "learning_rate": 4.58581407341428e-06, - "loss": 0.0321, + "learning_rate": 1.4609863791677719e-05, + "loss": 0.0773, "step": 165400 }, { "epoch": 7.72, - "learning_rate": 4.58534527213914e-06, - "loss": 0.0764, + "learning_rate": 1.4609395721841699e-05, + "loss": 0.0769, "step": 165405 }, { "epoch": 7.72, - "learning_rate": 4.584876470864001e-06, - "loss": 0.0571, + "learning_rate": 1.460892765200568e-05, + "loss": 0.0826, "step": 165410 }, { "epoch": 7.72, - "learning_rate": 4.584407669588862e-06, - "loss": 0.068, + "learning_rate": 1.460845958216966e-05, + "loss": 0.114, "step": 165415 }, { "epoch": 7.72, - "learning_rate": 4.583938868313722e-06, - "loss": 0.2186, + "learning_rate": 1.460799151233364e-05, + "loss": 0.1682, "step": 165420 }, { "epoch": 7.72, - "learning_rate": 4.5834700670385825e-06, - "loss": 0.0258, + "learning_rate": 1.4607523442497621e-05, + "loss": 0.0192, "step": 165425 }, { "epoch": 7.72, - "learning_rate": 4.583001265763443e-06, - "loss": 0.0077, + "learning_rate": 1.4607055372661601e-05, + "loss": 0.0101, "step": 165430 }, { "epoch": 7.72, - "learning_rate": 4.582532464488304e-06, - "loss": 0.0156, + "learning_rate": 1.4606587302825583e-05, + "loss": 0.0142, "step": 165435 }, { "epoch": 7.72, - "learning_rate": 4.582063663213164e-06, - "loss": 0.0526, + "learning_rate": 1.4606119232989562e-05, + "loss": 0.0047, "step": 165440 }, { "epoch": 7.72, - "learning_rate": 4.581594861938025e-06, - "loss": 0.0385, + "learning_rate": 1.4605651163153544e-05, + "loss": 0.0145, "step": 165445 }, { "epoch": 7.72, - "learning_rate": 4.581126060662886e-06, - "loss": 0.034, + "learning_rate": 1.4605183093317522e-05, + "loss": 0.0639, "step": 165450 }, { "epoch": 7.72, - "learning_rate": 4.5806572593877455e-06, - "loss": 0.0405, + "learning_rate": 1.4604715023481504e-05, + "loss": 0.0275, "step": 165455 }, { "epoch": 7.72, - "learning_rate": 4.580188458112606e-06, - "loss": 0.0307, + "learning_rate": 1.4604246953645483e-05, + "loss": 0.0454, "step": 165460 }, { "epoch": 7.72, - "learning_rate": 4.579719656837467e-06, - "loss": 0.158, + "learning_rate": 1.4603778883809465e-05, + "loss": 0.1424, "step": 165465 }, { "epoch": 7.72, - "learning_rate": 4.579250855562328e-06, - "loss": 0.2328, + "learning_rate": 1.4603310813973445e-05, + "loss": 0.1365, "step": 165470 }, { "epoch": 7.72, - "learning_rate": 4.578782054287188e-06, - "loss": 0.0131, + "learning_rate": 1.4602842744137426e-05, + "loss": 0.0105, "step": 165475 }, { "epoch": 7.72, - "learning_rate": 4.578313253012049e-06, - "loss": 0.0277, + "learning_rate": 1.4602374674301408e-05, + "loss": 0.0149, "step": 165480 }, { "epoch": 7.72, - "learning_rate": 4.577844451736909e-06, - "loss": 0.0073, + "learning_rate": 1.4601906604465386e-05, + "loss": 0.0046, "step": 165485 }, { "epoch": 7.72, - "learning_rate": 4.577375650461769e-06, - "loss": 0.0237, + "learning_rate": 1.4601438534629367e-05, + "loss": 0.0236, "step": 165490 }, { "epoch": 7.72, - "learning_rate": 4.57690684918663e-06, - "loss": 0.0242, + "learning_rate": 1.4600970464793347e-05, + "loss": 0.0393, "step": 165495 }, { "epoch": 7.72, - "learning_rate": 4.576438047911491e-06, - "loss": 0.068, + "learning_rate": 1.4600502394957329e-05, + "loss": 0.0262, "step": 165500 }, { "epoch": 7.72, - "learning_rate": 4.575969246636352e-06, - "loss": 0.111, + "learning_rate": 1.4600034325121309e-05, + "loss": 0.0985, "step": 165505 }, { "epoch": 7.72, - "learning_rate": 4.575500445361212e-06, - "loss": 0.0823, + "learning_rate": 1.459956625528529e-05, + "loss": 0.063, "step": 165510 }, { "epoch": 7.72, - "learning_rate": 4.575031644086072e-06, - "loss": 0.2003, + "learning_rate": 1.4599098185449268e-05, + "loss": 0.0632, "step": 165515 }, { "epoch": 7.72, - "learning_rate": 4.574562842810933e-06, - "loss": 0.1486, + "learning_rate": 1.459863011561325e-05, + "loss": 0.1234, "step": 165520 }, { "epoch": 7.72, - "learning_rate": 4.574094041535793e-06, - "loss": 0.0104, + "learning_rate": 1.459816204577723e-05, + "loss": 0.07, "step": 165525 }, { "epoch": 7.72, - "learning_rate": 4.573625240260654e-06, - "loss": 0.009, + "learning_rate": 1.4597693975941211e-05, + "loss": 0.0113, "step": 165530 }, { "epoch": 7.72, - "learning_rate": 4.573156438985515e-06, - "loss": 0.0118, + "learning_rate": 1.459722590610519e-05, + "loss": 0.0229, "step": 165535 }, { "epoch": 7.72, - "learning_rate": 4.572687637710375e-06, - "loss": 0.0162, + "learning_rate": 1.4596757836269172e-05, + "loss": 0.0235, "step": 165540 }, { "epoch": 7.72, - "learning_rate": 4.572218836435235e-06, - "loss": 0.1351, + "learning_rate": 1.4596289766433152e-05, + "loss": 0.0754, "step": 165545 }, { "epoch": 7.72, - "learning_rate": 4.571750035160096e-06, - "loss": 0.1191, + "learning_rate": 1.4595821696597132e-05, + "loss": 0.0385, "step": 165550 }, { "epoch": 7.73, - "learning_rate": 4.571281233884957e-06, - "loss": 0.0335, + "learning_rate": 1.4595353626761113e-05, + "loss": 0.0649, "step": 165555 }, { "epoch": 7.73, - "learning_rate": 4.570812432609817e-06, - "loss": 0.0781, + "learning_rate": 1.4594885556925093e-05, + "loss": 0.0483, "step": 165560 }, { "epoch": 7.73, - "learning_rate": 4.570343631334678e-06, - "loss": 0.1072, + "learning_rate": 1.4594417487089075e-05, + "loss": 0.153, "step": 165565 }, { "epoch": 7.73, - "learning_rate": 4.569874830059538e-06, - "loss": 0.1285, + "learning_rate": 1.4593949417253055e-05, + "loss": 0.1662, "step": 165570 }, { "epoch": 7.73, - "learning_rate": 4.569406028784399e-06, - "loss": 0.0219, + "learning_rate": 1.4593481347417036e-05, + "loss": 0.0016, "step": 165575 }, { "epoch": 7.73, - "learning_rate": 4.568937227509259e-06, - "loss": 0.0394, + "learning_rate": 1.4593013277581014e-05, + "loss": 0.0334, "step": 165580 }, { "epoch": 7.73, - "learning_rate": 4.568468426234119e-06, - "loss": 0.0222, + "learning_rate": 1.4592545207744996e-05, + "loss": 0.0285, "step": 165585 }, { "epoch": 7.73, - "learning_rate": 4.567999624958981e-06, - "loss": 0.0148, + "learning_rate": 1.4592077137908976e-05, + "loss": 0.054, "step": 165590 }, { "epoch": 7.73, - "learning_rate": 4.567530823683841e-06, - "loss": 0.0309, + "learning_rate": 1.4591609068072957e-05, + "loss": 0.0358, "step": 165595 }, { "epoch": 7.73, - "learning_rate": 4.567062022408701e-06, - "loss": 0.04, + "learning_rate": 1.4591140998236939e-05, + "loss": 0.0126, "step": 165600 }, { "epoch": 7.73, - "learning_rate": 4.566593221133561e-06, - "loss": 0.0253, + "learning_rate": 1.4590672928400918e-05, + "loss": 0.0152, "step": 165605 }, { "epoch": 7.73, - "learning_rate": 4.566124419858423e-06, - "loss": 0.1922, + "learning_rate": 1.4590204858564898e-05, + "loss": 0.1482, "step": 165610 }, { "epoch": 7.73, - "learning_rate": 4.565655618583283e-06, - "loss": 0.1703, + "learning_rate": 1.4589736788728878e-05, + "loss": 0.2024, "step": 165615 }, { "epoch": 7.73, - "learning_rate": 4.565186817308144e-06, - "loss": 0.2069, + "learning_rate": 1.458926871889286e-05, + "loss": 0.1657, "step": 165620 }, { "epoch": 7.73, - "learning_rate": 4.564718016033004e-06, - "loss": 0.0189, + "learning_rate": 1.458880064905684e-05, + "loss": 0.0518, "step": 165625 }, { "epoch": 7.73, - "learning_rate": 4.564249214757864e-06, - "loss": 0.0372, + "learning_rate": 1.4588332579220821e-05, + "loss": 0.0173, "step": 165630 }, { "epoch": 7.73, - "learning_rate": 4.563780413482725e-06, - "loss": 0.0379, + "learning_rate": 1.45878645093848e-05, + "loss": 0.0132, "step": 165635 }, { "epoch": 7.73, - "learning_rate": 4.563311612207585e-06, - "loss": 0.0163, + "learning_rate": 1.458739643954878e-05, + "loss": 0.0425, "step": 165640 }, { "epoch": 7.73, - "learning_rate": 4.562842810932446e-06, - "loss": 0.0529, + "learning_rate": 1.458692836971276e-05, + "loss": 0.0347, "step": 165645 }, { "epoch": 7.73, - "learning_rate": 4.562374009657307e-06, - "loss": 0.0579, + "learning_rate": 1.4586460299876742e-05, + "loss": 0.0776, "step": 165650 }, { "epoch": 7.73, - "learning_rate": 4.5619052083821674e-06, - "loss": 0.0478, + "learning_rate": 1.4585992230040722e-05, + "loss": 0.0353, "step": 165655 }, { "epoch": 7.73, - "learning_rate": 4.561436407107027e-06, - "loss": 0.097, + "learning_rate": 1.4585524160204703e-05, + "loss": 0.0677, "step": 165660 }, { "epoch": 7.73, - "learning_rate": 4.560967605831888e-06, - "loss": 0.0435, + "learning_rate": 1.4585056090368685e-05, + "loss": 0.0679, "step": 165665 }, { "epoch": 7.73, - "learning_rate": 4.560498804556749e-06, - "loss": 0.2444, + "learning_rate": 1.4584588020532665e-05, + "loss": 0.1671, "step": 165670 }, { "epoch": 7.73, - "learning_rate": 4.560030003281609e-06, - "loss": 0.087, + "learning_rate": 1.4584119950696644e-05, + "loss": 0.0027, "step": 165675 }, { "epoch": 7.73, - "learning_rate": 4.55956120200647e-06, - "loss": 0.0312, + "learning_rate": 1.4583651880860624e-05, + "loss": 0.0096, "step": 165680 }, { "epoch": 7.73, - "learning_rate": 4.5590924007313304e-06, - "loss": 0.017, + "learning_rate": 1.4583183811024606e-05, + "loss": 0.0415, "step": 165685 }, { "epoch": 7.73, - "learning_rate": 4.558623599456191e-06, - "loss": 0.0747, + "learning_rate": 1.4582715741188586e-05, + "loss": 0.0511, "step": 165690 }, { "epoch": 7.73, - "learning_rate": 4.558154798181051e-06, - "loss": 0.0155, + "learning_rate": 1.4582247671352567e-05, + "loss": 0.0355, "step": 165695 }, { "epoch": 7.73, - "learning_rate": 4.557685996905912e-06, - "loss": 0.0826, + "learning_rate": 1.4581779601516547e-05, + "loss": 0.0416, "step": 165700 }, { "epoch": 7.73, - "learning_rate": 4.557217195630773e-06, - "loss": 0.0613, + "learning_rate": 1.4581311531680527e-05, + "loss": 0.095, "step": 165705 }, { "epoch": 7.73, - "learning_rate": 4.556748394355633e-06, + "learning_rate": 1.4580843461844506e-05, "loss": 0.063, "step": 165710 }, { "epoch": 7.73, - "learning_rate": 4.556279593080493e-06, - "loss": 0.1011, + "learning_rate": 1.4580375392008488e-05, + "loss": 0.1456, "step": 165715 }, { "epoch": 7.73, - "learning_rate": 4.555810791805354e-06, - "loss": 0.1207, + "learning_rate": 1.4579907322172468e-05, + "loss": 0.1697, "step": 165720 }, { "epoch": 7.73, - "learning_rate": 4.555341990530215e-06, - "loss": 0.0218, + "learning_rate": 1.457943925233645e-05, + "loss": 0.0329, "step": 165725 }, { "epoch": 7.73, - "learning_rate": 4.554873189255075e-06, - "loss": 0.0742, + "learning_rate": 1.457897118250043e-05, + "loss": 0.011, "step": 165730 }, { "epoch": 7.73, - "learning_rate": 4.554404387979936e-06, - "loss": 0.0064, + "learning_rate": 1.4578503112664409e-05, + "loss": 0.0288, "step": 165735 }, { "epoch": 7.73, - "learning_rate": 4.5539355867047965e-06, - "loss": 0.0089, + "learning_rate": 1.457803504282839e-05, + "loss": 0.0553, "step": 165740 }, { "epoch": 7.73, - "learning_rate": 4.553466785429656e-06, - "loss": 0.0222, + "learning_rate": 1.457756697299237e-05, + "loss": 0.0533, "step": 165745 }, { "epoch": 7.73, - "learning_rate": 4.552997984154517e-06, - "loss": 0.114, + "learning_rate": 1.4577098903156352e-05, + "loss": 0.1149, "step": 165750 }, { "epoch": 7.73, - "learning_rate": 4.552529182879378e-06, - "loss": 0.0704, + "learning_rate": 1.4576630833320332e-05, + "loss": 0.0906, "step": 165755 }, { "epoch": 7.73, - "learning_rate": 4.552060381604239e-06, - "loss": 0.1174, + "learning_rate": 1.4576162763484313e-05, + "loss": 0.1295, "step": 165760 }, { "epoch": 7.73, - "learning_rate": 4.551591580329099e-06, - "loss": 0.1456, + "learning_rate": 1.4575694693648293e-05, + "loss": 0.1209, "step": 165765 }, { "epoch": 7.74, - "learning_rate": 4.5511227790539595e-06, - "loss": 0.1323, + "learning_rate": 1.4575226623812273e-05, + "loss": 0.2836, "step": 165770 }, { "epoch": 7.74, - "learning_rate": 4.55065397777882e-06, - "loss": 0.0199, + "learning_rate": 1.4574758553976253e-05, + "loss": 0.0215, "step": 165775 }, { "epoch": 7.74, - "learning_rate": 4.55018517650368e-06, - "loss": 0.0293, + "learning_rate": 1.4574290484140234e-05, + "loss": 0.0163, "step": 165780 }, { "epoch": 7.74, - "learning_rate": 4.549716375228541e-06, - "loss": 0.0314, + "learning_rate": 1.4573822414304216e-05, + "loss": 0.0052, "step": 165785 }, { "epoch": 7.74, - "learning_rate": 4.549247573953402e-06, - "loss": 0.0736, + "learning_rate": 1.4573354344468195e-05, + "loss": 0.0211, "step": 165790 }, { "epoch": 7.74, - "learning_rate": 4.5487787726782625e-06, - "loss": 0.1408, + "learning_rate": 1.4572886274632177e-05, + "loss": 0.021, "step": 165795 }, { "epoch": 7.74, - "learning_rate": 4.5483099714031225e-06, - "loss": 0.0303, + "learning_rate": 1.4572418204796155e-05, + "loss": 0.0203, "step": 165800 }, { "epoch": 7.74, - "learning_rate": 4.547841170127983e-06, + "learning_rate": 1.4571950134960137e-05, "loss": 0.0805, "step": 165805 }, { "epoch": 7.74, - "learning_rate": 4.547372368852844e-06, - "loss": 0.0494, + "learning_rate": 1.4571482065124116e-05, + "loss": 0.0349, "step": 165810 }, { "epoch": 7.74, - "learning_rate": 4.546903567577704e-06, - "loss": 0.1691, + "learning_rate": 1.4571013995288098e-05, + "loss": 0.1511, "step": 165815 }, { "epoch": 7.74, - "learning_rate": 4.546434766302565e-06, - "loss": 0.115, + "learning_rate": 1.4570545925452078e-05, + "loss": 0.0696, "step": 165820 }, { "epoch": 7.74, - "learning_rate": 4.5459659650274255e-06, - "loss": 0.0491, + "learning_rate": 1.457007785561606e-05, + "loss": 0.0295, "step": 165825 }, { "epoch": 7.74, - "learning_rate": 4.545497163752286e-06, - "loss": 0.0156, + "learning_rate": 1.4569609785780037e-05, + "loss": 0.0172, "step": 165830 }, { "epoch": 7.74, - "learning_rate": 4.545028362477146e-06, - "loss": 0.0251, + "learning_rate": 1.4569141715944019e-05, + "loss": 0.0106, "step": 165835 }, { "epoch": 7.74, - "learning_rate": 4.544559561202007e-06, - "loss": 0.033, + "learning_rate": 1.4568673646107999e-05, + "loss": 0.0189, "step": 165840 }, { "epoch": 7.74, - "learning_rate": 4.544090759926868e-06, - "loss": 0.0819, + "learning_rate": 1.456820557627198e-05, + "loss": 0.0402, "step": 165845 }, { "epoch": 7.74, - "learning_rate": 4.543621958651728e-06, - "loss": 0.0274, + "learning_rate": 1.4567737506435962e-05, + "loss": 0.056, "step": 165850 }, { "epoch": 7.74, - "learning_rate": 4.5431531573765885e-06, - "loss": 0.1054, + "learning_rate": 1.4567269436599942e-05, + "loss": 0.0929, "step": 165855 }, { "epoch": 7.74, - "learning_rate": 4.542684356101449e-06, - "loss": 0.0492, + "learning_rate": 1.4566801366763921e-05, + "loss": 0.0683, "step": 165860 }, { "epoch": 7.74, - "learning_rate": 4.54221555482631e-06, - "loss": 0.1337, + "learning_rate": 1.4566333296927901e-05, + "loss": 0.1351, "step": 165865 }, { "epoch": 7.74, - "learning_rate": 4.54174675355117e-06, - "loss": 0.1391, + "learning_rate": 1.4565865227091883e-05, + "loss": 0.2055, "step": 165870 }, { "epoch": 7.74, - "learning_rate": 4.54127795227603e-06, - "loss": 0.0056, + "learning_rate": 1.4565397157255862e-05, + "loss": 0.0179, "step": 165875 }, { "epoch": 7.74, - "learning_rate": 4.5408091510008916e-06, - "loss": 0.0227, + "learning_rate": 1.4564929087419844e-05, + "loss": 0.0589, "step": 165880 }, { "epoch": 7.74, - "learning_rate": 4.5403403497257515e-06, - "loss": 0.0245, + "learning_rate": 1.4564461017583824e-05, + "loss": 0.0145, "step": 165885 }, { "epoch": 7.74, - "learning_rate": 4.539871548450612e-06, - "loss": 0.0239, + "learning_rate": 1.4563992947747805e-05, + "loss": 0.0344, "step": 165890 }, { "epoch": 7.74, - "learning_rate": 4.539402747175472e-06, - "loss": 0.0948, + "learning_rate": 1.4563524877911783e-05, + "loss": 0.0341, "step": 165895 }, { "epoch": 7.74, - "learning_rate": 4.538933945900334e-06, - "loss": 0.0193, + "learning_rate": 1.4563056808075765e-05, + "loss": 0.0702, "step": 165900 }, { "epoch": 7.74, - "learning_rate": 4.538465144625194e-06, - "loss": 0.0797, + "learning_rate": 1.4562588738239746e-05, + "loss": 0.0442, "step": 165905 }, { "epoch": 7.74, - "learning_rate": 4.537996343350054e-06, - "loss": 0.1656, + "learning_rate": 1.4562120668403726e-05, + "loss": 0.0543, "step": 165910 }, { "epoch": 7.74, - "learning_rate": 4.5375275420749145e-06, - "loss": 0.1864, + "learning_rate": 1.4561652598567708e-05, + "loss": 0.0794, "step": 165915 }, { "epoch": 7.74, - "learning_rate": 4.537058740799775e-06, - "loss": 0.2205, + "learning_rate": 1.4561184528731688e-05, + "loss": 0.2016, "step": 165920 }, { "epoch": 7.74, - "learning_rate": 4.536589939524636e-06, - "loss": 0.0071, + "learning_rate": 1.4560716458895667e-05, + "loss": 0.0136, "step": 165925 }, { "epoch": 7.74, - "learning_rate": 4.536121138249496e-06, - "loss": 0.0136, + "learning_rate": 1.4560248389059647e-05, + "loss": 0.0342, "step": 165930 }, { "epoch": 7.74, - "learning_rate": 4.535652336974357e-06, - "loss": 0.0187, + "learning_rate": 1.4559780319223629e-05, + "loss": 0.0059, "step": 165935 }, { "epoch": 7.74, - "learning_rate": 4.5351835356992175e-06, - "loss": 0.0308, + "learning_rate": 1.4559312249387609e-05, + "loss": 0.0114, "step": 165940 }, { "epoch": 7.74, - "learning_rate": 4.534714734424078e-06, - "loss": 0.0491, + "learning_rate": 1.455884417955159e-05, + "loss": 0.0587, "step": 165945 }, { "epoch": 7.74, - "learning_rate": 4.534245933148938e-06, - "loss": 0.1242, + "learning_rate": 1.455837610971557e-05, + "loss": 0.0562, "step": 165950 }, { "epoch": 7.74, - "learning_rate": 4.533777131873799e-06, - "loss": 0.0732, + "learning_rate": 1.455790803987955e-05, + "loss": 0.078, "step": 165955 }, { "epoch": 7.74, - "learning_rate": 4.53330833059866e-06, - "loss": 0.1065, + "learning_rate": 1.455743997004353e-05, + "loss": 0.0716, "step": 165960 }, { "epoch": 7.74, - "learning_rate": 4.53283952932352e-06, - "loss": 0.164, + "learning_rate": 1.4556971900207511e-05, + "loss": 0.1721, "step": 165965 }, { "epoch": 7.74, - "learning_rate": 4.5323707280483805e-06, - "loss": 0.1692, + "learning_rate": 1.4556503830371493e-05, + "loss": 0.1525, "step": 165970 }, { "epoch": 7.74, - "learning_rate": 4.531901926773241e-06, - "loss": 0.0464, + "learning_rate": 1.4556035760535472e-05, + "loss": 0.0454, "step": 165975 }, { "epoch": 7.74, - "learning_rate": 4.531433125498102e-06, - "loss": 0.0042, + "learning_rate": 1.4555567690699454e-05, + "loss": 0.0121, "step": 165980 }, { "epoch": 7.75, - "learning_rate": 4.530964324222962e-06, - "loss": 0.0251, + "learning_rate": 1.4555099620863434e-05, + "loss": 0.0365, "step": 165985 }, { "epoch": 7.75, - "learning_rate": 4.530495522947823e-06, - "loss": 0.0277, + "learning_rate": 1.4554631551027414e-05, + "loss": 0.0156, "step": 165990 }, { "epoch": 7.75, - "learning_rate": 4.530026721672684e-06, - "loss": 0.0309, + "learning_rate": 1.4554163481191393e-05, + "loss": 0.0388, "step": 165995 }, { "epoch": 7.75, - "learning_rate": 4.5295579203975435e-06, - "loss": 0.0795, + "learning_rate": 1.4553695411355375e-05, + "loss": 0.0275, "step": 166000 }, { "epoch": 7.75, - "learning_rate": 4.529089119122404e-06, - "loss": 0.031, + "learning_rate": 1.4553227341519355e-05, + "loss": 0.0301, "step": 166005 }, { "epoch": 7.75, - "learning_rate": 4.528620317847265e-06, - "loss": 0.134, + "learning_rate": 1.4552759271683336e-05, + "loss": 0.0716, "step": 166010 }, { "epoch": 7.75, - "learning_rate": 4.528151516572126e-06, - "loss": 0.2913, + "learning_rate": 1.4552291201847316e-05, + "loss": 0.2078, "step": 166015 }, { "epoch": 7.75, - "learning_rate": 4.527682715296986e-06, - "loss": 0.1626, + "learning_rate": 1.4551823132011296e-05, + "loss": 0.2282, "step": 166020 }, { "epoch": 7.75, - "learning_rate": 4.5272139140218466e-06, - "loss": 0.0212, + "learning_rate": 1.4551355062175276e-05, + "loss": 0.02, "step": 166025 }, { "epoch": 7.75, - "learning_rate": 4.526745112746707e-06, - "loss": 0.015, + "learning_rate": 1.4550886992339257e-05, + "loss": 0.0172, "step": 166030 }, { "epoch": 7.75, - "learning_rate": 4.526276311471567e-06, - "loss": 0.0053, + "learning_rate": 1.4550418922503239e-05, + "loss": 0.0136, "step": 166035 }, { "epoch": 7.75, - "learning_rate": 4.525807510196428e-06, - "loss": 0.0097, + "learning_rate": 1.4549950852667218e-05, + "loss": 0.0202, "step": 166040 }, { "epoch": 7.75, - "learning_rate": 4.525338708921289e-06, - "loss": 0.0299, + "learning_rate": 1.45494827828312e-05, + "loss": 0.0182, "step": 166045 }, { "epoch": 7.75, - "learning_rate": 4.52486990764615e-06, - "loss": 0.0581, + "learning_rate": 1.4549014712995178e-05, + "loss": 0.0923, "step": 166050 }, { "epoch": 7.75, - "learning_rate": 4.5244011063710096e-06, - "loss": 0.0566, + "learning_rate": 1.454854664315916e-05, + "loss": 0.0477, "step": 166055 }, { "epoch": 7.75, - "learning_rate": 4.52393230509587e-06, - "loss": 0.1064, + "learning_rate": 1.454807857332314e-05, + "loss": 0.0811, "step": 166060 }, { "epoch": 7.75, - "learning_rate": 4.523463503820731e-06, - "loss": 0.0374, + "learning_rate": 1.4547610503487121e-05, + "loss": 0.1721, "step": 166065 }, { "epoch": 7.75, - "learning_rate": 4.522994702545591e-06, - "loss": 0.162, + "learning_rate": 1.45471424336511e-05, + "loss": 0.1604, "step": 166070 }, { "epoch": 7.75, - "learning_rate": 4.522525901270452e-06, - "loss": 0.034, + "learning_rate": 1.4546674363815082e-05, + "loss": 0.0297, "step": 166075 }, { "epoch": 7.75, - "learning_rate": 4.522057099995313e-06, - "loss": 0.0017, + "learning_rate": 1.4546206293979062e-05, + "loss": 0.0229, "step": 166080 }, { "epoch": 7.75, - "learning_rate": 4.521588298720173e-06, - "loss": 0.0194, + "learning_rate": 1.4545738224143042e-05, + "loss": 0.0141, "step": 166085 }, { "epoch": 7.75, - "learning_rate": 4.521119497445033e-06, - "loss": 0.0182, + "learning_rate": 1.4545270154307023e-05, + "loss": 0.051, "step": 166090 }, { "epoch": 7.75, - "learning_rate": 4.520650696169894e-06, - "loss": 0.0251, + "learning_rate": 1.4544802084471003e-05, + "loss": 0.0194, "step": 166095 }, { "epoch": 7.75, - "learning_rate": 4.520181894894755e-06, - "loss": 0.0691, + "learning_rate": 1.4544334014634985e-05, + "loss": 0.0624, "step": 166100 }, { "epoch": 7.75, - "learning_rate": 4.519713093619615e-06, - "loss": 0.0722, + "learning_rate": 1.4543865944798965e-05, + "loss": 0.0415, "step": 166105 }, { "epoch": 7.75, - "learning_rate": 4.519244292344476e-06, - "loss": 0.0892, + "learning_rate": 1.4543397874962946e-05, + "loss": 0.0675, "step": 166110 }, { "epoch": 7.75, - "learning_rate": 4.518775491069336e-06, - "loss": 0.1178, + "learning_rate": 1.4542929805126924e-05, + "loss": 0.1184, "step": 166115 }, { "epoch": 7.75, - "learning_rate": 4.518306689794197e-06, - "loss": 0.2381, + "learning_rate": 1.4542461735290906e-05, + "loss": 0.1557, "step": 166120 }, { "epoch": 7.75, - "learning_rate": 4.517837888519057e-06, - "loss": 0.0759, + "learning_rate": 1.4541993665454886e-05, + "loss": 0.0317, "step": 166125 }, { "epoch": 7.75, - "learning_rate": 4.517369087243917e-06, - "loss": 0.0412, + "learning_rate": 1.4541525595618867e-05, + "loss": 0.0211, "step": 166130 }, { "epoch": 7.75, - "learning_rate": 4.516900285968779e-06, - "loss": 0.0277, + "learning_rate": 1.4541057525782847e-05, + "loss": 0.0096, "step": 166135 }, { "epoch": 7.75, - "learning_rate": 4.516431484693639e-06, - "loss": 0.0633, + "learning_rate": 1.4540589455946828e-05, + "loss": 0.009, "step": 166140 }, { "epoch": 7.75, - "learning_rate": 4.515962683418499e-06, - "loss": 0.0628, + "learning_rate": 1.4540121386110807e-05, + "loss": 0.0034, "step": 166145 }, { "epoch": 7.75, - "learning_rate": 4.515493882143359e-06, - "loss": 0.0199, + "learning_rate": 1.4539653316274788e-05, + "loss": 0.0502, "step": 166150 }, { "epoch": 7.75, - "learning_rate": 4.515025080868221e-06, - "loss": 0.0587, + "learning_rate": 1.453918524643877e-05, + "loss": 0.0808, "step": 166155 }, { "epoch": 7.75, - "learning_rate": 4.514556279593081e-06, - "loss": 0.0999, + "learning_rate": 1.453871717660275e-05, + "loss": 0.0622, "step": 166160 }, { "epoch": 7.75, - "learning_rate": 4.514087478317941e-06, - "loss": 0.1685, + "learning_rate": 1.4538249106766731e-05, + "loss": 0.1639, "step": 166165 }, { "epoch": 7.75, - "learning_rate": 4.513618677042802e-06, - "loss": 0.1656, + "learning_rate": 1.453778103693071e-05, + "loss": 0.2209, "step": 166170 }, { "epoch": 7.75, - "learning_rate": 4.513149875767662e-06, - "loss": 0.0525, + "learning_rate": 1.4537312967094692e-05, + "loss": 0.0176, "step": 166175 }, { "epoch": 7.75, - "learning_rate": 4.512681074492523e-06, - "loss": 0.017, + "learning_rate": 1.453684489725867e-05, + "loss": 0.0238, "step": 166180 }, { "epoch": 7.75, - "learning_rate": 4.512212273217383e-06, - "loss": 0.0041, + "learning_rate": 1.4536376827422652e-05, + "loss": 0.0285, "step": 166185 }, { "epoch": 7.75, - "learning_rate": 4.511743471942244e-06, - "loss": 0.0309, + "learning_rate": 1.4535908757586632e-05, + "loss": 0.0179, "step": 166190 }, { "epoch": 7.75, - "learning_rate": 4.511274670667105e-06, - "loss": 0.0097, + "learning_rate": 1.4535440687750613e-05, + "loss": 0.0448, "step": 166195 }, { "epoch": 7.76, - "learning_rate": 4.510805869391965e-06, - "loss": 0.0465, + "learning_rate": 1.4534972617914593e-05, + "loss": 0.0128, "step": 166200 }, { "epoch": 7.76, - "learning_rate": 4.510337068116825e-06, - "loss": 0.0549, + "learning_rate": 1.4534504548078574e-05, + "loss": 0.1144, "step": 166205 }, { "epoch": 7.76, - "learning_rate": 4.509868266841686e-06, - "loss": 0.1288, + "learning_rate": 1.4534036478242553e-05, + "loss": 0.137, "step": 166210 }, { "epoch": 7.76, - "learning_rate": 4.509399465566547e-06, - "loss": 0.073, + "learning_rate": 1.4533568408406534e-05, + "loss": 0.2579, "step": 166215 }, { "epoch": 7.76, - "learning_rate": 4.508930664291407e-06, - "loss": 0.1467, + "learning_rate": 1.4533100338570516e-05, + "loss": 0.2096, "step": 166220 }, { "epoch": 7.76, - "learning_rate": 4.508461863016268e-06, - "loss": 0.0319, + "learning_rate": 1.4532632268734495e-05, + "loss": 0.0069, "step": 166225 }, { "epoch": 7.76, - "learning_rate": 4.507993061741128e-06, - "loss": 0.0227, + "learning_rate": 1.4532164198898477e-05, + "loss": 0.0254, "step": 166230 }, { "epoch": 7.76, - "learning_rate": 4.507524260465988e-06, - "loss": 0.0131, + "learning_rate": 1.4531696129062457e-05, + "loss": 0.0074, "step": 166235 }, { "epoch": 7.76, - "learning_rate": 4.507055459190849e-06, - "loss": 0.0286, + "learning_rate": 1.4531228059226437e-05, + "loss": 0.0474, "step": 166240 }, { "epoch": 7.76, - "learning_rate": 4.50658665791571e-06, - "loss": 0.0942, + "learning_rate": 1.4530759989390416e-05, + "loss": 0.0326, "step": 166245 }, { "epoch": 7.76, - "learning_rate": 4.506117856640571e-06, - "loss": 0.0591, + "learning_rate": 1.4530291919554398e-05, + "loss": 0.0711, "step": 166250 }, { "epoch": 7.76, - "learning_rate": 4.505649055365431e-06, - "loss": 0.0729, + "learning_rate": 1.4529823849718378e-05, + "loss": 0.0964, "step": 166255 }, { "epoch": 7.76, - "learning_rate": 4.505180254090291e-06, - "loss": 0.0959, + "learning_rate": 1.452935577988236e-05, + "loss": 0.1256, "step": 166260 }, { "epoch": 7.76, - "learning_rate": 4.504711452815152e-06, - "loss": 0.1949, + "learning_rate": 1.4528887710046339e-05, + "loss": 0.25, "step": 166265 }, { "epoch": 7.76, - "learning_rate": 4.504242651540013e-06, - "loss": 0.1961, + "learning_rate": 1.452841964021032e-05, + "loss": 0.1714, "step": 166270 }, { "epoch": 7.76, - "learning_rate": 4.503773850264873e-06, - "loss": 0.0073, + "learning_rate": 1.45279515703743e-05, + "loss": 0.0199, "step": 166275 }, { "epoch": 7.76, - "learning_rate": 4.503305048989734e-06, - "loss": 0.0401, + "learning_rate": 1.452748350053828e-05, + "loss": 0.0192, "step": 166280 }, { "epoch": 7.76, - "learning_rate": 4.5028362477145945e-06, - "loss": 0.0807, + "learning_rate": 1.4527015430702262e-05, + "loss": 0.0261, "step": 166285 }, { "epoch": 7.76, - "learning_rate": 4.502367446439454e-06, - "loss": 0.0645, + "learning_rate": 1.4526547360866242e-05, + "loss": 0.0491, "step": 166290 }, { "epoch": 7.76, - "learning_rate": 4.501898645164315e-06, - "loss": 0.0413, + "learning_rate": 1.4526079291030223e-05, + "loss": 0.0219, "step": 166295 }, { "epoch": 7.76, - "learning_rate": 4.501429843889176e-06, - "loss": 0.0937, + "learning_rate": 1.4525611221194203e-05, + "loss": 0.0769, "step": 166300 }, { "epoch": 7.76, - "learning_rate": 4.500961042614037e-06, - "loss": 0.0392, + "learning_rate": 1.4525143151358183e-05, + "loss": 0.0796, "step": 166305 }, { "epoch": 7.76, - "learning_rate": 4.500492241338897e-06, - "loss": 0.0737, + "learning_rate": 1.4524675081522163e-05, + "loss": 0.0728, "step": 166310 }, { "epoch": 7.76, - "learning_rate": 4.5000234400637575e-06, - "loss": 0.2251, + "learning_rate": 1.4524207011686144e-05, + "loss": 0.0534, "step": 166315 }, { "epoch": 7.76, - "learning_rate": 4.499554638788618e-06, - "loss": 0.1782, + "learning_rate": 1.4523738941850124e-05, + "loss": 0.1073, "step": 166320 }, { "epoch": 7.76, - "learning_rate": 4.499085837513478e-06, - "loss": 0.0159, + "learning_rate": 1.4523270872014105e-05, + "loss": 0.0122, "step": 166325 }, { "epoch": 7.76, - "learning_rate": 4.498617036238339e-06, - "loss": 0.0024, + "learning_rate": 1.4522802802178085e-05, + "loss": 0.0205, "step": 166330 }, { "epoch": 7.76, - "learning_rate": 4.4981482349632e-06, - "loss": 0.0178, + "learning_rate": 1.4522334732342065e-05, + "loss": 0.0167, "step": 166335 }, { "epoch": 7.76, - "learning_rate": 4.4976794336880605e-06, - "loss": 0.0211, + "learning_rate": 1.4521866662506047e-05, + "loss": 0.0299, "step": 166340 }, { "epoch": 7.76, - "learning_rate": 4.4972106324129204e-06, - "loss": 0.0449, + "learning_rate": 1.4521398592670026e-05, + "loss": 0.0356, "step": 166345 }, { "epoch": 7.76, - "learning_rate": 4.496741831137781e-06, - "loss": 0.0923, + "learning_rate": 1.4520930522834008e-05, + "loss": 0.0496, "step": 166350 }, { "epoch": 7.76, - "learning_rate": 4.496273029862642e-06, - "loss": 0.11, + "learning_rate": 1.4520462452997988e-05, + "loss": 0.092, "step": 166355 }, { "epoch": 7.76, - "learning_rate": 4.495804228587502e-06, - "loss": 0.0712, + "learning_rate": 1.451999438316197e-05, + "loss": 0.0303, "step": 166360 }, { "epoch": 7.76, - "learning_rate": 4.495335427312363e-06, - "loss": 0.2153, + "learning_rate": 1.4519526313325949e-05, + "loss": 0.0859, "step": 166365 }, { "epoch": 7.76, - "learning_rate": 4.4948666260372235e-06, - "loss": 0.1355, + "learning_rate": 1.4519058243489929e-05, + "loss": 0.1352, "step": 166370 }, { "epoch": 7.76, - "learning_rate": 4.494397824762084e-06, - "loss": 0.0208, + "learning_rate": 1.4518590173653909e-05, + "loss": 0.0356, "step": 166375 }, { "epoch": 7.76, - "learning_rate": 4.493929023486944e-06, - "loss": 0.0282, + "learning_rate": 1.451812210381789e-05, + "loss": 0.0084, "step": 166380 }, { "epoch": 7.76, - "learning_rate": 4.493460222211804e-06, - "loss": 0.0206, + "learning_rate": 1.451765403398187e-05, + "loss": 0.0399, "step": 166385 }, { "epoch": 7.76, - "learning_rate": 4.492991420936666e-06, - "loss": 0.0092, + "learning_rate": 1.4517185964145851e-05, + "loss": 0.0109, "step": 166390 }, { "epoch": 7.76, - "learning_rate": 4.492522619661526e-06, - "loss": 0.0605, + "learning_rate": 1.4516717894309833e-05, + "loss": 0.0552, "step": 166395 }, { "epoch": 7.76, - "learning_rate": 4.4920538183863865e-06, - "loss": 0.0397, + "learning_rate": 1.4516249824473811e-05, + "loss": 0.0094, "step": 166400 }, { "epoch": 7.76, - "learning_rate": 4.4915850171112464e-06, - "loss": 0.1554, + "learning_rate": 1.4515781754637793e-05, + "loss": 0.0136, "step": 166405 }, { "epoch": 7.76, - "learning_rate": 4.491116215836108e-06, - "loss": 0.1143, + "learning_rate": 1.4515313684801772e-05, + "loss": 0.0329, "step": 166410 }, { "epoch": 7.77, - "learning_rate": 4.490647414560968e-06, - "loss": 0.1448, + "learning_rate": 1.4514845614965754e-05, + "loss": 0.1549, "step": 166415 }, { "epoch": 7.77, - "learning_rate": 4.490178613285828e-06, - "loss": 0.1757, + "learning_rate": 1.4514377545129734e-05, + "loss": 0.2847, "step": 166420 }, { "epoch": 7.77, - "learning_rate": 4.489709812010689e-06, - "loss": 0.0117, + "learning_rate": 1.4513909475293715e-05, + "loss": 0.0226, "step": 166425 }, { "epoch": 7.77, - "learning_rate": 4.4892410107355495e-06, - "loss": 0.0131, + "learning_rate": 1.4513441405457693e-05, + "loss": 0.0078, "step": 166430 }, { "epoch": 7.77, - "learning_rate": 4.48877220946041e-06, - "loss": 0.0212, + "learning_rate": 1.4512973335621675e-05, + "loss": 0.0224, "step": 166435 }, { "epoch": 7.77, - "learning_rate": 4.48830340818527e-06, - "loss": 0.0404, + "learning_rate": 1.4512505265785655e-05, + "loss": 0.0239, "step": 166440 }, { "epoch": 7.77, - "learning_rate": 4.487834606910131e-06, - "loss": 0.0297, + "learning_rate": 1.4512037195949636e-05, + "loss": 0.0208, "step": 166445 }, { "epoch": 7.77, - "learning_rate": 4.487365805634992e-06, - "loss": 0.0281, + "learning_rate": 1.4511569126113616e-05, + "loss": 0.0424, "step": 166450 }, { "epoch": 7.77, - "learning_rate": 4.486897004359852e-06, - "loss": 0.0797, + "learning_rate": 1.4511101056277598e-05, + "loss": 0.0694, "step": 166455 }, { "epoch": 7.77, - "learning_rate": 4.4864282030847125e-06, - "loss": 0.149, + "learning_rate": 1.4510632986441577e-05, + "loss": 0.0774, "step": 166460 }, { "epoch": 7.77, - "learning_rate": 4.485959401809573e-06, - "loss": 0.135, + "learning_rate": 1.4510164916605557e-05, + "loss": 0.1787, "step": 166465 }, { "epoch": 7.77, - "learning_rate": 4.485490600534434e-06, - "loss": 0.2816, + "learning_rate": 1.4509696846769539e-05, + "loss": 0.1616, "step": 166470 }, { "epoch": 7.77, - "learning_rate": 4.485021799259294e-06, - "loss": 0.003, + "learning_rate": 1.4509228776933519e-05, + "loss": 0.0274, "step": 166475 }, { "epoch": 7.77, - "learning_rate": 4.484552997984155e-06, - "loss": 0.0164, + "learning_rate": 1.45087607070975e-05, + "loss": 0.0132, "step": 166480 }, { "epoch": 7.77, - "learning_rate": 4.4840841967090155e-06, - "loss": 0.0289, + "learning_rate": 1.450829263726148e-05, + "loss": 0.0211, "step": 166485 }, { "epoch": 7.77, - "learning_rate": 4.4836153954338755e-06, - "loss": 0.0381, + "learning_rate": 1.4507824567425461e-05, + "loss": 0.0316, "step": 166490 }, { "epoch": 7.77, - "learning_rate": 4.483146594158736e-06, - "loss": 0.0162, + "learning_rate": 1.450735649758944e-05, + "loss": 0.0269, "step": 166495 }, { "epoch": 7.77, - "learning_rate": 4.482677792883597e-06, - "loss": 0.067, + "learning_rate": 1.4506888427753421e-05, + "loss": 0.0467, "step": 166500 }, { "epoch": 7.77, - "learning_rate": 4.482208991608458e-06, - "loss": 0.0759, + "learning_rate": 1.45064203579174e-05, + "loss": 0.0419, "step": 166505 }, { "epoch": 7.77, - "learning_rate": 4.481740190333318e-06, - "loss": 0.0692, + "learning_rate": 1.4505952288081382e-05, + "loss": 0.0268, "step": 166510 }, { "epoch": 7.77, - "learning_rate": 4.4812713890581785e-06, - "loss": 0.1666, + "learning_rate": 1.4505484218245362e-05, + "loss": 0.1438, "step": 166515 }, { "epoch": 7.77, - "learning_rate": 4.480802587783039e-06, - "loss": 0.1563, + "learning_rate": 1.4505016148409344e-05, + "loss": 0.2128, "step": 166520 }, { "epoch": 7.77, - "learning_rate": 4.480333786507899e-06, - "loss": 0.0547, + "learning_rate": 1.4504548078573323e-05, + "loss": 0.0032, "step": 166525 }, { "epoch": 7.77, - "learning_rate": 4.47986498523276e-06, - "loss": 0.018, + "learning_rate": 1.4504080008737303e-05, + "loss": 0.0454, "step": 166530 }, { "epoch": 7.77, - "learning_rate": 4.479396183957621e-06, - "loss": 0.0366, + "learning_rate": 1.4503611938901285e-05, + "loss": 0.0095, "step": 166535 }, { "epoch": 7.77, - "learning_rate": 4.4789273826824816e-06, - "loss": 0.0118, + "learning_rate": 1.4503143869065265e-05, + "loss": 0.0281, "step": 166540 }, { "epoch": 7.77, - "learning_rate": 4.4784585814073415e-06, - "loss": 0.0397, + "learning_rate": 1.4502675799229246e-05, + "loss": 0.0307, "step": 166545 }, { "epoch": 7.77, - "learning_rate": 4.477989780132202e-06, - "loss": 0.031, + "learning_rate": 1.4502207729393226e-05, + "loss": 0.1219, "step": 166550 }, { "epoch": 7.77, - "learning_rate": 4.477520978857063e-06, - "loss": 0.0328, + "learning_rate": 1.4501739659557206e-05, + "loss": 0.1135, "step": 166555 }, { "epoch": 7.77, - "learning_rate": 4.477052177581923e-06, - "loss": 0.0799, + "learning_rate": 1.4501271589721186e-05, + "loss": 0.1554, "step": 166560 }, { "epoch": 7.77, - "learning_rate": 4.476583376306784e-06, - "loss": 0.1734, + "learning_rate": 1.4500803519885167e-05, + "loss": 0.1074, "step": 166565 }, { "epoch": 7.77, - "learning_rate": 4.4761145750316446e-06, - "loss": 0.0948, + "learning_rate": 1.4500335450049147e-05, + "loss": 0.1054, "step": 166570 }, { "epoch": 7.77, - "learning_rate": 4.475645773756505e-06, - "loss": 0.0293, + "learning_rate": 1.4499867380213128e-05, + "loss": 0.0288, "step": 166575 }, { "epoch": 7.77, - "learning_rate": 4.475176972481365e-06, - "loss": 0.0332, + "learning_rate": 1.449939931037711e-05, + "loss": 0.0063, "step": 166580 }, { "epoch": 7.77, - "learning_rate": 4.474708171206226e-06, - "loss": 0.0223, + "learning_rate": 1.449893124054109e-05, + "loss": 0.0094, "step": 166585 }, { "epoch": 7.77, - "learning_rate": 4.474239369931087e-06, + "learning_rate": 1.449846317070507e-05, "loss": 0.0351, "step": 166590 }, { "epoch": 7.77, - "learning_rate": 4.473770568655947e-06, - "loss": 0.0151, + "learning_rate": 1.449799510086905e-05, + "loss": 0.0218, "step": 166595 }, { "epoch": 7.77, - "learning_rate": 4.4733017673808076e-06, - "loss": 0.0412, + "learning_rate": 1.4497527031033031e-05, + "loss": 0.0511, "step": 166600 }, { "epoch": 7.77, - "learning_rate": 4.472832966105668e-06, - "loss": 0.0281, + "learning_rate": 1.449705896119701e-05, + "loss": 0.038, "step": 166605 }, { "epoch": 7.77, - "learning_rate": 4.472364164830529e-06, - "loss": 0.0676, + "learning_rate": 1.4496590891360992e-05, + "loss": 0.1208, "step": 166610 }, { "epoch": 7.77, - "learning_rate": 4.471895363555389e-06, - "loss": 0.0935, + "learning_rate": 1.4496122821524972e-05, + "loss": 0.1205, "step": 166615 }, { "epoch": 7.77, - "learning_rate": 4.47142656228025e-06, - "loss": 0.2202, + "learning_rate": 1.4495654751688952e-05, + "loss": 0.0812, "step": 166620 }, { "epoch": 7.77, - "learning_rate": 4.470957761005111e-06, - "loss": 0.004, + "learning_rate": 1.4495186681852932e-05, + "loss": 0.0721, "step": 166625 }, { "epoch": 7.78, - "learning_rate": 4.470488959729971e-06, - "loss": 0.0056, + "learning_rate": 1.4494718612016913e-05, + "loss": 0.0146, "step": 166630 }, { "epoch": 7.78, - "learning_rate": 4.470020158454831e-06, - "loss": 0.034, + "learning_rate": 1.4494250542180893e-05, + "loss": 0.0365, "step": 166635 }, { "epoch": 7.78, - "learning_rate": 4.469551357179692e-06, - "loss": 0.0405, + "learning_rate": 1.4493782472344875e-05, + "loss": 0.0399, "step": 166640 }, { "epoch": 7.78, - "learning_rate": 4.469082555904553e-06, - "loss": 0.025, + "learning_rate": 1.4493314402508856e-05, + "loss": 0.0909, "step": 166645 }, { "epoch": 7.78, - "learning_rate": 4.468613754629413e-06, - "loss": 0.0705, + "learning_rate": 1.4492846332672834e-05, + "loss": 0.0664, "step": 166650 }, { "epoch": 7.78, - "learning_rate": 4.468144953354274e-06, - "loss": 0.0405, + "learning_rate": 1.4492378262836816e-05, + "loss": 0.0466, "step": 166655 }, { "epoch": 7.78, - "learning_rate": 4.467676152079134e-06, - "loss": 0.1203, + "learning_rate": 1.4491910193000795e-05, + "loss": 0.049, "step": 166660 }, { "epoch": 7.78, - "learning_rate": 4.467207350803995e-06, - "loss": 0.157, + "learning_rate": 1.4491442123164777e-05, + "loss": 0.2146, "step": 166665 }, { "epoch": 7.78, - "learning_rate": 4.466738549528855e-06, - "loss": 0.2092, + "learning_rate": 1.4490974053328757e-05, + "loss": 0.1493, "step": 166670 }, { "epoch": 7.78, - "learning_rate": 4.466269748253715e-06, - "loss": 0.0052, + "learning_rate": 1.4490505983492738e-05, + "loss": 0.0314, "step": 166675 }, { "epoch": 7.78, - "learning_rate": 4.465800946978577e-06, - "loss": 0.0141, + "learning_rate": 1.4490037913656718e-05, + "loss": 0.0286, "step": 166680 }, { "epoch": 7.78, - "learning_rate": 4.465332145703437e-06, - "loss": 0.0061, + "learning_rate": 1.4489569843820698e-05, + "loss": 0.037, "step": 166685 }, { "epoch": 7.78, - "learning_rate": 4.464863344428297e-06, - "loss": 0.0059, + "learning_rate": 1.4489101773984678e-05, + "loss": 0.022, "step": 166690 }, { "epoch": 7.78, - "learning_rate": 4.464394543153157e-06, - "loss": 0.0601, + "learning_rate": 1.448863370414866e-05, + "loss": 0.0159, "step": 166695 }, { "epoch": 7.78, - "learning_rate": 4.463925741878019e-06, - "loss": 0.0256, + "learning_rate": 1.448816563431264e-05, + "loss": 0.0035, "step": 166700 }, { "epoch": 7.78, - "learning_rate": 4.463456940602879e-06, - "loss": 0.0233, + "learning_rate": 1.448769756447662e-05, + "loss": 0.0629, "step": 166705 }, { "epoch": 7.78, - "learning_rate": 4.462988139327739e-06, - "loss": 0.0537, + "learning_rate": 1.4487229494640602e-05, + "loss": 0.0881, "step": 166710 }, { "epoch": 7.78, - "learning_rate": 4.4625193380526e-06, - "loss": 0.1706, + "learning_rate": 1.448676142480458e-05, + "loss": 0.2355, "step": 166715 }, { "epoch": 7.78, - "learning_rate": 4.46205053677746e-06, - "loss": 0.1758, + "learning_rate": 1.4486293354968562e-05, + "loss": 0.0874, "step": 166720 }, { "epoch": 7.78, - "learning_rate": 4.461581735502321e-06, - "loss": 0.0614, + "learning_rate": 1.4485825285132542e-05, + "loss": 0.047, "step": 166725 }, { "epoch": 7.78, - "learning_rate": 4.461112934227181e-06, - "loss": 0.0127, + "learning_rate": 1.4485357215296523e-05, + "loss": 0.0052, "step": 166730 }, { "epoch": 7.78, - "learning_rate": 4.460644132952042e-06, - "loss": 0.0552, + "learning_rate": 1.4484889145460503e-05, + "loss": 0.0079, "step": 166735 }, { "epoch": 7.78, - "learning_rate": 4.460175331676903e-06, - "loss": 0.0324, + "learning_rate": 1.4484421075624484e-05, + "loss": 0.0121, "step": 166740 }, { "epoch": 7.78, - "learning_rate": 4.4597065304017626e-06, - "loss": 0.0607, + "learning_rate": 1.4483953005788463e-05, + "loss": 0.0586, "step": 166745 }, { "epoch": 7.78, - "learning_rate": 4.459237729126623e-06, - "loss": 0.0498, + "learning_rate": 1.4483484935952444e-05, + "loss": 0.0787, "step": 166750 }, { "epoch": 7.78, - "learning_rate": 4.458768927851484e-06, - "loss": 0.0338, + "learning_rate": 1.4483016866116424e-05, + "loss": 0.0835, "step": 166755 }, { "epoch": 7.78, - "learning_rate": 4.458300126576345e-06, - "loss": 0.0498, + "learning_rate": 1.4482548796280405e-05, + "loss": 0.0408, "step": 166760 }, { "epoch": 7.78, - "learning_rate": 4.457831325301205e-06, - "loss": 0.1544, + "learning_rate": 1.4482080726444387e-05, + "loss": 0.0984, "step": 166765 }, { "epoch": 7.78, - "learning_rate": 4.457362524026066e-06, - "loss": 0.1423, + "learning_rate": 1.4481612656608367e-05, + "loss": 0.12, "step": 166770 }, { "epoch": 7.78, - "learning_rate": 4.456893722750926e-06, - "loss": 0.0229, + "learning_rate": 1.4481144586772348e-05, + "loss": 0.0371, "step": 166775 }, { "epoch": 7.78, - "learning_rate": 4.456424921475786e-06, - "loss": 0.0432, + "learning_rate": 1.4480676516936326e-05, + "loss": 0.0306, "step": 166780 }, { "epoch": 7.78, - "learning_rate": 4.455956120200647e-06, - "loss": 0.0228, + "learning_rate": 1.4480208447100308e-05, + "loss": 0.0217, "step": 166785 }, { "epoch": 7.78, - "learning_rate": 4.455487318925508e-06, - "loss": 0.0137, + "learning_rate": 1.4479740377264288e-05, + "loss": 0.02, "step": 166790 }, { "epoch": 7.78, - "learning_rate": 4.455018517650369e-06, - "loss": 0.0238, + "learning_rate": 1.447927230742827e-05, + "loss": 0.0454, "step": 166795 }, { "epoch": 7.78, - "learning_rate": 4.454549716375229e-06, - "loss": 0.0372, + "learning_rate": 1.4478804237592249e-05, + "loss": 0.0502, "step": 166800 }, { "epoch": 7.78, - "learning_rate": 4.454080915100089e-06, - "loss": 0.0401, + "learning_rate": 1.447833616775623e-05, + "loss": 0.0682, "step": 166805 }, { "epoch": 7.78, - "learning_rate": 4.45361211382495e-06, - "loss": 0.0592, + "learning_rate": 1.4477868097920209e-05, + "loss": 0.0555, "step": 166810 }, { "epoch": 7.78, - "learning_rate": 4.45314331254981e-06, - "loss": 0.2137, + "learning_rate": 1.447740002808419e-05, + "loss": 0.1431, "step": 166815 }, { "epoch": 7.78, - "learning_rate": 4.452674511274671e-06, - "loss": 0.1909, + "learning_rate": 1.447693195824817e-05, + "loss": 0.2013, "step": 166820 }, { "epoch": 7.78, - "learning_rate": 4.452205709999532e-06, - "loss": 0.0541, + "learning_rate": 1.4476463888412152e-05, + "loss": 0.0412, "step": 166825 }, { "epoch": 7.78, - "learning_rate": 4.4517369087243925e-06, - "loss": 0.0218, + "learning_rate": 1.4475995818576133e-05, + "loss": 0.0034, "step": 166830 }, { "epoch": 7.78, - "learning_rate": 4.451268107449252e-06, - "loss": 0.0427, + "learning_rate": 1.4475527748740113e-05, + "loss": 0.0469, "step": 166835 }, { "epoch": 7.78, - "learning_rate": 4.450799306174113e-06, - "loss": 0.0216, + "learning_rate": 1.4475059678904093e-05, + "loss": 0.0235, "step": 166840 }, { "epoch": 7.79, - "learning_rate": 4.450330504898974e-06, - "loss": 0.0272, + "learning_rate": 1.4474591609068072e-05, + "loss": 0.0204, "step": 166845 }, { "epoch": 7.79, - "learning_rate": 4.449861703623834e-06, - "loss": 0.0256, + "learning_rate": 1.4474123539232054e-05, + "loss": 0.0162, "step": 166850 }, { "epoch": 7.79, - "learning_rate": 4.449392902348695e-06, - "loss": 0.0245, + "learning_rate": 1.4473655469396034e-05, + "loss": 0.0375, "step": 166855 }, { "epoch": 7.79, - "learning_rate": 4.4489241010735554e-06, - "loss": 0.1216, + "learning_rate": 1.4473187399560015e-05, + "loss": 0.0544, "step": 166860 }, { "epoch": 7.79, - "learning_rate": 4.448455299798416e-06, - "loss": 0.1365, + "learning_rate": 1.4472719329723995e-05, + "loss": 0.1083, "step": 166865 }, { "epoch": 7.79, - "learning_rate": 4.447986498523276e-06, - "loss": 0.2059, + "learning_rate": 1.4472251259887977e-05, + "loss": 0.1508, "step": 166870 }, { "epoch": 7.79, - "learning_rate": 4.447517697248137e-06, - "loss": 0.0335, + "learning_rate": 1.4471783190051955e-05, + "loss": 0.0724, "step": 166875 }, { "epoch": 7.79, - "learning_rate": 4.447048895972998e-06, - "loss": 0.0141, + "learning_rate": 1.4471315120215936e-05, + "loss": 0.0091, "step": 166880 }, { "epoch": 7.79, - "learning_rate": 4.446580094697858e-06, - "loss": 0.0141, + "learning_rate": 1.4470847050379918e-05, + "loss": 0.0145, "step": 166885 }, { "epoch": 7.79, - "learning_rate": 4.4461112934227184e-06, - "loss": 0.005, + "learning_rate": 1.4470378980543898e-05, + "loss": 0.0551, "step": 166890 }, { "epoch": 7.79, - "learning_rate": 4.445642492147579e-06, - "loss": 0.0476, + "learning_rate": 1.4469910910707879e-05, + "loss": 0.1019, "step": 166895 }, { "epoch": 7.79, - "learning_rate": 4.44517369087244e-06, - "loss": 0.0276, + "learning_rate": 1.4469442840871859e-05, + "loss": 0.0389, "step": 166900 }, { "epoch": 7.79, - "learning_rate": 4.4447048895973e-06, - "loss": 0.0608, + "learning_rate": 1.4468974771035839e-05, + "loss": 0.0227, "step": 166905 }, { "epoch": 7.79, - "learning_rate": 4.444236088322161e-06, - "loss": 0.0698, + "learning_rate": 1.4468506701199819e-05, + "loss": 0.1095, "step": 166910 }, { "epoch": 7.79, - "learning_rate": 4.4437672870470215e-06, - "loss": 0.1506, + "learning_rate": 1.44680386313638e-05, + "loss": 0.1951, "step": 166915 }, { "epoch": 7.79, - "learning_rate": 4.4432984857718814e-06, - "loss": 0.2063, + "learning_rate": 1.446757056152778e-05, + "loss": 0.1662, "step": 166920 }, { "epoch": 7.79, - "learning_rate": 4.442829684496742e-06, - "loss": 0.0634, + "learning_rate": 1.4467102491691761e-05, + "loss": 0.0137, "step": 166925 }, { "epoch": 7.79, - "learning_rate": 4.442360883221602e-06, - "loss": 0.0099, + "learning_rate": 1.4466634421855741e-05, + "loss": 0.0237, "step": 166930 }, { "epoch": 7.79, - "learning_rate": 4.441892081946464e-06, - "loss": 0.0157, + "learning_rate": 1.4466166352019721e-05, + "loss": 0.0253, "step": 166935 }, { "epoch": 7.79, - "learning_rate": 4.441423280671324e-06, - "loss": 0.0216, + "learning_rate": 1.4465698282183701e-05, + "loss": 0.0335, "step": 166940 }, { "epoch": 7.79, - "learning_rate": 4.4409544793961845e-06, - "loss": 0.0316, + "learning_rate": 1.4465230212347682e-05, + "loss": 0.0574, "step": 166945 }, { "epoch": 7.79, - "learning_rate": 4.440485678121044e-06, - "loss": 0.0138, + "learning_rate": 1.4464762142511664e-05, + "loss": 0.0726, "step": 166950 }, { "epoch": 7.79, - "learning_rate": 4.440016876845906e-06, - "loss": 0.129, + "learning_rate": 1.4464294072675644e-05, + "loss": 0.0634, "step": 166955 }, { "epoch": 7.79, - "learning_rate": 4.439548075570766e-06, - "loss": 0.0886, + "learning_rate": 1.4463826002839625e-05, + "loss": 0.0801, "step": 166960 }, { "epoch": 7.79, - "learning_rate": 4.439079274295626e-06, - "loss": 0.2321, + "learning_rate": 1.4463357933003605e-05, + "loss": 0.2019, "step": 166965 }, { "epoch": 7.79, - "learning_rate": 4.438610473020487e-06, - "loss": 0.2407, + "learning_rate": 1.4462889863167585e-05, + "loss": 0.2736, "step": 166970 }, { "epoch": 7.79, - "learning_rate": 4.4381416717453475e-06, - "loss": 0.0203, + "learning_rate": 1.4462421793331565e-05, + "loss": 0.0059, "step": 166975 }, { "epoch": 7.79, - "learning_rate": 4.437672870470208e-06, - "loss": 0.025, + "learning_rate": 1.4461953723495546e-05, + "loss": 0.0103, "step": 166980 }, { "epoch": 7.79, - "learning_rate": 4.437204069195068e-06, - "loss": 0.0149, + "learning_rate": 1.4461485653659526e-05, + "loss": 0.0043, "step": 166985 }, { "epoch": 7.79, - "learning_rate": 4.436735267919929e-06, - "loss": 0.0393, + "learning_rate": 1.4461017583823508e-05, + "loss": 0.0503, "step": 166990 }, { "epoch": 7.79, - "learning_rate": 4.43626646664479e-06, - "loss": 0.0417, + "learning_rate": 1.4460549513987487e-05, + "loss": 0.0279, "step": 166995 }, { "epoch": 7.79, - "learning_rate": 4.43579766536965e-06, - "loss": 0.0043, + "learning_rate": 1.4460081444151467e-05, + "loss": 0.0172, "step": 167000 }, { "epoch": 7.79, - "learning_rate": 4.4353288640945105e-06, - "loss": 0.0783, + "learning_rate": 1.4459613374315449e-05, + "loss": 0.051, "step": 167005 }, { "epoch": 7.79, - "learning_rate": 4.434860062819371e-06, - "loss": 0.1281, + "learning_rate": 1.4459145304479428e-05, + "loss": 0.0976, "step": 167010 }, { "epoch": 7.79, - "learning_rate": 4.434391261544232e-06, - "loss": 0.1435, + "learning_rate": 1.445867723464341e-05, + "loss": 0.0672, "step": 167015 }, { "epoch": 7.79, - "learning_rate": 4.433922460269092e-06, - "loss": 0.1686, + "learning_rate": 1.445820916480739e-05, + "loss": 0.1117, "step": 167020 }, { "epoch": 7.79, - "learning_rate": 4.433453658993953e-06, - "loss": 0.0322, + "learning_rate": 1.4457741094971371e-05, + "loss": 0.0029, "step": 167025 }, { "epoch": 7.79, - "learning_rate": 4.4329848577188135e-06, - "loss": 0.3229, + "learning_rate": 1.445727302513535e-05, + "loss": 0.0073, "step": 167030 }, { "epoch": 7.79, - "learning_rate": 4.4325160564436735e-06, - "loss": 0.0197, + "learning_rate": 1.4456804955299331e-05, + "loss": 0.0349, "step": 167035 }, { "epoch": 7.79, - "learning_rate": 4.432047255168534e-06, - "loss": 0.0273, + "learning_rate": 1.445633688546331e-05, + "loss": 0.0199, "step": 167040 }, { "epoch": 7.79, - "learning_rate": 4.431578453893395e-06, - "loss": 0.0334, + "learning_rate": 1.4455868815627292e-05, + "loss": 0.0336, "step": 167045 }, { "epoch": 7.79, - "learning_rate": 4.431109652618256e-06, - "loss": 0.0255, + "learning_rate": 1.4455400745791272e-05, + "loss": 0.0281, "step": 167050 }, { "epoch": 7.8, - "learning_rate": 4.430640851343116e-06, - "loss": 0.0362, + "learning_rate": 1.4454932675955254e-05, + "loss": 0.0476, "step": 167055 }, { "epoch": 7.8, - "learning_rate": 4.4301720500679765e-06, - "loss": 0.1108, + "learning_rate": 1.4454464606119233e-05, + "loss": 0.0552, "step": 167060 }, { "epoch": 7.8, - "learning_rate": 4.429703248792837e-06, - "loss": 0.1484, + "learning_rate": 1.4453996536283213e-05, + "loss": 0.2543, "step": 167065 }, { "epoch": 7.8, - "learning_rate": 4.429234447517697e-06, - "loss": 0.1669, + "learning_rate": 1.4453528466447195e-05, + "loss": 0.2595, "step": 167070 }, { "epoch": 7.8, - "learning_rate": 4.428765646242558e-06, - "loss": 0.0817, + "learning_rate": 1.4453060396611175e-05, + "loss": 0.0286, "step": 167075 }, { "epoch": 7.8, - "learning_rate": 4.428296844967419e-06, - "loss": 0.0081, + "learning_rate": 1.4452592326775156e-05, + "loss": 0.002, "step": 167080 }, { "epoch": 7.8, - "learning_rate": 4.4278280436922796e-06, - "loss": 0.0431, + "learning_rate": 1.4452124256939136e-05, + "loss": 0.0354, "step": 167085 }, { "epoch": 7.8, - "learning_rate": 4.4273592424171395e-06, - "loss": 0.0303, + "learning_rate": 1.4451656187103117e-05, + "loss": 0.0049, "step": 167090 }, { "epoch": 7.8, - "learning_rate": 4.426890441142e-06, - "loss": 0.0279, + "learning_rate": 1.4451188117267096e-05, + "loss": 0.0553, "step": 167095 }, { "epoch": 7.8, - "learning_rate": 4.426421639866861e-06, - "loss": 0.0619, + "learning_rate": 1.4450720047431077e-05, + "loss": 0.1479, "step": 167100 }, { "epoch": 7.8, - "learning_rate": 4.425952838591721e-06, - "loss": 0.0635, + "learning_rate": 1.4450251977595057e-05, + "loss": 0.0802, "step": 167105 }, { "epoch": 7.8, - "learning_rate": 4.425484037316582e-06, + "learning_rate": 1.4449783907759038e-05, "loss": 0.0452, "step": 167110 }, { "epoch": 7.8, - "learning_rate": 4.4250152360414426e-06, - "loss": 0.1246, + "learning_rate": 1.4449315837923018e-05, + "loss": 0.1301, "step": 167115 }, { "epoch": 7.8, - "learning_rate": 4.424546434766303e-06, - "loss": 0.227, + "learning_rate": 1.4448847768087e-05, + "loss": 0.1977, "step": 167120 }, { "epoch": 7.8, - "learning_rate": 4.424077633491163e-06, - "loss": 0.0159, + "learning_rate": 1.4448379698250978e-05, + "loss": 0.0234, "step": 167125 }, { "epoch": 7.8, - "learning_rate": 4.423608832216024e-06, - "loss": 0.0234, + "learning_rate": 1.444791162841496e-05, + "loss": 0.0281, "step": 167130 }, { "epoch": 7.8, - "learning_rate": 4.423140030940885e-06, - "loss": 0.0169, + "learning_rate": 1.4447443558578941e-05, + "loss": 0.0262, "step": 167135 }, { "epoch": 7.8, - "learning_rate": 4.422671229665745e-06, - "loss": 0.0581, + "learning_rate": 1.444697548874292e-05, + "loss": 0.0542, "step": 167140 }, { "epoch": 7.8, - "learning_rate": 4.4222024283906055e-06, - "loss": 0.0275, + "learning_rate": 1.4446507418906902e-05, + "loss": 0.0362, "step": 167145 }, { "epoch": 7.8, - "learning_rate": 4.421733627115466e-06, - "loss": 0.04, + "learning_rate": 1.4446039349070882e-05, + "loss": 0.0678, "step": 167150 }, { "epoch": 7.8, - "learning_rate": 4.421264825840327e-06, - "loss": 0.0934, + "learning_rate": 1.4445571279234862e-05, + "loss": 0.0578, "step": 167155 }, { "epoch": 7.8, - "learning_rate": 4.420796024565187e-06, - "loss": 0.1271, + "learning_rate": 1.4445103209398842e-05, + "loss": 0.0119, "step": 167160 }, { "epoch": 7.8, - "learning_rate": 4.420327223290048e-06, - "loss": 0.1598, + "learning_rate": 1.4444635139562823e-05, + "loss": 0.0707, "step": 167165 }, { "epoch": 7.8, - "learning_rate": 4.419858422014909e-06, - "loss": 0.2894, + "learning_rate": 1.4444167069726803e-05, + "loss": 0.1371, "step": 167170 }, { "epoch": 7.8, - "learning_rate": 4.4193896207397685e-06, - "loss": 0.0482, + "learning_rate": 1.4443698999890784e-05, + "loss": 0.0558, "step": 167175 }, { "epoch": 7.8, - "learning_rate": 4.418920819464629e-06, - "loss": 0.0276, + "learning_rate": 1.4443230930054764e-05, + "loss": 0.0397, "step": 167180 }, { "epoch": 7.8, - "learning_rate": 4.418452018189489e-06, - "loss": 0.0122, + "learning_rate": 1.4442762860218746e-05, + "loss": 0.0106, "step": 167185 }, { "epoch": 7.8, - "learning_rate": 4.417983216914351e-06, - "loss": 0.0132, + "learning_rate": 1.4442294790382726e-05, + "loss": 0.0168, "step": 167190 }, { "epoch": 7.8, - "learning_rate": 4.417514415639211e-06, - "loss": 0.0522, + "learning_rate": 1.4441826720546705e-05, + "loss": 0.0236, "step": 167195 }, { "epoch": 7.8, - "learning_rate": 4.417045614364072e-06, - "loss": 0.0304, + "learning_rate": 1.4441358650710687e-05, + "loss": 0.0272, "step": 167200 }, { "epoch": 7.8, - "learning_rate": 4.4165768130889315e-06, - "loss": 0.0391, + "learning_rate": 1.4440890580874667e-05, + "loss": 0.0531, "step": 167205 }, { "epoch": 7.8, - "learning_rate": 4.416108011813792e-06, - "loss": 0.1088, + "learning_rate": 1.4440422511038648e-05, + "loss": 0.1043, "step": 167210 }, { "epoch": 7.8, - "learning_rate": 4.415639210538653e-06, - "loss": 0.0891, + "learning_rate": 1.4439954441202628e-05, + "loss": 0.1009, "step": 167215 }, { "epoch": 7.8, - "learning_rate": 4.415170409263513e-06, - "loss": 0.1919, + "learning_rate": 1.4439486371366608e-05, + "loss": 0.1585, "step": 167220 }, { "epoch": 7.8, - "learning_rate": 4.414701607988374e-06, - "loss": 0.0193, + "learning_rate": 1.4439018301530588e-05, + "loss": 0.0253, "step": 167225 }, { "epoch": 7.8, - "learning_rate": 4.414232806713235e-06, - "loss": 0.0038, + "learning_rate": 1.443855023169457e-05, + "loss": 0.0229, "step": 167230 }, { "epoch": 7.8, - "learning_rate": 4.413764005438095e-06, - "loss": 0.0133, + "learning_rate": 1.4438082161858549e-05, + "loss": 0.0152, "step": 167235 }, { "epoch": 7.8, - "learning_rate": 4.413295204162955e-06, - "loss": 0.0724, + "learning_rate": 1.443761409202253e-05, + "loss": 0.0455, "step": 167240 }, { "epoch": 7.8, - "learning_rate": 4.412826402887816e-06, - "loss": 0.0512, + "learning_rate": 1.443714602218651e-05, + "loss": 0.0242, "step": 167245 }, { "epoch": 7.8, - "learning_rate": 4.412357601612677e-06, - "loss": 0.0026, + "learning_rate": 1.443667795235049e-05, + "loss": 0.0762, "step": 167250 }, { "epoch": 7.8, - "learning_rate": 4.411888800337537e-06, - "loss": 0.0498, + "learning_rate": 1.4436209882514472e-05, + "loss": 0.0937, "step": 167255 }, { "epoch": 7.8, - "learning_rate": 4.4114199990623976e-06, - "loss": 0.1063, + "learning_rate": 1.4435741812678452e-05, + "loss": 0.0155, "step": 167260 }, { "epoch": 7.8, - "learning_rate": 4.410951197787258e-06, - "loss": 0.1817, + "learning_rate": 1.4435273742842433e-05, + "loss": 0.2715, "step": 167265 }, { "epoch": 7.81, - "learning_rate": 4.410482396512119e-06, - "loss": 0.1458, + "learning_rate": 1.4434805673006413e-05, + "loss": 0.151, "step": 167270 }, { "epoch": 7.81, - "learning_rate": 4.410013595236979e-06, - "loss": 0.0222, + "learning_rate": 1.4434337603170394e-05, + "loss": 0.0026, "step": 167275 }, { "epoch": 7.81, - "learning_rate": 4.40954479396184e-06, - "loss": 0.0265, + "learning_rate": 1.4433869533334374e-05, + "loss": 0.0173, "step": 167280 }, { "epoch": 7.81, - "learning_rate": 4.409075992686701e-06, - "loss": 0.011, + "learning_rate": 1.4433401463498354e-05, + "loss": 0.0269, "step": 167285 }, { "epoch": 7.81, - "learning_rate": 4.4086071914115606e-06, - "loss": 0.0184, + "learning_rate": 1.4432933393662334e-05, + "loss": 0.0179, "step": 167290 }, { "epoch": 7.81, - "learning_rate": 4.408138390136421e-06, - "loss": 0.0413, + "learning_rate": 1.4432465323826315e-05, + "loss": 0.0326, "step": 167295 }, { "epoch": 7.81, - "learning_rate": 4.407669588861282e-06, - "loss": 0.0452, + "learning_rate": 1.4431997253990295e-05, + "loss": 0.0296, "step": 167300 }, { "epoch": 7.81, - "learning_rate": 4.407200787586143e-06, - "loss": 0.0538, + "learning_rate": 1.4431529184154277e-05, + "loss": 0.0338, "step": 167305 }, { "epoch": 7.81, - "learning_rate": 4.406731986311003e-06, - "loss": 0.0732, + "learning_rate": 1.4431061114318258e-05, + "loss": 0.1313, "step": 167310 }, { "epoch": 7.81, - "learning_rate": 4.406263185035864e-06, - "loss": 0.1081, + "learning_rate": 1.4430593044482236e-05, + "loss": 0.1325, "step": 167315 }, { "epoch": 7.81, - "learning_rate": 4.405794383760724e-06, - "loss": 0.2629, + "learning_rate": 1.4430124974646218e-05, + "loss": 0.2485, "step": 167320 }, { "epoch": 7.81, - "learning_rate": 4.405325582485584e-06, - "loss": 0.0076, + "learning_rate": 1.4429656904810198e-05, + "loss": 0.0172, "step": 167325 }, { "epoch": 7.81, - "learning_rate": 4.404856781210445e-06, - "loss": 0.0099, + "learning_rate": 1.442918883497418e-05, + "loss": 0.0068, "step": 167330 }, { "epoch": 7.81, - "learning_rate": 4.404387979935306e-06, - "loss": 0.0146, + "learning_rate": 1.4428720765138159e-05, + "loss": 0.0094, "step": 167335 }, { "epoch": 7.81, - "learning_rate": 4.403919178660167e-06, - "loss": 0.0437, + "learning_rate": 1.442825269530214e-05, + "loss": 0.0564, "step": 167340 }, { "epoch": 7.81, - "learning_rate": 4.403450377385027e-06, - "loss": 0.0314, + "learning_rate": 1.4427784625466119e-05, + "loss": 0.0432, "step": 167345 }, { "epoch": 7.81, - "learning_rate": 4.402981576109887e-06, - "loss": 0.0591, + "learning_rate": 1.44273165556301e-05, + "loss": 0.0347, "step": 167350 }, { "epoch": 7.81, - "learning_rate": 4.402512774834748e-06, - "loss": 0.077, + "learning_rate": 1.442684848579408e-05, + "loss": 0.0451, "step": 167355 }, { "epoch": 7.81, - "learning_rate": 4.402043973559608e-06, - "loss": 0.1351, + "learning_rate": 1.4426380415958061e-05, + "loss": 0.0362, "step": 167360 }, { "epoch": 7.81, - "learning_rate": 4.401575172284469e-06, - "loss": 0.0737, + "learning_rate": 1.4425912346122041e-05, + "loss": 0.1077, "step": 167365 }, { "epoch": 7.81, - "learning_rate": 4.40110637100933e-06, - "loss": 0.1145, + "learning_rate": 1.4425444276286023e-05, + "loss": 0.1782, "step": 167370 }, { "epoch": 7.81, - "learning_rate": 4.4006375697341904e-06, - "loss": 0.009, + "learning_rate": 1.4424976206450004e-05, + "loss": 0.0106, "step": 167375 }, { "epoch": 7.81, - "learning_rate": 4.40016876845905e-06, - "loss": 0.0418, + "learning_rate": 1.4424508136613982e-05, + "loss": 0.0044, "step": 167380 }, { "epoch": 7.81, - "learning_rate": 4.399699967183911e-06, - "loss": 0.0387, + "learning_rate": 1.4424040066777964e-05, + "loss": 0.0143, "step": 167385 }, { "epoch": 7.81, - "learning_rate": 4.399231165908772e-06, - "loss": 0.039, + "learning_rate": 1.4423571996941944e-05, + "loss": 0.0075, "step": 167390 }, { "epoch": 7.81, - "learning_rate": 4.398762364633632e-06, - "loss": 0.053, + "learning_rate": 1.4423103927105925e-05, + "loss": 0.0177, "step": 167395 }, { "epoch": 7.81, - "learning_rate": 4.398293563358493e-06, - "loss": 0.0294, + "learning_rate": 1.4422635857269905e-05, + "loss": 0.0525, "step": 167400 }, { "epoch": 7.81, - "learning_rate": 4.3978247620833534e-06, - "loss": 0.0843, + "learning_rate": 1.4422167787433887e-05, + "loss": 0.0336, "step": 167405 }, { "epoch": 7.81, - "learning_rate": 4.397355960808214e-06, - "loss": 0.0643, + "learning_rate": 1.4421699717597865e-05, + "loss": 0.0554, "step": 167410 }, { "epoch": 7.81, - "learning_rate": 4.396887159533074e-06, - "loss": 0.1428, + "learning_rate": 1.4421231647761846e-05, + "loss": 0.0612, "step": 167415 }, { "epoch": 7.81, - "learning_rate": 4.396418358257935e-06, - "loss": 0.1134, + "learning_rate": 1.4420763577925826e-05, + "loss": 0.1554, "step": 167420 }, { "epoch": 7.81, - "learning_rate": 4.395949556982796e-06, - "loss": 0.0819, + "learning_rate": 1.4420295508089808e-05, + "loss": 0.0202, "step": 167425 }, { "epoch": 7.81, - "learning_rate": 4.395480755707656e-06, - "loss": 0.0105, + "learning_rate": 1.4419827438253787e-05, + "loss": 0.0184, "step": 167430 }, { "epoch": 7.81, - "learning_rate": 4.3950119544325164e-06, - "loss": 0.0168, + "learning_rate": 1.4419359368417769e-05, + "loss": 0.04, "step": 167435 }, { "epoch": 7.81, - "learning_rate": 4.394543153157377e-06, - "loss": 0.0014, + "learning_rate": 1.4418891298581749e-05, + "loss": 0.0209, "step": 167440 }, { "epoch": 7.81, - "learning_rate": 4.394074351882238e-06, - "loss": 0.0551, + "learning_rate": 1.4418423228745729e-05, + "loss": 0.0125, "step": 167445 }, { "epoch": 7.81, - "learning_rate": 4.393605550607098e-06, - "loss": 0.0395, + "learning_rate": 1.441795515890971e-05, + "loss": 0.0482, "step": 167450 }, { "epoch": 7.81, - "learning_rate": 4.393136749331959e-06, - "loss": 0.0257, + "learning_rate": 1.441748708907369e-05, + "loss": 0.0925, "step": 167455 }, { "epoch": 7.81, - "learning_rate": 4.3926679480568195e-06, - "loss": 0.067, + "learning_rate": 1.4417019019237671e-05, + "loss": 0.1205, "step": 167460 }, { "epoch": 7.81, - "learning_rate": 4.392199146781679e-06, - "loss": 0.1088, + "learning_rate": 1.4416550949401651e-05, + "loss": 0.0881, "step": 167465 }, { "epoch": 7.81, - "learning_rate": 4.39173034550654e-06, - "loss": 0.1272, + "learning_rate": 1.4416082879565633e-05, + "loss": 0.162, "step": 167470 }, { "epoch": 7.81, - "learning_rate": 4.3912615442314e-06, - "loss": 0.0262, + "learning_rate": 1.441561480972961e-05, + "loss": 0.0437, "step": 167475 }, { "epoch": 7.81, - "learning_rate": 4.390792742956262e-06, - "loss": 0.0145, + "learning_rate": 1.4415146739893592e-05, + "loss": 0.0197, "step": 167480 }, { "epoch": 7.82, - "learning_rate": 4.390323941681122e-06, - "loss": 0.0233, + "learning_rate": 1.4414678670057572e-05, + "loss": 0.0045, "step": 167485 }, { "epoch": 7.82, - "learning_rate": 4.3898551404059825e-06, - "loss": 0.036, + "learning_rate": 1.4414210600221554e-05, + "loss": 0.0194, "step": 167490 }, { "epoch": 7.82, - "learning_rate": 4.389386339130842e-06, - "loss": 0.0103, + "learning_rate": 1.4413742530385535e-05, + "loss": 0.0466, "step": 167495 }, { "epoch": 7.82, - "learning_rate": 4.388917537855703e-06, - "loss": 0.0462, + "learning_rate": 1.4413274460549515e-05, + "loss": 0.0039, "step": 167500 }, { "epoch": 7.82, - "learning_rate": 4.388448736580564e-06, - "loss": 0.1625, + "learning_rate": 1.4412806390713495e-05, + "loss": 0.0958, "step": 167505 }, { "epoch": 7.82, - "learning_rate": 4.387979935305424e-06, - "loss": 0.0939, + "learning_rate": 1.4412338320877475e-05, + "loss": 0.1663, "step": 167510 }, { "epoch": 7.82, - "learning_rate": 4.387511134030285e-06, - "loss": 0.1638, + "learning_rate": 1.4411870251041456e-05, + "loss": 0.1609, "step": 167515 }, { "epoch": 7.82, - "learning_rate": 4.3870423327551455e-06, - "loss": 0.1769, + "learning_rate": 1.4411402181205436e-05, + "loss": 0.1776, "step": 167520 }, { "epoch": 7.82, - "learning_rate": 4.386573531480006e-06, - "loss": 0.0398, + "learning_rate": 1.4410934111369417e-05, + "loss": 0.0423, "step": 167525 }, { "epoch": 7.82, - "learning_rate": 4.386104730204866e-06, - "loss": 0.0101, + "learning_rate": 1.4410466041533397e-05, + "loss": 0.0149, "step": 167530 }, { "epoch": 7.82, - "learning_rate": 4.385635928929727e-06, - "loss": 0.0175, + "learning_rate": 1.4409997971697377e-05, + "loss": 0.039, "step": 167535 }, { "epoch": 7.82, - "learning_rate": 4.385167127654588e-06, - "loss": 0.023, + "learning_rate": 1.4409529901861357e-05, + "loss": 0.0541, "step": 167540 }, { "epoch": 7.82, - "learning_rate": 4.384698326379448e-06, - "loss": 0.0232, + "learning_rate": 1.4409061832025338e-05, + "loss": 0.0279, "step": 167545 }, { "epoch": 7.82, - "learning_rate": 4.3842295251043084e-06, - "loss": 0.1159, + "learning_rate": 1.4408593762189318e-05, + "loss": 0.0732, "step": 167550 }, { "epoch": 7.82, - "learning_rate": 4.383760723829169e-06, - "loss": 0.079, + "learning_rate": 1.44081256923533e-05, + "loss": 0.0746, "step": 167555 }, { "epoch": 7.82, - "learning_rate": 4.38329192255403e-06, - "loss": 0.0764, + "learning_rate": 1.4407657622517281e-05, + "loss": 0.0746, "step": 167560 }, { "epoch": 7.82, - "learning_rate": 4.38282312127889e-06, - "loss": 0.1254, + "learning_rate": 1.4407189552681261e-05, + "loss": 0.1061, "step": 167565 }, { "epoch": 7.82, - "learning_rate": 4.382354320003751e-06, - "loss": 0.1164, + "learning_rate": 1.4406721482845241e-05, + "loss": 0.0969, "step": 167570 }, { "epoch": 7.82, - "learning_rate": 4.3818855187286115e-06, - "loss": 0.0492, + "learning_rate": 1.440625341300922e-05, + "loss": 0.0151, "step": 167575 }, { "epoch": 7.82, - "learning_rate": 4.3814167174534714e-06, - "loss": 0.0348, + "learning_rate": 1.4405785343173202e-05, + "loss": 0.0221, "step": 167580 }, { "epoch": 7.82, - "learning_rate": 4.380947916178332e-06, - "loss": 0.0318, + "learning_rate": 1.4405317273337182e-05, + "loss": 0.0376, "step": 167585 }, { "epoch": 7.82, - "learning_rate": 4.380479114903193e-06, - "loss": 0.0187, + "learning_rate": 1.4404849203501164e-05, + "loss": 0.0103, "step": 167590 }, { "epoch": 7.82, - "learning_rate": 4.380010313628054e-06, - "loss": 0.0335, + "learning_rate": 1.4404381133665143e-05, + "loss": 0.0649, "step": 167595 }, { "epoch": 7.82, - "learning_rate": 4.379541512352914e-06, - "loss": 0.0689, + "learning_rate": 1.4403913063829123e-05, + "loss": 0.0538, "step": 167600 }, { "epoch": 7.82, - "learning_rate": 4.3790727110777745e-06, - "loss": 0.0703, + "learning_rate": 1.4403444993993103e-05, + "loss": 0.0679, "step": 167605 }, { "epoch": 7.82, - "learning_rate": 4.378603909802635e-06, - "loss": 0.0556, + "learning_rate": 1.4402976924157085e-05, + "loss": 0.1543, "step": 167610 }, { "epoch": 7.82, - "learning_rate": 4.378135108527495e-06, - "loss": 0.0959, + "learning_rate": 1.4402508854321066e-05, + "loss": 0.0622, "step": 167615 }, { "epoch": 7.82, - "learning_rate": 4.377666307252356e-06, - "loss": 0.1667, + "learning_rate": 1.4402040784485046e-05, + "loss": 0.2029, "step": 167620 }, { "epoch": 7.82, - "learning_rate": 4.377197505977217e-06, - "loss": 0.0631, + "learning_rate": 1.4401572714649027e-05, + "loss": 0.0062, "step": 167625 }, { "epoch": 7.82, - "learning_rate": 4.3767287047020776e-06, - "loss": 0.0057, + "learning_rate": 1.4401104644813005e-05, + "loss": 0.003, "step": 167630 }, { "epoch": 7.82, - "learning_rate": 4.3762599034269375e-06, - "loss": 0.0095, + "learning_rate": 1.4400636574976987e-05, + "loss": 0.0277, "step": 167635 }, { "epoch": 7.82, - "learning_rate": 4.375791102151798e-06, - "loss": 0.0559, + "learning_rate": 1.4400168505140967e-05, + "loss": 0.0316, "step": 167640 }, { "epoch": 7.82, - "learning_rate": 4.375322300876659e-06, - "loss": 0.0181, + "learning_rate": 1.4399700435304948e-05, + "loss": 0.0205, "step": 167645 }, { "epoch": 7.82, - "learning_rate": 4.374853499601519e-06, - "loss": 0.0463, + "learning_rate": 1.4399232365468928e-05, + "loss": 0.0065, "step": 167650 }, { "epoch": 7.82, - "learning_rate": 4.37438469832638e-06, - "loss": 0.0728, + "learning_rate": 1.439876429563291e-05, + "loss": 0.0608, "step": 167655 }, { "epoch": 7.82, - "learning_rate": 4.3739158970512405e-06, - "loss": 0.1238, + "learning_rate": 1.439829622579689e-05, + "loss": 0.1044, "step": 167660 }, { "epoch": 7.82, - "learning_rate": 4.373447095776101e-06, - "loss": 0.1457, + "learning_rate": 1.439782815596087e-05, + "loss": 0.1461, "step": 167665 }, { "epoch": 7.82, - "learning_rate": 4.372978294500961e-06, - "loss": 0.1249, + "learning_rate": 1.4397360086124849e-05, + "loss": 0.2347, "step": 167670 }, { "epoch": 7.82, - "learning_rate": 4.372509493225822e-06, - "loss": 0.0361, + "learning_rate": 1.439689201628883e-05, + "loss": 0.0227, "step": 167675 }, { "epoch": 7.82, - "learning_rate": 4.372040691950683e-06, - "loss": 0.0053, + "learning_rate": 1.4396423946452812e-05, + "loss": 0.0166, "step": 167680 }, { "epoch": 7.82, - "learning_rate": 4.371571890675543e-06, - "loss": 0.0238, + "learning_rate": 1.4395955876616792e-05, + "loss": 0.011, "step": 167685 }, { "epoch": 7.82, - "learning_rate": 4.3711030894004035e-06, + "learning_rate": 1.4395487806780773e-05, "loss": 0.0203, "step": 167690 }, { "epoch": 7.82, - "learning_rate": 4.370634288125264e-06, - "loss": 0.0104, + "learning_rate": 1.4395019736944752e-05, + "loss": 0.011, "step": 167695 }, { "epoch": 7.83, - "learning_rate": 4.370165486850125e-06, - "loss": 0.019, + "learning_rate": 1.4394551667108733e-05, + "loss": 0.0613, "step": 167700 }, { "epoch": 7.83, - "learning_rate": 4.369696685574985e-06, - "loss": 0.1438, + "learning_rate": 1.4394083597272713e-05, + "loss": 0.0905, "step": 167705 }, { "epoch": 7.83, - "learning_rate": 4.369227884299846e-06, - "loss": 0.1405, + "learning_rate": 1.4393615527436694e-05, + "loss": 0.1707, "step": 167710 }, { "epoch": 7.83, - "learning_rate": 4.368759083024707e-06, - "loss": 0.1288, + "learning_rate": 1.4393147457600674e-05, + "loss": 0.1667, "step": 167715 }, { "epoch": 7.83, - "learning_rate": 4.3682902817495665e-06, - "loss": 0.1894, + "learning_rate": 1.4392679387764656e-05, + "loss": 0.2035, "step": 167720 }, { "epoch": 7.83, - "learning_rate": 4.367821480474427e-06, - "loss": 0.0227, + "learning_rate": 1.4392211317928634e-05, + "loss": 0.0147, "step": 167725 }, { "epoch": 7.83, - "learning_rate": 4.367352679199287e-06, - "loss": 0.0039, + "learning_rate": 1.4391743248092615e-05, + "loss": 0.0065, "step": 167730 }, { "epoch": 7.83, - "learning_rate": 4.366883877924149e-06, - "loss": 0.0269, + "learning_rate": 1.4391275178256595e-05, + "loss": 0.0103, "step": 167735 }, { "epoch": 7.83, - "learning_rate": 4.366415076649009e-06, - "loss": 0.0186, + "learning_rate": 1.4390807108420577e-05, + "loss": 0.0374, "step": 167740 }, { "epoch": 7.83, - "learning_rate": 4.36594627537387e-06, - "loss": 0.0293, + "learning_rate": 1.4390339038584558e-05, + "loss": 0.0475, "step": 167745 }, { "epoch": 7.83, - "learning_rate": 4.3654774740987295e-06, - "loss": 0.0914, + "learning_rate": 1.4389870968748538e-05, + "loss": 0.0093, "step": 167750 }, { "epoch": 7.83, - "learning_rate": 4.36500867282359e-06, - "loss": 0.0644, + "learning_rate": 1.4389402898912518e-05, + "loss": 0.1153, "step": 167755 }, { "epoch": 7.83, - "learning_rate": 4.364539871548451e-06, - "loss": 0.0169, + "learning_rate": 1.4388934829076498e-05, + "loss": 0.0405, "step": 167760 }, { "epoch": 7.83, - "learning_rate": 4.364071070273311e-06, - "loss": 0.0956, + "learning_rate": 1.438846675924048e-05, + "loss": 0.1578, "step": 167765 }, { "epoch": 7.83, - "learning_rate": 4.363602268998172e-06, - "loss": 0.1448, + "learning_rate": 1.4387998689404459e-05, + "loss": 0.162, "step": 167770 }, { "epoch": 7.83, - "learning_rate": 4.3631334677230326e-06, - "loss": 0.0436, + "learning_rate": 1.438753061956844e-05, + "loss": 0.0397, "step": 167775 }, { "epoch": 7.83, - "learning_rate": 4.362664666447893e-06, - "loss": 0.0125, + "learning_rate": 1.438706254973242e-05, + "loss": 0.0262, "step": 167780 }, { "epoch": 7.83, - "learning_rate": 4.362195865172753e-06, - "loss": 0.0144, + "learning_rate": 1.4386594479896402e-05, + "loss": 0.009, "step": 167785 }, { "epoch": 7.83, - "learning_rate": 4.361727063897614e-06, - "loss": 0.018, + "learning_rate": 1.438612641006038e-05, + "loss": 0.0488, "step": 167790 }, { "epoch": 7.83, - "learning_rate": 4.361258262622475e-06, - "loss": 0.0539, + "learning_rate": 1.4385658340224361e-05, + "loss": 0.0369, "step": 167795 }, { "epoch": 7.83, - "learning_rate": 4.360789461347335e-06, - "loss": 0.0167, + "learning_rate": 1.4385190270388343e-05, + "loss": 0.0583, "step": 167800 }, { "epoch": 7.83, - "learning_rate": 4.3603206600721956e-06, - "loss": 0.1098, + "learning_rate": 1.4384722200552323e-05, + "loss": 0.044, "step": 167805 }, { "epoch": 7.83, - "learning_rate": 4.359851858797056e-06, - "loss": 0.106, + "learning_rate": 1.4384254130716304e-05, + "loss": 0.0572, "step": 167810 }, { "epoch": 7.83, - "learning_rate": 4.359383057521917e-06, - "loss": 0.085, + "learning_rate": 1.4383786060880284e-05, + "loss": 0.1138, "step": 167815 }, { "epoch": 7.83, - "learning_rate": 4.358914256246777e-06, - "loss": 0.1707, + "learning_rate": 1.4383317991044264e-05, + "loss": 0.1775, "step": 167820 }, { "epoch": 7.83, - "learning_rate": 4.358445454971638e-06, - "loss": 0.0173, + "learning_rate": 1.4382849921208244e-05, + "loss": 0.013, "step": 167825 }, { "epoch": 7.83, - "learning_rate": 4.357976653696499e-06, - "loss": 0.0848, + "learning_rate": 1.4382381851372225e-05, + "loss": 0.0266, "step": 167830 }, { "epoch": 7.83, - "learning_rate": 4.3575078524213585e-06, - "loss": 0.0121, + "learning_rate": 1.4381913781536205e-05, + "loss": 0.0166, "step": 167835 }, { "epoch": 7.83, - "learning_rate": 4.357039051146219e-06, - "loss": 0.0186, + "learning_rate": 1.4381445711700187e-05, + "loss": 0.0448, "step": 167840 }, { "epoch": 7.83, - "learning_rate": 4.35657024987108e-06, - "loss": 0.0439, + "learning_rate": 1.4380977641864166e-05, + "loss": 0.0577, "step": 167845 }, { "epoch": 7.83, - "learning_rate": 4.356101448595941e-06, - "loss": 0.0626, + "learning_rate": 1.4380509572028146e-05, + "loss": 0.0179, "step": 167850 }, { "epoch": 7.83, - "learning_rate": 4.355632647320801e-06, - "loss": 0.0424, + "learning_rate": 1.4380041502192126e-05, + "loss": 0.0693, "step": 167855 }, { "epoch": 7.83, - "learning_rate": 4.355163846045662e-06, - "loss": 0.1064, + "learning_rate": 1.4379573432356108e-05, + "loss": 0.1076, "step": 167860 }, { "epoch": 7.83, - "learning_rate": 4.354695044770522e-06, - "loss": 0.1137, + "learning_rate": 1.4379105362520089e-05, + "loss": 0.0818, "step": 167865 }, { "epoch": 7.83, - "learning_rate": 4.354226243495382e-06, - "loss": 0.1669, + "learning_rate": 1.4378637292684069e-05, + "loss": 0.1196, "step": 167870 }, { "epoch": 7.83, - "learning_rate": 4.353757442220243e-06, - "loss": 0.0589, + "learning_rate": 1.437816922284805e-05, + "loss": 0.0354, "step": 167875 }, { "epoch": 7.83, - "learning_rate": 4.353288640945104e-06, - "loss": 0.0226, + "learning_rate": 1.437770115301203e-05, + "loss": 0.0135, "step": 167880 }, { "epoch": 7.83, - "learning_rate": 4.352819839669965e-06, - "loss": 0.0494, + "learning_rate": 1.437723308317601e-05, + "loss": 0.0256, "step": 167885 }, { "epoch": 7.83, - "learning_rate": 4.352351038394825e-06, - "loss": 0.0242, + "learning_rate": 1.437676501333999e-05, + "loss": 0.0488, "step": 167890 }, { "epoch": 7.83, - "learning_rate": 4.351882237119685e-06, - "loss": 0.0483, + "learning_rate": 1.4376296943503971e-05, + "loss": 0.0544, "step": 167895 }, { "epoch": 7.83, - "learning_rate": 4.351413435844546e-06, - "loss": 0.0049, + "learning_rate": 1.4375828873667951e-05, + "loss": 0.061, "step": 167900 }, { "epoch": 7.83, - "learning_rate": 4.350944634569406e-06, - "loss": 0.0231, + "learning_rate": 1.4375360803831933e-05, + "loss": 0.0246, "step": 167905 }, { "epoch": 7.83, - "learning_rate": 4.350475833294267e-06, - "loss": 0.0936, + "learning_rate": 1.4374892733995913e-05, + "loss": 0.0447, "step": 167910 }, { "epoch": 7.84, - "learning_rate": 4.350007032019128e-06, - "loss": 0.1947, + "learning_rate": 1.4374424664159892e-05, + "loss": 0.1009, "step": 167915 }, { "epoch": 7.84, - "learning_rate": 4.3495382307439884e-06, - "loss": 0.1305, + "learning_rate": 1.4373956594323872e-05, + "loss": 0.1242, "step": 167920 }, { "epoch": 7.84, - "learning_rate": 4.349069429468848e-06, - "loss": 0.0457, + "learning_rate": 1.4373488524487854e-05, + "loss": 0.0065, "step": 167925 }, { "epoch": 7.84, - "learning_rate": 4.348600628193709e-06, - "loss": 0.0125, + "learning_rate": 1.4373020454651835e-05, + "loss": 0.056, "step": 167930 }, { "epoch": 7.84, - "learning_rate": 4.34813182691857e-06, - "loss": 0.0041, + "learning_rate": 1.4372552384815815e-05, + "loss": 0.0386, "step": 167935 }, { "epoch": 7.84, - "learning_rate": 4.34766302564343e-06, - "loss": 0.0144, + "learning_rate": 1.4372084314979797e-05, + "loss": 0.0275, "step": 167940 }, { "epoch": 7.84, - "learning_rate": 4.347194224368291e-06, - "loss": 0.1586, + "learning_rate": 1.4371616245143775e-05, + "loss": 0.0424, "step": 167945 }, { "epoch": 7.84, - "learning_rate": 4.346725423093151e-06, - "loss": 0.0214, + "learning_rate": 1.4371148175307756e-05, + "loss": 0.0332, "step": 167950 }, { "epoch": 7.84, - "learning_rate": 4.346256621818012e-06, - "loss": 0.0907, + "learning_rate": 1.4370680105471736e-05, + "loss": 0.0675, "step": 167955 }, { "epoch": 7.84, - "learning_rate": 4.345787820542872e-06, - "loss": 0.0504, + "learning_rate": 1.4370212035635717e-05, + "loss": 0.0559, "step": 167960 }, { "epoch": 7.84, - "learning_rate": 4.345319019267733e-06, - "loss": 0.106, + "learning_rate": 1.4369743965799697e-05, + "loss": 0.1802, "step": 167965 }, { "epoch": 7.84, - "learning_rate": 4.344850217992594e-06, - "loss": 0.2242, + "learning_rate": 1.4369275895963679e-05, + "loss": 0.0399, "step": 167970 }, { "epoch": 7.84, - "learning_rate": 4.344381416717454e-06, - "loss": 0.027, + "learning_rate": 1.4368807826127659e-05, + "loss": 0.0143, "step": 167975 }, { "epoch": 7.84, - "learning_rate": 4.343912615442314e-06, - "loss": 0.0116, + "learning_rate": 1.4368339756291638e-05, + "loss": 0.0063, "step": 167980 }, { "epoch": 7.84, - "learning_rate": 4.343443814167174e-06, - "loss": 0.0222, + "learning_rate": 1.436787168645562e-05, + "loss": 0.0568, "step": 167985 }, { "epoch": 7.84, - "learning_rate": 4.342975012892036e-06, - "loss": 0.0239, + "learning_rate": 1.43674036166196e-05, + "loss": 0.0312, "step": 167990 }, { "epoch": 7.84, - "learning_rate": 4.342506211616896e-06, - "loss": 0.0458, + "learning_rate": 1.4366935546783581e-05, + "loss": 0.0148, "step": 167995 }, { "epoch": 7.84, - "learning_rate": 4.342037410341757e-06, - "loss": 0.0253, + "learning_rate": 1.4366467476947561e-05, + "loss": 0.1382, "step": 168000 }, { "epoch": 7.84, - "learning_rate": 4.341568609066617e-06, - "loss": 0.1041, + "learning_rate": 1.4365999407111543e-05, + "loss": 0.0705, "step": 168005 }, { "epoch": 7.84, - "learning_rate": 4.341099807791477e-06, - "loss": 0.0802, + "learning_rate": 1.436553133727552e-05, + "loss": 0.0806, "step": 168010 }, { "epoch": 7.84, - "learning_rate": 4.340631006516338e-06, - "loss": 0.0606, + "learning_rate": 1.4365063267439502e-05, + "loss": 0.1365, "step": 168015 }, { "epoch": 7.84, - "learning_rate": 4.340162205241198e-06, - "loss": 0.2154, + "learning_rate": 1.4364595197603482e-05, + "loss": 0.2363, "step": 168020 }, { "epoch": 7.84, - "learning_rate": 4.339693403966059e-06, - "loss": 0.0155, + "learning_rate": 1.4364127127767464e-05, + "loss": 0.0248, "step": 168025 }, { "epoch": 7.84, - "learning_rate": 4.33922460269092e-06, - "loss": 0.0317, + "learning_rate": 1.4363659057931443e-05, + "loss": 0.0062, "step": 168030 }, { "epoch": 7.84, - "learning_rate": 4.3387558014157805e-06, - "loss": 0.0206, + "learning_rate": 1.4363190988095425e-05, + "loss": 0.0515, "step": 168035 }, { "epoch": 7.84, - "learning_rate": 4.33828700014064e-06, - "loss": 0.0711, + "learning_rate": 1.4362722918259403e-05, + "loss": 0.018, "step": 168040 }, { "epoch": 7.84, - "learning_rate": 4.337818198865501e-06, - "loss": 0.0603, + "learning_rate": 1.4362254848423385e-05, + "loss": 0.0511, "step": 168045 }, { "epoch": 7.84, - "learning_rate": 4.337349397590362e-06, - "loss": 0.0787, + "learning_rate": 1.4361786778587366e-05, + "loss": 0.028, "step": 168050 }, { "epoch": 7.84, - "learning_rate": 4.336880596315222e-06, - "loss": 0.0459, + "learning_rate": 1.4361318708751346e-05, + "loss": 0.0115, "step": 168055 }, { "epoch": 7.84, - "learning_rate": 4.336411795040083e-06, - "loss": 0.0995, + "learning_rate": 1.4360850638915327e-05, + "loss": 0.153, "step": 168060 }, { "epoch": 7.84, - "learning_rate": 4.3359429937649434e-06, - "loss": 0.2205, + "learning_rate": 1.4360382569079307e-05, + "loss": 0.184, "step": 168065 }, { "epoch": 7.84, - "learning_rate": 4.335474192489804e-06, - "loss": 0.1568, + "learning_rate": 1.4359914499243289e-05, + "loss": 0.1784, "step": 168070 }, { "epoch": 7.84, - "learning_rate": 4.335005391214664e-06, - "loss": 0.0431, + "learning_rate": 1.4359446429407267e-05, + "loss": 0.0017, "step": 168075 }, { "epoch": 7.84, - "learning_rate": 4.334536589939525e-06, - "loss": 0.0534, + "learning_rate": 1.4358978359571248e-05, + "loss": 0.0202, "step": 168080 }, { "epoch": 7.84, - "learning_rate": 4.334067788664386e-06, - "loss": 0.0142, + "learning_rate": 1.4358510289735228e-05, + "loss": 0.0192, "step": 168085 }, { "epoch": 7.84, - "learning_rate": 4.333598987389246e-06, - "loss": 0.0221, + "learning_rate": 1.435804221989921e-05, + "loss": 0.026, "step": 168090 }, { "epoch": 7.84, - "learning_rate": 4.3331301861141064e-06, - "loss": 0.0306, + "learning_rate": 1.435757415006319e-05, + "loss": 0.0406, "step": 168095 }, { "epoch": 7.84, - "learning_rate": 4.332661384838967e-06, - "loss": 0.0695, + "learning_rate": 1.4357106080227171e-05, + "loss": 0.0608, "step": 168100 }, { "epoch": 7.84, - "learning_rate": 4.332192583563828e-06, - "loss": 0.0839, + "learning_rate": 1.4356638010391151e-05, + "loss": 0.0737, "step": 168105 }, { "epoch": 7.84, - "learning_rate": 4.331723782288688e-06, - "loss": 0.0718, + "learning_rate": 1.435616994055513e-05, + "loss": 0.0685, "step": 168110 }, { "epoch": 7.84, - "learning_rate": 4.331254981013549e-06, - "loss": 0.2482, + "learning_rate": 1.4355701870719112e-05, + "loss": 0.1732, "step": 168115 }, { "epoch": 7.84, - "learning_rate": 4.3307861797384095e-06, - "loss": 0.2165, + "learning_rate": 1.4355233800883092e-05, + "loss": 0.1328, "step": 168120 }, { "epoch": 7.84, - "learning_rate": 4.3303173784632694e-06, - "loss": 0.0429, + "learning_rate": 1.4354765731047074e-05, + "loss": 0.0168, "step": 168125 }, { "epoch": 7.85, - "learning_rate": 4.32984857718813e-06, - "loss": 0.0091, + "learning_rate": 1.4354297661211053e-05, + "loss": 0.0007, "step": 168130 }, { "epoch": 7.85, - "learning_rate": 4.329379775912991e-06, - "loss": 0.0153, + "learning_rate": 1.4353829591375033e-05, + "loss": 0.0146, "step": 168135 }, { "epoch": 7.85, - "learning_rate": 4.328910974637852e-06, - "loss": 0.0335, + "learning_rate": 1.4353361521539013e-05, + "loss": 0.0331, "step": 168140 }, { "epoch": 7.85, - "learning_rate": 4.328442173362712e-06, - "loss": 0.0154, + "learning_rate": 1.4352893451702994e-05, + "loss": 0.029, "step": 168145 }, { "epoch": 7.85, - "learning_rate": 4.3279733720875725e-06, - "loss": 0.0202, + "learning_rate": 1.4352425381866974e-05, + "loss": 0.0402, "step": 168150 }, { "epoch": 7.85, - "learning_rate": 4.327504570812433e-06, - "loss": 0.0562, + "learning_rate": 1.4351957312030956e-05, + "loss": 0.0451, "step": 168155 }, { "epoch": 7.85, - "learning_rate": 4.327035769537293e-06, - "loss": 0.127, + "learning_rate": 1.4351489242194936e-05, + "loss": 0.1041, "step": 168160 }, { "epoch": 7.85, - "learning_rate": 4.326566968262154e-06, - "loss": 0.11, + "learning_rate": 1.4351021172358917e-05, + "loss": 0.1706, "step": 168165 }, { "epoch": 7.85, - "learning_rate": 4.326098166987015e-06, - "loss": 0.1143, + "learning_rate": 1.4350553102522897e-05, + "loss": 0.1667, "step": 168170 }, { "epoch": 7.85, - "learning_rate": 4.3256293657118755e-06, - "loss": 0.0205, + "learning_rate": 1.4350085032686877e-05, + "loss": 0.0329, "step": 168175 }, { "epoch": 7.85, - "learning_rate": 4.3251605644367355e-06, - "loss": 0.0123, + "learning_rate": 1.4349616962850858e-05, + "loss": 0.0471, "step": 168180 }, { "epoch": 7.85, - "learning_rate": 4.324691763161596e-06, - "loss": 0.046, + "learning_rate": 1.4349148893014838e-05, + "loss": 0.0074, "step": 168185 }, { "epoch": 7.85, - "learning_rate": 4.324222961886457e-06, - "loss": 0.0345, + "learning_rate": 1.434868082317882e-05, + "loss": 0.0084, "step": 168190 }, { "epoch": 7.85, - "learning_rate": 4.323754160611317e-06, - "loss": 0.0855, + "learning_rate": 1.43482127533428e-05, + "loss": 0.0453, "step": 168195 }, { "epoch": 7.85, - "learning_rate": 4.323285359336178e-06, - "loss": 0.0284, + "learning_rate": 1.434774468350678e-05, + "loss": 0.0421, "step": 168200 }, { "epoch": 7.85, - "learning_rate": 4.3228165580610385e-06, - "loss": 0.1183, + "learning_rate": 1.4347276613670759e-05, + "loss": 0.0552, "step": 168205 }, { "epoch": 7.85, - "learning_rate": 4.322347756785899e-06, - "loss": 0.1265, + "learning_rate": 1.434680854383474e-05, + "loss": 0.0864, "step": 168210 }, { "epoch": 7.85, - "learning_rate": 4.321878955510759e-06, - "loss": 0.2559, + "learning_rate": 1.434634047399872e-05, + "loss": 0.0847, "step": 168215 }, { "epoch": 7.85, - "learning_rate": 4.32141015423562e-06, - "loss": 0.351, + "learning_rate": 1.4345872404162702e-05, + "loss": 0.1498, "step": 168220 }, { "epoch": 7.85, - "learning_rate": 4.320941352960481e-06, - "loss": 0.0825, + "learning_rate": 1.4345404334326682e-05, + "loss": 0.0277, "step": 168225 }, { "epoch": 7.85, - "learning_rate": 4.320472551685341e-06, - "loss": 0.0097, + "learning_rate": 1.4344936264490662e-05, + "loss": 0.0245, "step": 168230 }, { "epoch": 7.85, - "learning_rate": 4.3200037504102015e-06, - "loss": 0.0071, + "learning_rate": 1.4344468194654643e-05, + "loss": 0.0042, "step": 168235 }, { "epoch": 7.85, - "learning_rate": 4.319534949135062e-06, - "loss": 0.0575, + "learning_rate": 1.4344000124818623e-05, + "loss": 0.0166, "step": 168240 }, { "epoch": 7.85, - "learning_rate": 4.319066147859923e-06, - "loss": 0.0714, + "learning_rate": 1.4343532054982604e-05, + "loss": 0.0455, "step": 168245 }, { "epoch": 7.85, - "learning_rate": 4.318597346584783e-06, - "loss": 0.0238, + "learning_rate": 1.4343063985146584e-05, + "loss": 0.0231, "step": 168250 }, { "epoch": 7.85, - "learning_rate": 4.318128545309644e-06, - "loss": 0.0925, + "learning_rate": 1.4342595915310566e-05, + "loss": 0.0461, "step": 168255 }, { "epoch": 7.85, - "learning_rate": 4.317659744034505e-06, - "loss": 0.023, + "learning_rate": 1.4342127845474546e-05, + "loss": 0.0751, "step": 168260 }, { "epoch": 7.85, - "learning_rate": 4.3171909427593645e-06, - "loss": 0.1082, + "learning_rate": 1.4341659775638525e-05, + "loss": 0.1025, "step": 168265 }, { "epoch": 7.85, - "learning_rate": 4.316722141484225e-06, - "loss": 0.0811, + "learning_rate": 1.4341191705802505e-05, + "loss": 0.1607, "step": 168270 }, { "epoch": 7.85, - "learning_rate": 4.316253340209085e-06, - "loss": 0.0182, + "learning_rate": 1.4340723635966487e-05, + "loss": 0.0098, "step": 168275 }, { "epoch": 7.85, - "learning_rate": 4.315784538933947e-06, - "loss": 0.0103, + "learning_rate": 1.4340255566130466e-05, + "loss": 0.0086, "step": 168280 }, { "epoch": 7.85, - "learning_rate": 4.315315737658807e-06, - "loss": 0.0069, + "learning_rate": 1.4339787496294448e-05, + "loss": 0.0053, "step": 168285 }, { "epoch": 7.85, - "learning_rate": 4.3148469363836676e-06, - "loss": 0.0555, + "learning_rate": 1.433931942645843e-05, + "loss": 0.0296, "step": 168290 }, { "epoch": 7.85, - "learning_rate": 4.3143781351085275e-06, - "loss": 0.0237, + "learning_rate": 1.4338851356622408e-05, + "loss": 0.0136, "step": 168295 }, { "epoch": 7.85, - "learning_rate": 4.313909333833388e-06, - "loss": 0.0805, + "learning_rate": 1.4338383286786389e-05, + "loss": 0.0587, "step": 168300 }, { "epoch": 7.85, - "learning_rate": 4.313440532558249e-06, - "loss": 0.0598, + "learning_rate": 1.4337915216950369e-05, + "loss": 0.0381, "step": 168305 }, { "epoch": 7.85, - "learning_rate": 4.312971731283109e-06, - "loss": 0.0819, + "learning_rate": 1.433744714711435e-05, + "loss": 0.2118, "step": 168310 }, { "epoch": 7.85, - "learning_rate": 4.31250293000797e-06, - "loss": 0.2036, + "learning_rate": 1.433697907727833e-05, + "loss": 0.1968, "step": 168315 }, { "epoch": 7.85, - "learning_rate": 4.3120341287328306e-06, - "loss": 0.1435, + "learning_rate": 1.4336511007442312e-05, + "loss": 0.2089, "step": 168320 }, { "epoch": 7.85, - "learning_rate": 4.311565327457691e-06, - "loss": 0.0299, + "learning_rate": 1.433604293760629e-05, + "loss": 0.0065, "step": 168325 }, { "epoch": 7.85, - "learning_rate": 4.311096526182551e-06, - "loss": 0.0045, + "learning_rate": 1.4335574867770271e-05, + "loss": 0.0319, "step": 168330 }, { "epoch": 7.85, - "learning_rate": 4.310627724907412e-06, - "loss": 0.0087, + "learning_rate": 1.4335106797934251e-05, + "loss": 0.0949, "step": 168335 }, { "epoch": 7.85, - "learning_rate": 4.310158923632273e-06, - "loss": 0.0175, + "learning_rate": 1.4334638728098233e-05, + "loss": 0.0334, "step": 168340 }, { "epoch": 7.86, - "learning_rate": 4.309690122357133e-06, - "loss": 0.0506, + "learning_rate": 1.4334170658262213e-05, + "loss": 0.0388, "step": 168345 }, { "epoch": 7.86, - "learning_rate": 4.3092213210819935e-06, - "loss": 0.0554, + "learning_rate": 1.4333702588426194e-05, + "loss": 0.0509, "step": 168350 }, { "epoch": 7.86, - "learning_rate": 4.308752519806854e-06, - "loss": 0.0603, + "learning_rate": 1.4333234518590174e-05, + "loss": 0.0546, "step": 168355 }, { "epoch": 7.86, - "learning_rate": 4.308283718531715e-06, - "loss": 0.091, + "learning_rate": 1.4332766448754154e-05, + "loss": 0.0497, "step": 168360 }, { "epoch": 7.86, - "learning_rate": 4.307814917256575e-06, - "loss": 0.212, + "learning_rate": 1.4332298378918135e-05, + "loss": 0.104, "step": 168365 }, { "epoch": 7.86, - "learning_rate": 4.307346115981436e-06, - "loss": 0.2274, + "learning_rate": 1.4331830309082115e-05, + "loss": 0.1949, "step": 168370 }, { "epoch": 7.86, - "learning_rate": 4.306877314706297e-06, - "loss": 0.0377, + "learning_rate": 1.4331362239246097e-05, + "loss": 0.0125, "step": 168375 }, { "epoch": 7.86, - "learning_rate": 4.3064085134311565e-06, - "loss": 0.007, + "learning_rate": 1.4330894169410076e-05, + "loss": 0.0283, "step": 168380 }, { "epoch": 7.86, - "learning_rate": 4.305939712156017e-06, - "loss": 0.0119, + "learning_rate": 1.4330426099574058e-05, + "loss": 0.0026, "step": 168385 }, { "epoch": 7.86, - "learning_rate": 4.305470910880878e-06, - "loss": 0.0058, + "learning_rate": 1.4329958029738036e-05, + "loss": 0.1066, "step": 168390 }, { "epoch": 7.86, - "learning_rate": 4.305002109605739e-06, - "loss": 0.0614, + "learning_rate": 1.4329489959902018e-05, + "loss": 0.0414, "step": 168395 }, { "epoch": 7.86, - "learning_rate": 4.304533308330599e-06, - "loss": 0.0545, + "learning_rate": 1.4329021890065997e-05, + "loss": 0.0493, "step": 168400 }, { "epoch": 7.86, - "learning_rate": 4.30406450705546e-06, - "loss": 0.0637, + "learning_rate": 1.4328553820229979e-05, + "loss": 0.0563, "step": 168405 }, { "epoch": 7.86, - "learning_rate": 4.30359570578032e-06, - "loss": 0.0693, + "learning_rate": 1.432808575039396e-05, + "loss": 0.0915, "step": 168410 }, { "epoch": 7.86, - "learning_rate": 4.30312690450518e-06, - "loss": 0.0879, + "learning_rate": 1.432761768055794e-05, + "loss": 0.1736, "step": 168415 }, { "epoch": 7.86, - "learning_rate": 4.302658103230041e-06, - "loss": 0.1527, + "learning_rate": 1.432714961072192e-05, + "loss": 0.1877, "step": 168420 }, { "epoch": 7.86, - "learning_rate": 4.302189301954902e-06, - "loss": 0.0524, + "learning_rate": 1.43266815408859e-05, + "loss": 0.0102, "step": 168425 }, { "epoch": 7.86, - "learning_rate": 4.301720500679763e-06, - "loss": 0.0019, + "learning_rate": 1.4326213471049881e-05, + "loss": 0.041, "step": 168430 }, { "epoch": 7.86, - "learning_rate": 4.301251699404623e-06, - "loss": 0.0432, + "learning_rate": 1.4325745401213861e-05, + "loss": 0.0124, "step": 168435 }, { "epoch": 7.86, - "learning_rate": 4.300782898129483e-06, - "loss": 0.0809, + "learning_rate": 1.4325277331377843e-05, + "loss": 0.0464, "step": 168440 }, { "epoch": 7.86, - "learning_rate": 4.300314096854344e-06, - "loss": 0.029, + "learning_rate": 1.4324809261541822e-05, + "loss": 0.0182, "step": 168445 }, { "epoch": 7.86, - "learning_rate": 4.299845295579204e-06, - "loss": 0.0623, + "learning_rate": 1.4324341191705802e-05, + "loss": 0.0908, "step": 168450 }, { "epoch": 7.86, - "learning_rate": 4.299376494304065e-06, - "loss": 0.0347, + "learning_rate": 1.4323873121869782e-05, + "loss": 0.0399, "step": 168455 }, { "epoch": 7.86, - "learning_rate": 4.298907693028926e-06, - "loss": 0.1299, + "learning_rate": 1.4323405052033764e-05, + "loss": 0.1194, "step": 168460 }, { "epoch": 7.86, - "learning_rate": 4.298438891753786e-06, - "loss": 0.1557, + "learning_rate": 1.4322936982197743e-05, + "loss": 0.0737, "step": 168465 }, { "epoch": 7.86, - "learning_rate": 4.297970090478646e-06, - "loss": 0.1699, + "learning_rate": 1.4322468912361725e-05, + "loss": 0.2016, "step": 168470 }, { "epoch": 7.86, - "learning_rate": 4.297501289203507e-06, - "loss": 0.029, + "learning_rate": 1.4322000842525706e-05, + "loss": 0.007, "step": 168475 }, { "epoch": 7.86, - "learning_rate": 4.297032487928368e-06, - "loss": 0.0024, + "learning_rate": 1.4321532772689686e-05, + "loss": 0.0103, "step": 168480 }, { "epoch": 7.86, - "learning_rate": 4.296563686653228e-06, - "loss": 0.0279, + "learning_rate": 1.4321064702853666e-05, + "loss": 0.0183, "step": 168485 }, { "epoch": 7.86, - "learning_rate": 4.296094885378089e-06, - "loss": 0.0123, + "learning_rate": 1.4320596633017646e-05, + "loss": 0.0217, "step": 168490 }, { "epoch": 7.86, - "learning_rate": 4.295626084102949e-06, - "loss": 0.0645, + "learning_rate": 1.4320128563181627e-05, + "loss": 0.0219, "step": 168495 }, { "epoch": 7.86, - "learning_rate": 4.29515728282781e-06, - "loss": 0.0283, + "learning_rate": 1.4319660493345607e-05, + "loss": 0.0392, "step": 168500 }, { "epoch": 7.86, - "learning_rate": 4.29468848155267e-06, - "loss": 0.0655, + "learning_rate": 1.4319192423509589e-05, + "loss": 0.0515, "step": 168505 }, { "epoch": 7.86, - "learning_rate": 4.29421968027753e-06, - "loss": 0.0213, + "learning_rate": 1.4318724353673569e-05, + "loss": 0.0871, "step": 168510 }, { "epoch": 7.86, - "learning_rate": 4.293750879002392e-06, - "loss": 0.1315, + "learning_rate": 1.4318256283837548e-05, + "loss": 0.1736, "step": 168515 }, { "epoch": 7.86, - "learning_rate": 4.293282077727252e-06, - "loss": 0.1786, + "learning_rate": 1.4317788214001528e-05, + "loss": 0.1631, "step": 168520 }, { "epoch": 7.86, - "learning_rate": 4.292813276452112e-06, - "loss": 0.0336, + "learning_rate": 1.431732014416551e-05, + "loss": 0.0147, "step": 168525 }, { "epoch": 7.86, - "learning_rate": 4.292344475176972e-06, - "loss": 0.0077, + "learning_rate": 1.431685207432949e-05, + "loss": 0.0156, "step": 168530 }, { "epoch": 7.86, - "learning_rate": 4.291875673901834e-06, - "loss": 0.0224, + "learning_rate": 1.4316384004493471e-05, + "loss": 0.0228, "step": 168535 }, { "epoch": 7.86, - "learning_rate": 4.291406872626694e-06, - "loss": 0.0418, + "learning_rate": 1.4315915934657453e-05, + "loss": 0.1026, "step": 168540 }, { "epoch": 7.86, - "learning_rate": 4.290938071351554e-06, - "loss": 0.0675, + "learning_rate": 1.431544786482143e-05, + "loss": 0.0176, "step": 168545 }, { "epoch": 7.86, - "learning_rate": 4.290469270076415e-06, - "loss": 0.0349, + "learning_rate": 1.4314979794985412e-05, + "loss": 0.0381, "step": 168550 }, { "epoch": 7.87, - "learning_rate": 4.290000468801275e-06, - "loss": 0.1098, + "learning_rate": 1.4314511725149392e-05, + "loss": 0.0337, "step": 168555 }, { "epoch": 7.87, - "learning_rate": 4.289531667526136e-06, - "loss": 0.0939, + "learning_rate": 1.4314043655313374e-05, + "loss": 0.0971, "step": 168560 }, { "epoch": 7.87, - "learning_rate": 4.289062866250996e-06, - "loss": 0.0722, + "learning_rate": 1.4313575585477353e-05, + "loss": 0.1553, "step": 168565 }, { "epoch": 7.87, - "learning_rate": 4.288594064975857e-06, - "loss": 0.1079, + "learning_rate": 1.4313107515641335e-05, + "loss": 0.1645, "step": 168570 }, { "epoch": 7.87, - "learning_rate": 4.288125263700718e-06, - "loss": 0.0175, + "learning_rate": 1.4312639445805315e-05, + "loss": 0.0128, "step": 168575 }, { "epoch": 7.87, - "learning_rate": 4.2876564624255784e-06, - "loss": 0.038, + "learning_rate": 1.4312171375969295e-05, + "loss": 0.0054, "step": 168580 }, { "epoch": 7.87, - "learning_rate": 4.287187661150438e-06, - "loss": 0.031, + "learning_rate": 1.4311703306133274e-05, + "loss": 0.0164, "step": 168585 }, { "epoch": 7.87, - "learning_rate": 4.286718859875299e-06, - "loss": 0.0402, + "learning_rate": 1.4311235236297256e-05, + "loss": 0.0193, "step": 168590 }, { "epoch": 7.87, - "learning_rate": 4.28625005860016e-06, - "loss": 0.017, + "learning_rate": 1.4310767166461237e-05, + "loss": 0.0275, "step": 168595 }, { "epoch": 7.87, - "learning_rate": 4.28578125732502e-06, - "loss": 0.1015, + "learning_rate": 1.4310299096625217e-05, + "loss": 0.0513, "step": 168600 }, { "epoch": 7.87, - "learning_rate": 4.285312456049881e-06, - "loss": 0.0653, + "learning_rate": 1.4309831026789199e-05, + "loss": 0.0374, "step": 168605 }, { "epoch": 7.87, - "learning_rate": 4.2848436547747414e-06, - "loss": 0.0628, + "learning_rate": 1.4309362956953177e-05, + "loss": 0.0629, "step": 168610 }, { "epoch": 7.87, - "learning_rate": 4.284374853499602e-06, - "loss": 0.2802, + "learning_rate": 1.4308894887117158e-05, + "loss": 0.164, "step": 168615 }, { "epoch": 7.87, - "learning_rate": 4.283906052224462e-06, - "loss": 0.2606, + "learning_rate": 1.4308426817281138e-05, + "loss": 0.1714, "step": 168620 }, { "epoch": 7.87, - "learning_rate": 4.283437250949323e-06, - "loss": 0.0017, + "learning_rate": 1.430795874744512e-05, + "loss": 0.0188, "step": 168625 }, { "epoch": 7.87, - "learning_rate": 4.282968449674184e-06, - "loss": 0.0145, + "learning_rate": 1.43074906776091e-05, + "loss": 0.018, "step": 168630 }, { "epoch": 7.87, - "learning_rate": 4.282499648399044e-06, - "loss": 0.0263, + "learning_rate": 1.4307022607773081e-05, + "loss": 0.0494, "step": 168635 }, { "epoch": 7.87, - "learning_rate": 4.2820308471239044e-06, - "loss": 0.0058, + "learning_rate": 1.4306554537937059e-05, + "loss": 0.0749, "step": 168640 }, { "epoch": 7.87, - "learning_rate": 4.281562045848765e-06, - "loss": 0.0209, + "learning_rate": 1.430608646810104e-05, + "loss": 0.0424, "step": 168645 }, { "epoch": 7.87, - "learning_rate": 4.281093244573626e-06, - "loss": 0.0242, + "learning_rate": 1.430561839826502e-05, + "loss": 0.0256, "step": 168650 }, { "epoch": 7.87, - "learning_rate": 4.280624443298486e-06, - "loss": 0.0621, + "learning_rate": 1.4305150328429002e-05, + "loss": 0.0316, "step": 168655 }, { "epoch": 7.87, - "learning_rate": 4.280155642023347e-06, - "loss": 0.1231, + "learning_rate": 1.4304682258592983e-05, + "loss": 0.1013, "step": 168660 }, { "epoch": 7.87, - "learning_rate": 4.2796868407482075e-06, - "loss": 0.1369, + "learning_rate": 1.4304214188756963e-05, + "loss": 0.0591, "step": 168665 }, { "epoch": 7.87, - "learning_rate": 4.279218039473067e-06, - "loss": 0.1725, + "learning_rate": 1.4303746118920945e-05, + "loss": 0.1162, "step": 168670 }, { "epoch": 7.87, - "learning_rate": 4.278749238197928e-06, - "loss": 0.0272, + "learning_rate": 1.4303278049084923e-05, + "loss": 0.0401, "step": 168675 }, { "epoch": 7.87, - "learning_rate": 4.278280436922789e-06, - "loss": 0.0048, + "learning_rate": 1.4302809979248904e-05, + "loss": 0.0082, "step": 168680 }, { "epoch": 7.87, - "learning_rate": 4.27781163564765e-06, - "loss": 0.0094, + "learning_rate": 1.4302341909412884e-05, + "loss": 0.0119, "step": 168685 }, { "epoch": 7.87, - "learning_rate": 4.27734283437251e-06, - "loss": 0.0254, + "learning_rate": 1.4301873839576866e-05, + "loss": 0.0143, "step": 168690 }, { "epoch": 7.87, - "learning_rate": 4.2768740330973705e-06, - "loss": 0.0119, + "learning_rate": 1.4301405769740846e-05, + "loss": 0.0405, "step": 168695 }, { "epoch": 7.87, - "learning_rate": 4.276405231822231e-06, - "loss": 0.0234, + "learning_rate": 1.4300937699904827e-05, + "loss": 0.0337, "step": 168700 }, { "epoch": 7.87, - "learning_rate": 4.275936430547091e-06, - "loss": 0.0467, + "learning_rate": 1.4300469630068805e-05, + "loss": 0.075, "step": 168705 }, { "epoch": 7.87, - "learning_rate": 4.275467629271952e-06, - "loss": 0.0556, + "learning_rate": 1.4300001560232787e-05, + "loss": 0.0669, "step": 168710 }, { "epoch": 7.87, - "learning_rate": 4.274998827996813e-06, - "loss": 0.1093, + "learning_rate": 1.4299533490396768e-05, + "loss": 0.233, "step": 168715 }, { "epoch": 7.87, - "learning_rate": 4.2745300267216735e-06, - "loss": 0.1735, + "learning_rate": 1.4299065420560748e-05, + "loss": 0.1384, "step": 168720 }, { "epoch": 7.87, - "learning_rate": 4.2740612254465335e-06, - "loss": 0.0042, + "learning_rate": 1.429859735072473e-05, + "loss": 0.0357, "step": 168725 }, { "epoch": 7.87, - "learning_rate": 4.273592424171394e-06, - "loss": 0.0389, + "learning_rate": 1.429812928088871e-05, + "loss": 0.014, "step": 168730 }, { "epoch": 7.87, - "learning_rate": 4.273123622896255e-06, - "loss": 0.0092, + "learning_rate": 1.429766121105269e-05, + "loss": 0.0088, "step": 168735 }, { "epoch": 7.87, - "learning_rate": 4.272654821621115e-06, - "loss": 0.0702, + "learning_rate": 1.4297193141216669e-05, + "loss": 0.0058, "step": 168740 }, { "epoch": 7.87, - "learning_rate": 4.272186020345976e-06, - "loss": 0.06, + "learning_rate": 1.429672507138065e-05, + "loss": 0.0406, "step": 168745 }, { "epoch": 7.87, - "learning_rate": 4.2717172190708365e-06, - "loss": 0.0788, + "learning_rate": 1.429625700154463e-05, + "loss": 0.0495, "step": 168750 }, { "epoch": 7.87, - "learning_rate": 4.271248417795697e-06, - "loss": 0.0233, + "learning_rate": 1.4295788931708612e-05, + "loss": 0.0499, "step": 168755 }, { "epoch": 7.87, - "learning_rate": 4.270779616520557e-06, - "loss": 0.1058, + "learning_rate": 1.4295320861872592e-05, + "loss": 0.0511, "step": 168760 }, { "epoch": 7.87, - "learning_rate": 4.270310815245417e-06, - "loss": 0.151, + "learning_rate": 1.4294852792036573e-05, + "loss": 0.0496, "step": 168765 }, { "epoch": 7.88, - "learning_rate": 4.269842013970279e-06, - "loss": 0.1454, + "learning_rate": 1.4294384722200551e-05, + "loss": 0.1869, "step": 168770 }, { "epoch": 7.88, - "learning_rate": 4.269373212695139e-06, - "loss": 0.0255, + "learning_rate": 1.4293916652364533e-05, + "loss": 0.033, "step": 168775 }, { "epoch": 7.88, - "learning_rate": 4.2689044114199995e-06, - "loss": 0.0098, + "learning_rate": 1.4293448582528514e-05, + "loss": 0.0181, "step": 168780 }, { "epoch": 7.88, - "learning_rate": 4.2684356101448594e-06, - "loss": 0.0199, + "learning_rate": 1.4292980512692494e-05, + "loss": 0.0056, "step": 168785 }, { "epoch": 7.88, - "learning_rate": 4.267966808869721e-06, - "loss": 0.033, + "learning_rate": 1.4292512442856476e-05, + "loss": 0.0498, "step": 168790 }, { "epoch": 7.88, - "learning_rate": 4.267498007594581e-06, - "loss": 0.0905, + "learning_rate": 1.4292044373020455e-05, + "loss": 0.0304, "step": 168795 }, { "epoch": 7.88, - "learning_rate": 4.267029206319441e-06, - "loss": 0.0729, + "learning_rate": 1.4291576303184435e-05, + "loss": 0.0064, "step": 168800 }, { "epoch": 7.88, - "learning_rate": 4.266560405044302e-06, - "loss": 0.0492, + "learning_rate": 1.4291108233348415e-05, + "loss": 0.0618, "step": 168805 }, { "epoch": 7.88, - "learning_rate": 4.2660916037691625e-06, - "loss": 0.0431, + "learning_rate": 1.4290640163512397e-05, + "loss": 0.0914, "step": 168810 }, { "epoch": 7.88, - "learning_rate": 4.265622802494023e-06, - "loss": 0.1371, + "learning_rate": 1.4290172093676376e-05, + "loss": 0.1788, "step": 168815 }, { "epoch": 7.88, - "learning_rate": 4.265154001218883e-06, - "loss": 0.174, + "learning_rate": 1.4289704023840358e-05, + "loss": 0.1843, "step": 168820 }, { "epoch": 7.88, - "learning_rate": 4.264685199943744e-06, - "loss": 0.0287, + "learning_rate": 1.4289235954004338e-05, + "loss": 0.0342, "step": 168825 }, { "epoch": 7.88, - "learning_rate": 4.264216398668605e-06, - "loss": 0.0342, + "learning_rate": 1.4288767884168318e-05, + "loss": 0.0096, "step": 168830 }, { "epoch": 7.88, - "learning_rate": 4.263747597393465e-06, - "loss": 0.0245, + "learning_rate": 1.4288299814332297e-05, + "loss": 0.0394, "step": 168835 }, { "epoch": 7.88, - "learning_rate": 4.2632787961183255e-06, - "loss": 0.0667, + "learning_rate": 1.4287831744496279e-05, + "loss": 0.0484, "step": 168840 }, { "epoch": 7.88, - "learning_rate": 4.262809994843186e-06, - "loss": 0.015, + "learning_rate": 1.428736367466026e-05, + "loss": 0.011, "step": 168845 }, { "epoch": 7.88, - "learning_rate": 4.262341193568047e-06, - "loss": 0.0788, + "learning_rate": 1.428689560482424e-05, + "loss": 0.0084, "step": 168850 }, { "epoch": 7.88, - "learning_rate": 4.261872392292907e-06, - "loss": 0.056, + "learning_rate": 1.4286427534988222e-05, + "loss": 0.0434, "step": 168855 }, { "epoch": 7.88, - "learning_rate": 4.261403591017768e-06, - "loss": 0.0783, + "learning_rate": 1.4285959465152202e-05, + "loss": 0.0616, "step": 168860 }, { "epoch": 7.88, - "learning_rate": 4.2609347897426285e-06, - "loss": 0.1247, + "learning_rate": 1.4285491395316181e-05, + "loss": 0.2006, "step": 168865 }, { "epoch": 7.88, - "learning_rate": 4.2604659884674885e-06, - "loss": 0.2674, + "learning_rate": 1.4285023325480161e-05, + "loss": 0.1944, "step": 168870 }, { "epoch": 7.88, - "learning_rate": 4.259997187192349e-06, - "loss": 0.024, + "learning_rate": 1.4284555255644143e-05, + "loss": 0.0178, "step": 168875 }, { "epoch": 7.88, - "learning_rate": 4.25952838591721e-06, - "loss": 0.0029, + "learning_rate": 1.4284087185808123e-05, + "loss": 0.0012, "step": 168880 }, { "epoch": 7.88, - "learning_rate": 4.259059584642071e-06, - "loss": 0.0054, + "learning_rate": 1.4283619115972104e-05, + "loss": 0.0289, "step": 168885 }, { "epoch": 7.88, - "learning_rate": 4.258590783366931e-06, - "loss": 0.0221, + "learning_rate": 1.4283151046136084e-05, + "loss": 0.0169, "step": 168890 }, { "epoch": 7.88, - "learning_rate": 4.2581219820917915e-06, - "loss": 0.0256, + "learning_rate": 1.4282682976300064e-05, + "loss": 0.0462, "step": 168895 }, { "epoch": 7.88, - "learning_rate": 4.257653180816652e-06, - "loss": 0.0613, + "learning_rate": 1.4282214906464045e-05, + "loss": 0.0182, "step": 168900 }, { "epoch": 7.88, - "learning_rate": 4.257184379541512e-06, - "loss": 0.0367, + "learning_rate": 1.4281746836628025e-05, + "loss": 0.0974, "step": 168905 }, { "epoch": 7.88, - "learning_rate": 4.256715578266373e-06, - "loss": 0.0618, + "learning_rate": 1.4281278766792007e-05, + "loss": 0.0776, "step": 168910 }, { "epoch": 7.88, - "learning_rate": 4.256246776991234e-06, - "loss": 0.087, + "learning_rate": 1.4280810696955986e-05, + "loss": 0.2962, "step": 168915 }, { "epoch": 7.88, - "learning_rate": 4.255777975716095e-06, - "loss": 0.3263, + "learning_rate": 1.4280342627119968e-05, + "loss": 0.1977, "step": 168920 }, { "epoch": 7.88, - "learning_rate": 4.2553091744409545e-06, - "loss": 0.0258, + "learning_rate": 1.4279874557283946e-05, + "loss": 0.0322, "step": 168925 }, { "epoch": 7.88, - "learning_rate": 4.254840373165815e-06, - "loss": 0.0182, + "learning_rate": 1.4279406487447927e-05, + "loss": 0.0067, "step": 168930 }, { "epoch": 7.88, - "learning_rate": 4.254371571890676e-06, - "loss": 0.014, + "learning_rate": 1.4278938417611907e-05, + "loss": 0.002, "step": 168935 }, { "epoch": 7.88, - "learning_rate": 4.253902770615537e-06, - "loss": 0.0426, + "learning_rate": 1.4278470347775889e-05, + "loss": 0.0032, "step": 168940 }, { "epoch": 7.88, - "learning_rate": 4.253433969340397e-06, - "loss": 0.0559, + "learning_rate": 1.4278002277939869e-05, + "loss": 0.0697, "step": 168945 }, { "epoch": 7.88, - "learning_rate": 4.252965168065258e-06, - "loss": 0.0248, + "learning_rate": 1.427753420810385e-05, + "loss": 0.0022, "step": 168950 }, { "epoch": 7.88, - "learning_rate": 4.252496366790118e-06, - "loss": 0.0744, + "learning_rate": 1.427706613826783e-05, + "loss": 0.0664, "step": 168955 }, { "epoch": 7.88, - "learning_rate": 4.252027565514978e-06, - "loss": 0.0943, + "learning_rate": 1.427659806843181e-05, + "loss": 0.0316, "step": 168960 }, { "epoch": 7.88, - "learning_rate": 4.251558764239839e-06, - "loss": 0.1795, + "learning_rate": 1.4276129998595791e-05, + "loss": 0.1129, "step": 168965 }, { "epoch": 7.88, - "learning_rate": 4.2510899629647e-06, - "loss": 0.2325, + "learning_rate": 1.4275661928759771e-05, + "loss": 0.1576, "step": 168970 }, { "epoch": 7.88, - "learning_rate": 4.250621161689561e-06, - "loss": 0.0335, + "learning_rate": 1.4275193858923753e-05, + "loss": 0.0025, "step": 168975 }, { "epoch": 7.88, - "learning_rate": 4.2501523604144206e-06, - "loss": 0.0017, + "learning_rate": 1.4274725789087732e-05, + "loss": 0.011, "step": 168980 }, { "epoch": 7.89, - "learning_rate": 4.249683559139281e-06, - "loss": 0.0291, + "learning_rate": 1.4274257719251714e-05, + "loss": 0.0307, "step": 168985 }, { "epoch": 7.89, - "learning_rate": 4.249214757864142e-06, - "loss": 0.0094, + "learning_rate": 1.4273789649415692e-05, + "loss": 0.0184, "step": 168990 }, { "epoch": 7.89, - "learning_rate": 4.248745956589002e-06, - "loss": 0.0263, + "learning_rate": 1.4273321579579674e-05, + "loss": 0.0125, "step": 168995 }, { "epoch": 7.89, - "learning_rate": 4.248277155313863e-06, - "loss": 0.0261, + "learning_rate": 1.4272853509743653e-05, + "loss": 0.0335, "step": 169000 }, { "epoch": 7.89, - "learning_rate": 4.247808354038724e-06, - "loss": 0.084, + "learning_rate": 1.4272385439907635e-05, + "loss": 0.0265, "step": 169005 }, { "epoch": 7.89, - "learning_rate": 4.247339552763584e-06, - "loss": 0.0413, + "learning_rate": 1.4271917370071615e-05, + "loss": 0.0571, "step": 169010 }, { "epoch": 7.89, - "learning_rate": 4.246870751488444e-06, - "loss": 0.1586, + "learning_rate": 1.4271449300235596e-05, + "loss": 0.1325, "step": 169015 }, { "epoch": 7.89, - "learning_rate": 4.246401950213305e-06, - "loss": 0.1493, + "learning_rate": 1.4270981230399576e-05, + "loss": 0.067, "step": 169020 }, { "epoch": 7.89, - "learning_rate": 4.245933148938166e-06, - "loss": 0.0058, + "learning_rate": 1.4270513160563556e-05, + "loss": 0.0299, "step": 169025 }, { "epoch": 7.89, - "learning_rate": 4.245464347663026e-06, - "loss": 0.0543, + "learning_rate": 1.4270045090727537e-05, + "loss": 0.0511, "step": 169030 }, { "epoch": 7.89, - "learning_rate": 4.244995546387887e-06, - "loss": 0.0185, + "learning_rate": 1.4269577020891517e-05, + "loss": 0.0064, "step": 169035 }, { "epoch": 7.89, - "learning_rate": 4.244526745112747e-06, - "loss": 0.0856, + "learning_rate": 1.4269108951055499e-05, + "loss": 0.0055, "step": 169040 }, { "epoch": 7.89, - "learning_rate": 4.244057943837608e-06, - "loss": 0.0232, + "learning_rate": 1.4268640881219479e-05, + "loss": 0.0149, "step": 169045 }, { "epoch": 7.89, - "learning_rate": 4.243589142562468e-06, - "loss": 0.0785, + "learning_rate": 1.4268172811383458e-05, + "loss": 0.0433, "step": 169050 }, { "epoch": 7.89, - "learning_rate": 4.243120341287328e-06, - "loss": 0.0907, + "learning_rate": 1.4267704741547438e-05, + "loss": 0.0434, "step": 169055 }, { "epoch": 7.89, - "learning_rate": 4.24265154001219e-06, - "loss": 0.092, + "learning_rate": 1.426723667171142e-05, + "loss": 0.0778, "step": 169060 }, { "epoch": 7.89, - "learning_rate": 4.24218273873705e-06, - "loss": 0.1473, + "learning_rate": 1.42667686018754e-05, + "loss": 0.1068, "step": 169065 }, { "epoch": 7.89, - "learning_rate": 4.24171393746191e-06, - "loss": 0.2602, + "learning_rate": 1.4266300532039381e-05, + "loss": 0.228, "step": 169070 }, { "epoch": 7.89, - "learning_rate": 4.24124513618677e-06, - "loss": 0.0367, + "learning_rate": 1.426583246220336e-05, + "loss": 0.0231, "step": 169075 }, { "epoch": 7.89, - "learning_rate": 4.240776334911632e-06, - "loss": 0.0632, + "learning_rate": 1.4265364392367342e-05, + "loss": 0.0349, "step": 169080 }, { "epoch": 7.89, - "learning_rate": 4.240307533636492e-06, - "loss": 0.0177, + "learning_rate": 1.4264896322531322e-05, + "loss": 0.019, "step": 169085 }, { "epoch": 7.89, - "learning_rate": 4.239838732361352e-06, - "loss": 0.0119, + "learning_rate": 1.4264428252695302e-05, + "loss": 0.0014, "step": 169090 }, { "epoch": 7.89, - "learning_rate": 4.239369931086213e-06, - "loss": 0.0509, + "learning_rate": 1.4263960182859283e-05, + "loss": 0.0668, "step": 169095 }, { "epoch": 7.89, - "learning_rate": 4.238901129811073e-06, - "loss": 0.0559, + "learning_rate": 1.4263492113023263e-05, + "loss": 0.0491, "step": 169100 }, { "epoch": 7.89, - "learning_rate": 4.238432328535934e-06, - "loss": 0.0734, + "learning_rate": 1.4263024043187245e-05, + "loss": 0.0932, "step": 169105 }, { "epoch": 7.89, - "learning_rate": 4.237963527260794e-06, - "loss": 0.0724, + "learning_rate": 1.4262555973351225e-05, + "loss": 0.0868, "step": 169110 }, { "epoch": 7.89, - "learning_rate": 4.237494725985655e-06, - "loss": 0.2283, + "learning_rate": 1.4262087903515204e-05, + "loss": 0.1378, "step": 169115 }, { "epoch": 7.89, - "learning_rate": 4.237025924710516e-06, - "loss": 0.1418, + "learning_rate": 1.4261619833679184e-05, + "loss": 0.2035, "step": 169120 }, { "epoch": 7.89, - "learning_rate": 4.236557123435376e-06, - "loss": 0.0025, + "learning_rate": 1.4261151763843166e-05, + "loss": 0.0306, "step": 169125 }, { "epoch": 7.89, - "learning_rate": 4.236088322160236e-06, - "loss": 0.0011, + "learning_rate": 1.4260683694007146e-05, + "loss": 0.023, "step": 169130 }, { "epoch": 7.89, - "learning_rate": 4.235619520885097e-06, - "loss": 0.0309, + "learning_rate": 1.4260215624171127e-05, + "loss": 0.0283, "step": 169135 }, { "epoch": 7.89, - "learning_rate": 4.235150719609958e-06, - "loss": 0.0131, + "learning_rate": 1.4259747554335107e-05, + "loss": 0.011, "step": 169140 }, { "epoch": 7.89, - "learning_rate": 4.234681918334818e-06, - "loss": 0.0202, + "learning_rate": 1.4259279484499087e-05, + "loss": 0.0033, "step": 169145 }, { "epoch": 7.89, - "learning_rate": 4.234213117059679e-06, - "loss": 0.1491, + "learning_rate": 1.4258811414663068e-05, + "loss": 0.015, "step": 169150 }, { "epoch": 7.89, - "learning_rate": 4.2337443157845394e-06, - "loss": 0.0379, + "learning_rate": 1.4258343344827048e-05, + "loss": 0.0433, "step": 169155 }, { "epoch": 7.89, - "learning_rate": 4.233275514509399e-06, - "loss": 0.0636, + "learning_rate": 1.425787527499103e-05, + "loss": 0.0606, "step": 169160 }, { "epoch": 7.89, - "learning_rate": 4.23280671323426e-06, - "loss": 0.0811, + "learning_rate": 1.425740720515501e-05, + "loss": 0.1532, "step": 169165 }, { "epoch": 7.89, - "learning_rate": 4.232337911959121e-06, - "loss": 0.2218, + "learning_rate": 1.4256939135318991e-05, + "loss": 0.1092, "step": 169170 }, { "epoch": 7.89, - "learning_rate": 4.231869110683982e-06, - "loss": 0.0068, + "learning_rate": 1.425647106548297e-05, + "loss": 0.0139, "step": 169175 }, { "epoch": 7.89, - "learning_rate": 4.231400309408842e-06, - "loss": 0.0485, + "learning_rate": 1.425600299564695e-05, + "loss": 0.0219, "step": 169180 }, { "epoch": 7.89, - "learning_rate": 4.230931508133702e-06, - "loss": 0.0227, + "learning_rate": 1.425553492581093e-05, + "loss": 0.0367, "step": 169185 }, { "epoch": 7.89, - "learning_rate": 4.230462706858563e-06, - "loss": 0.0299, + "learning_rate": 1.4255066855974912e-05, + "loss": 0.0186, "step": 169190 }, { "epoch": 7.89, - "learning_rate": 4.229993905583423e-06, - "loss": 0.0297, + "learning_rate": 1.4254598786138892e-05, + "loss": 0.0567, "step": 169195 }, { "epoch": 7.9, - "learning_rate": 4.229525104308284e-06, - "loss": 0.0548, + "learning_rate": 1.4254130716302873e-05, + "loss": 0.1161, "step": 169200 }, { "epoch": 7.9, - "learning_rate": 4.229056303033145e-06, - "loss": 0.0614, + "learning_rate": 1.4253662646466855e-05, + "loss": 0.0381, "step": 169205 }, { "epoch": 7.9, - "learning_rate": 4.2285875017580055e-06, - "loss": 0.0374, + "learning_rate": 1.4253194576630833e-05, + "loss": 0.1105, "step": 169210 }, { "epoch": 7.9, - "learning_rate": 4.228118700482865e-06, - "loss": 0.1684, + "learning_rate": 1.4252726506794814e-05, + "loss": 0.2184, "step": 169215 }, { "epoch": 7.9, - "learning_rate": 4.227649899207726e-06, - "loss": 0.1932, + "learning_rate": 1.4252258436958794e-05, + "loss": 0.2139, "step": 169220 }, { "epoch": 7.9, - "learning_rate": 4.227181097932587e-06, - "loss": 0.0181, + "learning_rate": 1.4251790367122776e-05, + "loss": 0.0141, "step": 169225 }, { "epoch": 7.9, - "learning_rate": 4.226712296657447e-06, - "loss": 0.0493, + "learning_rate": 1.4251322297286756e-05, + "loss": 0.0354, "step": 169230 }, { "epoch": 7.9, - "learning_rate": 4.226243495382308e-06, - "loss": 0.0246, + "learning_rate": 1.4250854227450737e-05, + "loss": 0.0184, "step": 169235 }, { "epoch": 7.9, - "learning_rate": 4.2257746941071685e-06, - "loss": 0.0321, + "learning_rate": 1.4250386157614715e-05, + "loss": 0.0074, "step": 169240 }, { "epoch": 7.9, - "learning_rate": 4.225305892832029e-06, - "loss": 0.0063, + "learning_rate": 1.4249918087778697e-05, + "loss": 0.0094, "step": 169245 }, { "epoch": 7.9, - "learning_rate": 4.224837091556889e-06, - "loss": 0.0592, + "learning_rate": 1.4249450017942676e-05, + "loss": 0.1195, "step": 169250 }, { "epoch": 7.9, - "learning_rate": 4.22436829028175e-06, - "loss": 0.0718, + "learning_rate": 1.4248981948106658e-05, + "loss": 0.1098, "step": 169255 }, { "epoch": 7.9, - "learning_rate": 4.223899489006611e-06, - "loss": 0.1441, + "learning_rate": 1.4248513878270638e-05, + "loss": 0.0378, "step": 169260 }, { "epoch": 7.9, - "learning_rate": 4.2234306877314715e-06, - "loss": 0.186, + "learning_rate": 1.424804580843462e-05, + "loss": 0.1194, "step": 169265 }, { "epoch": 7.9, - "learning_rate": 4.2229618864563315e-06, - "loss": 0.0941, + "learning_rate": 1.42475777385986e-05, + "loss": 0.1865, "step": 169270 }, { "epoch": 7.9, - "learning_rate": 4.222493085181192e-06, - "loss": 0.0268, + "learning_rate": 1.4247109668762579e-05, + "loss": 0.0021, "step": 169275 }, { "epoch": 7.9, - "learning_rate": 4.222024283906053e-06, - "loss": 0.004, + "learning_rate": 1.424664159892656e-05, + "loss": 0.025, "step": 169280 }, { "epoch": 7.9, - "learning_rate": 4.221555482630913e-06, - "loss": 0.0009, + "learning_rate": 1.424617352909054e-05, + "loss": 0.0366, "step": 169285 }, { "epoch": 7.9, - "learning_rate": 4.221086681355774e-06, - "loss": 0.028, + "learning_rate": 1.4245705459254522e-05, + "loss": 0.0225, "step": 169290 }, { "epoch": 7.9, - "learning_rate": 4.2206178800806345e-06, - "loss": 0.0436, + "learning_rate": 1.4245237389418502e-05, + "loss": 0.0464, "step": 169295 }, { "epoch": 7.9, - "learning_rate": 4.220149078805495e-06, - "loss": 0.0228, + "learning_rate": 1.4244769319582483e-05, + "loss": 0.0812, "step": 169300 }, { "epoch": 7.9, - "learning_rate": 4.219680277530355e-06, - "loss": 0.0387, + "learning_rate": 1.4244301249746461e-05, + "loss": 0.0989, "step": 169305 }, { "epoch": 7.9, - "learning_rate": 4.219211476255215e-06, - "loss": 0.1916, + "learning_rate": 1.4243833179910443e-05, + "loss": 0.0917, "step": 169310 }, { "epoch": 7.9, - "learning_rate": 4.218742674980077e-06, - "loss": 0.1993, + "learning_rate": 1.4243365110074423e-05, + "loss": 0.149, "step": 169315 }, { "epoch": 7.9, - "learning_rate": 4.218273873704937e-06, - "loss": 0.1035, + "learning_rate": 1.4242897040238404e-05, + "loss": 0.1643, "step": 169320 }, { "epoch": 7.9, - "learning_rate": 4.2178050724297975e-06, - "loss": 0.0424, + "learning_rate": 1.4242428970402384e-05, + "loss": 0.0153, "step": 169325 }, { "epoch": 7.9, - "learning_rate": 4.2173362711546574e-06, - "loss": 0.0303, + "learning_rate": 1.4241960900566365e-05, + "loss": 0.0049, "step": 169330 }, { "epoch": 7.9, - "learning_rate": 4.216867469879519e-06, - "loss": 0.0086, + "learning_rate": 1.4241492830730345e-05, + "loss": 0.0241, "step": 169335 }, { "epoch": 7.9, - "learning_rate": 4.216398668604379e-06, - "loss": 0.0104, + "learning_rate": 1.4241024760894325e-05, + "loss": 0.0565, "step": 169340 }, { "epoch": 7.9, - "learning_rate": 4.215929867329239e-06, - "loss": 0.0207, + "learning_rate": 1.4240556691058307e-05, + "loss": 0.0549, "step": 169345 }, { "epoch": 7.9, - "learning_rate": 4.2154610660541e-06, - "loss": 0.0429, + "learning_rate": 1.4240088621222286e-05, + "loss": 0.0298, "step": 169350 }, { "epoch": 7.9, - "learning_rate": 4.2149922647789605e-06, - "loss": 0.0823, + "learning_rate": 1.4239620551386268e-05, + "loss": 0.0707, "step": 169355 }, { "epoch": 7.9, - "learning_rate": 4.214523463503821e-06, - "loss": 0.0334, + "learning_rate": 1.4239152481550248e-05, + "loss": 0.0992, "step": 169360 }, { "epoch": 7.9, - "learning_rate": 4.214054662228681e-06, - "loss": 0.1578, + "learning_rate": 1.423868441171423e-05, + "loss": 0.0889, "step": 169365 }, { "epoch": 7.9, - "learning_rate": 4.213585860953542e-06, - "loss": 0.167, + "learning_rate": 1.4238216341878207e-05, + "loss": 0.1562, "step": 169370 }, { "epoch": 7.9, - "learning_rate": 4.213117059678403e-06, - "loss": 0.0168, + "learning_rate": 1.4237748272042189e-05, + "loss": 0.043, "step": 169375 }, { "epoch": 7.9, - "learning_rate": 4.212648258403263e-06, - "loss": 0.0216, + "learning_rate": 1.4237280202206169e-05, + "loss": 0.025, "step": 169380 }, { "epoch": 7.9, - "learning_rate": 4.2121794571281235e-06, - "loss": 0.014, + "learning_rate": 1.423681213237015e-05, + "loss": 0.0088, "step": 169385 }, { "epoch": 7.9, - "learning_rate": 4.211710655852984e-06, - "loss": 0.0265, + "learning_rate": 1.4236344062534132e-05, + "loss": 0.0499, "step": 169390 }, { "epoch": 7.9, - "learning_rate": 4.211241854577845e-06, - "loss": 0.0134, + "learning_rate": 1.4235875992698112e-05, + "loss": 0.0486, "step": 169395 }, { "epoch": 7.9, - "learning_rate": 4.210773053302705e-06, - "loss": 0.0746, + "learning_rate": 1.4235407922862091e-05, + "loss": 0.0107, "step": 169400 }, { "epoch": 7.9, - "learning_rate": 4.210304252027566e-06, - "loss": 0.0688, + "learning_rate": 1.4234939853026071e-05, + "loss": 0.0263, "step": 169405 }, { "epoch": 7.9, - "learning_rate": 4.2098354507524265e-06, - "loss": 0.0936, + "learning_rate": 1.4234471783190053e-05, + "loss": 0.1129, "step": 169410 }, { "epoch": 7.91, - "learning_rate": 4.2093666494772865e-06, - "loss": 0.1367, + "learning_rate": 1.4234003713354032e-05, + "loss": 0.1112, "step": 169415 }, { "epoch": 7.91, - "learning_rate": 4.208897848202147e-06, - "loss": 0.1399, + "learning_rate": 1.4233535643518014e-05, + "loss": 0.1384, "step": 169420 }, { "epoch": 7.91, - "learning_rate": 4.208429046927008e-06, - "loss": 0.0213, + "learning_rate": 1.4233067573681994e-05, + "loss": 0.0184, "step": 169425 }, { "epoch": 7.91, - "learning_rate": 4.207960245651869e-06, - "loss": 0.0142, + "learning_rate": 1.4232599503845974e-05, + "loss": 0.0461, "step": 169430 }, { "epoch": 7.91, - "learning_rate": 4.207491444376729e-06, - "loss": 0.0136, + "learning_rate": 1.4232131434009953e-05, + "loss": 0.0043, "step": 169435 }, { "epoch": 7.91, - "learning_rate": 4.2070226431015895e-06, - "loss": 0.0253, + "learning_rate": 1.4231663364173935e-05, + "loss": 0.0387, "step": 169440 }, { "epoch": 7.91, - "learning_rate": 4.20655384182645e-06, - "loss": 0.0129, + "learning_rate": 1.4231195294337915e-05, + "loss": 0.0426, "step": 169445 }, { "epoch": 7.91, - "learning_rate": 4.20608504055131e-06, - "loss": 0.1484, + "learning_rate": 1.4230727224501896e-05, + "loss": 0.0638, "step": 169450 }, { "epoch": 7.91, - "learning_rate": 4.205616239276171e-06, - "loss": 0.0565, + "learning_rate": 1.4230259154665878e-05, + "loss": 0.0484, "step": 169455 }, { "epoch": 7.91, - "learning_rate": 4.205147438001032e-06, - "loss": 0.0649, + "learning_rate": 1.4229791084829858e-05, + "loss": 0.0521, "step": 169460 }, { "epoch": 7.91, - "learning_rate": 4.204678636725893e-06, - "loss": 0.1295, + "learning_rate": 1.4229323014993837e-05, + "loss": 0.1699, "step": 169465 }, { "epoch": 7.91, - "learning_rate": 4.2042098354507525e-06, - "loss": 0.1742, + "learning_rate": 1.4228854945157817e-05, + "loss": 0.2367, "step": 169470 }, { "epoch": 7.91, - "learning_rate": 4.203741034175613e-06, - "loss": 0.0147, + "learning_rate": 1.4228386875321799e-05, + "loss": 0.0179, "step": 169475 }, { "epoch": 7.91, - "learning_rate": 4.203272232900474e-06, - "loss": 0.0149, + "learning_rate": 1.4227918805485779e-05, + "loss": 0.0074, "step": 169480 }, { "epoch": 7.91, - "learning_rate": 4.202803431625334e-06, - "loss": 0.0575, + "learning_rate": 1.422745073564976e-05, + "loss": 0.0567, "step": 169485 }, { "epoch": 7.91, - "learning_rate": 4.202334630350195e-06, - "loss": 0.0224, + "learning_rate": 1.422698266581374e-05, + "loss": 0.016, "step": 169490 }, { "epoch": 7.91, - "learning_rate": 4.2018658290750556e-06, - "loss": 0.1344, + "learning_rate": 1.422651459597772e-05, + "loss": 0.0632, "step": 169495 }, { "epoch": 7.91, - "learning_rate": 4.201397027799916e-06, - "loss": 0.0568, + "learning_rate": 1.42260465261417e-05, + "loss": 0.0331, "step": 169500 }, { "epoch": 7.91, - "learning_rate": 4.200928226524776e-06, - "loss": 0.0977, + "learning_rate": 1.4225578456305681e-05, + "loss": 0.0494, "step": 169505 }, { "epoch": 7.91, - "learning_rate": 4.200459425249637e-06, - "loss": 0.0969, + "learning_rate": 1.4225110386469663e-05, + "loss": 0.0742, "step": 169510 }, { "epoch": 7.91, - "learning_rate": 4.199990623974498e-06, - "loss": 0.1581, + "learning_rate": 1.4224642316633642e-05, + "loss": 0.1612, "step": 169515 }, { "epoch": 7.91, - "learning_rate": 4.199521822699358e-06, - "loss": 0.1901, + "learning_rate": 1.4224174246797624e-05, + "loss": 0.1182, "step": 169520 }, { "epoch": 7.91, - "learning_rate": 4.1990530214242186e-06, - "loss": 0.0159, + "learning_rate": 1.4223706176961602e-05, + "loss": 0.0226, "step": 169525 }, { "epoch": 7.91, - "learning_rate": 4.198584220149079e-06, - "loss": 0.0343, + "learning_rate": 1.4223238107125584e-05, + "loss": 0.0221, "step": 169530 }, { "epoch": 7.91, - "learning_rate": 4.19811541887394e-06, - "loss": 0.0222, + "learning_rate": 1.4222770037289563e-05, + "loss": 0.0205, "step": 169535 }, { "epoch": 7.91, - "learning_rate": 4.1976466175988e-06, - "loss": 0.054, + "learning_rate": 1.4222301967453545e-05, + "loss": 0.0256, "step": 169540 }, { "epoch": 7.91, - "learning_rate": 4.197177816323661e-06, - "loss": 0.0673, + "learning_rate": 1.4221833897617525e-05, + "loss": 0.0545, "step": 169545 }, { "epoch": 7.91, - "learning_rate": 4.196709015048522e-06, - "loss": 0.048, + "learning_rate": 1.4221365827781506e-05, + "loss": 0.0546, "step": 169550 }, { "epoch": 7.91, - "learning_rate": 4.1962402137733816e-06, - "loss": 0.0437, + "learning_rate": 1.4220897757945486e-05, + "loss": 0.0734, "step": 169555 }, { "epoch": 7.91, - "learning_rate": 4.195771412498242e-06, - "loss": 0.0724, + "learning_rate": 1.4220429688109466e-05, + "loss": 0.0938, "step": 169560 }, { "epoch": 7.91, - "learning_rate": 4.195302611223102e-06, - "loss": 0.1265, + "learning_rate": 1.4219961618273446e-05, + "loss": 0.1567, "step": 169565 }, { "epoch": 7.91, - "learning_rate": 4.194833809947964e-06, - "loss": 0.1208, + "learning_rate": 1.4219493548437427e-05, + "loss": 0.1117, "step": 169570 }, { "epoch": 7.91, - "learning_rate": 4.194365008672824e-06, - "loss": 0.0268, + "learning_rate": 1.4219025478601409e-05, + "loss": 0.0044, "step": 169575 }, { "epoch": 7.91, - "learning_rate": 4.193896207397685e-06, - "loss": 0.0171, + "learning_rate": 1.4218557408765388e-05, + "loss": 0.0491, "step": 169580 }, { "epoch": 7.91, - "learning_rate": 4.1934274061225445e-06, - "loss": 0.015, + "learning_rate": 1.421808933892937e-05, + "loss": 0.0152, "step": 169585 }, { "epoch": 7.91, - "learning_rate": 4.192958604847406e-06, - "loss": 0.0057, + "learning_rate": 1.4217621269093348e-05, + "loss": 0.0394, "step": 169590 }, { "epoch": 7.91, - "learning_rate": 4.192489803572266e-06, - "loss": 0.0166, + "learning_rate": 1.421715319925733e-05, + "loss": 0.0425, "step": 169595 }, { "epoch": 7.91, - "learning_rate": 4.192021002297126e-06, - "loss": 0.0181, + "learning_rate": 1.421668512942131e-05, + "loss": 0.0227, "step": 169600 }, { "epoch": 7.91, - "learning_rate": 4.191552201021987e-06, - "loss": 0.0603, + "learning_rate": 1.4216217059585291e-05, + "loss": 0.1057, "step": 169605 }, { "epoch": 7.91, - "learning_rate": 4.191083399746848e-06, - "loss": 0.1623, + "learning_rate": 1.421574898974927e-05, + "loss": 0.0623, "step": 169610 }, { "epoch": 7.91, - "learning_rate": 4.190614598471708e-06, - "loss": 0.1333, + "learning_rate": 1.4215280919913252e-05, + "loss": 0.1672, "step": 169615 }, { "epoch": 7.91, - "learning_rate": 4.190145797196568e-06, - "loss": 0.1127, + "learning_rate": 1.421481285007723e-05, + "loss": 0.2064, "step": 169620 }, { "epoch": 7.91, - "learning_rate": 4.189676995921429e-06, - "loss": 0.015, + "learning_rate": 1.4214344780241212e-05, + "loss": 0.0186, "step": 169625 }, { "epoch": 7.92, - "learning_rate": 4.18920819464629e-06, - "loss": 0.0208, + "learning_rate": 1.4213876710405192e-05, + "loss": 0.0116, "step": 169630 }, { "epoch": 7.92, - "learning_rate": 4.18873939337115e-06, - "loss": 0.0404, + "learning_rate": 1.4213408640569173e-05, + "loss": 0.0287, "step": 169635 }, { "epoch": 7.92, - "learning_rate": 4.188270592096011e-06, - "loss": 0.0043, + "learning_rate": 1.4212940570733155e-05, + "loss": 0.0379, "step": 169640 }, { "epoch": 7.92, - "learning_rate": 4.187801790820871e-06, - "loss": 0.0596, + "learning_rate": 1.4212472500897135e-05, + "loss": 0.0268, "step": 169645 }, { "epoch": 7.92, - "learning_rate": 4.187332989545732e-06, - "loss": 0.031, + "learning_rate": 1.4212004431061114e-05, + "loss": 0.0211, "step": 169650 }, { "epoch": 7.92, - "learning_rate": 4.186864188270592e-06, - "loss": 0.0866, + "learning_rate": 1.4211536361225094e-05, + "loss": 0.0341, "step": 169655 }, { "epoch": 7.92, - "learning_rate": 4.186395386995453e-06, - "loss": 0.1089, + "learning_rate": 1.4211068291389076e-05, + "loss": 0.0734, "step": 169660 }, { "epoch": 7.92, - "learning_rate": 4.185926585720314e-06, - "loss": 0.1468, + "learning_rate": 1.4210600221553056e-05, + "loss": 0.165, "step": 169665 }, { "epoch": 7.92, - "learning_rate": 4.185457784445174e-06, - "loss": 0.1847, + "learning_rate": 1.4210132151717037e-05, + "loss": 0.1486, "step": 169670 }, { "epoch": 7.92, - "learning_rate": 4.184988983170034e-06, - "loss": 0.0266, + "learning_rate": 1.4209664081881017e-05, + "loss": 0.0147, "step": 169675 }, { "epoch": 7.92, - "learning_rate": 4.184520181894895e-06, - "loss": 0.0044, + "learning_rate": 1.4209196012044998e-05, + "loss": 0.0052, "step": 169680 }, { "epoch": 7.92, - "learning_rate": 4.184051380619756e-06, - "loss": 0.0261, + "learning_rate": 1.4208727942208977e-05, + "loss": 0.0326, "step": 169685 }, { "epoch": 7.92, - "learning_rate": 4.183582579344616e-06, - "loss": 0.0564, + "learning_rate": 1.4208259872372958e-05, + "loss": 0.0219, "step": 169690 }, { "epoch": 7.92, - "learning_rate": 4.183113778069477e-06, - "loss": 0.0299, + "learning_rate": 1.420779180253694e-05, + "loss": 0.0641, "step": 169695 }, { "epoch": 7.92, - "learning_rate": 4.182644976794337e-06, - "loss": 0.0088, + "learning_rate": 1.420732373270092e-05, + "loss": 0.0597, "step": 169700 }, { "epoch": 7.92, - "learning_rate": 4.182176175519197e-06, - "loss": 0.0745, + "learning_rate": 1.4206855662864901e-05, + "loss": 0.0285, "step": 169705 }, { "epoch": 7.92, - "learning_rate": 4.181707374244058e-06, - "loss": 0.2249, + "learning_rate": 1.420638759302888e-05, + "loss": 0.0853, "step": 169710 }, { "epoch": 7.92, - "learning_rate": 4.181238572968919e-06, - "loss": 0.1528, + "learning_rate": 1.420591952319286e-05, + "loss": 0.08, "step": 169715 }, { "epoch": 7.92, - "learning_rate": 4.18076977169378e-06, - "loss": 0.259, + "learning_rate": 1.420545145335684e-05, + "loss": 0.1281, "step": 169720 }, { "epoch": 7.92, - "learning_rate": 4.18030097041864e-06, - "loss": 0.0804, + "learning_rate": 1.4204983383520822e-05, + "loss": 0.0358, "step": 169725 }, { "epoch": 7.92, - "learning_rate": 4.1798321691435e-06, - "loss": 0.0231, + "learning_rate": 1.4204515313684802e-05, + "loss": 0.0665, "step": 169730 }, { "epoch": 7.92, - "learning_rate": 4.179363367868361e-06, - "loss": 0.0158, + "learning_rate": 1.4204047243848783e-05, + "loss": 0.0128, "step": 169735 }, { "epoch": 7.92, - "learning_rate": 4.178894566593221e-06, - "loss": 0.023, + "learning_rate": 1.4203579174012763e-05, + "loss": 0.004, "step": 169740 }, { "epoch": 7.92, - "learning_rate": 4.178425765318082e-06, - "loss": 0.0728, + "learning_rate": 1.4203111104176743e-05, + "loss": 0.0147, "step": 169745 }, { "epoch": 7.92, - "learning_rate": 4.177956964042943e-06, - "loss": 0.1863, + "learning_rate": 1.4202643034340723e-05, + "loss": 0.0719, "step": 169750 }, { "epoch": 7.92, - "learning_rate": 4.1774881627678035e-06, - "loss": 0.0725, + "learning_rate": 1.4202174964504704e-05, + "loss": 0.0366, "step": 169755 }, { "epoch": 7.92, - "learning_rate": 4.177019361492663e-06, - "loss": 0.0451, + "learning_rate": 1.4201706894668686e-05, + "loss": 0.0315, "step": 169760 }, { "epoch": 7.92, - "learning_rate": 4.176550560217524e-06, - "loss": 0.0929, + "learning_rate": 1.4201238824832665e-05, + "loss": 0.1283, "step": 169765 }, { "epoch": 7.92, - "learning_rate": 4.176081758942385e-06, - "loss": 0.1799, + "learning_rate": 1.4200770754996647e-05, + "loss": 0.1969, "step": 169770 }, { "epoch": 7.92, - "learning_rate": 4.175612957667245e-06, - "loss": 0.0017, + "learning_rate": 1.4200302685160627e-05, + "loss": 0.0181, "step": 169775 }, { "epoch": 7.92, - "learning_rate": 4.175144156392106e-06, - "loss": 0.0071, + "learning_rate": 1.4199834615324607e-05, + "loss": 0.0116, "step": 169780 }, { "epoch": 7.92, - "learning_rate": 4.1746753551169665e-06, - "loss": 0.0314, + "learning_rate": 1.4199366545488586e-05, + "loss": 0.0189, "step": 169785 }, { "epoch": 7.92, - "learning_rate": 4.174206553841827e-06, - "loss": 0.0895, + "learning_rate": 1.4198898475652568e-05, + "loss": 0.0442, "step": 169790 }, { "epoch": 7.92, - "learning_rate": 4.173737752566687e-06, - "loss": 0.047, + "learning_rate": 1.4198430405816548e-05, + "loss": 0.0163, "step": 169795 }, { "epoch": 7.92, - "learning_rate": 4.173268951291548e-06, - "loss": 0.0833, + "learning_rate": 1.419796233598053e-05, + "loss": 0.0304, "step": 169800 }, { "epoch": 7.92, - "learning_rate": 4.172800150016409e-06, - "loss": 0.1158, + "learning_rate": 1.4197494266144509e-05, + "loss": 0.0485, "step": 169805 }, { "epoch": 7.92, - "learning_rate": 4.172331348741269e-06, - "loss": 0.1082, + "learning_rate": 1.4197026196308489e-05, + "loss": 0.0795, "step": 169810 }, { "epoch": 7.92, - "learning_rate": 4.1718625474661294e-06, - "loss": 0.1358, + "learning_rate": 1.419655812647247e-05, + "loss": 0.0922, "step": 169815 }, { "epoch": 7.92, - "learning_rate": 4.17139374619099e-06, - "loss": 0.0923, + "learning_rate": 1.419609005663645e-05, + "loss": 0.2317, "step": 169820 }, { "epoch": 7.92, - "learning_rate": 4.170924944915851e-06, - "loss": 0.023, + "learning_rate": 1.4195621986800432e-05, + "loss": 0.068, "step": 169825 }, { "epoch": 7.92, - "learning_rate": 4.170456143640711e-06, - "loss": 0.0279, + "learning_rate": 1.4195153916964412e-05, + "loss": 0.0281, "step": 169830 }, { "epoch": 7.92, - "learning_rate": 4.169987342365572e-06, - "loss": 0.0199, + "learning_rate": 1.4194685847128393e-05, + "loss": 0.0245, "step": 169835 }, { "epoch": 7.92, - "learning_rate": 4.1695185410904325e-06, - "loss": 0.0095, + "learning_rate": 1.4194217777292371e-05, + "loss": 0.0393, "step": 169840 }, { "epoch": 7.93, - "learning_rate": 4.1690497398152924e-06, - "loss": 0.0237, + "learning_rate": 1.4193749707456353e-05, + "loss": 0.0112, "step": 169845 }, { "epoch": 7.93, - "learning_rate": 4.168580938540153e-06, - "loss": 0.0201, + "learning_rate": 1.4193281637620333e-05, + "loss": 0.0315, "step": 169850 }, { "epoch": 7.93, - "learning_rate": 4.168112137265013e-06, - "loss": 0.0497, + "learning_rate": 1.4192813567784314e-05, + "loss": 0.032, "step": 169855 }, { "epoch": 7.93, - "learning_rate": 4.167643335989875e-06, - "loss": 0.0739, + "learning_rate": 1.4192345497948294e-05, + "loss": 0.0694, "step": 169860 }, { "epoch": 7.93, - "learning_rate": 4.167174534714735e-06, - "loss": 0.1654, + "learning_rate": 1.4191877428112275e-05, + "loss": 0.0649, "step": 169865 }, { "epoch": 7.93, - "learning_rate": 4.1667057334395955e-06, - "loss": 0.1472, + "learning_rate": 1.4191409358276255e-05, + "loss": 0.1722, "step": 169870 }, { "epoch": 7.93, - "learning_rate": 4.166236932164455e-06, - "loss": 0.0166, + "learning_rate": 1.4190941288440235e-05, + "loss": 0.0273, "step": 169875 }, { "epoch": 7.93, - "learning_rate": 4.165768130889316e-06, - "loss": 0.0229, + "learning_rate": 1.4190473218604217e-05, + "loss": 0.0524, "step": 169880 }, { "epoch": 7.93, - "learning_rate": 4.165299329614177e-06, - "loss": 0.0321, + "learning_rate": 1.4190005148768196e-05, + "loss": 0.029, "step": 169885 }, { "epoch": 7.93, - "learning_rate": 4.164830528339037e-06, - "loss": 0.0278, + "learning_rate": 1.4189537078932178e-05, + "loss": 0.0372, "step": 169890 }, { "epoch": 7.93, - "learning_rate": 4.164361727063898e-06, - "loss": 0.0161, + "learning_rate": 1.4189069009096158e-05, + "loss": 0.0232, "step": 169895 }, { "epoch": 7.93, - "learning_rate": 4.1638929257887585e-06, - "loss": 0.0269, + "learning_rate": 1.418860093926014e-05, + "loss": 0.046, "step": 169900 }, { "epoch": 7.93, - "learning_rate": 4.163424124513619e-06, - "loss": 0.0463, + "learning_rate": 1.4188132869424117e-05, + "loss": 0.0199, "step": 169905 }, { "epoch": 7.93, - "learning_rate": 4.162955323238479e-06, - "loss": 0.0704, + "learning_rate": 1.4187664799588099e-05, + "loss": 0.0509, "step": 169910 }, { "epoch": 7.93, - "learning_rate": 4.16248652196334e-06, - "loss": 0.2355, + "learning_rate": 1.4187196729752079e-05, + "loss": 0.244, "step": 169915 }, { "epoch": 7.93, - "learning_rate": 4.162017720688201e-06, - "loss": 0.1404, + "learning_rate": 1.418672865991606e-05, + "loss": 0.1059, "step": 169920 }, { "epoch": 7.93, - "learning_rate": 4.161548919413061e-06, - "loss": 0.0271, + "learning_rate": 1.418626059008004e-05, + "loss": 0.0181, "step": 169925 }, { "epoch": 7.93, - "learning_rate": 4.1610801181379215e-06, - "loss": 0.036, + "learning_rate": 1.4185792520244021e-05, + "loss": 0.0107, "step": 169930 }, { "epoch": 7.93, - "learning_rate": 4.160611316862782e-06, - "loss": 0.0083, + "learning_rate": 1.4185324450408e-05, + "loss": 0.0258, "step": 169935 }, { "epoch": 7.93, - "learning_rate": 4.160142515587643e-06, - "loss": 0.0192, + "learning_rate": 1.4184856380571981e-05, + "loss": 0.0643, "step": 169940 }, { "epoch": 7.93, - "learning_rate": 4.159673714312503e-06, - "loss": 0.0509, + "learning_rate": 1.4184388310735963e-05, + "loss": 0.0373, "step": 169945 }, { "epoch": 7.93, - "learning_rate": 4.159204913037364e-06, - "loss": 0.0281, + "learning_rate": 1.4183920240899942e-05, + "loss": 0.0432, "step": 169950 }, { "epoch": 7.93, - "learning_rate": 4.1587361117622245e-06, - "loss": 0.0537, + "learning_rate": 1.4183452171063924e-05, + "loss": 0.0689, "step": 169955 }, { "epoch": 7.93, - "learning_rate": 4.1582673104870845e-06, - "loss": 0.1194, + "learning_rate": 1.4182984101227904e-05, + "loss": 0.0593, "step": 169960 }, { "epoch": 7.93, - "learning_rate": 4.157798509211945e-06, - "loss": 0.2366, + "learning_rate": 1.4182516031391885e-05, + "loss": 0.1325, "step": 169965 }, { "epoch": 7.93, - "learning_rate": 4.157329707936806e-06, - "loss": 0.2241, + "learning_rate": 1.4182047961555863e-05, + "loss": 0.1373, "step": 169970 }, { "epoch": 7.93, - "learning_rate": 4.156860906661667e-06, - "loss": 0.0231, + "learning_rate": 1.4181579891719845e-05, + "loss": 0.0166, "step": 169975 }, { "epoch": 7.93, - "learning_rate": 4.156392105386527e-06, - "loss": 0.0421, + "learning_rate": 1.4181111821883825e-05, + "loss": 0.0478, "step": 169980 }, { "epoch": 7.93, - "learning_rate": 4.1559233041113875e-06, - "loss": 0.0222, + "learning_rate": 1.4180643752047806e-05, + "loss": 0.0256, "step": 169985 }, { "epoch": 7.93, - "learning_rate": 4.155454502836248e-06, - "loss": 0.0828, + "learning_rate": 1.4180175682211786e-05, + "loss": 0.0544, "step": 169990 }, { "epoch": 7.93, - "learning_rate": 4.154985701561108e-06, - "loss": 0.012, + "learning_rate": 1.4179707612375768e-05, + "loss": 0.0489, "step": 169995 }, { "epoch": 7.93, - "learning_rate": 4.154516900285969e-06, - "loss": 0.0059, + "learning_rate": 1.4179239542539747e-05, + "loss": 0.0191, "step": 170000 }, { "epoch": 7.93, - "learning_rate": 4.15404809901083e-06, - "loss": 0.0729, + "learning_rate": 1.4178771472703727e-05, + "loss": 0.0613, "step": 170005 }, { "epoch": 7.93, - "learning_rate": 4.1535792977356906e-06, - "loss": 0.0288, + "learning_rate": 1.4178303402867709e-05, + "loss": 0.093, "step": 170010 }, { "epoch": 7.93, - "learning_rate": 4.1531104964605505e-06, - "loss": 0.1906, + "learning_rate": 1.4177835333031689e-05, + "loss": 0.2091, "step": 170015 }, { "epoch": 7.93, - "learning_rate": 4.152641695185411e-06, - "loss": 0.1276, + "learning_rate": 1.417736726319567e-05, + "loss": 0.1201, "step": 170020 }, { "epoch": 7.93, - "learning_rate": 4.152172893910272e-06, - "loss": 0.0111, + "learning_rate": 1.417689919335965e-05, + "loss": 0.0339, "step": 170025 }, { "epoch": 7.93, - "learning_rate": 4.151704092635132e-06, - "loss": 0.0129, + "learning_rate": 1.417643112352363e-05, + "loss": 0.0287, "step": 170030 }, { "epoch": 7.93, - "learning_rate": 4.151235291359993e-06, - "loss": 0.0675, + "learning_rate": 1.417596305368761e-05, + "loss": 0.0251, "step": 170035 }, { "epoch": 7.93, - "learning_rate": 4.1507664900848536e-06, - "loss": 0.0555, + "learning_rate": 1.4175494983851591e-05, + "loss": 0.0316, "step": 170040 }, { "epoch": 7.93, - "learning_rate": 4.150297688809714e-06, - "loss": 0.0496, + "learning_rate": 1.417502691401557e-05, + "loss": 0.0632, "step": 170045 }, { "epoch": 7.93, - "learning_rate": 4.149828887534574e-06, - "loss": 0.1202, + "learning_rate": 1.4174558844179552e-05, + "loss": 0.0313, "step": 170050 }, { "epoch": 7.94, - "learning_rate": 4.149360086259435e-06, - "loss": 0.0764, + "learning_rate": 1.4174090774343532e-05, + "loss": 0.036, "step": 170055 }, { "epoch": 7.94, - "learning_rate": 4.148891284984296e-06, - "loss": 0.0628, + "learning_rate": 1.4173622704507514e-05, + "loss": 0.0875, "step": 170060 }, { "epoch": 7.94, - "learning_rate": 4.148422483709156e-06, - "loss": 0.109, + "learning_rate": 1.4173154634671493e-05, + "loss": 0.1398, "step": 170065 }, { "epoch": 7.94, - "learning_rate": 4.1479536824340166e-06, - "loss": 0.3167, + "learning_rate": 1.4172686564835473e-05, + "loss": 0.3945, "step": 170070 }, { "epoch": 7.94, - "learning_rate": 4.147484881158877e-06, - "loss": 0.0294, + "learning_rate": 1.4172218494999455e-05, + "loss": 0.0289, "step": 170075 }, { "epoch": 7.94, - "learning_rate": 4.147016079883738e-06, - "loss": 0.0289, + "learning_rate": 1.4171750425163435e-05, + "loss": 0.0086, "step": 170080 }, { "epoch": 7.94, - "learning_rate": 4.146547278608598e-06, - "loss": 0.0187, + "learning_rate": 1.4171282355327416e-05, + "loss": 0.0408, "step": 170085 }, { "epoch": 7.94, - "learning_rate": 4.146078477333459e-06, - "loss": 0.0212, + "learning_rate": 1.4170814285491396e-05, + "loss": 0.0173, "step": 170090 }, { "epoch": 7.94, - "learning_rate": 4.14560967605832e-06, - "loss": 0.0462, + "learning_rate": 1.4170346215655376e-05, + "loss": 0.0533, "step": 170095 }, { "epoch": 7.94, - "learning_rate": 4.1451408747831795e-06, - "loss": 0.0251, + "learning_rate": 1.4169878145819356e-05, + "loss": 0.071, "step": 170100 }, { "epoch": 7.94, - "learning_rate": 4.14467207350804e-06, - "loss": 0.0341, + "learning_rate": 1.4169410075983337e-05, + "loss": 0.1269, "step": 170105 }, { "epoch": 7.94, - "learning_rate": 4.1442032722329e-06, - "loss": 0.1667, + "learning_rate": 1.4168942006147317e-05, + "loss": 0.0733, "step": 170110 }, { "epoch": 7.94, - "learning_rate": 4.143734470957762e-06, - "loss": 0.1383, + "learning_rate": 1.4168473936311298e-05, + "loss": 0.1649, "step": 170115 }, { "epoch": 7.94, - "learning_rate": 4.143265669682622e-06, - "loss": 0.1238, + "learning_rate": 1.416800586647528e-05, + "loss": 0.2442, "step": 170120 }, { "epoch": 7.94, - "learning_rate": 4.142796868407483e-06, - "loss": 0.0317, + "learning_rate": 1.4167537796639258e-05, + "loss": 0.0147, "step": 170125 }, { "epoch": 7.94, - "learning_rate": 4.1423280671323425e-06, - "loss": 0.0084, + "learning_rate": 1.416706972680324e-05, + "loss": 0.0152, "step": 170130 }, { "epoch": 7.94, - "learning_rate": 4.141859265857203e-06, - "loss": 0.0156, + "learning_rate": 1.416660165696722e-05, + "loss": 0.0057, "step": 170135 }, { "epoch": 7.94, - "learning_rate": 4.141390464582064e-06, - "loss": 0.0167, + "learning_rate": 1.4166133587131201e-05, + "loss": 0.0142, "step": 170140 }, { "epoch": 7.94, - "learning_rate": 4.140921663306924e-06, - "loss": 0.0385, + "learning_rate": 1.416566551729518e-05, + "loss": 0.0121, "step": 170145 }, { "epoch": 7.94, - "learning_rate": 4.140452862031785e-06, - "loss": 0.0337, + "learning_rate": 1.4165197447459162e-05, + "loss": 0.011, "step": 170150 }, { "epoch": 7.94, - "learning_rate": 4.139984060756646e-06, - "loss": 0.0866, + "learning_rate": 1.4164729377623142e-05, + "loss": 0.0735, "step": 170155 }, { "epoch": 7.94, - "learning_rate": 4.139515259481506e-06, - "loss": 0.1173, + "learning_rate": 1.4164261307787122e-05, + "loss": 0.095, "step": 170160 }, { "epoch": 7.94, - "learning_rate": 4.139046458206366e-06, - "loss": 0.1932, + "learning_rate": 1.4163793237951102e-05, + "loss": 0.1414, "step": 170165 }, { "epoch": 7.94, - "learning_rate": 4.138577656931227e-06, - "loss": 0.1354, + "learning_rate": 1.4163325168115083e-05, + "loss": 0.1553, "step": 170170 }, { "epoch": 7.94, - "learning_rate": 4.138108855656088e-06, - "loss": 0.027, + "learning_rate": 1.4162857098279063e-05, + "loss": 0.0279, "step": 170175 }, { "epoch": 7.94, - "learning_rate": 4.137640054380948e-06, - "loss": 0.0143, + "learning_rate": 1.4162389028443045e-05, + "loss": 0.0158, "step": 170180 }, { "epoch": 7.94, - "learning_rate": 4.137171253105809e-06, - "loss": 0.0245, + "learning_rate": 1.4161920958607026e-05, + "loss": 0.0085, "step": 170185 }, { "epoch": 7.94, - "learning_rate": 4.136702451830669e-06, - "loss": 0.0326, + "learning_rate": 1.4161452888771004e-05, + "loss": 0.0184, "step": 170190 }, { "epoch": 7.94, - "learning_rate": 4.13623365055553e-06, - "loss": 0.0553, + "learning_rate": 1.4160984818934986e-05, + "loss": 0.0201, "step": 170195 }, { "epoch": 7.94, - "learning_rate": 4.13576484928039e-06, - "loss": 0.0432, + "learning_rate": 1.4160516749098966e-05, + "loss": 0.0316, "step": 170200 }, { "epoch": 7.94, - "learning_rate": 4.135296048005251e-06, - "loss": 0.0766, + "learning_rate": 1.4160048679262947e-05, + "loss": 0.0998, "step": 170205 }, { "epoch": 7.94, - "learning_rate": 4.134827246730112e-06, - "loss": 0.0881, + "learning_rate": 1.4159580609426927e-05, + "loss": 0.1697, "step": 170210 }, { "epoch": 7.94, - "learning_rate": 4.1343584454549716e-06, - "loss": 0.1198, + "learning_rate": 1.4159112539590908e-05, + "loss": 0.0701, "step": 170215 }, { "epoch": 7.94, - "learning_rate": 4.133889644179832e-06, - "loss": 0.1885, + "learning_rate": 1.4158644469754886e-05, + "loss": 0.2348, "step": 170220 }, { "epoch": 7.94, - "learning_rate": 4.133420842904693e-06, - "loss": 0.0115, + "learning_rate": 1.4158176399918868e-05, + "loss": 0.0301, "step": 170225 }, { "epoch": 7.94, - "learning_rate": 4.132952041629554e-06, - "loss": 0.0052, + "learning_rate": 1.4157708330082848e-05, + "loss": 0.0132, "step": 170230 }, { "epoch": 7.94, - "learning_rate": 4.132483240354414e-06, - "loss": 0.0274, + "learning_rate": 1.415724026024683e-05, + "loss": 0.0296, "step": 170235 }, { "epoch": 7.94, - "learning_rate": 4.132014439079275e-06, - "loss": 0.0374, + "learning_rate": 1.4156772190410809e-05, + "loss": 0.0434, "step": 170240 }, { "epoch": 7.94, - "learning_rate": 4.131545637804135e-06, - "loss": 0.0248, + "learning_rate": 1.415630412057479e-05, + "loss": 0.0265, "step": 170245 }, { "epoch": 7.94, - "learning_rate": 4.131076836528995e-06, - "loss": 0.0477, + "learning_rate": 1.415583605073877e-05, + "loss": 0.0555, "step": 170250 }, { "epoch": 7.94, - "learning_rate": 4.130608035253856e-06, - "loss": 0.036, + "learning_rate": 1.415536798090275e-05, + "loss": 0.0723, "step": 170255 }, { "epoch": 7.94, - "learning_rate": 4.130139233978717e-06, - "loss": 0.0809, + "learning_rate": 1.4154899911066732e-05, + "loss": 0.0458, "step": 170260 }, { "epoch": 7.94, - "learning_rate": 4.129670432703578e-06, - "loss": 0.1133, + "learning_rate": 1.4154431841230712e-05, + "loss": 0.0911, "step": 170265 }, { "epoch": 7.95, - "learning_rate": 4.129201631428438e-06, - "loss": 0.1498, + "learning_rate": 1.4153963771394693e-05, + "loss": 0.115, "step": 170270 }, { "epoch": 7.95, - "learning_rate": 4.128732830153298e-06, - "loss": 0.034, + "learning_rate": 1.4153495701558673e-05, + "loss": 0.0171, "step": 170275 }, { "epoch": 7.95, - "learning_rate": 4.128264028878159e-06, - "loss": 0.0082, + "learning_rate": 1.4153027631722654e-05, + "loss": 0.0052, "step": 170280 }, { "epoch": 7.95, - "learning_rate": 4.127795227603019e-06, - "loss": 0.0387, + "learning_rate": 1.4152559561886633e-05, + "loss": 0.0195, "step": 170285 }, { "epoch": 7.95, - "learning_rate": 4.12732642632788e-06, - "loss": 0.0271, + "learning_rate": 1.4152091492050614e-05, + "loss": 0.0091, "step": 170290 }, { "epoch": 7.95, - "learning_rate": 4.126857625052741e-06, - "loss": 0.039, + "learning_rate": 1.4151623422214594e-05, + "loss": 0.0326, "step": 170295 }, { "epoch": 7.95, - "learning_rate": 4.1263888237776014e-06, - "loss": 0.0212, + "learning_rate": 1.4151155352378575e-05, + "loss": 0.0605, "step": 170300 }, { "epoch": 7.95, - "learning_rate": 4.125920022502461e-06, - "loss": 0.0567, + "learning_rate": 1.4150687282542557e-05, + "loss": 0.0392, "step": 170305 }, { "epoch": 7.95, - "learning_rate": 4.125451221227322e-06, - "loss": 0.0592, + "learning_rate": 1.4150219212706537e-05, + "loss": 0.017, "step": 170310 }, { "epoch": 7.95, - "learning_rate": 4.124982419952183e-06, - "loss": 0.0712, + "learning_rate": 1.4149751142870517e-05, + "loss": 0.1193, "step": 170315 }, { "epoch": 7.95, - "learning_rate": 4.124513618677043e-06, - "loss": 0.1622, + "learning_rate": 1.4149283073034496e-05, + "loss": 0.1034, "step": 170320 }, { "epoch": 7.95, - "learning_rate": 4.124044817401904e-06, - "loss": 0.0311, + "learning_rate": 1.4148815003198478e-05, + "loss": 0.0261, "step": 170325 }, { "epoch": 7.95, - "learning_rate": 4.1235760161267644e-06, - "loss": 0.0052, + "learning_rate": 1.4148346933362458e-05, + "loss": 0.0085, "step": 170330 }, { "epoch": 7.95, - "learning_rate": 4.123107214851625e-06, - "loss": 0.0125, + "learning_rate": 1.414787886352644e-05, + "loss": 0.0095, "step": 170335 }, { "epoch": 7.95, - "learning_rate": 4.122638413576485e-06, - "loss": 0.0827, + "learning_rate": 1.4147410793690419e-05, + "loss": 0.0096, "step": 170340 }, { "epoch": 7.95, - "learning_rate": 4.122169612301346e-06, - "loss": 0.0289, + "learning_rate": 1.4146942723854399e-05, + "loss": 0.025, "step": 170345 }, { "epoch": 7.95, - "learning_rate": 4.121700811026207e-06, - "loss": 0.0506, + "learning_rate": 1.4146474654018379e-05, + "loss": 0.0808, "step": 170350 }, { "epoch": 7.95, - "learning_rate": 4.121232009751067e-06, - "loss": 0.1159, + "learning_rate": 1.414600658418236e-05, + "loss": 0.0592, "step": 170355 }, { "epoch": 7.95, - "learning_rate": 4.1207632084759274e-06, - "loss": 0.0658, + "learning_rate": 1.414553851434634e-05, + "loss": 0.0711, "step": 170360 }, { "epoch": 7.95, - "learning_rate": 4.120294407200787e-06, - "loss": 0.1321, + "learning_rate": 1.4145070444510322e-05, + "loss": 0.1664, "step": 170365 }, { "epoch": 7.95, - "learning_rate": 4.119825605925649e-06, - "loss": 0.1938, + "learning_rate": 1.4144602374674303e-05, + "loss": 0.189, "step": 170370 }, { "epoch": 7.95, - "learning_rate": 4.119356804650509e-06, - "loss": 0.013, + "learning_rate": 1.4144134304838283e-05, + "loss": 0.0279, "step": 170375 }, { "epoch": 7.95, - "learning_rate": 4.11888800337537e-06, - "loss": 0.015, + "learning_rate": 1.4143666235002263e-05, + "loss": 0.0154, "step": 170380 }, { "epoch": 7.95, - "learning_rate": 4.11841920210023e-06, - "loss": 0.0225, + "learning_rate": 1.4143198165166242e-05, + "loss": 0.0108, "step": 170385 }, { "epoch": 7.95, - "learning_rate": 4.11795040082509e-06, - "loss": 0.0442, + "learning_rate": 1.4142730095330224e-05, + "loss": 0.06, "step": 170390 }, { "epoch": 7.95, - "learning_rate": 4.117481599549951e-06, - "loss": 0.0514, + "learning_rate": 1.4142262025494204e-05, + "loss": 0.0164, "step": 170395 }, { "epoch": 7.95, - "learning_rate": 4.117012798274811e-06, - "loss": 0.031, + "learning_rate": 1.4141793955658185e-05, + "loss": 0.0624, "step": 170400 }, { "epoch": 7.95, - "learning_rate": 4.116543996999672e-06, - "loss": 0.0328, + "learning_rate": 1.4141325885822165e-05, + "loss": 0.0983, "step": 170405 }, { "epoch": 7.95, - "learning_rate": 4.116075195724533e-06, - "loss": 0.1181, + "learning_rate": 1.4140857815986145e-05, + "loss": 0.0629, "step": 170410 }, { "epoch": 7.95, - "learning_rate": 4.1156063944493935e-06, - "loss": 0.2264, + "learning_rate": 1.4140389746150125e-05, + "loss": 0.1881, "step": 170415 }, { "epoch": 7.95, - "learning_rate": 4.115137593174253e-06, - "loss": 0.219, + "learning_rate": 1.4139921676314106e-05, + "loss": 0.1303, "step": 170420 }, { "epoch": 7.95, - "learning_rate": 4.114668791899114e-06, - "loss": 0.0293, + "learning_rate": 1.4139453606478088e-05, + "loss": 0.0466, "step": 170425 }, { "epoch": 7.95, - "learning_rate": 4.114199990623975e-06, - "loss": 0.0106, + "learning_rate": 1.4138985536642068e-05, + "loss": 0.0341, "step": 170430 }, { "epoch": 7.95, - "learning_rate": 4.113731189348835e-06, - "loss": 0.0181, + "learning_rate": 1.4138517466806049e-05, + "loss": 0.0346, "step": 170435 }, { "epoch": 7.95, - "learning_rate": 4.113262388073696e-06, - "loss": 0.0468, + "learning_rate": 1.4138049396970027e-05, + "loss": 0.0278, "step": 170440 }, { "epoch": 7.95, - "learning_rate": 4.1127935867985565e-06, - "loss": 0.022, + "learning_rate": 1.4137581327134009e-05, + "loss": 0.0704, "step": 170445 }, { "epoch": 7.95, - "learning_rate": 4.112324785523417e-06, - "loss": 0.0406, + "learning_rate": 1.4137113257297989e-05, + "loss": 0.0964, "step": 170450 }, { "epoch": 7.95, - "learning_rate": 4.111855984248277e-06, - "loss": 0.1061, + "learning_rate": 1.413664518746197e-05, + "loss": 0.0471, "step": 170455 }, { "epoch": 7.95, - "learning_rate": 4.111387182973138e-06, - "loss": 0.1182, + "learning_rate": 1.413617711762595e-05, + "loss": 0.0904, "step": 170460 }, { "epoch": 7.95, - "learning_rate": 4.110918381697999e-06, - "loss": 0.1648, + "learning_rate": 1.4135709047789931e-05, + "loss": 0.0931, "step": 170465 }, { "epoch": 7.95, - "learning_rate": 4.110449580422859e-06, - "loss": 0.2513, + "learning_rate": 1.4135240977953911e-05, + "loss": 0.1057, "step": 170470 }, { "epoch": 7.95, - "learning_rate": 4.1099807791477195e-06, - "loss": 0.0074, + "learning_rate": 1.4134772908117891e-05, + "loss": 0.0119, "step": 170475 }, { "epoch": 7.95, - "learning_rate": 4.10951197787258e-06, - "loss": 0.012, + "learning_rate": 1.4134304838281871e-05, + "loss": 0.0337, "step": 170480 }, { "epoch": 7.96, - "learning_rate": 4.109043176597441e-06, - "loss": 0.0056, + "learning_rate": 1.4133836768445852e-05, + "loss": 0.014, "step": 170485 }, { "epoch": 7.96, - "learning_rate": 4.108574375322301e-06, - "loss": 0.1037, + "learning_rate": 1.4133368698609834e-05, + "loss": 0.05, "step": 170490 }, { "epoch": 7.96, - "learning_rate": 4.108105574047162e-06, - "loss": 0.1325, + "learning_rate": 1.4132900628773814e-05, + "loss": 0.0495, "step": 170495 }, { "epoch": 7.96, - "learning_rate": 4.1076367727720225e-06, - "loss": 0.0237, + "learning_rate": 1.4132432558937795e-05, + "loss": 0.0153, "step": 170500 }, { "epoch": 7.96, - "learning_rate": 4.1071679714968824e-06, - "loss": 0.0512, + "learning_rate": 1.4131964489101773e-05, + "loss": 0.0605, "step": 170505 }, { "epoch": 7.96, - "learning_rate": 4.106699170221743e-06, - "loss": 0.0412, + "learning_rate": 1.4131496419265755e-05, + "loss": 0.0995, "step": 170510 }, { "epoch": 7.96, - "learning_rate": 4.106230368946604e-06, - "loss": 0.1551, + "learning_rate": 1.4131028349429735e-05, + "loss": 0.0692, "step": 170515 }, { "epoch": 7.96, - "learning_rate": 4.105761567671465e-06, - "loss": 0.282, + "learning_rate": 1.4130560279593716e-05, + "loss": 0.1907, "step": 170520 }, { "epoch": 7.96, - "learning_rate": 4.105292766396325e-06, - "loss": 0.005, + "learning_rate": 1.4130092209757696e-05, + "loss": 0.0127, "step": 170525 }, { "epoch": 7.96, - "learning_rate": 4.1048239651211855e-06, - "loss": 0.0012, + "learning_rate": 1.4129624139921678e-05, + "loss": 0.0056, "step": 170530 }, { "epoch": 7.96, - "learning_rate": 4.104355163846046e-06, - "loss": 0.0377, + "learning_rate": 1.4129156070085656e-05, + "loss": 0.0167, "step": 170535 }, { "epoch": 7.96, - "learning_rate": 4.103886362570906e-06, - "loss": 0.0193, + "learning_rate": 1.4128688000249637e-05, + "loss": 0.0174, "step": 170540 }, { "epoch": 7.96, - "learning_rate": 4.103417561295767e-06, - "loss": 0.0113, + "learning_rate": 1.4128219930413617e-05, + "loss": 0.0175, "step": 170545 }, { "epoch": 7.96, - "learning_rate": 4.102948760020628e-06, - "loss": 0.0882, + "learning_rate": 1.4127751860577598e-05, + "loss": 0.0434, "step": 170550 }, { "epoch": 7.96, - "learning_rate": 4.1024799587454886e-06, - "loss": 0.0255, + "learning_rate": 1.412728379074158e-05, + "loss": 0.084, "step": 170555 }, { "epoch": 7.96, - "learning_rate": 4.1020111574703485e-06, - "loss": 0.0933, + "learning_rate": 1.412681572090556e-05, + "loss": 0.0812, "step": 170560 }, { "epoch": 7.96, - "learning_rate": 4.101542356195209e-06, - "loss": 0.1787, + "learning_rate": 1.4126347651069541e-05, + "loss": 0.1454, "step": 170565 }, { "epoch": 7.96, - "learning_rate": 4.10107355492007e-06, - "loss": 0.1964, + "learning_rate": 1.412587958123352e-05, + "loss": 0.1323, "step": 170570 }, { "epoch": 7.96, - "learning_rate": 4.10060475364493e-06, - "loss": 0.0057, + "learning_rate": 1.4125411511397501e-05, + "loss": 0.0136, "step": 170575 }, { "epoch": 7.96, - "learning_rate": 4.100135952369791e-06, - "loss": 0.0045, + "learning_rate": 1.412494344156148e-05, + "loss": 0.0098, "step": 170580 }, { "epoch": 7.96, - "learning_rate": 4.0996671510946515e-06, - "loss": 0.0378, + "learning_rate": 1.4124475371725462e-05, + "loss": 0.0253, "step": 170585 }, { "epoch": 7.96, - "learning_rate": 4.099198349819512e-06, - "loss": 0.0206, + "learning_rate": 1.4124007301889442e-05, + "loss": 0.0627, "step": 170590 }, { "epoch": 7.96, - "learning_rate": 4.098729548544372e-06, - "loss": 0.0384, + "learning_rate": 1.4123539232053424e-05, + "loss": 0.0632, "step": 170595 }, { "epoch": 7.96, - "learning_rate": 4.098260747269233e-06, - "loss": 0.0538, + "learning_rate": 1.4123071162217402e-05, + "loss": 0.0884, "step": 170600 }, { "epoch": 7.96, - "learning_rate": 4.097791945994094e-06, - "loss": 0.0496, + "learning_rate": 1.4122603092381383e-05, + "loss": 0.0947, "step": 170605 }, { "epoch": 7.96, - "learning_rate": 4.097323144718954e-06, - "loss": 0.062, + "learning_rate": 1.4122135022545365e-05, + "loss": 0.1037, "step": 170610 }, { "epoch": 7.96, - "learning_rate": 4.0968543434438145e-06, - "loss": 0.191, + "learning_rate": 1.4121666952709345e-05, + "loss": 0.1234, "step": 170615 }, { "epoch": 7.96, - "learning_rate": 4.096385542168675e-06, - "loss": 0.1332, + "learning_rate": 1.4121198882873326e-05, + "loss": 0.1116, "step": 170620 }, { "epoch": 7.96, - "learning_rate": 4.095916740893536e-06, - "loss": 0.0939, + "learning_rate": 1.4120730813037306e-05, + "loss": 0.021, "step": 170625 }, { "epoch": 7.96, - "learning_rate": 4.095447939618396e-06, - "loss": 0.0205, + "learning_rate": 1.4120262743201286e-05, + "loss": 0.0059, "step": 170630 }, { "epoch": 7.96, - "learning_rate": 4.094979138343257e-06, - "loss": 0.0262, + "learning_rate": 1.4119794673365266e-05, + "loss": 0.0304, "step": 170635 }, { "epoch": 7.96, - "learning_rate": 4.094510337068118e-06, - "loss": 0.0063, + "learning_rate": 1.4119326603529247e-05, + "loss": 0.02, "step": 170640 }, { "epoch": 7.96, - "learning_rate": 4.0940415357929775e-06, - "loss": 0.027, + "learning_rate": 1.4118858533693227e-05, + "loss": 0.0593, "step": 170645 }, { "epoch": 7.96, - "learning_rate": 4.093572734517838e-06, - "loss": 0.067, + "learning_rate": 1.4118390463857208e-05, + "loss": 0.0468, "step": 170650 }, { "epoch": 7.96, - "learning_rate": 4.093103933242698e-06, - "loss": 0.0676, + "learning_rate": 1.4117922394021188e-05, + "loss": 0.0565, "step": 170655 }, { "epoch": 7.96, - "learning_rate": 4.09263513196756e-06, - "loss": 0.0689, + "learning_rate": 1.411745432418517e-05, + "loss": 0.1429, "step": 170660 }, { "epoch": 7.96, - "learning_rate": 4.09216633069242e-06, - "loss": 0.1047, + "learning_rate": 1.4116986254349148e-05, + "loss": 0.0949, "step": 170665 }, { "epoch": 7.96, - "learning_rate": 4.091697529417281e-06, - "loss": 0.1222, + "learning_rate": 1.411651818451313e-05, + "loss": 0.1787, "step": 170670 }, { "epoch": 7.96, - "learning_rate": 4.0912287281421405e-06, - "loss": 0.0266, + "learning_rate": 1.4116050114677111e-05, + "loss": 0.0157, "step": 170675 }, { "epoch": 7.96, - "learning_rate": 4.090759926867001e-06, - "loss": 0.0079, + "learning_rate": 1.411558204484109e-05, + "loss": 0.0116, "step": 170680 }, { "epoch": 7.96, - "learning_rate": 4.090291125591862e-06, - "loss": 0.0086, + "learning_rate": 1.4115113975005072e-05, + "loss": 0.0147, "step": 170685 }, { "epoch": 7.96, - "learning_rate": 4.089822324316722e-06, - "loss": 0.012, + "learning_rate": 1.4114645905169052e-05, + "loss": 0.0171, "step": 170690 }, { "epoch": 7.96, - "learning_rate": 4.089353523041583e-06, - "loss": 0.0592, + "learning_rate": 1.4114177835333032e-05, + "loss": 0.0142, "step": 170695 }, { "epoch": 7.97, - "learning_rate": 4.0888847217664436e-06, - "loss": 0.0369, + "learning_rate": 1.4113709765497012e-05, + "loss": 0.0751, "step": 170700 }, { "epoch": 7.97, - "learning_rate": 4.088415920491304e-06, - "loss": 0.0382, + "learning_rate": 1.4113241695660993e-05, + "loss": 0.0346, "step": 170705 }, { "epoch": 7.97, - "learning_rate": 4.087947119216164e-06, - "loss": 0.0961, + "learning_rate": 1.4112773625824973e-05, + "loss": 0.0659, "step": 170710 }, { "epoch": 7.97, - "learning_rate": 4.087478317941025e-06, - "loss": 0.1386, + "learning_rate": 1.4112305555988954e-05, + "loss": 0.1441, "step": 170715 }, { "epoch": 7.97, - "learning_rate": 4.087009516665886e-06, - "loss": 0.1483, + "learning_rate": 1.4111837486152934e-05, + "loss": 0.3029, "step": 170720 }, { "epoch": 7.97, - "learning_rate": 4.086540715390746e-06, - "loss": 0.02, + "learning_rate": 1.4111369416316914e-05, + "loss": 0.0252, "step": 170725 }, { "epoch": 7.97, - "learning_rate": 4.0860719141156066e-06, - "loss": 0.027, + "learning_rate": 1.4110901346480896e-05, + "loss": 0.0321, "step": 170730 }, { "epoch": 7.97, - "learning_rate": 4.085603112840467e-06, - "loss": 0.0251, + "learning_rate": 1.4110433276644875e-05, + "loss": 0.0155, "step": 170735 }, { "epoch": 7.97, - "learning_rate": 4.085134311565328e-06, - "loss": 0.0254, + "learning_rate": 1.4109965206808857e-05, + "loss": 0.0168, "step": 170740 }, { "epoch": 7.97, - "learning_rate": 4.084665510290188e-06, - "loss": 0.0506, + "learning_rate": 1.4109497136972837e-05, + "loss": 0.0496, "step": 170745 }, { "epoch": 7.97, - "learning_rate": 4.084196709015049e-06, - "loss": 0.0446, + "learning_rate": 1.4109029067136818e-05, + "loss": 0.0367, "step": 170750 }, { "epoch": 7.97, - "learning_rate": 4.08372790773991e-06, - "loss": 0.0612, + "learning_rate": 1.4108560997300798e-05, + "loss": 0.0866, "step": 170755 }, { "epoch": 7.97, - "learning_rate": 4.0832591064647696e-06, - "loss": 0.0724, + "learning_rate": 1.4108092927464778e-05, + "loss": 0.0888, "step": 170760 }, { "epoch": 7.97, - "learning_rate": 4.08279030518963e-06, - "loss": 0.1437, + "learning_rate": 1.4107624857628758e-05, + "loss": 0.1215, "step": 170765 }, { "epoch": 7.97, - "learning_rate": 4.082321503914491e-06, - "loss": 0.1288, + "learning_rate": 1.410715678779274e-05, + "loss": 0.2038, "step": 170770 }, { "epoch": 7.97, - "learning_rate": 4.081852702639352e-06, - "loss": 0.028, + "learning_rate": 1.4106688717956719e-05, + "loss": 0.0081, "step": 170775 }, { "epoch": 7.97, - "learning_rate": 4.081383901364212e-06, - "loss": 0.0159, + "learning_rate": 1.41062206481207e-05, + "loss": 0.0125, "step": 170780 }, { "epoch": 7.97, - "learning_rate": 4.080915100089073e-06, - "loss": 0.0064, + "learning_rate": 1.410575257828468e-05, + "loss": 0.0611, "step": 170785 }, { "epoch": 7.97, - "learning_rate": 4.080446298813933e-06, - "loss": 0.0975, + "learning_rate": 1.410528450844866e-05, + "loss": 0.0162, "step": 170790 }, { "epoch": 7.97, - "learning_rate": 4.079977497538793e-06, - "loss": 0.0494, + "learning_rate": 1.4104816438612642e-05, + "loss": 0.0716, "step": 170795 }, { "epoch": 7.97, - "learning_rate": 4.079508696263654e-06, - "loss": 0.0134, + "learning_rate": 1.4104348368776622e-05, + "loss": 0.0402, "step": 170800 }, { "epoch": 7.97, - "learning_rate": 4.079039894988515e-06, - "loss": 0.0699, + "learning_rate": 1.4103880298940603e-05, + "loss": 0.0308, "step": 170805 }, { "epoch": 7.97, - "learning_rate": 4.078571093713376e-06, - "loss": 0.0531, + "learning_rate": 1.4103412229104583e-05, + "loss": 0.106, "step": 170810 }, { "epoch": 7.97, - "learning_rate": 4.078102292438236e-06, - "loss": 0.2027, + "learning_rate": 1.4102944159268564e-05, + "loss": 0.2219, "step": 170815 }, { "epoch": 7.97, - "learning_rate": 4.077633491163096e-06, - "loss": 0.2624, + "learning_rate": 1.4102476089432543e-05, + "loss": 0.1144, "step": 170820 }, { "epoch": 7.97, - "learning_rate": 4.077164689887957e-06, - "loss": 0.0192, + "learning_rate": 1.4102008019596524e-05, + "loss": 0.0163, "step": 170825 }, { "epoch": 7.97, - "learning_rate": 4.076695888612817e-06, - "loss": 0.0481, + "learning_rate": 1.4101539949760504e-05, + "loss": 0.0049, "step": 170830 }, { "epoch": 7.97, - "learning_rate": 4.076227087337678e-06, - "loss": 0.024, + "learning_rate": 1.4101071879924485e-05, + "loss": 0.0246, "step": 170835 }, { "epoch": 7.97, - "learning_rate": 4.075758286062539e-06, - "loss": 0.0514, + "learning_rate": 1.4100603810088465e-05, + "loss": 0.0321, "step": 170840 }, { "epoch": 7.97, - "learning_rate": 4.0752894847873994e-06, - "loss": 0.064, + "learning_rate": 1.4100135740252447e-05, + "loss": 0.0352, "step": 170845 }, { "epoch": 7.97, - "learning_rate": 4.074820683512259e-06, - "loss": 0.0134, + "learning_rate": 1.4099667670416427e-05, + "loss": 0.0485, "step": 170850 }, { "epoch": 7.97, - "learning_rate": 4.07435188223712e-06, - "loss": 0.0817, + "learning_rate": 1.4099199600580406e-05, + "loss": 0.0604, "step": 170855 }, { "epoch": 7.97, - "learning_rate": 4.073883080961981e-06, - "loss": 0.0328, + "learning_rate": 1.4098731530744388e-05, + "loss": 0.1209, "step": 170860 }, { "epoch": 7.97, - "learning_rate": 4.073414279686841e-06, - "loss": 0.0873, + "learning_rate": 1.4098263460908368e-05, + "loss": 0.1365, "step": 170865 }, { "epoch": 7.97, - "learning_rate": 4.072945478411702e-06, - "loss": 0.1017, + "learning_rate": 1.409779539107235e-05, + "loss": 0.1499, "step": 170870 }, { "epoch": 7.97, - "learning_rate": 4.0724766771365624e-06, - "loss": 0.0538, + "learning_rate": 1.4097327321236329e-05, + "loss": 0.0549, "step": 170875 }, { "epoch": 7.97, - "learning_rate": 4.072007875861423e-06, - "loss": 0.0086, + "learning_rate": 1.409685925140031e-05, + "loss": 0.0164, "step": 170880 }, { "epoch": 7.97, - "learning_rate": 4.071539074586283e-06, - "loss": 0.006, + "learning_rate": 1.4096391181564289e-05, + "loss": 0.0344, "step": 170885 }, { "epoch": 7.97, - "learning_rate": 4.071070273311144e-06, - "loss": 0.0148, + "learning_rate": 1.409592311172827e-05, + "loss": 0.0098, "step": 170890 }, { "epoch": 7.97, - "learning_rate": 4.070601472036005e-06, - "loss": 0.0637, + "learning_rate": 1.409545504189225e-05, + "loss": 0.0237, "step": 170895 }, { "epoch": 7.97, - "learning_rate": 4.070132670760865e-06, - "loss": 0.0834, + "learning_rate": 1.4094986972056231e-05, + "loss": 0.1034, "step": 170900 }, { "epoch": 7.97, - "learning_rate": 4.069663869485725e-06, - "loss": 0.1048, + "learning_rate": 1.4094518902220211e-05, + "loss": 0.0526, "step": 170905 }, { "epoch": 7.97, - "learning_rate": 4.069195068210585e-06, - "loss": 0.0848, + "learning_rate": 1.4094050832384193e-05, + "loss": 0.0655, "step": 170910 }, { "epoch": 7.98, - "learning_rate": 4.068726266935447e-06, - "loss": 0.0758, + "learning_rate": 1.4093582762548173e-05, + "loss": 0.1126, "step": 170915 }, { "epoch": 7.98, - "learning_rate": 4.068257465660307e-06, - "loss": 0.1047, + "learning_rate": 1.4093114692712152e-05, + "loss": 0.0937, "step": 170920 }, { "epoch": 7.98, - "learning_rate": 4.067788664385168e-06, - "loss": 0.0134, + "learning_rate": 1.4092646622876134e-05, + "loss": 0.0084, "step": 170925 }, { "epoch": 7.98, - "learning_rate": 4.067319863110028e-06, - "loss": 0.0263, + "learning_rate": 1.4092178553040114e-05, + "loss": 0.0295, "step": 170930 }, { "epoch": 7.98, - "learning_rate": 4.066851061834888e-06, - "loss": 0.0412, + "learning_rate": 1.4091710483204095e-05, + "loss": 0.0207, "step": 170935 }, { "epoch": 7.98, - "learning_rate": 4.066382260559749e-06, - "loss": 0.0376, + "learning_rate": 1.4091242413368075e-05, + "loss": 0.0515, "step": 170940 }, { "epoch": 7.98, - "learning_rate": 4.065913459284609e-06, - "loss": 0.0714, + "learning_rate": 1.4090774343532055e-05, + "loss": 0.0344, "step": 170945 }, { "epoch": 7.98, - "learning_rate": 4.06544465800947e-06, - "loss": 0.0787, + "learning_rate": 1.4090306273696035e-05, + "loss": 0.0411, "step": 170950 }, { "epoch": 7.98, - "learning_rate": 4.064975856734331e-06, - "loss": 0.0731, + "learning_rate": 1.4089838203860016e-05, + "loss": 0.0344, "step": 170955 }, { "epoch": 7.98, - "learning_rate": 4.0645070554591915e-06, - "loss": 0.0556, + "learning_rate": 1.4089370134023996e-05, + "loss": 0.0498, "step": 170960 }, { "epoch": 7.98, - "learning_rate": 4.064038254184051e-06, - "loss": 0.1424, + "learning_rate": 1.4088902064187978e-05, + "loss": 0.1855, "step": 170965 }, { "epoch": 7.98, - "learning_rate": 4.063569452908912e-06, - "loss": 0.1419, + "learning_rate": 1.4088433994351957e-05, + "loss": 0.1631, "step": 170970 }, { "epoch": 7.98, - "learning_rate": 4.063100651633773e-06, - "loss": 0.0461, + "learning_rate": 1.4087965924515939e-05, + "loss": 0.0228, "step": 170975 }, { "epoch": 7.98, - "learning_rate": 4.062631850358633e-06, - "loss": 0.0087, + "learning_rate": 1.4087497854679919e-05, + "loss": 0.0145, "step": 170980 }, { "epoch": 7.98, - "learning_rate": 4.062163049083494e-06, - "loss": 0.0123, + "learning_rate": 1.4087029784843899e-05, + "loss": 0.0183, "step": 170985 }, { "epoch": 7.98, - "learning_rate": 4.0616942478083545e-06, - "loss": 0.0312, + "learning_rate": 1.408656171500788e-05, + "loss": 0.0728, "step": 170990 }, { "epoch": 7.98, - "learning_rate": 4.061225446533215e-06, - "loss": 0.0582, + "learning_rate": 1.408609364517186e-05, + "loss": 0.0499, "step": 170995 }, { "epoch": 7.98, - "learning_rate": 4.060756645258075e-06, - "loss": 0.0479, + "learning_rate": 1.4085625575335841e-05, + "loss": 0.081, "step": 171000 }, { "epoch": 7.98, - "learning_rate": 4.060287843982936e-06, - "loss": 0.1042, + "learning_rate": 1.4085157505499821e-05, + "loss": 0.0569, "step": 171005 }, { "epoch": 7.98, - "learning_rate": 4.059819042707797e-06, - "loss": 0.0726, + "learning_rate": 1.4084689435663801e-05, + "loss": 0.0685, "step": 171010 }, { "epoch": 7.98, - "learning_rate": 4.059350241432657e-06, - "loss": 0.0893, + "learning_rate": 1.408422136582778e-05, + "loss": 0.0375, "step": 171015 }, { "epoch": 7.98, - "learning_rate": 4.0588814401575174e-06, - "loss": 0.1851, + "learning_rate": 1.4083753295991762e-05, + "loss": 0.1271, "step": 171020 }, { "epoch": 7.98, - "learning_rate": 4.058412638882378e-06, + "learning_rate": 1.4083285226155742e-05, "loss": 0.0036, "step": 171025 }, { "epoch": 7.98, - "learning_rate": 4.057943837607239e-06, - "loss": 0.0064, + "learning_rate": 1.4082817156319724e-05, + "loss": 0.0277, "step": 171030 }, { "epoch": 7.98, - "learning_rate": 4.057475036332099e-06, - "loss": 0.0143, + "learning_rate": 1.4082349086483703e-05, + "loss": 0.0179, "step": 171035 }, { "epoch": 7.98, - "learning_rate": 4.05700623505696e-06, - "loss": 0.0459, + "learning_rate": 1.4081881016647683e-05, + "loss": 0.0261, "step": 171040 }, { "epoch": 7.98, - "learning_rate": 4.0565374337818205e-06, - "loss": 0.0501, + "learning_rate": 1.4081412946811665e-05, + "loss": 0.0412, "step": 171045 }, { "epoch": 7.98, - "learning_rate": 4.0560686325066804e-06, - "loss": 0.0417, + "learning_rate": 1.4080944876975645e-05, + "loss": 0.0161, "step": 171050 }, { "epoch": 7.98, - "learning_rate": 4.055599831231541e-06, - "loss": 0.0798, + "learning_rate": 1.4080476807139626e-05, + "loss": 0.0629, "step": 171055 }, { "epoch": 7.98, - "learning_rate": 4.055131029956402e-06, - "loss": 0.0507, + "learning_rate": 1.4080008737303606e-05, + "loss": 0.1715, "step": 171060 }, { "epoch": 7.98, - "learning_rate": 4.054662228681263e-06, - "loss": 0.1143, + "learning_rate": 1.4079540667467587e-05, + "loss": 0.1458, "step": 171065 }, { "epoch": 7.98, - "learning_rate": 4.054193427406123e-06, - "loss": 0.1655, + "learning_rate": 1.4079072597631567e-05, + "loss": 0.1487, "step": 171070 }, { "epoch": 7.98, - "learning_rate": 4.0537246261309835e-06, - "loss": 0.0139, + "learning_rate": 1.4078604527795547e-05, + "loss": 0.0343, "step": 171075 }, { "epoch": 7.98, - "learning_rate": 4.053255824855844e-06, - "loss": 0.0163, + "learning_rate": 1.4078136457959527e-05, + "loss": 0.0051, "step": 171080 }, { "epoch": 7.98, - "learning_rate": 4.052787023580704e-06, - "loss": 0.0022, + "learning_rate": 1.4077668388123508e-05, + "loss": 0.0486, "step": 171085 }, { "epoch": 7.98, - "learning_rate": 4.052318222305565e-06, - "loss": 0.0339, + "learning_rate": 1.4077200318287488e-05, + "loss": 0.0869, "step": 171090 }, { "epoch": 7.98, - "learning_rate": 4.051849421030426e-06, - "loss": 0.0273, + "learning_rate": 1.407673224845147e-05, + "loss": 0.0275, "step": 171095 }, { "epoch": 7.98, - "learning_rate": 4.0513806197552865e-06, - "loss": 0.0724, + "learning_rate": 1.4076264178615451e-05, + "loss": 0.0354, "step": 171100 }, { "epoch": 7.98, - "learning_rate": 4.0509118184801465e-06, - "loss": 0.0323, + "learning_rate": 1.407579610877943e-05, + "loss": 0.0195, "step": 171105 }, { "epoch": 7.98, - "learning_rate": 4.050443017205007e-06, - "loss": 0.0835, + "learning_rate": 1.4075328038943411e-05, + "loss": 0.0602, "step": 171110 }, { "epoch": 7.98, - "learning_rate": 4.049974215929868e-06, - "loss": 0.1589, + "learning_rate": 1.407485996910739e-05, + "loss": 0.1439, "step": 171115 }, { "epoch": 7.98, - "learning_rate": 4.049505414654728e-06, - "loss": 0.1683, + "learning_rate": 1.4074391899271372e-05, + "loss": 0.1042, "step": 171120 }, { "epoch": 7.98, - "learning_rate": 4.049036613379589e-06, - "loss": 0.0112, + "learning_rate": 1.4073923829435352e-05, + "loss": 0.0356, "step": 171125 }, { "epoch": 7.99, - "learning_rate": 4.0485678121044495e-06, - "loss": 0.0309, + "learning_rate": 1.4073455759599334e-05, + "loss": 0.0268, "step": 171130 }, { "epoch": 7.99, - "learning_rate": 4.04809901082931e-06, - "loss": 0.0372, + "learning_rate": 1.4072987689763312e-05, + "loss": 0.0101, "step": 171135 }, { "epoch": 7.99, - "learning_rate": 4.04763020955417e-06, - "loss": 0.0042, + "learning_rate": 1.4072519619927293e-05, + "loss": 0.0723, "step": 171140 }, { "epoch": 7.99, - "learning_rate": 4.04716140827903e-06, - "loss": 0.0895, + "learning_rate": 1.4072051550091273e-05, + "loss": 0.0154, "step": 171145 }, { "epoch": 7.99, - "learning_rate": 4.046692607003892e-06, - "loss": 0.0378, + "learning_rate": 1.4071583480255255e-05, + "loss": 0.0452, "step": 171150 }, { "epoch": 7.99, - "learning_rate": 4.046223805728752e-06, - "loss": 0.018, + "learning_rate": 1.4071115410419234e-05, + "loss": 0.0295, "step": 171155 }, { "epoch": 7.99, - "learning_rate": 4.0457550044536125e-06, - "loss": 0.109, + "learning_rate": 1.4070647340583216e-05, + "loss": 0.0645, "step": 171160 }, { "epoch": 7.99, - "learning_rate": 4.0452862031784725e-06, - "loss": 0.137, + "learning_rate": 1.4070179270747197e-05, + "loss": 0.1374, "step": 171165 }, { "epoch": 7.99, - "learning_rate": 4.044817401903334e-06, - "loss": 0.1345, + "learning_rate": 1.4069711200911175e-05, + "loss": 0.1754, "step": 171170 }, { "epoch": 7.99, - "learning_rate": 4.044348600628194e-06, - "loss": 0.0126, + "learning_rate": 1.4069243131075157e-05, + "loss": 0.0388, "step": 171175 }, { "epoch": 7.99, - "learning_rate": 4.043879799353054e-06, - "loss": 0.0281, + "learning_rate": 1.4068775061239137e-05, + "loss": 0.0068, "step": 171180 }, { "epoch": 7.99, - "learning_rate": 4.043410998077915e-06, - "loss": 0.0486, + "learning_rate": 1.4068306991403118e-05, + "loss": 0.0037, "step": 171185 }, { "epoch": 7.99, - "learning_rate": 4.0429421968027755e-06, - "loss": 0.0167, + "learning_rate": 1.4067838921567098e-05, + "loss": 0.0397, "step": 171190 }, { "epoch": 7.99, - "learning_rate": 4.042473395527636e-06, - "loss": 0.0609, + "learning_rate": 1.406737085173108e-05, + "loss": 0.034, "step": 171195 }, { "epoch": 7.99, - "learning_rate": 4.042004594252496e-06, - "loss": 0.0897, + "learning_rate": 1.4066902781895058e-05, + "loss": 0.0196, "step": 171200 }, { "epoch": 7.99, - "learning_rate": 4.041535792977357e-06, - "loss": 0.0454, + "learning_rate": 1.406643471205904e-05, + "loss": 0.1096, "step": 171205 }, { "epoch": 7.99, - "learning_rate": 4.041066991702218e-06, - "loss": 0.0632, + "learning_rate": 1.4065966642223019e-05, + "loss": 0.0154, "step": 171210 }, { "epoch": 7.99, - "learning_rate": 4.040598190427078e-06, - "loss": 0.2014, + "learning_rate": 1.4065498572387e-05, + "loss": 0.0772, "step": 171215 }, { "epoch": 7.99, - "learning_rate": 4.0401293891519385e-06, - "loss": 0.4487, + "learning_rate": 1.4065030502550982e-05, + "loss": 0.2117, "step": 171220 }, { "epoch": 7.99, - "learning_rate": 4.039660587876799e-06, - "loss": 0.0322, + "learning_rate": 1.4064562432714962e-05, + "loss": 0.0114, "step": 171225 }, { "epoch": 7.99, - "learning_rate": 4.03919178660166e-06, - "loss": 0.0313, + "learning_rate": 1.4064094362878942e-05, + "loss": 0.0026, "step": 171230 }, { "epoch": 7.99, - "learning_rate": 4.03872298532652e-06, - "loss": 0.0383, + "learning_rate": 1.4063626293042922e-05, + "loss": 0.0265, "step": 171235 }, { "epoch": 7.99, - "learning_rate": 4.038254184051381e-06, - "loss": 0.0345, + "learning_rate": 1.4063158223206903e-05, + "loss": 0.0392, "step": 171240 }, { "epoch": 7.99, - "learning_rate": 4.0377853827762416e-06, - "loss": 0.2517, + "learning_rate": 1.4062690153370883e-05, + "loss": 0.0324, "step": 171245 }, { "epoch": 7.99, - "learning_rate": 4.037316581501102e-06, - "loss": 0.047, + "learning_rate": 1.4062222083534864e-05, + "loss": 0.0253, "step": 171250 }, { "epoch": 7.99, - "learning_rate": 4.036847780225962e-06, - "loss": 0.1324, + "learning_rate": 1.4061754013698844e-05, + "loss": 0.1109, "step": 171255 }, { "epoch": 7.99, - "learning_rate": 4.036378978950823e-06, - "loss": 0.1, + "learning_rate": 1.4061285943862826e-05, + "loss": 0.0215, "step": 171260 }, { "epoch": 7.99, - "learning_rate": 4.035910177675684e-06, - "loss": 0.2955, + "learning_rate": 1.4060817874026804e-05, + "loss": 0.3367, "step": 171265 }, { "epoch": 7.99, - "learning_rate": 4.035441376400544e-06, - "loss": 0.1861, + "learning_rate": 1.4060349804190785e-05, + "loss": 0.1925, "step": 171270 }, { "epoch": 7.99, - "learning_rate": 4.0349725751254046e-06, - "loss": 0.0234, + "learning_rate": 1.4059881734354765e-05, + "loss": 0.0209, "step": 171275 }, { "epoch": 7.99, - "learning_rate": 4.034503773850265e-06, - "loss": 0.0016, + "learning_rate": 1.4059413664518747e-05, + "loss": 0.0185, "step": 171280 }, { "epoch": 7.99, - "learning_rate": 4.034034972575126e-06, - "loss": 0.0262, + "learning_rate": 1.4058945594682728e-05, + "loss": 0.0169, "step": 171285 }, { "epoch": 7.99, - "learning_rate": 4.033566171299986e-06, - "loss": 0.0201, + "learning_rate": 1.4058477524846708e-05, + "loss": 0.0341, "step": 171290 }, { "epoch": 7.99, - "learning_rate": 4.033097370024847e-06, - "loss": 0.0532, + "learning_rate": 1.4058009455010688e-05, + "loss": 0.0066, "step": 171295 }, { "epoch": 7.99, - "learning_rate": 4.032628568749708e-06, - "loss": 0.0282, + "learning_rate": 1.4057541385174668e-05, + "loss": 0.019, "step": 171300 }, { "epoch": 7.99, - "learning_rate": 4.0321597674745675e-06, - "loss": 0.0561, + "learning_rate": 1.405707331533865e-05, + "loss": 0.0898, "step": 171305 }, { "epoch": 7.99, - "learning_rate": 4.031690966199428e-06, - "loss": 0.1074, + "learning_rate": 1.4056605245502629e-05, + "loss": 0.088, "step": 171310 }, { "epoch": 7.99, - "learning_rate": 4.031222164924289e-06, - "loss": 0.1725, + "learning_rate": 1.405613717566661e-05, + "loss": 0.0426, "step": 171315 }, { "epoch": 7.99, - "learning_rate": 4.03075336364915e-06, - "loss": 0.2808, + "learning_rate": 1.405566910583059e-05, + "loss": 0.1252, "step": 171320 }, { "epoch": 7.99, - "learning_rate": 4.03028456237401e-06, - "loss": 0.0478, + "learning_rate": 1.405520103599457e-05, + "loss": 0.0091, "step": 171325 }, { "epoch": 7.99, - "learning_rate": 4.029815761098871e-06, - "loss": 0.0055, + "learning_rate": 1.405473296615855e-05, + "loss": 0.01, "step": 171330 }, { "epoch": 7.99, - "learning_rate": 4.029346959823731e-06, - "loss": 0.0189, + "learning_rate": 1.4054264896322531e-05, + "loss": 0.0142, "step": 171335 }, { "epoch": 7.99, - "learning_rate": 4.028878158548591e-06, - "loss": 0.0248, + "learning_rate": 1.4053796826486511e-05, + "loss": 0.0307, "step": 171340 }, { "epoch": 8.0, - "learning_rate": 4.028409357273452e-06, - "loss": 0.0465, + "learning_rate": 1.4053328756650493e-05, + "loss": 0.0646, "step": 171345 }, { "epoch": 8.0, - "learning_rate": 4.027940555998313e-06, - "loss": 0.0944, + "learning_rate": 1.4052860686814474e-05, + "loss": 0.0269, "step": 171350 }, { "epoch": 8.0, - "learning_rate": 4.027471754723174e-06, - "loss": 0.0695, + "learning_rate": 1.4052392616978454e-05, + "loss": 0.0455, "step": 171355 }, { "epoch": 8.0, - "learning_rate": 4.027002953448034e-06, - "loss": 0.1055, + "learning_rate": 1.4051924547142434e-05, + "loss": 0.0582, "step": 171360 }, { "epoch": 8.0, - "learning_rate": 4.026534152172894e-06, - "loss": 0.1024, + "learning_rate": 1.4051456477306414e-05, + "loss": 0.159, "step": 171365 }, { "epoch": 8.0, - "learning_rate": 4.026065350897755e-06, - "loss": 0.2597, + "learning_rate": 1.4050988407470395e-05, + "loss": 0.1131, "step": 171370 }, { "epoch": 8.0, - "learning_rate": 4.025596549622615e-06, - "loss": 0.0397, + "learning_rate": 1.4050520337634375e-05, + "loss": 0.0527, "step": 171375 }, { "epoch": 8.0, - "learning_rate": 4.025127748347476e-06, - "loss": 0.0121, + "learning_rate": 1.4050052267798357e-05, + "loss": 0.0038, "step": 171380 }, { "epoch": 8.0, - "learning_rate": 4.024658947072337e-06, - "loss": 0.0234, + "learning_rate": 1.4049584197962336e-05, + "loss": 0.0187, "step": 171385 }, { "epoch": 8.0, - "learning_rate": 4.0241901457971974e-06, - "loss": 0.0723, + "learning_rate": 1.4049116128126316e-05, + "loss": 0.0144, "step": 171390 }, { "epoch": 8.0, - "learning_rate": 4.023721344522057e-06, - "loss": 0.0872, + "learning_rate": 1.4048648058290296e-05, + "loss": 0.0912, "step": 171395 }, { "epoch": 8.0, - "learning_rate": 4.023252543246918e-06, - "loss": 0.0199, + "learning_rate": 1.4048179988454278e-05, + "loss": 0.0457, "step": 171400 }, { "epoch": 8.0, - "learning_rate": 4.022783741971779e-06, - "loss": 0.0655, + "learning_rate": 1.4047711918618259e-05, + "loss": 0.0737, "step": 171405 }, { "epoch": 8.0, - "learning_rate": 4.022314940696639e-06, - "loss": 0.0825, + "learning_rate": 1.4047243848782239e-05, + "loss": 0.07, "step": 171410 }, { "epoch": 8.0, - "learning_rate": 4.0218461394215e-06, - "loss": 0.0664, + "learning_rate": 1.404677577894622e-05, + "loss": 0.0928, "step": 171415 }, { "epoch": 8.0, - "learning_rate": 4.02137733814636e-06, - "loss": 0.1971, + "learning_rate": 1.4046307709110199e-05, + "loss": 0.0986, "step": 171420 }, { "epoch": 8.0, - "learning_rate": 4.020908536871221e-06, - "loss": 0.014, + "learning_rate": 1.404583963927418e-05, + "loss": 0.0093, "step": 171425 }, { "epoch": 8.0, - "learning_rate": 4.020439735596081e-06, - "loss": 0.0192, + "learning_rate": 1.404537156943816e-05, + "loss": 0.022, "step": 171430 }, { "epoch": 8.0, - "learning_rate": 4.019970934320941e-06, - "loss": 0.0449, + "learning_rate": 1.4044903499602141e-05, + "loss": 0.0212, "step": 171435 }, { "epoch": 8.0, - "learning_rate": 4.019502133045803e-06, - "loss": 0.0371, + "learning_rate": 1.4044435429766121e-05, + "loss": 0.0397, "step": 171440 }, { "epoch": 8.0, - "learning_rate": 4.019033331770663e-06, - "loss": 0.0625, + "learning_rate": 1.4043967359930103e-05, + "loss": 0.1309, "step": 171445 }, { "epoch": 8.0, - "eval_cer": 0.009891667047584221, - "eval_loss": 0.039958395063877106, - "eval_runtime": 381.234, - "eval_samples_per_second": 49.969, - "eval_steps_per_second": 12.494, - "eval_wer": 0.07781175671083929, + "eval_cer": 0.007440448647948324, + "eval_loss": 0.017975328490138054, + "eval_runtime": 397.123, + "eval_samples_per_second": 47.97, + "eval_steps_per_second": 11.994, + "eval_wer": 0.059505587019745905, "step": 171448 } ], - "max_steps": 214310, - "num_train_epochs": 10, - "total_flos": 2.059505829305942e+19, + "max_steps": 321465, + "num_train_epochs": 15, + "total_flos": 2.062066464774775e+19, "trial_name": null, "trial_params": null }